diff options
author | Donald Sharp <sharpd@cumulusnetworks.com> | 2015-05-20 02:40:34 +0200 |
---|---|---|
committer | Donald Sharp <sharpd@cumulusnetworks.com> | 2015-05-20 02:40:34 +0200 |
commit | fb018d251e5fc2fee0814a63c4e0a44274c98bb9 (patch) | |
tree | 59351ede3cd497fbb05c2b6906de549f17686f55 | |
parent | bgpd: bgpd-update-delay.patch (diff) | |
download | frr-fb018d251e5fc2fee0814a63c4e0a44274c98bb9.tar.xz frr-fb018d251e5fc2fee0814a63c4e0a44274c98bb9.zip |
nexthop-tracking.patch
quagga: nexthop-tracking.patch
Add next hop tracking support to Quagga. Complete documentation in doc/next-hop-tracking.txt.
Signed-off-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com>
Signed-off-by: Daniel Walton <dwalton@cumulusnetworks.com>
Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
-rw-r--r-- | bgpd/Makefile.am | 5 | ||||
-rw-r--r-- | bgpd/bgp_debug.c | 57 | ||||
-rw-r--r-- | bgpd/bgp_debug.h | 3 | ||||
-rw-r--r-- | bgpd/bgp_nexthop.c | 221 | ||||
-rw-r--r-- | bgpd/bgp_nexthop.h | 20 | ||||
-rw-r--r-- | bgpd/bgp_nht.c | 473 | ||||
-rw-r--r-- | bgpd/bgp_nht.h | 62 | ||||
-rw-r--r-- | bgpd/bgp_route.c | 103 | ||||
-rw-r--r-- | bgpd/bgp_route.h | 14 | ||||
-rw-r--r-- | bgpd/bgp_zebra.c | 11 | ||||
-rw-r--r-- | doc/next-hop-tracking.txt | 326 | ||||
-rw-r--r-- | lib/Makefile.am | 4 | ||||
-rw-r--r-- | lib/log.c | 3 | ||||
-rw-r--r-- | lib/memtypes.c | 1 | ||||
-rw-r--r-- | lib/nexthop.c | 100 | ||||
-rw-r--r-- | lib/nexthop.h | 89 | ||||
-rw-r--r-- | lib/zclient.c | 6 | ||||
-rw-r--r-- | lib/zclient.h | 1 | ||||
-rw-r--r-- | lib/zebra.h | 6 | ||||
-rw-r--r-- | zebra/Makefile.am | 6 | ||||
-rw-r--r-- | zebra/debug.c | 30 | ||||
-rw-r--r-- | zebra/debug.h | 3 | ||||
-rw-r--r-- | zebra/rib.h | 62 | ||||
-rw-r--r-- | zebra/rt_netlink.c | 1 | ||||
-rw-r--r-- | zebra/zebra_fpm_netlink.c | 1 | ||||
-rw-r--r-- | zebra/zebra_rib.c | 50 | ||||
-rw-r--r-- | zebra/zebra_rnh.c | 603 | ||||
-rw-r--r-- | zebra/zebra_rnh.h | 48 | ||||
-rw-r--r-- | zebra/zebra_rnh_null.c | 10 | ||||
-rw-r--r-- | zebra/zebra_routemap.c | 1 | ||||
-rw-r--r-- | zebra/zebra_vty.c | 28 | ||||
-rw-r--r-- | zebra/zserv.c | 81 | ||||
-rw-r--r-- | zebra/zserv.h | 6 |
33 files changed, 2224 insertions, 211 deletions
diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am index 9928734ee..63836a1bd 100644 --- a/bgpd/Makefile.am +++ b/bgpd/Makefile.am @@ -15,14 +15,15 @@ libbgp_a_SOURCES = \ bgp_debug.c bgp_route.c bgp_zebra.c bgp_open.c bgp_routemap.c \ bgp_packet.c bgp_network.c bgp_filter.c bgp_regex.c bgp_clist.c \ bgp_dump.c bgp_snmp.c bgp_ecommunity.c bgp_mplsvpn.c bgp_nexthop.c \ - bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c + bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c \ + bgp_nht.c noinst_HEADERS = \ bgp_aspath.h bgp_attr.h bgp_community.h bgp_debug.h bgp_fsm.h \ bgp_network.h bgp_open.h bgp_packet.h bgp_regex.h bgp_route.h \ bgpd.h bgp_filter.h bgp_clist.h bgp_dump.h bgp_zebra.h \ bgp_ecommunity.h bgp_mplsvpn.h bgp_nexthop.h bgp_damp.h bgp_table.h \ - bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h + bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h bgp_nht.h bgpd_SOURCES = bgp_main.c bgpd_LDADD = libbgp.a ../lib/libzebra.la @LIBCAP@ @LIBM@ diff --git a/bgpd/bgp_debug.c b/bgpd/bgp_debug.c index 1d0976977..91e6ad177 100644 --- a/bgpd/bgp_debug.c +++ b/bgpd/bgp_debug.c @@ -45,6 +45,7 @@ unsigned long conf_bgp_debug_keepalive; unsigned long conf_bgp_debug_update; unsigned long conf_bgp_debug_normal; unsigned long conf_bgp_debug_zebra; +unsigned long conf_bgp_debug_nht; unsigned long term_bgp_debug_as4; unsigned long term_bgp_debug_fsm; @@ -55,6 +56,7 @@ unsigned long term_bgp_debug_keepalive; unsigned long term_bgp_debug_update; unsigned long term_bgp_debug_normal; unsigned long term_bgp_debug_zebra; +unsigned long term_bgp_debug_nht; /* messages for BGP-4 status */ const struct message bgp_status_msg[] = @@ -471,6 +473,48 @@ ALIAS (no_debug_bgp_events, BGP_STR "BGP events\n") +DEFUN (debug_bgp_nht, + debug_bgp_nht_cmd, + "debug bgp nht", + DEBUG_STR + BGP_STR + "BGP nexthop tracking events\n") +{ + if (vty->node == CONFIG_NODE) + DEBUG_ON (nht, NHT); + else + { + TERM_DEBUG_ON (nht, NHT); + vty_out (vty, "BGP nexthop tracking debugging is on%s", VTY_NEWLINE); + } + return CMD_SUCCESS; +} + +DEFUN (no_debug_bgp_nht, + no_debug_bgp_nht_cmd, + "no debug bgp nht", + NO_STR + DEBUG_STR + BGP_STR + "BGP nexthop tracking events\n") +{ + if (vty->node == CONFIG_NODE) + DEBUG_OFF (nht, NHT); + else + { + TERM_DEBUG_OFF (nht, NHT); + vty_out (vty, "BGP nexthop tracking debugging is off%s", VTY_NEWLINE); + } + return CMD_SUCCESS; +} + +ALIAS (no_debug_bgp_nht, + undebug_bgp_nht_cmd, + "undebug bgp nht", + UNDEBUG_STR + BGP_STR + "BGP next-hop tracking updates\n") + DEFUN (debug_bgp_filter, debug_bgp_filter_cmd, "debug bgp filters", @@ -787,6 +831,8 @@ DEFUN (show_debugging_bgp, vty_out (vty, " BGP as4 debugging is on%s", VTY_NEWLINE); if (BGP_DEBUG (as4, AS4_SEGMENT)) vty_out (vty, " BGP as4 aspath segment debugging is on%s", VTY_NEWLINE); + if (BGP_DEBUG (nht, NHT)) + vty_out (vty, " BGP next-hop tracking debugging is on%s", VTY_NEWLINE); vty_out (vty, "%s", VTY_NEWLINE); return CMD_SUCCESS; } @@ -860,6 +906,12 @@ bgp_config_write_debug (struct vty *vty) write++; } + if (CONF_BGP_DEBUG (nht, NHT)) + { + vty_out (vty, "debug bgp nht%s", VTY_NEWLINE); + write++; + } + return write; } @@ -886,6 +938,8 @@ bgp_debug_init (void) install_element (CONFIG_NODE, &debug_bgp_fsm_cmd); install_element (ENABLE_NODE, &debug_bgp_events_cmd); install_element (CONFIG_NODE, &debug_bgp_events_cmd); + install_element (ENABLE_NODE, &debug_bgp_nht_cmd); + install_element (CONFIG_NODE, &debug_bgp_nht_cmd); install_element (ENABLE_NODE, &debug_bgp_filter_cmd); install_element (CONFIG_NODE, &debug_bgp_filter_cmd); install_element (ENABLE_NODE, &debug_bgp_keepalive_cmd); @@ -912,6 +966,9 @@ bgp_debug_init (void) install_element (ENABLE_NODE, &no_debug_bgp_events_cmd); install_element (ENABLE_NODE, &undebug_bgp_events_cmd); install_element (CONFIG_NODE, &no_debug_bgp_events_cmd); + install_element (ENABLE_NODE, &no_debug_bgp_nht_cmd); + install_element (ENABLE_NODE, &undebug_bgp_nht_cmd); + install_element (CONFIG_NODE, &no_debug_bgp_nht_cmd); install_element (ENABLE_NODE, &no_debug_bgp_filter_cmd); install_element (ENABLE_NODE, &undebug_bgp_filter_cmd); install_element (CONFIG_NODE, &no_debug_bgp_filter_cmd); diff --git a/bgpd/bgp_debug.h b/bgpd/bgp_debug.h index ce8547b04..0090f814a 100644 --- a/bgpd/bgp_debug.h +++ b/bgpd/bgp_debug.h @@ -67,6 +67,7 @@ extern unsigned long conf_bgp_debug_keepalive; extern unsigned long conf_bgp_debug_update; extern unsigned long conf_bgp_debug_normal; extern unsigned long conf_bgp_debug_zebra; +extern unsigned long conf_bgp_debug_nht; extern unsigned long term_bgp_debug_as4; extern unsigned long term_bgp_debug_fsm; @@ -77,6 +78,7 @@ extern unsigned long term_bgp_debug_keepalive; extern unsigned long term_bgp_debug_update; extern unsigned long term_bgp_debug_normal; extern unsigned long term_bgp_debug_zebra; +extern unsigned long term_bgp_debug_nht; #define BGP_DEBUG_AS4 0x01 #define BGP_DEBUG_AS4_SEGMENT 0x02 @@ -90,6 +92,7 @@ extern unsigned long term_bgp_debug_zebra; #define BGP_DEBUG_UPDATE_OUT 0x02 #define BGP_DEBUG_NORMAL 0x01 #define BGP_DEBUG_ZEBRA 0x01 +#define BGP_DEBUG_NHT 0x01 #define BGP_DEBUG_PACKET_SEND 0x01 #define BGP_DEBUG_PACKET_SEND_DETAIL 0x02 diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index 5b1d13ac8..f048ae289 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -30,17 +30,21 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "memory.h" #include "hash.h" #include "jhash.h" +#include "nexthop.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" #include "bgpd/bgp_route.h" #include "bgpd/bgp_attr.h" #include "bgpd/bgp_nexthop.h" +#include "bgpd/bgp_nht.h" #include "bgpd/bgp_debug.h" #include "bgpd/bgp_damp.h" #include "zebra/rib.h" #include "zebra/zserv.h" /* For ZEBRA_SERV_PATH. */ +extern struct zclient *zclient; + struct bgp_nexthop_cache *zlookup_query (struct in_addr); #ifdef HAVE_IPV6 struct bgp_nexthop_cache *zlookup_query_ipv6 (struct in6_addr *); @@ -59,7 +63,7 @@ static int bgp_scan_interval; static int bgp_import_interval; /* Route table for next-hop lookup cache. */ -static struct bgp_table *bgp_nexthop_cache_table[AFI_MAX]; +struct bgp_table *bgp_nexthop_cache_table[AFI_MAX]; static struct bgp_table *cache1_table[AFI_MAX]; static struct bgp_table *cache2_table[AFI_MAX]; @@ -69,6 +73,13 @@ static struct bgp_table *bgp_connected_table[AFI_MAX]; /* BGP nexthop lookup query client. */ struct zclient *zlookup = NULL; +char * +bnc_str (struct bgp_nexthop_cache *bnc, char *buf, int size) +{ + prefix2str(&(bnc->node->p), buf, size); + return buf; +} + /* Add nexthop to the end of the list. */ static void bnc_nexthop_add (struct bgp_nexthop_cache *bnc, struct nexthop *nexthop) @@ -84,7 +95,7 @@ bnc_nexthop_add (struct bgp_nexthop_cache *bnc, struct nexthop *nexthop) nexthop->prev = last; } -static void +void bnc_nexthop_free (struct bgp_nexthop_cache *bnc) { struct nexthop *nexthop; @@ -97,13 +108,17 @@ bnc_nexthop_free (struct bgp_nexthop_cache *bnc) } } -static struct bgp_nexthop_cache * -bnc_new (void) +struct bgp_nexthop_cache * +bnc_new () { - return XCALLOC (MTYPE_BGP_NEXTHOP_CACHE, sizeof (struct bgp_nexthop_cache)); + struct bgp_nexthop_cache *bnc; + + bnc = XCALLOC (MTYPE_BGP_NEXTHOP_CACHE, sizeof (struct bgp_nexthop_cache)); + LIST_INIT(&(bnc->paths)); + return bnc; } -static void +void bnc_free (struct bgp_nexthop_cache *bnc) { bnc_nexthop_free (bnc); @@ -111,48 +126,6 @@ bnc_free (struct bgp_nexthop_cache *bnc) } static int -bgp_nexthop_same (struct nexthop *next1, struct nexthop *next2) -{ - if (next1->type != next2->type) - return 0; - - switch (next1->type) - { - case ZEBRA_NEXTHOP_IPV4: - if (! IPV4_ADDR_SAME (&next1->gate.ipv4, &next2->gate.ipv4)) - return 0; - break; - case ZEBRA_NEXTHOP_IPV4_IFINDEX: - if (! IPV4_ADDR_SAME (&next1->gate.ipv4, &next2->gate.ipv4) - || next1->ifindex != next2->ifindex) - return 0; - break; - case ZEBRA_NEXTHOP_IFINDEX: - case ZEBRA_NEXTHOP_IFNAME: - if (next1->ifindex != next2->ifindex) - return 0; - break; -#ifdef HAVE_IPV6 - case ZEBRA_NEXTHOP_IPV6: - if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6)) - return 0; - break; - case ZEBRA_NEXTHOP_IPV6_IFINDEX: - case ZEBRA_NEXTHOP_IPV6_IFNAME: - if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6)) - return 0; - if (next1->ifindex != next2->ifindex) - return 0; - break; -#endif /* HAVE_IPV6 */ - default: - /* do nothing */ - break; - } - return 1; -} - -static int bgp_nexthop_cache_different (struct bgp_nexthop_cache *bnc1, struct bgp_nexthop_cache *bnc2) { @@ -167,7 +140,7 @@ bgp_nexthop_cache_different (struct bgp_nexthop_cache *bnc1, for (i = 0; i < bnc1->nexthop_num; i++) { - if (! bgp_nexthop_same (next1, next2)) + if (! nexthop_same_no_recurse (next1, next2)) return 1; next1 = next1->next; @@ -416,6 +389,7 @@ bgp_scan (afi_t afi, safi_t safi) struct bgp_info *next; struct peer *peer; struct listnode *node, *nnode; +#if BGP_SCAN_NEXTHOP int valid; int current; int changed; @@ -426,6 +400,7 @@ bgp_scan (afi_t afi, safi_t safi) bgp_nexthop_cache_table[afi] = cache2_table[afi]; else bgp_nexthop_cache_table[afi] = cache1_table[afi]; +#endif /* Get default bgp. */ bgp = bgp_get_default (); @@ -455,6 +430,7 @@ bgp_scan (afi_t afi, safi_t safi) if (bi->type == ZEBRA_ROUTE_BGP && bi->sub_type == BGP_ROUTE_NORMAL) { +#if BGP_SCAN_NEXTHOP changed = 0; metricchanged = 0; @@ -487,6 +463,7 @@ bgp_scan (afi_t afi, safi_t safi) afi, SAFI_UNICAST); } } +#endif if (CHECK_FLAG (bgp->af_flags[afi][SAFI_UNICAST], BGP_CONFIG_DAMPENING) @@ -499,11 +476,13 @@ bgp_scan (afi_t afi, safi_t safi) bgp_process (bgp, rn, afi, SAFI_UNICAST); } +#if BGP_SCAN_NEXTHOP /* Flash old cache. */ if (bgp_nexthop_cache_table[afi] == cache1_table[afi]) bgp_nexthop_cache_reset (cache2_table[afi]); else bgp_nexthop_cache_reset (cache1_table[afi]); +#endif if (BGP_DEBUG (events, EVENTS)) { @@ -1294,9 +1273,7 @@ static int show_ip_bgp_scan_tables (struct vty *vty, const char detail) { struct bgp_node *rn; - struct bgp_nexthop_cache *bnc; char buf[INET6_ADDRSTRLEN]; - u_char i; if (bgp_scan_thread) vty_out (vty, "BGP scan is running%s", VTY_NEWLINE); @@ -1304,6 +1281,7 @@ show_ip_bgp_scan_tables (struct vty *vty, const char detail) vty_out (vty, "BGP scan is not running%s", VTY_NEWLINE); vty_out (vty, "BGP scan interval is %d%s", bgp_scan_interval, VTY_NEWLINE); +#if BGP_SCAN_NEXTHOP vty_out (vty, "Current BGP nexthop cache:%s", VTY_NEWLINE); for (rn = bgp_table_top (bgp_nexthop_cache_table[AFI_IP]); rn; rn = bgp_route_next (rn)) if ((bnc = rn->info) != NULL) @@ -1368,7 +1346,9 @@ show_ip_bgp_scan_tables (struct vty *vty, const char detail) } } #endif /* HAVE_IPV6 */ - +#else + vty_out (vty, "BGP next-hop tracking is on%s", VTY_NEWLINE); +#endif vty_out (vty, "BGP connected route:%s", VTY_NEWLINE); for (rn = bgp_table_top (bgp_connected_table[AFI_IP]); rn; @@ -1393,6 +1373,117 @@ show_ip_bgp_scan_tables (struct vty *vty, const char detail) return CMD_SUCCESS; } +static int +show_ip_bgp_nexthop_table (struct vty *vty, int detail) +{ + struct bgp_node *rn; + struct bgp_nexthop_cache *bnc; + char buf[INET6_ADDRSTRLEN]; + time_t tbuf; + u_char i; + + vty_out (vty, "Current BGP nexthop cache:%s", VTY_NEWLINE); + for (rn = bgp_table_top (bgp_nexthop_cache_table[AFI_IP]); rn; rn = bgp_route_next (rn)) + if ((bnc = rn->info) != NULL) + { + if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)) + { + vty_out (vty, " %s valid [IGP metric %d], #paths %d%s", + inet_ntop (AF_INET, &rn->p.u.prefix4, buf, INET6_ADDRSTRLEN), + bnc->metric, bnc->path_count, VTY_NEWLINE); + if (detail) + for (i = 0; i < bnc->nexthop_num; i++) + switch (bnc->nexthop[i].type) + { + case NEXTHOP_TYPE_IPV4: + vty_out (vty, " gate %s%s", + inet_ntop (AF_INET, &bnc->nexthop[i].gate.ipv4, buf, + INET6_ADDRSTRLEN), VTY_NEWLINE); + break; + case NEXTHOP_TYPE_IFINDEX: + vty_out (vty, " if %s%s", + ifindex2ifname(bnc->nexthop[i].ifindex), VTY_NEWLINE); + break; + case NEXTHOP_TYPE_IPV4_IFINDEX: + vty_out (vty, " gate %s, if %s%s", + inet_ntop(AF_INET, &bnc->nexthop[i].gate.ipv4, buf, + INET6_ADDRSTRLEN), + ifindex2ifname(bnc->nexthop[i].ifindex), VTY_NEWLINE); + break; + default: + vty_out (vty, " invalid nexthop type %u%s", + bnc->nexthop[i].type, VTY_NEWLINE); + } + } + else + vty_out (vty, " %s invalid%s", + inet_ntop (AF_INET, &rn->p.u.prefix4, buf, INET6_ADDRSTRLEN), VTY_NEWLINE); +#ifdef HAVE_CLOCK_MONOTONIC + tbuf = time(NULL) - (bgp_clock() - bnc->last_update); + vty_out (vty, " Last update: %s", ctime(&tbuf)); +#else + vty_out (vty, " Last update: %s", ctime(&bnc->uptime)); +#endif /* HAVE_CLOCK_MONOTONIC */ + + vty_out(vty, "%s", VTY_NEWLINE); + } + +#ifdef HAVE_IPV6 + { + for (rn = bgp_table_top (bgp_nexthop_cache_table[AFI_IP6]); + rn; + rn = bgp_route_next (rn)) + if ((bnc = rn->info) != NULL) + { + if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)) + { + vty_out (vty, " %s valid [IGP metric %d]%s", + inet_ntop (AF_INET6, &rn->p.u.prefix6, buf, + INET6_ADDRSTRLEN), + bnc->metric, VTY_NEWLINE); + if (detail) + for (i = 0; i < bnc->nexthop_num; i++) + switch (bnc->nexthop[i].type) + { + case NEXTHOP_TYPE_IPV6: + vty_out (vty, " gate %s%s", + inet_ntop (AF_INET6, &bnc->nexthop[i].gate.ipv6, + buf, INET6_ADDRSTRLEN), VTY_NEWLINE); + break; + case NEXTHOP_TYPE_IPV6_IFINDEX: + vty_out(vty, " gate %s, if %s%s", + inet_ntop(AF_INET6, &bnc->nexthop[i].gate.ipv6, buf, + INET6_ADDRSTRLEN), + ifindex2ifname(bnc->nexthop[i].ifindex), + VTY_NEWLINE); + break; + case NEXTHOP_TYPE_IFINDEX: + vty_out (vty, " ifidx %u%s", bnc->nexthop[i].ifindex, + VTY_NEWLINE); + break; + default: + vty_out (vty, " invalid nexthop type %u%s", + bnc->nexthop[i].type, VTY_NEWLINE); + } + } + else + vty_out (vty, " %s invalid%s", + inet_ntop (AF_INET6, &rn->p.u.prefix6, buf, INET6_ADDRSTRLEN), + VTY_NEWLINE); +#ifdef HAVE_CLOCK_MONOTONIC + tbuf = time(NULL) - (bgp_clock() - bnc->last_update); + vty_out (vty, " Last update: %s", ctime(&tbuf)); +#else + vty_out (vty, " Last update: %s", ctime(&bnc->uptime)); +#endif /* HAVE_CLOCK_MONOTONIC */ + + vty_out(vty, "%s", VTY_NEWLINE); + } + } +#endif /* HAVE_IPV6 */ + return CMD_SUCCESS; +} + DEFUN (show_ip_bgp_scan, show_ip_bgp_scan_cmd, "show ip bgp scan", @@ -1416,6 +1507,28 @@ DEFUN (show_ip_bgp_scan_detail, return show_ip_bgp_scan_tables (vty, 1); } +DEFUN (show_ip_bgp_nexthop, + show_ip_bgp_nexthop_cmd, + "show ip bgp nexthop", + SHOW_STR + IP_STR + BGP_STR + "BGP nexthop table\n") +{ + return show_ip_bgp_nexthop_table (vty, 0); +} + +DEFUN (show_ip_bgp_nexthop_detail, + show_ip_bgp_nexthop_detail_cmd, + "show ip bgp nexthop detail", + SHOW_STR + IP_STR + BGP_STR + "BGP nexthop table\n") +{ + return show_ip_bgp_nexthop_table (vty, 1); +} + int bgp_config_write_scan_time (struct vty *vty) { @@ -1458,8 +1571,12 @@ bgp_scan_init (void) install_element (BGP_NODE, &no_bgp_scan_time_val_cmd); install_element (VIEW_NODE, &show_ip_bgp_scan_cmd); install_element (VIEW_NODE, &show_ip_bgp_scan_detail_cmd); + install_element (VIEW_NODE, &show_ip_bgp_nexthop_cmd); + install_element (VIEW_NODE, &show_ip_bgp_nexthop_detail_cmd); install_element (RESTRICTED_NODE, &show_ip_bgp_scan_cmd); install_element (ENABLE_NODE, &show_ip_bgp_scan_cmd); + install_element (ENABLE_NODE, &show_ip_bgp_nexthop_cmd); + install_element (ENABLE_NODE, &show_ip_bgp_nexthop_detail_cmd); install_element (ENABLE_NODE, &show_ip_bgp_scan_detail_cmd); } diff --git a/bgpd/bgp_nexthop.h b/bgpd/bgp_nexthop.h index 6e5350ead..abcf24263 100644 --- a/bgpd/bgp_nexthop.h +++ b/bgpd/bgp_nexthop.h @@ -22,6 +22,8 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define _QUAGGA_BGP_NEXTHOP_H #include "if.h" +#include "queue.h" +#include "prefix.h" #define BGP_SCAN_INTERVAL_DEFAULT 60 #define BGP_IMPORT_INTERVAL_DEFAULT 15 @@ -44,6 +46,20 @@ struct bgp_nexthop_cache /* Nexthop number and nexthop linked list.*/ u_char nexthop_num; struct nexthop *nexthop; + time_t last_update; + u_int16_t flags; + +#define BGP_NEXTHOP_VALID (1 << 0) +#define BGP_NEXTHOP_REGISTERED (1 << 1) + + u_int16_t change_flags; + +#define BGP_NEXTHOP_CHANGED (1 << 0) +#define BGP_NEXTHOP_METRIC_CHANGED (1 << 1) + + struct bgp_node *node; + LIST_HEAD(path_list, bgp_info) paths; + unsigned int path_count; }; extern void bgp_scan_init (void); @@ -57,5 +73,9 @@ extern int bgp_config_write_scan_time (struct vty *); extern int bgp_nexthop_onlink (afi_t, struct attr *); extern int bgp_nexthop_self (struct attr *); extern void bgp_address_init (void); +extern struct bgp_nexthop_cache *bnc_new(); +extern void bnc_free(struct bgp_nexthop_cache *bnc); +extern void bnc_nexthop_free(struct bgp_nexthop_cache *bnc); +extern char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size); #endif /* _QUAGGA_BGP_NEXTHOP_H */ diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c new file mode 100644 index 000000000..8f5f1af96 --- /dev/null +++ b/bgpd/bgp_nht.c @@ -0,0 +1,473 @@ +/* BGP Nexthop tracking + * Copyright (C) 2013 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#include "command.h" +#include "thread.h" +#include "prefix.h" +#include "zclient.h" +#include "stream.h" +#include "network.h" +#include "log.h" +#include "memory.h" +#include "nexthop.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_table.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_nexthop.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_nht.h" + +extern struct zclient *zclient; +extern struct bgp_table *bgp_nexthop_cache_table[AFI_MAX]; + +static void register_nexthop(struct bgp_nexthop_cache *bnc); +static void unregister_nexthop (struct bgp_nexthop_cache *bnc); +static void evaluate_paths(struct bgp_nexthop_cache *bnc); +static int make_prefix(int afi, struct bgp_info *ri, struct prefix *p); +static void path_nh_map(struct bgp_info *path, struct bgp_nexthop_cache *bnc, + int keep); + +int +bgp_find_nexthop (struct bgp_info *path, int *changed, int *metricchanged) +{ + struct bgp_nexthop_cache *bnc = path->nexthop; + + if (!bnc) + return 0; + + if (changed) + *changed = CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED); + + if (metricchanged) + *metricchanged = CHECK_FLAG(bnc->change_flags, + BGP_NEXTHOP_METRIC_CHANGED); + + return (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)); +} + +void +bgp_unlink_nexthop (struct bgp_info *path) +{ + struct bgp_nexthop_cache *bnc = path->nexthop; + + if (!bnc) + return; + + path_nh_map(path, NULL, 0); + + if (LIST_EMPTY(&(bnc->paths))) + { + if (BGP_DEBUG(nht, NHT)) + { + char buf[INET6_ADDRSTRLEN]; + zlog_debug("bgp_unlink_nexthop: freeing bnc %s", + bnc_str(bnc, buf, INET6_ADDRSTRLEN)); + } + unregister_nexthop(bnc); + bnc->node->info = NULL; + bgp_unlock_node(bnc->node); + bnc_free(bnc); + } +} + +int +bgp_find_or_add_nexthop (afi_t afi, struct bgp_info *ri, int *changed, + int *metricchanged) +{ + struct bgp_node *rn; + struct bgp_nexthop_cache *bnc; + struct prefix p; + + if (make_prefix(afi, ri, &p) < 0) + return 1; + rn = bgp_node_get (bgp_nexthop_cache_table[afi], &p); + + if (!rn->info) + { + bnc = bnc_new(); + rn->info = bnc; + bnc->node = rn; + bgp_lock_node(rn); + register_nexthop(bnc); + } + bnc = rn->info; + bgp_unlock_node (rn); + path_nh_map(ri, bnc, 1); + + if (changed) + *changed = CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED); + + if (metricchanged) + *metricchanged = CHECK_FLAG(bnc->change_flags, + BGP_NEXTHOP_METRIC_CHANGED); + + if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric) + (bgp_info_extra_get(ri))->igpmetric = bnc->metric; + else if (ri->extra) + ri->extra->igpmetric = 0; + + return (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)); +} + +void +bgp_parse_nexthop_update (void) +{ + struct stream *s; + struct bgp_node *rn; + struct bgp_nexthop_cache *bnc; + struct nexthop *nexthop; + struct nexthop *oldnh; + struct nexthop *nhlist_head = NULL; + struct nexthop *nhlist_tail = NULL; + uint32_t metric; + u_char nexthop_num; + struct prefix p; + int i; + + s = zclient->ibuf; + + memset(&p, 0, sizeof(struct prefix)); + p.family = stream_getw(s); + p.prefixlen = stream_getc(s); + switch (p.family) + { + case AF_INET: + p.u.prefix4.s_addr = stream_get_ipv4 (s); + break; + case AF_INET6: + stream_get(&p.u.prefix6, s, 16); + break; + default: + break; + } + + rn = bgp_node_lookup(bgp_nexthop_cache_table[family2afi(p.family)], &p); + if (!rn || !rn->info) + { + if (BGP_DEBUG(nht, NHT)) + { + char buf[INET6_ADDRSTRLEN]; + prefix2str(&p, buf, INET6_ADDRSTRLEN); + zlog_debug("parse nexthop update(%s): rn not found", buf); + } + return; + } + + bnc = rn->info; + bnc->last_update = bgp_clock(); + bnc->change_flags = 0; + metric = stream_getl (s); + nexthop_num = stream_getc (s); + + /* debug print the input */ + if (BGP_DEBUG(nht, NHT)) + { + char buf[INET6_ADDRSTRLEN]; + prefix2str(&p, buf, INET6_ADDRSTRLEN); + zlog_debug("parse nexthop update(%s): metric=%d, #nexthop=%d", buf, + metric, nexthop_num); + } + + if (metric != bnc->metric) + bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED; + + if(nexthop_num != bnc->nexthop_num) + bnc->change_flags |= BGP_NEXTHOP_CHANGED; + + if (nexthop_num) + { + bnc->flags |= BGP_NEXTHOP_VALID; + bnc->metric = metric; + bnc->nexthop_num = nexthop_num; + + for (i = 0; i < nexthop_num; i++) + { + nexthop = nexthop_new(); + nexthop->type = stream_getc (s); + switch (nexthop->type) + { + case ZEBRA_NEXTHOP_IPV4: + nexthop->gate.ipv4.s_addr = stream_get_ipv4 (s); + break; + case ZEBRA_NEXTHOP_IFINDEX: + case ZEBRA_NEXTHOP_IFNAME: + nexthop->ifindex = stream_getl (s); + break; + case ZEBRA_NEXTHOP_IPV4_IFINDEX: + case ZEBRA_NEXTHOP_IPV4_IFNAME: + nexthop->gate.ipv4.s_addr = stream_get_ipv4 (s); + nexthop->ifindex = stream_getl (s); + break; +#ifdef HAVE_IPV6 + case ZEBRA_NEXTHOP_IPV6: + stream_get (&nexthop->gate.ipv6, s, 16); + break; + case ZEBRA_NEXTHOP_IPV6_IFINDEX: + case ZEBRA_NEXTHOP_IPV6_IFNAME: + stream_get (&nexthop->gate.ipv6, s, 16); + nexthop->ifindex = stream_getl (s); + break; +#endif + default: + /* do nothing */ + break; + } + + if (nhlist_tail) + { + nhlist_tail->next = nexthop; + nhlist_tail = nexthop; + } + else + { + nhlist_tail = nexthop; + nhlist_head = nexthop; + } + + /* No need to evaluate the nexthop if we have already determined + * that there has been a change. + */ + if (bnc->change_flags & BGP_NEXTHOP_CHANGED) + continue; + + for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next) + if (nexthop_same_no_recurse(oldnh, nexthop)) + break; + + if (!oldnh) + bnc->change_flags |= BGP_NEXTHOP_CHANGED; + } + bnc_nexthop_free(bnc); + bnc->nexthop = nhlist_head; + } + else + { + bnc->flags &= ~BGP_NEXTHOP_VALID; + bnc_nexthop_free(bnc); + bnc->nexthop = NULL; + } + + evaluate_paths(bnc); +} + +/** + * make_prefix - make a prefix structure from the path (essentially + * path's node. + */ +static int +make_prefix (int afi, struct bgp_info *ri, struct prefix *p) +{ + memset (p, 0, sizeof (struct prefix)); + switch (afi) + { + case AFI_IP: + p->family = AF_INET; + p->prefixlen = IPV4_MAX_BITLEN; + p->u.prefix4 = ri->attr->nexthop; + break; +#ifdef HAVE_IPV6 + case AFI_IP6: + if (ri->attr->extra->mp_nexthop_len != 16 + || IN6_IS_ADDR_LINKLOCAL (&ri->attr->extra->mp_nexthop_global)) + return -1; + + p->family = AF_INET6; + p->prefixlen = IPV6_MAX_BITLEN; + p->u.prefix6 = ri->attr->extra->mp_nexthop_global; + break; +#endif + default: + break; + } + return 0; +} + +/** + * sendmsg_nexthop -- Format and send a nexthop register/Unregister + * command to Zebra. + * ARGUMENTS: + * struct bgp_nexthop_cache *bnc -- the nexthop structure. + * int command -- either ZEBRA_NEXTHOP_REGISTER or ZEBRA_NEXTHOP_UNREGISTER + * RETURNS: + * void. + */ +static void +sendmsg_nexthop (struct bgp_nexthop_cache *bnc, int command) +{ + struct stream *s; + struct prefix *p; + int ret; + + /* Check socket. */ + if (!zclient || zclient->sock < 0) + return; + + p = &(bnc->node->p); + s = zclient->obuf; + stream_reset (s); + zclient_create_header (s, command); + stream_putw(s, PREFIX_FAMILY(p)); + stream_putc(s, p->prefixlen); + switch (PREFIX_FAMILY(p)) + { + case AF_INET: + stream_put_in_addr (s, &p->u.prefix4); + break; +#ifdef HAVE_IPV6 + case AF_INET6: + stream_put(s, &(p->u.prefix6), 16); + break; +#endif + default: + break; + } + stream_putw_at (s, 0, stream_get_endp (s)); + + ret = zclient_send_message(zclient); + /* TBD: handle the failure */ + if (ret < 0) + zlog_warn("sendmsg_nexthop: zclient_send_message() failed"); + return; +} + +/** + * register_nexthop - register a nexthop with Zebra for notification + * when the route to the nexthop changes. + * ARGUMENTS: + * struct bgp_nexthop_cache *bnc -- the nexthop structure. + * RETURNS: + * void. + */ +static void +register_nexthop (struct bgp_nexthop_cache *bnc) +{ + /* Check if we have already registered */ + if (bnc->flags & BGP_NEXTHOP_REGISTERED) + return; + sendmsg_nexthop(bnc, ZEBRA_NEXTHOP_REGISTER); + SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED); +} + +/** + * unregister_nexthop -- Unregister the nexthop from Zebra. + * ARGUMENTS: + * struct bgp_nexthop_cache *bnc -- the nexthop structure. + * RETURNS: + * void. + */ +static void +unregister_nexthop (struct bgp_nexthop_cache *bnc) +{ + /* Check if we have already registered */ + if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)) + return; + + sendmsg_nexthop(bnc, ZEBRA_NEXTHOP_UNREGISTER); + UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED); +} + +/** + * evaluate_paths - Evaluate the paths/nets associated with a nexthop. + * ARGUMENTS: + * struct bgp_nexthop_cache *bnc -- the nexthop structure. + * RETURNS: + * void. + */ +static void +evaluate_paths (struct bgp_nexthop_cache *bnc) +{ + struct bgp_node *rn; + struct bgp_info *path; + struct bgp *bgp = bgp_get_default(); + int afi; + + LIST_FOREACH(path, &(bnc->paths), nh_thread) + { + if (!(path->type == ZEBRA_ROUTE_BGP && + path->sub_type == BGP_ROUTE_NORMAL)) + continue; + + rn = path->net; + afi = family2afi(rn->p.family); + + /* Path becomes valid/invalid depending on whether the nexthop + * reachable/unreachable. + */ + if ((CHECK_FLAG(path->flags, BGP_INFO_VALID) ? 1 : 0) != + (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) ? 1 : 0)) + { + if (CHECK_FLAG (path->flags, BGP_INFO_VALID)) + { + bgp_aggregate_decrement (bgp, &rn->p, path, + afi, SAFI_UNICAST); + bgp_info_unset_flag (rn, path, BGP_INFO_VALID); + } + else + { + bgp_info_set_flag (rn, path, BGP_INFO_VALID); + bgp_aggregate_increment (bgp, &rn->p, path, + afi, SAFI_UNICAST); + } + } + + /* Copy the metric to the path. Will be used for bestpath computation */ + if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric) + (bgp_info_extra_get(path))->igpmetric = bnc->metric; + else if (path->extra) + path->extra->igpmetric = 0; + + if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_METRIC_CHANGED) || + CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CHANGED)) + SET_FLAG(path->flags, BGP_INFO_IGP_CHANGED); + + bgp_process(bgp, rn, afi, SAFI_UNICAST); + } + RESET_FLAG(bnc->change_flags); +} + +/** + * path_nh_map - make or break path-to-nexthop association. + * ARGUMENTS: + * path - pointer to the path structure + * bnc - pointer to the nexthop structure + * make - if set, make the association. if unset, just break the existing + * association. + */ +static void +path_nh_map (struct bgp_info *path, struct bgp_nexthop_cache *bnc, int make) +{ + if (path->nexthop) + { + LIST_REMOVE(path, nh_thread); + path->nexthop->path_count--; + path->nexthop = NULL; + } + if (make) + { + LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread); + path->nexthop = bnc; + path->nexthop->path_count++; + } +} diff --git a/bgpd/bgp_nht.h b/bgpd/bgp_nht.h new file mode 100644 index 000000000..0e43f0a44 --- /dev/null +++ b/bgpd/bgp_nht.h @@ -0,0 +1,62 @@ +/* BGP Nexthop tracking + * Copyright (C) 2013 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _BGP_NHT_H +#define _BGP_NHT_H + +/** + * bgp_parse_nexthop_update() - parse a nexthop update message from Zebra. + */ +extern void bgp_parse_nexthop_update(); + +/** + * bgp_find_nexthop() - lookup the nexthop cache table for the bnc object + * ARGUMENTS: + * p - path for which the nexthop object is being looked up + * c - output variable that stores whether the nexthop object has changed + * since last time. + * m - output variable that stores whether the nexthop metric has changed + * since last time. + */ +extern int bgp_find_nexthop(struct bgp_info *p, int *c, int *m); + +/** + * bgp_find_or_add_nexthop() - lookup the nexthop cache table for the bnc + * object. If not found, create a new object and register with ZEBRA for + * nexthop notification. + * ARGUMENTS: + * a - afi: AFI_IP or AF_IP6 + * p - path for which the nexthop object is being looked up + * c - output variable that stores whether the nexthop object has changed + * since last time. + * m - output variable that stores whether the nexthop metric has changed + * since last time. + */ +extern int bgp_find_or_add_nexthop(afi_t a, struct bgp_info *p, int *c, int *m); + +/** + * bgp_unlink_nexthop() - Unlink the nexthop object from the path structure. + * ARGUMENTS: + * p - path structure. + */ +extern void bgp_unlink_nexthop(struct bgp_info *p); + +#endif /* _BGP_NHT_H */ diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index 15c3b6eab..d55a1081e 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -55,6 +55,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_zebra.h" #include "bgpd/bgp_vty.h" #include "bgpd/bgp_mpath.h" +#include "bgpd/bgp_nht.c" /* Extern from bgp_dump.c */ extern const char *bgp_origin_str[]; @@ -126,20 +127,14 @@ bgp_info_extra_get (struct bgp_info *ri) return ri->extra; } -/* Allocate new bgp info structure. */ -static struct bgp_info * -bgp_info_new (void) -{ - return XCALLOC (MTYPE_BGP_ROUTE, sizeof (struct bgp_info)); -} - /* Free bgp route information. */ static void bgp_info_free (struct bgp_info *binfo) { if (binfo->attr) bgp_attr_unintern (&binfo->attr); - + + bgp_unlink_nexthop(binfo); bgp_info_extra_free (&binfo->extra); bgp_info_mpath_free (&binfo->mpath); @@ -1857,6 +1852,23 @@ bgp_rib_withdraw (struct bgp_node *rn, struct bgp_info *ri, struct peer *peer, bgp_rib_remove (rn, ri, peer, afi, safi); } +static struct bgp_info * +info_make (int type, int sub_type, struct peer *peer, struct attr *attr, + struct bgp_node *rn) +{ + struct bgp_info *new; + + /* Make new BGP info. */ + new = XCALLOC (MTYPE_BGP_ROUTE, sizeof (struct bgp_info)); + new->type = type; + new->sub_type = sub_type; + new->peer = peer; + new->attr = attr; + new->uptime = bgp_clock (); + new->net = rn; + return new; +} + static void bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, struct attr *attr, struct peer *peer, struct prefix *p, int type, @@ -2004,13 +2016,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, p->prefixlen, rsclient->host); } - /* Make new BGP info. */ - new = bgp_info_new (); - new->type = type; - new->sub_type = sub_type; - new->peer = peer; - new->attr = attr_new; - new->uptime = bgp_clock (); + new = info_make(type, sub_type, peer, attr_new, rn); /* Update MPLS tag. */ if (safi == SAFI_MPLS_VPN) @@ -2316,7 +2322,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, struct attr *attr, || (peer->sort == BGP_PEER_EBGP && peer->ttl != 1) || CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK))) { - if (bgp_nexthop_lookup (afi, peer, ri, NULL, NULL)) + if (bgp_find_or_add_nexthop (afi, ri, NULL, NULL)) bgp_info_set_flag (rn, ri, BGP_INFO_VALID); else bgp_info_unset_flag (rn, ri, BGP_INFO_VALID); @@ -2343,12 +2349,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, struct attr *attr, } /* Make new BGP info. */ - new = bgp_info_new (); - new->type = type; - new->sub_type = sub_type; - new->peer = peer; - new->attr = attr_new; - new->uptime = bgp_clock (); + new = info_make(type, sub_type, peer, attr_new, rn); /* Update MPLS tag. */ if (safi == SAFI_MPLS_VPN) @@ -2362,7 +2363,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, struct attr *attr, || (peer->sort == BGP_PEER_EBGP && peer->ttl != 1) || CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK))) { - if (bgp_nexthop_lookup (afi, peer, new, NULL, NULL)) + if (bgp_find_or_add_nexthop (afi, new, NULL, NULL)) bgp_info_set_flag (rn, new, BGP_INFO_VALID); else bgp_info_unset_flag (rn, new, BGP_INFO_VALID); @@ -3450,15 +3451,11 @@ bgp_static_update_rsclient (struct peer *rsclient, struct prefix *p, return; } } - + /* Make new BGP info. */ - new = bgp_info_new (); - new->type = ZEBRA_ROUTE_BGP; - new->sub_type = BGP_ROUTE_STATIC; - new->peer = bgp->peer_self; + new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, bgp->peer_self, + attr_new, rn); SET_FLAG (new->flags, BGP_INFO_VALID); - new->attr = attr_new; - new->uptime = bgp_clock (); /* Register new BGP information. */ bgp_info_add (rn, new); @@ -3571,13 +3568,9 @@ bgp_static_update_main (struct bgp *bgp, struct prefix *p, } /* Make new BGP info. */ - new = bgp_info_new (); - new->type = ZEBRA_ROUTE_BGP; - new->sub_type = BGP_ROUTE_STATIC; - new->peer = bgp->peer_self; + new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, bgp->peer_self, attr_new, + rn); SET_FLAG (new->flags, BGP_INFO_VALID); - new->attr = attr_new; - new->uptime = bgp_clock (); /* Aggregate address increment. */ bgp_aggregate_increment (bgp, p, new, afi, safi); @@ -3622,13 +3615,10 @@ bgp_static_update_vpnv4 (struct bgp *bgp, struct prefix *p, afi_t afi, rn = bgp_afi_node_get (bgp->rib[afi][safi], afi, safi, p, prd); /* Make new BGP info. */ - new = bgp_info_new (); - new->type = ZEBRA_ROUTE_BGP; - new->sub_type = BGP_ROUTE_STATIC; - new->peer = bgp->peer_self; - new->attr = bgp_attr_default_intern (BGP_ORIGIN_IGP); + new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, bgp->peer_self, + bgp_attr_default_intern(BGP_ORIGIN_IGP), rn); + SET_FLAG (new->flags, BGP_INFO_VALID); - new->uptime = bgp_clock (); new->extra = bgp_info_extra_new(); memcpy (new->extra->tag, tag, 3); @@ -4700,13 +4690,10 @@ bgp_aggregate_route (struct bgp *bgp, struct prefix *p, struct bgp_info *rinew, if (aggregate->count > 0) { rn = bgp_node_get (table, p); - new = bgp_info_new (); - new->type = ZEBRA_ROUTE_BGP; - new->sub_type = BGP_ROUTE_AGGREGATE; - new->peer = bgp->peer_self; + new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_AGGREGATE, bgp->peer_self, + bgp_attr_aggregate_intern(bgp, origin, aspath, community, + aggregate->as_set), rn); SET_FLAG (new->flags, BGP_INFO_VALID); - new->attr = bgp_attr_aggregate_intern (bgp, origin, aspath, community, aggregate->as_set); - new->uptime = bgp_clock (); bgp_info_add (rn, new); bgp_unlock_node (rn); @@ -4884,14 +4871,10 @@ bgp_aggregate_add (struct bgp *bgp, struct prefix *p, afi_t afi, safi_t safi, if (aggregate->count) { rn = bgp_node_get (table, p); - - new = bgp_info_new (); - new->type = ZEBRA_ROUTE_BGP; - new->sub_type = BGP_ROUTE_AGGREGATE; - new->peer = bgp->peer_self; + new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_AGGREGATE, bgp->peer_self, + bgp_attr_aggregate_intern(bgp, origin, aspath, community, + aggregate->as_set), rn); SET_FLAG (new->flags, BGP_INFO_VALID); - new->attr = bgp_attr_aggregate_intern (bgp, origin, aspath, community, aggregate->as_set); - new->uptime = bgp_clock (); bgp_info_add (rn, new); bgp_unlock_node (rn); @@ -5531,16 +5514,12 @@ bgp_redistribute_add (struct prefix *p, const struct in_addr *nexthop, aspath_unintern (&attr.aspath); bgp_attr_extra_free (&attr); return; - } + } } - new = bgp_info_new (); - new->type = type; - new->sub_type = BGP_ROUTE_REDISTRIBUTE; - new->peer = bgp->peer_self; + new = info_make(type, BGP_ROUTE_REDISTRIBUTE, bgp->peer_self, + new_attr, bn); SET_FLAG (new->flags, BGP_INFO_VALID); - new->attr = new_attr; - new->uptime = bgp_clock (); bgp_aggregate_increment (bgp, p, new, afi, SAFI_UNICAST); bgp_info_add (bn, new); diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index fea18dd29..f8cc34fc3 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -21,8 +21,11 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #ifndef _QUAGGA_BGP_ROUTE_H #define _QUAGGA_BGP_ROUTE_H +#include "queue.h" #include "bgp_table.h" +struct bgp_nexthop_cache; + /* Ancillary information to struct bgp_info, * used for uncommonly used data (aggregation, MPLS, etc.) * and lazily allocated to save memory. @@ -47,7 +50,16 @@ struct bgp_info /* For linked list. */ struct bgp_info *next; struct bgp_info *prev; - + + /* For nexthop linked list */ + LIST_ENTRY(bgp_info) nh_thread; + + /* Back pointer to the prefix node */ + struct bgp_node *net; + + /* Back pointer to the nexthop structure */ + struct bgp_nexthop_cache *nexthop; + /* Peer structure. */ struct peer *peer; diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 99a7be21b..6ba4f4bf2 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -38,6 +38,7 @@ Boston, MA 02111-1307, USA. */ #include "bgpd/bgp_fsm.h" #include "bgpd/bgp_debug.h" #include "bgpd/bgp_mpath.h" +#include "bgpd/bgp_nexthop.h" /* All information about zebra. */ struct zclient *zclient = NULL; @@ -75,6 +76,15 @@ bgp_router_id_update (int command, struct zclient *zclient, zebra_size_t length) return 0; } +/* Nexthop update message from zebra. */ +static int +bgp_read_nexthop_update (int command, struct zclient *zclient, + zebra_size_t length) +{ + bgp_parse_nexthop_update(); + return 0; +} + /* Inteface addition message from zebra. */ static int bgp_interface_add (int command, struct zclient *zclient, zebra_size_t length) @@ -1187,6 +1197,7 @@ bgp_zebra_init (void) zclient->ipv6_route_add = zebra_read_ipv6; zclient->ipv6_route_delete = zebra_read_ipv6; #endif /* HAVE_IPV6 */ + zclient->nexthop_update = bgp_read_nexthop_update; /* Interface related init. */ if_init (); diff --git a/doc/next-hop-tracking.txt b/doc/next-hop-tracking.txt new file mode 100644 index 000000000..d157866e8 --- /dev/null +++ b/doc/next-hop-tracking.txt @@ -0,0 +1,326 @@ +0. Introduction + +This is the design specification for next hop tracking feature in +Quagga. + +1. Background + +Recursive routes are of the form: + + p/m --> n + [Ex: 1.1.0.0/16 --> 2.2.2.2] + +where 'n' itself is resolved through another route as follows: + + p2/m --> h, interface + [Ex: 2.2.2.0/24 --> 3.3.3.3, eth0] + +Usually, BGP routes are recursive in nature and BGP nexthops get +resolved through an IGP route. IGP usually adds its routes pointing to +an interface (these are called non-recursive routes). + +When BGP receives a recursive route from a peer, it needs to validate +the nexthop. The path is marked valid or invalid based on the +reachability status of the nexthop. Nexthop validation is also +important for BGP decision process as the metric to reach the nexthop +is a parameter to best path selection process. + +As it goes with routing, this is a dynamic process. Route to the +nexthop can change. The nexthop can become unreachable or +reachable. In the current BGP implementation, the nexthop validation +is done periodically in the scanner run. The default scanner run +interval is one minute. Every minute, the scanner task walks the +entire BGP table. It checks the validity of each nexthop with Zebra +(the routing table manager) through a request and response message +exchange between BGP and Zebra process. BGP process is blocked for +that duration. The mechanism has two major drawbacks: + +(1) The scanner task runs to completion. That can potentially starve + the other tasks for long periods of time, based on the BGP table + size and number of nexthops. + +(2) Convergence around routing changes that affect the nexthops can be + long (around a minute with the default intervals). The interval + can be shortened to achieve faster reaction time, but it makes the + first problem worse, with the scanner task consuming most of the + CPU resources. + +"Next hop tracking" feature makes this process event-driven. It +eliminates periodic nexthop validation and introduces an asynchronous +communication path between BGP and Zebra for route change notifications +that can then be acted upon. + +2. Goal + +Stating the obvious, the main goal is to remove the two limitations we +discussed in the previous section. The goals, in a constructive tone, +are the following: + +- fairness: the scanner run should not consume an unjustly high amount + of CPU time. This should give an overall good performance and + response time to other events (route changes, session events, + IO/user interface). + +- convergence: BGP must react to nexthop changes instantly and provide + sub-second convergence. This may involve diverting the routes from + one nexthop to another. + +3. Overview of the changes + +The changes are in both BGP and Zebra modules. The short summary is +the following: + +- Zebra implements a registration mechanism by which clients can + register for next hop notification. Consequently, it maintains a + separate table, per (VRF, AF) pair, of next hops and interested + client-list per next hop. + +- When the main routing table changes in Zebra, it evaluates the next + hop table: for each next hop, it checks if the route table + modifications have changed its state. If so, it notifies the + interested clients. + +- BGP is one such client. It registers the next hops corresponding to + all of its received routes/paths. It also threads the paths against + each nexthop structure. + +- When BGP receives a next hop notification from Zebra, it walks the + corresponding path list. It makes them valid or invalid depending + on the next hop notification. It then re-computes best path for the + corresponding destination. This may result in re-announcing those + destinations to peers. + +4. Design + +4.1. Modules + +The core design introduces an "nht" (next hop tracking) module in BGP +and "rnh" (recursive nexthop) module in Zebra. The "nht" module +provides the following APIs: + +bgp_find_or_add_nexthop() : find or add a nexthop in BGP nexthop table +bgp_find_nexthop() : find a nexthop in BGP nexthop table +bgp_parse_nexthop_update() : parse a nexthop update message coming + from zebra + +The "rnh" module provides the following APIs: + +zebra_add_rnh() : add a recursive nexthop +zebra_delete_rnh() : delete a recursive nexthop +zebra_lookup_rnh() : lookup a recursive nexthop + +zebra_add_rnh_client() : register a client for nexthop notifications + against a recursive nexthop + +zebra_remove_rnh_client(): remove the client registration for a + recursive nexthop + +zebra_evaluate_rnh_table(): (re)evaluate the recursive nexthop table + (most probably because the main routing + table has changed). + +zebra_cleanup_rnh_client(): Cleanup a client from the "rnh" module + data structures (most probably because the + client is going away). + +4.2. Control flow + +The next hop registration control flow is the following: + +<==== BGP Process ====>|<==== Zebra Process ====> + | +receive module nht module | zserv module rnh module +---------------------------------------------------------------------- + | | | +bgp_update_ | | | + main() | bgp_find_or_add_ | | + | nexthop() | | + | | | + | | zserv_nexthop_ | + | | register() | + | | | zebra_add_rnh() + | | | + + +The next hop notification control flow is the following: + +<==== Zebra Process ====>|<==== BGP Process ====> + | +rib module rnh module | zebra module nht module +---------------------------------------------------------------------- + | | | +meta_queue_ | | | + process() | zebra_evaluate_ | | + | rnh_table() | | + | | | + | | bgp_read_nexthop_ | + | | update() | + | | | bgp_parse_ + | | | nexthop_update() + | | | + + +4.3. zclient message format + +ZEBRA_NEXTHOP_REGISTER and ZEBRA_NEXTHOP_UNREGISTER messages are +encoded in the following way: + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | AF | prefix len | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * . Nexthop prefix . + * . . + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * . . + * . . + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | AF | prefix len | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * . Nexthop prefix . + * . . + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +ZEBRA_NEXTHOP_UPDATE message is encoded as follows: + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | AF | prefix len | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * . Nexthop prefix getting resolved . + * . . + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | metric | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | #nexthops | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | nexthop type | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * . resolving Nexthop details . + * . . + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * . . + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | nexthop type | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * . resolving Nexthop details . + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +4.4. BGP data structure + +Legend: + +/\ struct bgp_node: a BGP destination/route/prefix +\/ + +[ ] struct bgp_info: a BGP path (e.g. route received from a peer) + + _ +(_) struct bgp_nexthop_cache: a BGP nexthop + + + + /\ NULL + \/--+ ^ + | : + +--[ ]--[ ]--[ ]--> NULL + /\ : + \/--+ : + | : + +--[ ]--[ ]--> NULL + : + _ : + (_)............. + + +4.5. Zebra data structure + +rnh table: + + O + / \ + O O + / \ + O O + + struct rnh + { + u_char flags; + struct rib *state; + struct list *client_list; + struct route_node *node; + }; + +5. User interface changes + +quagga# show ip nht +3.3.3.3 + resolved via kernel + via 11.0.0.6, swp1 + Client list: bgp(fd 12) +11.0.0.10 + resolved via connected + is directly connected, swp2 + Client list: bgp(fd 12) +11.0.0.18 + resolved via connected + is directly connected, swp4 + Client list: bgp(fd 12) +11.11.11.11 + resolved via kernel + via 10.0.1.2, eth0 + Client list: bgp(fd 12) + +quagga# show ip bgp nexthop +Current BGP nexthop cache: + 3.3.3.3 valid [IGP metric 0], #paths 3 + Last update: Wed Oct 16 04:43:49 2013 + + 11.0.0.10 valid [IGP metric 1], #paths 1 + Last update: Wed Oct 16 04:43:51 2013 + + 11.0.0.18 valid [IGP metric 1], #paths 2 + Last update: Wed Oct 16 04:43:47 2013 + + 11.11.11.11 valid [IGP metric 0], #paths 1 + Last update: Wed Oct 16 04:43:47 2013 + +quagga# show ipv6 nht +quagga# show ip bgp nexthop detail + +quagga# debug bgp nht +quagga# debug zebra nht + +6. Sample test cases + + r2----r3 + / \ / + r1----r4 + +- Verify that a change in IGP cost triggers NHT + + shutdown the r1-r4 and r2-r4 links + + no shut the r1-r4 and r2-r4 links and wait for OSPF to come back + up + + We should be back to the original nexthop via r4 now +- Verify that a NH becoming unreachable triggers NHT + + Shutdown all links to r4 +- Verify that a NH becoming reachable triggers NHT + + no shut all links to r4 + +7. Future work + +- route-policy for next hop validation (e.g. ignore default route) +- damping for rapid next hop changes +- prioritized handling of nexthop changes ((un)reachability vs. metric + changes) +- handling recursion loop, e.g. + 11.11.11.11/32 -> 12.12.12.12 + 12.12.12.12/32 -> 11.11.11.11 + 11.0.0.0/8 -> <interface> +- better statistics diff --git a/lib/Makefile.am b/lib/Makefile.am index bd2109292..f64972dd0 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -12,7 +12,7 @@ libzebra_la_SOURCES = \ sockunion.c prefix.c thread.c if.c memory.c buffer.c table.c hash.c \ filter.c routemap.c distribute.c stream.c str.c log.c plist.c \ zclient.c sockopt.c smux.c agentx.c snmp.c md5.c if_rmap.c keychain.c privs.c \ - sigevent.c pqueue.c jhash.c memtypes.c workqueue.c + sigevent.c pqueue.c jhash.c memtypes.c workqueue.c nexthop.c BUILT_SOURCES = memtypes.h route_types.h gitversion.h @@ -27,7 +27,7 @@ pkginclude_HEADERS = \ str.h stream.h table.h thread.h vector.h version.h vty.h zebra.h \ plist.h zclient.h sockopt.h smux.h md5.h if_rmap.h keychain.h \ privs.h sigevent.h pqueue.h jhash.h zassert.h memtypes.h \ - workqueue.h route_types.h libospf.h + workqueue.h route_types.h libospf.h nexthop.h EXTRA_DIST = \ regex.c regex-gnu.h \ @@ -845,6 +845,9 @@ static const struct zebra_desc_table command_types[] = { DESC_ENTRY (ZEBRA_ROUTER_ID_DELETE), DESC_ENTRY (ZEBRA_ROUTER_ID_UPDATE), DESC_ENTRY (ZEBRA_HELLO), + DESC_ENTRY (ZEBRA_NEXTHOP_REGISTER), + DESC_ENTRY (ZEBRA_NEXTHOP_UNREGISTER), + DESC_ENTRY (ZEBRA_NEXTHOP_UPDATE), }; #undef DESC_ENTRY diff --git a/lib/memtypes.c b/lib/memtypes.c index 47a343873..5af2642be 100644 --- a/lib/memtypes.c +++ b/lib/memtypes.c @@ -84,6 +84,7 @@ struct memory_list memory_list_zebra[] = { MTYPE_STATIC_IPV6, "Static IPv6 route" }, { MTYPE_RIB_DEST, "RIB destination" }, { MTYPE_RIB_TABLE_INFO, "RIB table info" }, + { MTYPE_RNH, "Nexthop tracking object" }, { -1, NULL }, }; diff --git a/lib/nexthop.c b/lib/nexthop.c new file mode 100644 index 000000000..2478dca7a --- /dev/null +++ b/lib/nexthop.c @@ -0,0 +1,100 @@ +/* A generic nexthop structure + * Copyright (C) 2013 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ +#include <zebra.h> + +#include "prefix.h" +#include "table.h" +#include "memory.h" +#include "str.h" +#include "command.h" +#include "if.h" +#include "log.h" +#include "sockunion.h" +#include "linklist.h" +#include "thread.h" +#include "prefix.h" +#include "nexthop.h" + +/* check if nexthops are same, non-recursive */ +int +nexthop_same_no_recurse (struct nexthop *next1, struct nexthop *next2) +{ + if (next1->type != next2->type) + return 0; + + switch (next1->type) + { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + if (! IPV4_ADDR_SAME (&next1->gate.ipv4, &next2->gate.ipv4)) + return 0; + if (next1->ifindex && (next1->ifindex != next2->ifindex)) + return 0; + break; + case NEXTHOP_TYPE_IFINDEX: + case NEXTHOP_TYPE_IFNAME: + if (next1->ifindex != next2->ifindex) + return 0; + break; +#ifdef HAVE_IPV6 + case NEXTHOP_TYPE_IPV6: + if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6)) + return 0; + break; + case NEXTHOP_TYPE_IPV6_IFINDEX: + case NEXTHOP_TYPE_IPV6_IFNAME: + if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6)) + return 0; + if (next1->ifindex != next2->ifindex) + return 0; + break; +#endif /* HAVE_IPV6 */ + default: + /* do nothing */ + break; + } + return 1; +} + +/* + * nexthop_type_to_str + */ +const char * +nexthop_type_to_str (enum nexthop_types_t nh_type) +{ + static const char *desc[] = { + "none", + "Directly connected", + "Interface route", + "IPv4 nexthop", + "IPv4 nexthop with ifindex", + "IPv4 nexthop with ifname", + "IPv6 nexthop", + "IPv6 nexthop with ifindex", + "IPv6 nexthop with ifname", + "Null0 nexthop", + }; + + if (nh_type >= ZEBRA_NUM_OF (desc)) + return "<Invalid nh type>"; + + return desc[nh_type]; +} diff --git a/lib/nexthop.h b/lib/nexthop.h new file mode 100644 index 000000000..bddac6554 --- /dev/null +++ b/lib/nexthop.h @@ -0,0 +1,89 @@ +/* + * Nexthop structure definition. + * Copyright (C) 2013 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _LIB_NEXTHOP_H +#define _LIB_NEXTHOP_H + +#include "prefix.h" + +union g_addr { + struct in_addr ipv4; +#ifdef HAVE_IPV6 + struct in6_addr ipv6; +#endif /* HAVE_IPV6 */ +}; + +enum nexthop_types_t +{ + NEXTHOP_TYPE_IFINDEX = 1, /* Directly connected. */ + NEXTHOP_TYPE_IFNAME, /* Interface route. */ + NEXTHOP_TYPE_IPV4, /* IPv4 nexthop. */ + NEXTHOP_TYPE_IPV4_IFINDEX, /* IPv4 nexthop with ifindex. */ + NEXTHOP_TYPE_IPV4_IFNAME, /* IPv4 nexthop with ifname. */ + NEXTHOP_TYPE_IPV6, /* IPv6 nexthop. */ + NEXTHOP_TYPE_IPV6_IFINDEX, /* IPv6 nexthop with ifindex. */ + NEXTHOP_TYPE_IPV6_IFNAME, /* IPv6 nexthop with ifname. */ + NEXTHOP_TYPE_BLACKHOLE, /* Null0 nexthop. */ +}; + +/* Nexthop structure. */ +struct nexthop +{ + struct nexthop *next; + struct nexthop *prev; + + /* Interface index. */ + char *ifname; + unsigned int ifindex; + + enum nexthop_types_t type; + + u_char flags; +#define NEXTHOP_FLAG_ACTIVE (1 << 0) /* This nexthop is alive. */ +#define NEXTHOP_FLAG_FIB (1 << 1) /* FIB nexthop. */ +#define NEXTHOP_FLAG_RECURSIVE (1 << 2) /* Recursive nexthop. */ +#define NEXTHOP_FLAG_ONLINK (1 << 3) /* Nexthop should be installed onlink. */ +#define NEXTHOP_FLAG_MATCHED (1 << 4) /* Already matched vs a nexthop */ + + /* Nexthop address */ + union g_addr gate; + union g_addr src; + + /* Nexthops obtained by recursive resolution. + * + * If the nexthop struct needs to be resolved recursively, + * NEXTHOP_FLAG_RECURSIVE will be set in flags and the nexthops + * obtained by recursive resolution will be added to `resolved'. + * Only one level of recursive resolution is currently supported. */ + struct nexthop *resolved; +}; + +#define nexthop_new() \ +({ \ + struct nexthop *n = XCALLOC (MTYPE_NEXTHOP, sizeof (struct nexthop)); \ + n; \ +}) + +extern const char *nexthop_type_to_str (enum nexthop_types_t nh_type); +extern int nexthop_same_no_recurse (struct nexthop *next1, struct nexthop *next2); + +#endif /*_LIB_NEXTHOP_H */ diff --git a/lib/zclient.c b/lib/zclient.c index 3b5477e90..b77fd3432 100644 --- a/lib/zclient.c +++ b/lib/zclient.c @@ -967,6 +967,12 @@ zclient_read (struct thread *thread) if (zclient->ipv6_route_delete) (*zclient->ipv6_route_delete) (command, zclient, length); break; + case ZEBRA_NEXTHOP_UPDATE: + if (zclient_debug) + zlog_debug("zclient rcvd nexthop update\n"); + if (zclient->nexthop_update) + (*zclient->nexthop_update) (command, zclient, length); + break; default: break; } diff --git a/lib/zclient.h b/lib/zclient.h index a660bbf19..2ece81ebc 100644 --- a/lib/zclient.h +++ b/lib/zclient.h @@ -82,6 +82,7 @@ struct zclient int (*ipv4_route_delete) (int, struct zclient *, uint16_t); int (*ipv6_route_add) (int, struct zclient *, uint16_t); int (*ipv6_route_delete) (int, struct zclient *, uint16_t); + int (*nexthop_update) (int, struct zclient *, uint16_t); }; /* Zebra API message flag. */ diff --git a/lib/zebra.h b/lib/zebra.h index 3715b342e..124431a85 100644 --- a/lib/zebra.h +++ b/lib/zebra.h @@ -424,7 +424,10 @@ struct in_pktinfo #define ZEBRA_ROUTER_ID_DELETE 21 #define ZEBRA_ROUTER_ID_UPDATE 22 #define ZEBRA_HELLO 23 -#define ZEBRA_MESSAGE_MAX 24 +#define ZEBRA_NEXTHOP_REGISTER 24 +#define ZEBRA_NEXTHOP_UNREGISTER 25 +#define ZEBRA_NEXTHOP_UPDATE 26 +#define ZEBRA_MESSAGE_MAX 27 /* Marker value used in new Zserv, in the byte location corresponding * the command value in the old zserv header. To allow old and new @@ -525,6 +528,7 @@ extern const char *zserv_command_string (unsigned int command); #define CHECK_FLAG(V,F) ((V) & (F)) #define SET_FLAG(V,F) (V) |= (F) #define UNSET_FLAG(V,F) (V) &= ~(F) +#define RESET_FLAG(V) (V) = 0 /* AFI and SAFI type. */ typedef u_int16_t afi_t; diff --git a/zebra/Makefile.am b/zebra/Makefile.am index 96f7bef30..9aa135236 100644 --- a/zebra/Makefile.am +++ b/zebra/Makefile.am @@ -34,16 +34,16 @@ zebra_SOURCES = \ zserv.c main.c interface.c connected.c zebra_rib.c zebra_routemap.c \ redistribute.c debug.c rtadv.c zebra_snmp.c zebra_vty.c \ irdp_main.c irdp_interface.c irdp_packet.c router-id.c zebra_fpm.c \ - $(othersrc) + $(othersrc) zebra_rnh.c testzebra_SOURCES = test_main.c zebra_rib.c interface.c connected.c debug.c \ zebra_vty.c \ - kernel_null.c redistribute_null.c ioctl_null.c misc_null.c + kernel_null.c redistribute_null.c ioctl_null.c misc_null.c zebra_rnh_null.c noinst_HEADERS = \ connected.h ioctl.h rib.h rt.h zserv.h redistribute.h debug.h rtadv.h \ interface.h ipforward.h irdp.h router-id.h kernel_socket.h \ - rt_netlink.h zebra_fpm.h zebra_fpm_private.h + rt_netlink.h zebra_fpm.h zebra_fpm_private.h zebra_rnh.h zebra_LDADD = $(otherobj) ../lib/libzebra.la $(LIBCAP) $(LIB_IPV6) diff --git a/zebra/debug.c b/zebra/debug.c index c3b00e0fa..10aba2d55 100644 --- a/zebra/debug.c +++ b/zebra/debug.c @@ -30,6 +30,7 @@ unsigned long zebra_debug_packet; unsigned long zebra_debug_kernel; unsigned long zebra_debug_rib; unsigned long zebra_debug_fpm; +unsigned long zebra_debug_nht; DEFUN (show_debugging_zebra, show_debugging_zebra_cmd, @@ -74,6 +75,8 @@ DEFUN (show_debugging_zebra, if (IS_ZEBRA_DEBUG_FPM) vty_out (vty, " Zebra FPM debugging is on%s", VTY_NEWLINE); + if (IS_ZEBRA_DEBUG_NHT) + vty_out (vty, " Zebra next-hop tracking debugging is on%s", VTY_NEWLINE); return CMD_SUCCESS; } @@ -89,6 +92,17 @@ DEFUN (debug_zebra_events, return CMD_WARNING; } +DEFUN (debug_zebra_nht, + debug_zebra_nht_cmd, + "debug zebra nht", + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra next hop tracking\n") +{ + zebra_debug_nht = ZEBRA_DEBUG_NHT; + return CMD_WARNING; +} + DEFUN (debug_zebra_packet, debug_zebra_packet_cmd, "debug zebra packet", @@ -196,6 +210,18 @@ DEFUN (no_debug_zebra_events, return CMD_SUCCESS; } +DEFUN (no_debug_zebra_nht, + no_debug_zebra_nht_cmd, + "no debug zebra nht", + NO_STR + DEBUG_STR + "Zebra configuration\n" + "Debug option set for zebra next hop tracking\n") +{ + zebra_debug_nht = 0; + return CMD_SUCCESS; +} + DEFUN (no_debug_zebra_packet, no_debug_zebra_packet_cmd, "no debug zebra packet", @@ -352,6 +378,7 @@ zebra_debug_init (void) install_element (ENABLE_NODE, &show_debugging_zebra_cmd); install_element (ENABLE_NODE, &debug_zebra_events_cmd); + install_element (ENABLE_NODE, &debug_zebra_nht_cmd); install_element (ENABLE_NODE, &debug_zebra_packet_cmd); install_element (ENABLE_NODE, &debug_zebra_packet_direct_cmd); install_element (ENABLE_NODE, &debug_zebra_packet_detail_cmd); @@ -360,6 +387,7 @@ zebra_debug_init (void) install_element (ENABLE_NODE, &debug_zebra_rib_q_cmd); install_element (ENABLE_NODE, &debug_zebra_fpm_cmd); install_element (ENABLE_NODE, &no_debug_zebra_events_cmd); + install_element (ENABLE_NODE, &no_debug_zebra_nht_cmd); install_element (ENABLE_NODE, &no_debug_zebra_packet_cmd); install_element (ENABLE_NODE, &no_debug_zebra_kernel_cmd); install_element (ENABLE_NODE, &no_debug_zebra_rib_cmd); @@ -367,6 +395,7 @@ zebra_debug_init (void) install_element (ENABLE_NODE, &no_debug_zebra_fpm_cmd); install_element (CONFIG_NODE, &debug_zebra_events_cmd); + install_element (CONFIG_NODE, &debug_zebra_nht_cmd); install_element (CONFIG_NODE, &debug_zebra_packet_cmd); install_element (CONFIG_NODE, &debug_zebra_packet_direct_cmd); install_element (CONFIG_NODE, &debug_zebra_packet_detail_cmd); @@ -375,6 +404,7 @@ zebra_debug_init (void) install_element (CONFIG_NODE, &debug_zebra_rib_q_cmd); install_element (CONFIG_NODE, &debug_zebra_fpm_cmd); install_element (CONFIG_NODE, &no_debug_zebra_events_cmd); + install_element (CONFIG_NODE, &no_debug_zebra_nht_cmd); install_element (CONFIG_NODE, &no_debug_zebra_packet_cmd); install_element (CONFIG_NODE, &no_debug_zebra_kernel_cmd); install_element (CONFIG_NODE, &no_debug_zebra_rib_cmd); diff --git a/zebra/debug.h b/zebra/debug.h index d9231a22b..0fb4dd9fe 100644 --- a/zebra/debug.h +++ b/zebra/debug.h @@ -37,6 +37,7 @@ #define ZEBRA_DEBUG_RIB_Q 0x02 #define ZEBRA_DEBUG_FPM 0x01 +#define ZEBRA_DEBUG_NHT 0x01 /* Debug related macro. */ #define IS_ZEBRA_DEBUG_EVENT (zebra_debug_event & ZEBRA_DEBUG_EVENT) @@ -52,12 +53,14 @@ #define IS_ZEBRA_DEBUG_RIB_Q (zebra_debug_rib & ZEBRA_DEBUG_RIB_Q) #define IS_ZEBRA_DEBUG_FPM (zebra_debug_fpm & ZEBRA_DEBUG_FPM) +#define IS_ZEBRA_DEBUG_NHT (zebra_debug_nht & ZEBRA_DEBUG_NHT) extern unsigned long zebra_debug_event; extern unsigned long zebra_debug_packet; extern unsigned long zebra_debug_kernel; extern unsigned long zebra_debug_rib; extern unsigned long zebra_debug_fpm; +extern unsigned long zebra_debug_nht; extern void zebra_debug_init (void); diff --git a/zebra/rib.h b/zebra/rib.h index ad726c6cc..833b892cb 100644 --- a/zebra/rib.h +++ b/zebra/rib.h @@ -26,18 +26,10 @@ #include "prefix.h" #include "table.h" #include "queue.h" +#include "nexthop.h" #define DISTANCE_INFINITY 255 -/* Routing information base. */ - -union g_addr { - struct in_addr ipv4; -#ifdef HAVE_IPV6 - struct in6_addr ipv6; -#endif /* HAVE_IPV6 */ -}; - struct rib { /* Link list. */ @@ -224,50 +216,6 @@ struct static_ipv6 }; #endif /* HAVE_IPV6 */ -enum nexthop_types_t -{ - NEXTHOP_TYPE_IFINDEX = 1, /* Directly connected. */ - NEXTHOP_TYPE_IFNAME, /* Interface route. */ - NEXTHOP_TYPE_IPV4, /* IPv4 nexthop. */ - NEXTHOP_TYPE_IPV4_IFINDEX, /* IPv4 nexthop with ifindex. */ - NEXTHOP_TYPE_IPV4_IFNAME, /* IPv4 nexthop with ifname. */ - NEXTHOP_TYPE_IPV6, /* IPv6 nexthop. */ - NEXTHOP_TYPE_IPV6_IFINDEX, /* IPv6 nexthop with ifindex. */ - NEXTHOP_TYPE_IPV6_IFNAME, /* IPv6 nexthop with ifname. */ - NEXTHOP_TYPE_BLACKHOLE, /* Null0 nexthop. */ -}; - -/* Nexthop structure. */ -struct nexthop -{ - struct nexthop *next; - struct nexthop *prev; - - /* Interface index. */ - char *ifname; - unsigned int ifindex; - - enum nexthop_types_t type; - - u_char flags; -#define NEXTHOP_FLAG_ACTIVE (1 << 0) /* This nexthop is alive. */ -#define NEXTHOP_FLAG_FIB (1 << 1) /* FIB nexthop. */ -#define NEXTHOP_FLAG_RECURSIVE (1 << 2) /* Recursive nexthop. */ -#define NEXTHOP_FLAG_ONLINK (1 << 3) /* Nexthop should be installed onlink. */ - - /* Nexthop address */ - union g_addr gate; - union g_addr src; - - /* Nexthops obtained by recursive resolution. - * - * If the nexthop struct needs to be resolved recursively, - * NEXTHOP_FLAG_RECURSIVE will be set in flags and the nexthops - * obtained by recursive resolution will be added to `resolved'. - * Only one level of recursive resolution is currently supported. */ - struct nexthop *resolved; -}; - /* The following for loop allows to iterate over the nexthop * structure of routes. * @@ -334,6 +282,9 @@ struct vrf /* Static route configuration. */ struct route_table *stable[AFI_MAX][SAFI_MAX]; + + /* Recursive Nexthop table */ + struct route_table *rnh_table[AFI_MAX]; }; /* @@ -373,7 +324,6 @@ typedef struct rib_tables_iter_t_ rib_tables_iter_state_t state; } rib_tables_iter_t; -extern const char *nexthop_type_to_str (enum nexthop_types_t nh_type); extern struct nexthop *nexthop_ifindex_add (struct rib *, unsigned int); extern struct nexthop *nexthop_ifname_add (struct rib *, char *); extern struct nexthop *nexthop_blackhole_add (struct rib *); @@ -383,6 +333,10 @@ extern struct nexthop *nexthop_ipv4_ifindex_add (struct rib *, struct in_addr *, struct in_addr *, unsigned int); +extern void nexthop_free (struct nexthop *nexthop); +extern void nexthops_free (struct nexthop *nexthop); +extern void nexthop_add (struct rib *rib, struct nexthop *nexthop); + extern int nexthop_has_fib_child(struct nexthop *); extern void rib_lookup_and_dump (struct prefix_ipv4 *); extern void rib_lookup_and_pushup (struct prefix_ipv4 *); diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c index c242480f0..11f6f0ab2 100644 --- a/zebra/rt_netlink.c +++ b/zebra/rt_netlink.c @@ -36,6 +36,7 @@ #include "rib.h" #include "thread.h" #include "privs.h" +#include "nexthop.h" #include "zebra/zserv.h" #include "zebra/rt.h" diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c index b5f2b7607..1de75be4b 100644 --- a/zebra/zebra_fpm_netlink.c +++ b/zebra/zebra_fpm_netlink.c @@ -29,6 +29,7 @@ #include "rib.h" #include "rt_netlink.h" +#include "nexthop.h" #include "zebra_fpm_private.h" diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c index 02116b3d2..5cd81e3de 100644 --- a/zebra/zebra_rib.c +++ b/zebra/zebra_rib.c @@ -34,6 +34,7 @@ #include "workqueue.h" #include "prefix.h" #include "routemap.h" +#include "nexthop.h" #include "zebra/rib.h" #include "zebra/rt.h" @@ -41,6 +42,7 @@ #include "zebra/redistribute.h" #include "zebra/debug.h" #include "zebra/zebra_fpm.h" +#include "zebra/zebra_rnh.h" /* Default rtm_table for all clients */ extern struct zebra_t zebrad; @@ -118,6 +120,8 @@ vrf_alloc (const char *name) vrf->stable[AFI_IP][SAFI_MULTICAST] = route_table_init (); vrf->stable[AFI_IP6][SAFI_MULTICAST] = route_table_init (); + vrf->rnh_table[AFI_IP] = route_table_init(); + vrf->rnh_table[AFI_IP6] = route_table_init(); return vrf; } @@ -177,31 +181,6 @@ vrf_static_table (afi_t afi, safi_t safi, u_int32_t id) return vrf->stable[afi][safi]; } -/* - * nexthop_type_to_str - */ -const char * -nexthop_type_to_str (enum nexthop_types_t nh_type) -{ - static const char *desc[] = { - "none", - "Directly connected", - "Interface route", - "IPv4 nexthop", - "IPv4 nexthop with ifindex", - "IPv4 nexthop with ifname", - "IPv6 nexthop", - "IPv6 nexthop with ifindex", - "IPv6 nexthop with ifname", - "Null0 nexthop", - }; - - if (nh_type >= ZEBRA_NUM_OF (desc)) - return "<Invalid nh type>"; - - return desc[nh_type]; -} - /* Add nexthop to the end of a nexthop list. */ static void _nexthop_add (struct nexthop **target, struct nexthop *nexthop) @@ -218,7 +197,7 @@ _nexthop_add (struct nexthop **target, struct nexthop *nexthop) } /* Add nexthop to the end of a rib node's nexthop list */ -static void +void nexthop_add (struct rib *rib, struct nexthop *nexthop) { _nexthop_add(&rib->nexthop, nexthop); @@ -238,10 +217,8 @@ nexthop_delete (struct rib *rib, struct nexthop *nexthop) rib->nexthop_num--; } -static void nexthops_free(struct nexthop *nexthop); - /* Free nexthop. */ -static void +void nexthop_free (struct nexthop *nexthop) { if (nexthop->ifname) @@ -252,7 +229,7 @@ nexthop_free (struct nexthop *nexthop) } /* Frees a list of nexthops */ -static void +void nexthops_free (struct nexthop *nexthop) { struct nexthop *nh, *next; @@ -1475,6 +1452,18 @@ process_subq (struct list * subq, u_char qindex) return 1; } +/* + * All meta queues have been processed. Trigger next-hop evaluation. + */ +static void +meta_queue_process_complete (struct work_queue *dummy) +{ + zebra_evaluate_rnh_table(0, AF_INET); +#ifdef HAVE_IPV6 + zebra_evaluate_rnh_table(0, AF_INET6); +#endif /* HAVE_IPV6 */ +} + /* Dispatch the meta queue by picking, processing and unlocking the next RN from * a non-empty sub-queue with lowest priority. wq is equal to zebra->ribq and data * is pointed to the meta queue structure. @@ -1635,6 +1624,7 @@ rib_queue_init (struct zebra_t *zebra) /* fill in the work queue spec */ zebra->ribq->spec.workfunc = &meta_queue_process; zebra->ribq->spec.errorfunc = NULL; + zebra->ribq->spec.completion_func = &meta_queue_process_complete; /* XXX: TODO: These should be runtime configurable via vty */ zebra->ribq->spec.max_retries = 3; zebra->ribq->spec.hold = rib_process_hold_time; diff --git a/zebra/zebra_rnh.c b/zebra/zebra_rnh.c new file mode 100644 index 000000000..117a2659c --- /dev/null +++ b/zebra/zebra_rnh.c @@ -0,0 +1,603 @@ +/* Zebra next hop tracking code + * Copyright (C) 2013 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include <zebra.h> + +#include "prefix.h" +#include "table.h" +#include "memory.h" +#include "str.h" +#include "command.h" +#include "if.h" +#include "log.h" +#include "sockunion.h" +#include "linklist.h" +#include "thread.h" +#include "workqueue.h" +#include "prefix.h" +#include "routemap.h" +#include "stream.h" +#include "nexthop.h" + +#include "zebra/rib.h" +#include "zebra/rt.h" +#include "zebra/zserv.h" +#include "zebra/redistribute.h" +#include "zebra/debug.h" +#include "zebra/zebra_rnh.h" + +#define lookup_rnh_table(v, f) \ +({ \ + struct vrf *vrf; \ + struct route_table *t = NULL; \ + vrf = vrf_lookup(v); \ + if (vrf) \ + t = vrf->rnh_table[family2afi(f)]; \ + t; \ +}) + +static void free_state(struct rib *rib); +static void copy_state(struct rnh *rnh, struct rib *rib); +static int compare_state(struct rib *r1, struct rib *r2); +static int send_client(struct rnh *rnh, struct zserv *client); +static void print_rnh(struct route_node *rn, struct vty *vty); + +char * +rnh_str (struct rnh *rnh, char *buf, int size) +{ + prefix2str(&(rnh->node->p), buf, size); + return buf; +} + +struct rnh * +zebra_add_rnh (struct prefix *p, u_int32_t vrfid) +{ + struct route_table *table; + struct route_node *rn; + struct rnh *rnh = NULL; + + if (IS_ZEBRA_DEBUG_NHT) + { + char buf[INET6_ADDRSTRLEN]; + prefix2str(p, buf, INET6_ADDRSTRLEN); + zlog_debug("add rnh %s in vrf %d", buf, vrfid); + } + table = lookup_rnh_table(vrfid, PREFIX_FAMILY(p)); + if (!table) + { + zlog_debug("add_rnh: rnh table not found\n"); + return NULL; + } + + /* Make it sure prefixlen is applied to the prefix. */ + apply_mask (p); + + /* Lookup (or add) route node.*/ + rn = route_node_get (table, p); + + if (!rn->info) + { + rnh = XCALLOC(MTYPE_RNH, sizeof(struct rnh)); + rnh->client_list = list_new(); + route_lock_node (rn); + rn->info = rnh; + rnh->node = rn; + } + + route_unlock_node (rn); + return (rn->info); +} + +struct rnh * +zebra_lookup_rnh (struct prefix *p, u_int32_t vrfid) +{ + struct route_table *table; + struct route_node *rn; + + table = lookup_rnh_table(vrfid, PREFIX_FAMILY(p)); + if (!table) + return NULL; + + /* Make it sure prefixlen is applied to the prefix. */ + apply_mask (p); + + /* Lookup route node.*/ + rn = route_node_lookup (table, p); + if (!rn) + return NULL; + + route_unlock_node (rn); + return (rn->info); +} + +void +zebra_delete_rnh (struct rnh *rnh) +{ + struct route_node *rn; + + if (!rnh || !(rn = rnh->node)) + return; + + if (IS_ZEBRA_DEBUG_NHT) + { + char buf[INET6_ADDRSTRLEN]; + zlog_debug("delete rnh %s", rnh_str(rnh, buf, INET6_ADDRSTRLEN)); + } + + list_free(rnh->client_list); + free_state(rnh->state); + XFREE(MTYPE_RNH, rn->info); + rn->info = NULL; + route_unlock_node (rn); + return; +} + +void +zebra_add_rnh_client (struct rnh *rnh, struct zserv *client) +{ + if (IS_ZEBRA_DEBUG_NHT) + { + char buf[INET6_ADDRSTRLEN]; + zlog_debug("client %s registers rnh %s", + zebra_route_string(client->proto), + rnh_str(rnh, buf, INET6_ADDRSTRLEN)); + } + if (!listnode_lookup(rnh->client_list, client)) + { + listnode_add(rnh->client_list, client); + send_client(rnh, client); + } +} + +void +zebra_remove_rnh_client (struct rnh *rnh, struct zserv *client) +{ + if (IS_ZEBRA_DEBUG_NHT) + { + char buf[INET6_ADDRSTRLEN]; + zlog_debug("client %s unregisters rnh %s", + zebra_route_string(client->proto), + rnh_str(rnh, buf, INET6_ADDRSTRLEN)); + } + listnode_delete(rnh->client_list, client); + if (list_isempty(rnh->client_list)) + zebra_delete_rnh(rnh); +} + +int +zebra_evaluate_rnh_table (int vrfid, int family) +{ + struct route_table *ptable; + struct route_table *ntable; + struct route_node *prn; + struct route_node *nrn; + struct rnh *rnh; + struct zserv *client; + struct listnode *node; + struct rib *rib; + + ntable = lookup_rnh_table(vrfid, family); + if (!ntable) + { + zlog_debug("evaluate_rnh_table: rnh table not found\n"); + return -1; + } + + ptable = vrf_table(family2afi(family), SAFI_UNICAST, vrfid); + if (!ptable) + { + zlog_debug("evaluate_rnh_table: prefix table not found\n"); + return -1; + } + + for (nrn = route_top (ntable); nrn; nrn = route_next (nrn)) + { + if (!nrn->info) + continue; + + prn = route_node_match(ptable, &nrn->p); + if (!prn) + rib = NULL; + else + { + RNODE_FOREACH_RIB(prn, rib) + { + if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED)) + continue; + if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED)) + break; + } + } + + rnh = nrn->info; + if (compare_state(rib, rnh->state)) + { + if (IS_ZEBRA_DEBUG_NHT) + { + char bufn[INET6_ADDRSTRLEN]; + char bufp[INET6_ADDRSTRLEN]; + prefix2str(&nrn->p, bufn, INET6_ADDRSTRLEN); + if (prn) + prefix2str(&prn->p, bufp, INET6_ADDRSTRLEN); + else + strcpy(bufp, "null"); + zlog_debug("rnh %s resolved through route %s - sending " + "nexthop %s event to clients", bufn, bufp, + rib ? "reachable" : "unreachable"); + } + copy_state(rnh, rib); + for (ALL_LIST_ELEMENTS_RO(rnh->client_list, node, client)) + send_client(rnh, client); + } + } + return 1; +} + +int +zebra_dispatch_rnh_table (int vrfid, int family, struct zserv *client) +{ + struct route_table *ntable; + struct route_node *nrn; + struct rnh *rnh; + + ntable = lookup_rnh_table(vrfid, family); + if (!ntable) + { + zlog_debug("dispatch_rnh_table: rnh table not found\n"); + return -1; + } + + for (nrn = route_top (ntable); nrn; nrn = route_next (nrn)) + { + if (!nrn->info) + continue; + + rnh = nrn->info; + if (IS_ZEBRA_DEBUG_NHT) + { + char bufn[INET6_ADDRSTRLEN]; + prefix2str(&nrn->p, bufn, INET6_ADDRSTRLEN); + zlog_debug("rnh %s - sending nexthop %s event to client %s", bufn, + rnh->state ? "reachable" : "unreachable", + zebra_route_string(client->proto)); + } + send_client(rnh, client); + } + return 1; +} + +void +zebra_print_rnh_table (int vrfid, int af, struct vty *vty) +{ + struct route_table *table; + struct route_node *rn; + + table = lookup_rnh_table(vrfid, af); + if (!table) + { + zlog_debug("print_rnhs: rnh table not found\n"); + return; + } + + for (rn = route_top(table); rn; rn = route_next(rn)) + if (rn->info) + print_rnh(rn, vty); +} + +int +zebra_cleanup_rnh_client (int vrfid, int family, struct zserv *client) +{ + struct route_table *ntable; + struct route_node *nrn; + struct rnh *rnh; + + ntable = lookup_rnh_table(vrfid, family); + if (!ntable) + { + zlog_debug("cleanup_rnh_client: rnh table not found\n"); + return -1; + } + + for (nrn = route_top (ntable); nrn; nrn = route_next (nrn)) + { + if (!nrn->info) + continue; + + rnh = nrn->info; + if (IS_ZEBRA_DEBUG_NHT) + { + char bufn[INET6_ADDRSTRLEN]; + prefix2str(&nrn->p, bufn, INET6_ADDRSTRLEN); + zlog_debug("rnh %s - cleaning state for client %s", bufn, + zebra_route_string(client->proto)); + } + zebra_remove_rnh_client(rnh, client); + } + return 1; +} + +/** + * free_state - free up the rib structure associated with the rnh. + */ +static void +free_state (struct rib *rib) +{ + struct nexthop *nexthop, *next; + + if (!rib) + return; + + /* free RIB and nexthops */ + for (nexthop = rib->nexthop; nexthop; nexthop = next) + { + next = nexthop->next; + nexthop_free (nexthop); + } + XFREE (MTYPE_RIB, rib); +} + +/** + * copy_nexthop - copy a nexthop to the rib structure. + */ +static void +copy_nexthop (struct rib *state, struct nexthop *nh) +{ + struct nexthop *nexthop; + + nexthop = nexthop_new(); + nexthop->flags = nh->flags; + nexthop->type = nh->type; + nexthop->ifindex = nh->ifindex; + if (nh->ifname) + nexthop->ifname = XSTRDUP(0, nh->ifname); + memcpy(&(nexthop->gate), &(nh->gate), sizeof(union g_addr)); + memcpy(&(nexthop->src), &(nh->src), sizeof(union g_addr)); + + nexthop_add(state, nexthop); +} + +static void +copy_state (struct rnh *rnh, struct rib *rib) +{ + struct rib *state; + struct nexthop *nh; + + if (rnh->state) + { + free_state(rnh->state); + rnh->state = NULL; + } + + if (!rib) + return; + + state = XCALLOC (MTYPE_RIB, sizeof (struct rib)); + state->type = rib->type; + state->metric = rib->metric; + + for (nh = rib->nexthop; nh; nh = nh->next) + copy_nexthop(state, nh); + rnh->state = state; +} + +static int +compare_state (struct rib *r1, struct rib *r2) +{ + struct nexthop *nh1; + struct nexthop *nh2; + u_char found_nh = 0; + + if (!r1 && !r2) + return 0; + + if ((!r1 && r2) || (r1 && !r2)) + return 1; + + if (r1->metric != r2->metric) + return 1; + + if (r1->nexthop_num != r2->nexthop_num) + return 1; + + /* We need to verify that the nexthops for r1 match the nexthops for r2. + * Since it is possible for a rib entry to have the same nexthop multiple + * times (Example: [a,a]) we need to keep track of which r2 nexthops we have + * already used as a match against a r1 nexthop. We track this + * via NEXTHOP_FLAG_MATCHED. Clear this flag for all r2 nexthops when you + * are finished. + * + * TRUE: r1 [a,b], r2 [a,b] + * TRUE: r1 [a,b], r2 [b,a] + * FALSE: r1 [a,b], r2 [a,c] + * FALSE: r1 [a,a], r2 [a,b] + */ + for (nh1 = r1->nexthop; nh1; nh1 = nh1->next) + { + found_nh = 0; + for (nh2 = r2->nexthop; nh2; nh2 = nh2->next) + { + if (CHECK_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED)) + continue; + + if (nexthop_same_no_recurse(nh1, nh2)) + { + SET_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED); + found_nh = 1; + break; + } + } + + if (!found_nh) + { + for (nh2 = r2->nexthop; nh2; nh2 = nh2->next) + if (CHECK_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED)) + UNSET_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED); + return 1; + } + } + + for (nh2 = r2->nexthop; nh2; nh2 = nh2->next) + if (CHECK_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED)) + UNSET_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED); + + return 0; +} + +static int +send_client (struct rnh *rnh, struct zserv *client) +{ + struct stream *s; + struct rib *rib; + unsigned long nump; + u_char num; + struct nexthop *nexthop; + struct route_node *rn; + + rn = rnh->node; + rib = rnh->state; + + /* Get output stream. */ + s = client->obuf; + stream_reset (s); + + zserv_create_header (s, ZEBRA_NEXTHOP_UPDATE); + + stream_putw(s, rn->p.family); + stream_put_prefix (s, &rn->p); + + if (rib) + { + stream_putl (s, rib->metric); + num = 0; + nump = stream_get_endp(s); + stream_putc (s, 0); + for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next) + if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB)) + { + stream_putc (s, nexthop->type); + switch (nexthop->type) + { + case ZEBRA_NEXTHOP_IPV4: + stream_put_in_addr (s, &nexthop->gate.ipv4); + break; + case ZEBRA_NEXTHOP_IFINDEX: + case ZEBRA_NEXTHOP_IFNAME: + stream_putl (s, nexthop->ifindex); + break; + case ZEBRA_NEXTHOP_IPV4_IFINDEX: + case ZEBRA_NEXTHOP_IPV4_IFNAME: + stream_put_in_addr (s, &nexthop->gate.ipv4); + stream_putl (s, nexthop->ifindex); + break; +#ifdef HAVE_IPV6 + case ZEBRA_NEXTHOP_IPV6: + stream_put (s, &nexthop->gate.ipv6, 16); + break; + case ZEBRA_NEXTHOP_IPV6_IFINDEX: + case ZEBRA_NEXTHOP_IPV6_IFNAME: + stream_put (s, &nexthop->gate.ipv6, 16); + stream_putl (s, nexthop->ifindex); + break; +#endif /* HAVE_IPV6 */ + default: + /* do nothing */ + break; + } + num++; + } + stream_putc_at (s, nump, num); + } + else + { + stream_putl (s, 0); + stream_putc (s, 0); + } + stream_putw_at (s, 0, stream_get_endp (s)); + return zebra_server_send_message(client); +} + +static void +print_nh (struct nexthop *nexthop, struct vty *vty) +{ + char buf[BUFSIZ]; + + switch (nexthop->type) + { + case NEXTHOP_TYPE_IPV4: + case NEXTHOP_TYPE_IPV4_IFINDEX: + vty_out (vty, " via %s", inet_ntoa (nexthop->gate.ipv4)); + if (nexthop->ifindex) + vty_out (vty, ", %s", ifindex2ifname (nexthop->ifindex)); + break; + case NEXTHOP_TYPE_IPV6: + case NEXTHOP_TYPE_IPV6_IFINDEX: + case NEXTHOP_TYPE_IPV6_IFNAME: + vty_out (vty, " %s", + inet_ntop (AF_INET6, &nexthop->gate.ipv6, buf, BUFSIZ)); + if (nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME) + vty_out (vty, ", %s", nexthop->ifname); + else if (nexthop->ifindex) + vty_out (vty, ", via %s", ifindex2ifname (nexthop->ifindex)); + break; + case NEXTHOP_TYPE_IFINDEX: + vty_out (vty, " is directly connected, %s", + ifindex2ifname (nexthop->ifindex)); + break; + case NEXTHOP_TYPE_IFNAME: + vty_out (vty, " is directly connected, %s", nexthop->ifname); + break; + case NEXTHOP_TYPE_BLACKHOLE: + vty_out (vty, " is directly connected, Null0"); + break; + default: + break; + } + vty_out(vty, "%s", VTY_NEWLINE); +} + +static void +print_rnh (struct route_node *rn, struct vty *vty) +{ + struct rnh *rnh; + struct nexthop *nexthop; + struct listnode *node; + struct zserv *client; + char buf[BUFSIZ]; + + rnh = rn->info; + vty_out(vty, "%s%s", inet_ntop(rn->p.family, &rn->p.u.prefix, buf, BUFSIZ), + VTY_NEWLINE); + if (rnh->state) + { + vty_out(vty, " resolved via %s%s", + zebra_route_string(rnh->state->type), VTY_NEWLINE); + for (nexthop = rnh->state->nexthop; nexthop; nexthop = nexthop->next) + print_nh(nexthop, vty); + } + else + vty_out(vty, " unresolved%s", VTY_NEWLINE); + + vty_out(vty, " Client list:"); + for (ALL_LIST_ELEMENTS_RO(rnh->client_list, node, client)) + vty_out(vty, " %s(fd %d)", zebra_route_string(client->proto), + client->sock); + vty_out(vty, "%s", VTY_NEWLINE); +} diff --git a/zebra/zebra_rnh.h b/zebra/zebra_rnh.h new file mode 100644 index 000000000..212bab602 --- /dev/null +++ b/zebra/zebra_rnh.h @@ -0,0 +1,48 @@ +/* + * Zebra next hop tracking header + * Copyright (C) 2013 Cumulus Networks, Inc. + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _ZEBRA_RNH_H +#define _ZEBRA_RNH_H + +#include "prefix.h" +#include "vty.h" + +/* Nexthop structure. */ +struct rnh +{ + u_char flags; + struct rib *state; + struct list *client_list; + struct route_node *node; +}; + +extern struct rnh *zebra_add_rnh(struct prefix *p, u_int32_t vrfid); +extern struct rnh *zebra_lookup_rnh(struct prefix *p, u_int32_t vrfid); +extern void zebra_delete_rnh(struct rnh *rnh); +extern void zebra_add_rnh_client(struct rnh *rnh, struct zserv *client); +extern void zebra_remove_rnh_client(struct rnh *rnh, struct zserv *client); +extern int zebra_evaluate_rnh_table(int vrfid, int family); +extern int zebra_dispatch_rnh_table(int vrfid, int family, struct zserv *cl); +extern void zebra_print_rnh_table(int vrfid, int family, struct vty *vty); +extern char *rnh_str(struct rnh *rnh, char *buf, int size); +extern int zebra_cleanup_rnh_client(int vrf, int family, struct zserv *client); +#endif /*_ZEBRA_RNH_H */ diff --git a/zebra/zebra_rnh_null.c b/zebra/zebra_rnh_null.c new file mode 100644 index 000000000..68b58ce3e --- /dev/null +++ b/zebra/zebra_rnh_null.c @@ -0,0 +1,10 @@ +#include <zebra.h> +#include "zebra/rib.h" +#include "zebra/zserv.h" +#include "zebra/zebra_rnh.h" + +int zebra_evaluate_rnh_table (int vrfid, int family) +{ return 0; } + +void zebra_print_rnh_table (int vrfid, int family, struct vty *vty) +{} diff --git a/zebra/zebra_routemap.c b/zebra/zebra_routemap.c index b0dca088b..7b4472488 100644 --- a/zebra/zebra_routemap.c +++ b/zebra/zebra_routemap.c @@ -28,6 +28,7 @@ #include "command.h" #include "filter.h" #include "plist.h" +#include "nexthop.h" #include "zebra/zserv.h" diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c index 254354c72..72095706e 100644 --- a/zebra/zebra_vty.c +++ b/zebra/zebra_vty.c @@ -27,8 +27,10 @@ #include "command.h" #include "table.h" #include "rib.h" +#include "nexthop.h" #include "zebra/zserv.h" +#include "zebra/zebra_rnh.h" /* General fucntion for static route. */ static int @@ -811,6 +813,28 @@ DEFUN (show_ip_route, return CMD_SUCCESS; } +DEFUN (show_ip_nht, + show_ip_nht_cmd, + "show ip nht", + SHOW_STR + IP_STR + "IP nexthop tracking table\n") +{ + zebra_print_rnh_table(0, AF_INET, vty); + return CMD_SUCCESS; +} + +DEFUN (show_ipv6_nht, + show_ipv6_nht_cmd, + "show ipv6 nht", + SHOW_STR + IP_STR + "IPv6 nexthop tracking table\n") +{ + zebra_print_rnh_table(0, AF_INET6, vty); + return CMD_SUCCESS; +} + DEFUN (show_ip_route_prefix_longer, show_ip_route_prefix_longer_cmd, "show ip route A.B.C.D/M longer-prefixes", @@ -2210,6 +2234,8 @@ zebra_vty_init (void) install_element (CONFIG_NODE, &no_ip_route_mask_flags_distance2_cmd); install_element (VIEW_NODE, &show_ip_route_cmd); + install_element (VIEW_NODE, &show_ip_nht_cmd); + install_element (VIEW_NODE, &show_ipv6_nht_cmd); install_element (VIEW_NODE, &show_ip_route_addr_cmd); install_element (VIEW_NODE, &show_ip_route_prefix_cmd); install_element (VIEW_NODE, &show_ip_route_prefix_longer_cmd); @@ -2218,6 +2244,8 @@ zebra_vty_init (void) install_element (VIEW_NODE, &show_ip_route_summary_cmd); install_element (VIEW_NODE, &show_ip_route_summary_prefix_cmd); install_element (ENABLE_NODE, &show_ip_route_cmd); + install_element (ENABLE_NODE, &show_ip_nht_cmd); + install_element (ENABLE_NODE, &show_ipv6_nht_cmd); install_element (ENABLE_NODE, &show_ip_route_addr_cmd); install_element (ENABLE_NODE, &show_ip_route_prefix_cmd); install_element (ENABLE_NODE, &show_ip_route_prefix_longer_cmd); diff --git a/zebra/zserv.c b/zebra/zserv.c index b245b7f7b..ecb3bd05d 100644 --- a/zebra/zserv.c +++ b/zebra/zserv.c @@ -36,12 +36,14 @@ #include "privs.h" #include "network.h" #include "buffer.h" +#include "nexthop.h" #include "zebra/zserv.h" #include "zebra/router-id.h" #include "zebra/redistribute.h" #include "zebra/debug.h" #include "zebra/ipforward.h" +#include "zebra/zebra_rnh.h" /* Event list of zebra. */ enum event { ZEBRA_SERV, ZEBRA_READ, ZEBRA_WRITE }; @@ -101,7 +103,7 @@ zserv_flush_data(struct thread *thread) return 0; } -static int +int zebra_server_send_message(struct zserv *client) { if (client->t_suicide) @@ -130,7 +132,7 @@ zebra_server_send_message(struct zserv *client) return 0; } -static void +void zserv_create_header (struct stream *s, uint16_t cmd) { /* length placeholder, caller can update */ @@ -599,6 +601,65 @@ zsend_ipv4_nexthop_lookup (struct zserv *client, struct in_addr addr) return zebra_server_send_message(client); } +/* Nexthop register */ +static int +zserv_nexthop_register (struct zserv *client, int sock, u_short length) +{ + struct rnh *rnh; + struct stream *s; + struct prefix p; + u_short l = 0; + + if (IS_ZEBRA_DEBUG_NHT) + zlog_debug("nexthop_register msg from client %s: length=%d\n", + zebra_route_string(client->proto), length); + + s = client->ibuf; + + while (l < length) + { + p.family = stream_getw(s); + p.prefixlen = stream_getc(s); + l += 3; + stream_get(&p.u.prefix, s, PSIZE(p.prefixlen)); + l += PSIZE(p.prefixlen); + rnh = zebra_add_rnh(&p, 0); + zebra_add_rnh_client(rnh, client); + } + zebra_evaluate_rnh_table(0, AF_INET); + zebra_evaluate_rnh_table(0, AF_INET6); + return 0; +} + +/* Nexthop register */ +static int +zserv_nexthop_unregister (struct zserv *client, int sock, u_short length) +{ + struct rnh *rnh; + struct stream *s; + struct prefix p; + u_short l = 0; + + if (IS_ZEBRA_DEBUG_NHT) + zlog_debug("nexthop_unregister msg from client %s: length=%d\n", + zebra_route_string(client->proto), length); + + s = client->ibuf; + + while (l < length) + { + p.family = stream_getw(s); + p.prefixlen = stream_getc(s); + l += 3; + stream_get(&p.u.prefix, s, PSIZE(p.prefixlen)); + l += PSIZE(p.prefixlen); + rnh = zebra_lookup_rnh(&p, 0); + if (rnh) + zebra_remove_rnh_client(rnh, client); + } + return 0; +} + static int zsend_ipv4_import_lookup (struct zserv *client, struct prefix_ipv4 *p) { @@ -1171,6 +1232,7 @@ zread_hello (struct zserv *client) client->sock); route_type_oaths[proto] = client->sock; + client->proto = proto; } } @@ -1196,6 +1258,9 @@ zebra_score_rib (int client_sock) static void zebra_client_close (struct zserv *client) { + zebra_cleanup_rnh_client(0, AF_INET, client); + zebra_cleanup_rnh_client(0, AF_INET6, client); + /* Close file descriptor. */ if (client->sock) { @@ -1408,6 +1473,12 @@ zebra_client_read (struct thread *thread) case ZEBRA_HELLO: zread_hello (client); break; + case ZEBRA_NEXTHOP_REGISTER: + zserv_nexthop_register(client, sock, length); + break; + case ZEBRA_NEXTHOP_UNREGISTER: + zserv_nexthop_unregister(client, sock, length); + break; default: zlog_info ("Zebra received unknown command %d", command); break; @@ -1681,8 +1752,10 @@ DEFUN (show_zebra_client, struct zserv *client; for (ALL_LIST_ELEMENTS_RO (zebrad.client_list, node, client)) - vty_out (vty, "Client fd %d%s", client->sock, VTY_NEWLINE); - + vty_out (vty, "Client %s fd %d%s", + zebra_route_string(client->proto), client->sock, + VTY_NEWLINE); + return CMD_SUCCESS; } diff --git a/zebra/zserv.h b/zebra/zserv.h index 5e8bccac3..92b8ba3a6 100644 --- a/zebra/zserv.h +++ b/zebra/zserv.h @@ -66,6 +66,9 @@ struct zserv /* Router-id information. */ u_char ridinfo; + + /* client's protocol */ + u_char proto; }; /* Zebra instance */ @@ -110,4 +113,7 @@ extern int zsend_router_id_update(struct zserv *, struct prefix *); extern pid_t pid; +extern void zserv_create_header(struct stream *s, uint16_t cmd); +extern int zebra_server_send_message(struct zserv *client); + #endif /* _ZEBRA_ZEBRA_H */ |