summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDonald Sharp <sharpd@cumulusnetworks.com>2015-05-20 02:40:34 +0200
committerDonald Sharp <sharpd@cumulusnetworks.com>2015-05-20 02:40:34 +0200
commitfb018d251e5fc2fee0814a63c4e0a44274c98bb9 (patch)
tree59351ede3cd497fbb05c2b6906de549f17686f55
parentbgpd: bgpd-update-delay.patch (diff)
downloadfrr-fb018d251e5fc2fee0814a63c4e0a44274c98bb9.tar.xz
frr-fb018d251e5fc2fee0814a63c4e0a44274c98bb9.zip
nexthop-tracking.patch
quagga: nexthop-tracking.patch Add next hop tracking support to Quagga. Complete documentation in doc/next-hop-tracking.txt. Signed-off-by: Pradosh Mohapatra <pmohapat@cumulusnetworks.com> Signed-off-by: Daniel Walton <dwalton@cumulusnetworks.com> Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
-rw-r--r--bgpd/Makefile.am5
-rw-r--r--bgpd/bgp_debug.c57
-rw-r--r--bgpd/bgp_debug.h3
-rw-r--r--bgpd/bgp_nexthop.c221
-rw-r--r--bgpd/bgp_nexthop.h20
-rw-r--r--bgpd/bgp_nht.c473
-rw-r--r--bgpd/bgp_nht.h62
-rw-r--r--bgpd/bgp_route.c103
-rw-r--r--bgpd/bgp_route.h14
-rw-r--r--bgpd/bgp_zebra.c11
-rw-r--r--doc/next-hop-tracking.txt326
-rw-r--r--lib/Makefile.am4
-rw-r--r--lib/log.c3
-rw-r--r--lib/memtypes.c1
-rw-r--r--lib/nexthop.c100
-rw-r--r--lib/nexthop.h89
-rw-r--r--lib/zclient.c6
-rw-r--r--lib/zclient.h1
-rw-r--r--lib/zebra.h6
-rw-r--r--zebra/Makefile.am6
-rw-r--r--zebra/debug.c30
-rw-r--r--zebra/debug.h3
-rw-r--r--zebra/rib.h62
-rw-r--r--zebra/rt_netlink.c1
-rw-r--r--zebra/zebra_fpm_netlink.c1
-rw-r--r--zebra/zebra_rib.c50
-rw-r--r--zebra/zebra_rnh.c603
-rw-r--r--zebra/zebra_rnh.h48
-rw-r--r--zebra/zebra_rnh_null.c10
-rw-r--r--zebra/zebra_routemap.c1
-rw-r--r--zebra/zebra_vty.c28
-rw-r--r--zebra/zserv.c81
-rw-r--r--zebra/zserv.h6
33 files changed, 2224 insertions, 211 deletions
diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am
index 9928734ee..63836a1bd 100644
--- a/bgpd/Makefile.am
+++ b/bgpd/Makefile.am
@@ -15,14 +15,15 @@ libbgp_a_SOURCES = \
bgp_debug.c bgp_route.c bgp_zebra.c bgp_open.c bgp_routemap.c \
bgp_packet.c bgp_network.c bgp_filter.c bgp_regex.c bgp_clist.c \
bgp_dump.c bgp_snmp.c bgp_ecommunity.c bgp_mplsvpn.c bgp_nexthop.c \
- bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c
+ bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c \
+ bgp_nht.c
noinst_HEADERS = \
bgp_aspath.h bgp_attr.h bgp_community.h bgp_debug.h bgp_fsm.h \
bgp_network.h bgp_open.h bgp_packet.h bgp_regex.h bgp_route.h \
bgpd.h bgp_filter.h bgp_clist.h bgp_dump.h bgp_zebra.h \
bgp_ecommunity.h bgp_mplsvpn.h bgp_nexthop.h bgp_damp.h bgp_table.h \
- bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h
+ bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h bgp_nht.h
bgpd_SOURCES = bgp_main.c
bgpd_LDADD = libbgp.a ../lib/libzebra.la @LIBCAP@ @LIBM@
diff --git a/bgpd/bgp_debug.c b/bgpd/bgp_debug.c
index 1d0976977..91e6ad177 100644
--- a/bgpd/bgp_debug.c
+++ b/bgpd/bgp_debug.c
@@ -45,6 +45,7 @@ unsigned long conf_bgp_debug_keepalive;
unsigned long conf_bgp_debug_update;
unsigned long conf_bgp_debug_normal;
unsigned long conf_bgp_debug_zebra;
+unsigned long conf_bgp_debug_nht;
unsigned long term_bgp_debug_as4;
unsigned long term_bgp_debug_fsm;
@@ -55,6 +56,7 @@ unsigned long term_bgp_debug_keepalive;
unsigned long term_bgp_debug_update;
unsigned long term_bgp_debug_normal;
unsigned long term_bgp_debug_zebra;
+unsigned long term_bgp_debug_nht;
/* messages for BGP-4 status */
const struct message bgp_status_msg[] =
@@ -471,6 +473,48 @@ ALIAS (no_debug_bgp_events,
BGP_STR
"BGP events\n")
+DEFUN (debug_bgp_nht,
+ debug_bgp_nht_cmd,
+ "debug bgp nht",
+ DEBUG_STR
+ BGP_STR
+ "BGP nexthop tracking events\n")
+{
+ if (vty->node == CONFIG_NODE)
+ DEBUG_ON (nht, NHT);
+ else
+ {
+ TERM_DEBUG_ON (nht, NHT);
+ vty_out (vty, "BGP nexthop tracking debugging is on%s", VTY_NEWLINE);
+ }
+ return CMD_SUCCESS;
+}
+
+DEFUN (no_debug_bgp_nht,
+ no_debug_bgp_nht_cmd,
+ "no debug bgp nht",
+ NO_STR
+ DEBUG_STR
+ BGP_STR
+ "BGP nexthop tracking events\n")
+{
+ if (vty->node == CONFIG_NODE)
+ DEBUG_OFF (nht, NHT);
+ else
+ {
+ TERM_DEBUG_OFF (nht, NHT);
+ vty_out (vty, "BGP nexthop tracking debugging is off%s", VTY_NEWLINE);
+ }
+ return CMD_SUCCESS;
+}
+
+ALIAS (no_debug_bgp_nht,
+ undebug_bgp_nht_cmd,
+ "undebug bgp nht",
+ UNDEBUG_STR
+ BGP_STR
+ "BGP next-hop tracking updates\n")
+
DEFUN (debug_bgp_filter,
debug_bgp_filter_cmd,
"debug bgp filters",
@@ -787,6 +831,8 @@ DEFUN (show_debugging_bgp,
vty_out (vty, " BGP as4 debugging is on%s", VTY_NEWLINE);
if (BGP_DEBUG (as4, AS4_SEGMENT))
vty_out (vty, " BGP as4 aspath segment debugging is on%s", VTY_NEWLINE);
+ if (BGP_DEBUG (nht, NHT))
+ vty_out (vty, " BGP next-hop tracking debugging is on%s", VTY_NEWLINE);
vty_out (vty, "%s", VTY_NEWLINE);
return CMD_SUCCESS;
}
@@ -860,6 +906,12 @@ bgp_config_write_debug (struct vty *vty)
write++;
}
+ if (CONF_BGP_DEBUG (nht, NHT))
+ {
+ vty_out (vty, "debug bgp nht%s", VTY_NEWLINE);
+ write++;
+ }
+
return write;
}
@@ -886,6 +938,8 @@ bgp_debug_init (void)
install_element (CONFIG_NODE, &debug_bgp_fsm_cmd);
install_element (ENABLE_NODE, &debug_bgp_events_cmd);
install_element (CONFIG_NODE, &debug_bgp_events_cmd);
+ install_element (ENABLE_NODE, &debug_bgp_nht_cmd);
+ install_element (CONFIG_NODE, &debug_bgp_nht_cmd);
install_element (ENABLE_NODE, &debug_bgp_filter_cmd);
install_element (CONFIG_NODE, &debug_bgp_filter_cmd);
install_element (ENABLE_NODE, &debug_bgp_keepalive_cmd);
@@ -912,6 +966,9 @@ bgp_debug_init (void)
install_element (ENABLE_NODE, &no_debug_bgp_events_cmd);
install_element (ENABLE_NODE, &undebug_bgp_events_cmd);
install_element (CONFIG_NODE, &no_debug_bgp_events_cmd);
+ install_element (ENABLE_NODE, &no_debug_bgp_nht_cmd);
+ install_element (ENABLE_NODE, &undebug_bgp_nht_cmd);
+ install_element (CONFIG_NODE, &no_debug_bgp_nht_cmd);
install_element (ENABLE_NODE, &no_debug_bgp_filter_cmd);
install_element (ENABLE_NODE, &undebug_bgp_filter_cmd);
install_element (CONFIG_NODE, &no_debug_bgp_filter_cmd);
diff --git a/bgpd/bgp_debug.h b/bgpd/bgp_debug.h
index ce8547b04..0090f814a 100644
--- a/bgpd/bgp_debug.h
+++ b/bgpd/bgp_debug.h
@@ -67,6 +67,7 @@ extern unsigned long conf_bgp_debug_keepalive;
extern unsigned long conf_bgp_debug_update;
extern unsigned long conf_bgp_debug_normal;
extern unsigned long conf_bgp_debug_zebra;
+extern unsigned long conf_bgp_debug_nht;
extern unsigned long term_bgp_debug_as4;
extern unsigned long term_bgp_debug_fsm;
@@ -77,6 +78,7 @@ extern unsigned long term_bgp_debug_keepalive;
extern unsigned long term_bgp_debug_update;
extern unsigned long term_bgp_debug_normal;
extern unsigned long term_bgp_debug_zebra;
+extern unsigned long term_bgp_debug_nht;
#define BGP_DEBUG_AS4 0x01
#define BGP_DEBUG_AS4_SEGMENT 0x02
@@ -90,6 +92,7 @@ extern unsigned long term_bgp_debug_zebra;
#define BGP_DEBUG_UPDATE_OUT 0x02
#define BGP_DEBUG_NORMAL 0x01
#define BGP_DEBUG_ZEBRA 0x01
+#define BGP_DEBUG_NHT 0x01
#define BGP_DEBUG_PACKET_SEND 0x01
#define BGP_DEBUG_PACKET_SEND_DETAIL 0x02
diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c
index 5b1d13ac8..f048ae289 100644
--- a/bgpd/bgp_nexthop.c
+++ b/bgpd/bgp_nexthop.c
@@ -30,17 +30,21 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#include "memory.h"
#include "hash.h"
#include "jhash.h"
+#include "nexthop.h"
#include "bgpd/bgpd.h"
#include "bgpd/bgp_table.h"
#include "bgpd/bgp_route.h"
#include "bgpd/bgp_attr.h"
#include "bgpd/bgp_nexthop.h"
+#include "bgpd/bgp_nht.h"
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_damp.h"
#include "zebra/rib.h"
#include "zebra/zserv.h" /* For ZEBRA_SERV_PATH. */
+extern struct zclient *zclient;
+
struct bgp_nexthop_cache *zlookup_query (struct in_addr);
#ifdef HAVE_IPV6
struct bgp_nexthop_cache *zlookup_query_ipv6 (struct in6_addr *);
@@ -59,7 +63,7 @@ static int bgp_scan_interval;
static int bgp_import_interval;
/* Route table for next-hop lookup cache. */
-static struct bgp_table *bgp_nexthop_cache_table[AFI_MAX];
+struct bgp_table *bgp_nexthop_cache_table[AFI_MAX];
static struct bgp_table *cache1_table[AFI_MAX];
static struct bgp_table *cache2_table[AFI_MAX];
@@ -69,6 +73,13 @@ static struct bgp_table *bgp_connected_table[AFI_MAX];
/* BGP nexthop lookup query client. */
struct zclient *zlookup = NULL;
+char *
+bnc_str (struct bgp_nexthop_cache *bnc, char *buf, int size)
+{
+ prefix2str(&(bnc->node->p), buf, size);
+ return buf;
+}
+
/* Add nexthop to the end of the list. */
static void
bnc_nexthop_add (struct bgp_nexthop_cache *bnc, struct nexthop *nexthop)
@@ -84,7 +95,7 @@ bnc_nexthop_add (struct bgp_nexthop_cache *bnc, struct nexthop *nexthop)
nexthop->prev = last;
}
-static void
+void
bnc_nexthop_free (struct bgp_nexthop_cache *bnc)
{
struct nexthop *nexthop;
@@ -97,13 +108,17 @@ bnc_nexthop_free (struct bgp_nexthop_cache *bnc)
}
}
-static struct bgp_nexthop_cache *
-bnc_new (void)
+struct bgp_nexthop_cache *
+bnc_new ()
{
- return XCALLOC (MTYPE_BGP_NEXTHOP_CACHE, sizeof (struct bgp_nexthop_cache));
+ struct bgp_nexthop_cache *bnc;
+
+ bnc = XCALLOC (MTYPE_BGP_NEXTHOP_CACHE, sizeof (struct bgp_nexthop_cache));
+ LIST_INIT(&(bnc->paths));
+ return bnc;
}
-static void
+void
bnc_free (struct bgp_nexthop_cache *bnc)
{
bnc_nexthop_free (bnc);
@@ -111,48 +126,6 @@ bnc_free (struct bgp_nexthop_cache *bnc)
}
static int
-bgp_nexthop_same (struct nexthop *next1, struct nexthop *next2)
-{
- if (next1->type != next2->type)
- return 0;
-
- switch (next1->type)
- {
- case ZEBRA_NEXTHOP_IPV4:
- if (! IPV4_ADDR_SAME (&next1->gate.ipv4, &next2->gate.ipv4))
- return 0;
- break;
- case ZEBRA_NEXTHOP_IPV4_IFINDEX:
- if (! IPV4_ADDR_SAME (&next1->gate.ipv4, &next2->gate.ipv4)
- || next1->ifindex != next2->ifindex)
- return 0;
- break;
- case ZEBRA_NEXTHOP_IFINDEX:
- case ZEBRA_NEXTHOP_IFNAME:
- if (next1->ifindex != next2->ifindex)
- return 0;
- break;
-#ifdef HAVE_IPV6
- case ZEBRA_NEXTHOP_IPV6:
- if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6))
- return 0;
- break;
- case ZEBRA_NEXTHOP_IPV6_IFINDEX:
- case ZEBRA_NEXTHOP_IPV6_IFNAME:
- if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6))
- return 0;
- if (next1->ifindex != next2->ifindex)
- return 0;
- break;
-#endif /* HAVE_IPV6 */
- default:
- /* do nothing */
- break;
- }
- return 1;
-}
-
-static int
bgp_nexthop_cache_different (struct bgp_nexthop_cache *bnc1,
struct bgp_nexthop_cache *bnc2)
{
@@ -167,7 +140,7 @@ bgp_nexthop_cache_different (struct bgp_nexthop_cache *bnc1,
for (i = 0; i < bnc1->nexthop_num; i++)
{
- if (! bgp_nexthop_same (next1, next2))
+ if (! nexthop_same_no_recurse (next1, next2))
return 1;
next1 = next1->next;
@@ -416,6 +389,7 @@ bgp_scan (afi_t afi, safi_t safi)
struct bgp_info *next;
struct peer *peer;
struct listnode *node, *nnode;
+#if BGP_SCAN_NEXTHOP
int valid;
int current;
int changed;
@@ -426,6 +400,7 @@ bgp_scan (afi_t afi, safi_t safi)
bgp_nexthop_cache_table[afi] = cache2_table[afi];
else
bgp_nexthop_cache_table[afi] = cache1_table[afi];
+#endif
/* Get default bgp. */
bgp = bgp_get_default ();
@@ -455,6 +430,7 @@ bgp_scan (afi_t afi, safi_t safi)
if (bi->type == ZEBRA_ROUTE_BGP && bi->sub_type == BGP_ROUTE_NORMAL)
{
+#if BGP_SCAN_NEXTHOP
changed = 0;
metricchanged = 0;
@@ -487,6 +463,7 @@ bgp_scan (afi_t afi, safi_t safi)
afi, SAFI_UNICAST);
}
}
+#endif
if (CHECK_FLAG (bgp->af_flags[afi][SAFI_UNICAST],
BGP_CONFIG_DAMPENING)
@@ -499,11 +476,13 @@ bgp_scan (afi_t afi, safi_t safi)
bgp_process (bgp, rn, afi, SAFI_UNICAST);
}
+#if BGP_SCAN_NEXTHOP
/* Flash old cache. */
if (bgp_nexthop_cache_table[afi] == cache1_table[afi])
bgp_nexthop_cache_reset (cache2_table[afi]);
else
bgp_nexthop_cache_reset (cache1_table[afi]);
+#endif
if (BGP_DEBUG (events, EVENTS))
{
@@ -1294,9 +1273,7 @@ static int
show_ip_bgp_scan_tables (struct vty *vty, const char detail)
{
struct bgp_node *rn;
- struct bgp_nexthop_cache *bnc;
char buf[INET6_ADDRSTRLEN];
- u_char i;
if (bgp_scan_thread)
vty_out (vty, "BGP scan is running%s", VTY_NEWLINE);
@@ -1304,6 +1281,7 @@ show_ip_bgp_scan_tables (struct vty *vty, const char detail)
vty_out (vty, "BGP scan is not running%s", VTY_NEWLINE);
vty_out (vty, "BGP scan interval is %d%s", bgp_scan_interval, VTY_NEWLINE);
+#if BGP_SCAN_NEXTHOP
vty_out (vty, "Current BGP nexthop cache:%s", VTY_NEWLINE);
for (rn = bgp_table_top (bgp_nexthop_cache_table[AFI_IP]); rn; rn = bgp_route_next (rn))
if ((bnc = rn->info) != NULL)
@@ -1368,7 +1346,9 @@ show_ip_bgp_scan_tables (struct vty *vty, const char detail)
}
}
#endif /* HAVE_IPV6 */
-
+#else
+ vty_out (vty, "BGP next-hop tracking is on%s", VTY_NEWLINE);
+#endif
vty_out (vty, "BGP connected route:%s", VTY_NEWLINE);
for (rn = bgp_table_top (bgp_connected_table[AFI_IP]);
rn;
@@ -1393,6 +1373,117 @@ show_ip_bgp_scan_tables (struct vty *vty, const char detail)
return CMD_SUCCESS;
}
+static int
+show_ip_bgp_nexthop_table (struct vty *vty, int detail)
+{
+ struct bgp_node *rn;
+ struct bgp_nexthop_cache *bnc;
+ char buf[INET6_ADDRSTRLEN];
+ time_t tbuf;
+ u_char i;
+
+ vty_out (vty, "Current BGP nexthop cache:%s", VTY_NEWLINE);
+ for (rn = bgp_table_top (bgp_nexthop_cache_table[AFI_IP]); rn; rn = bgp_route_next (rn))
+ if ((bnc = rn->info) != NULL)
+ {
+ if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID))
+ {
+ vty_out (vty, " %s valid [IGP metric %d], #paths %d%s",
+ inet_ntop (AF_INET, &rn->p.u.prefix4, buf, INET6_ADDRSTRLEN),
+ bnc->metric, bnc->path_count, VTY_NEWLINE);
+ if (detail)
+ for (i = 0; i < bnc->nexthop_num; i++)
+ switch (bnc->nexthop[i].type)
+ {
+ case NEXTHOP_TYPE_IPV4:
+ vty_out (vty, " gate %s%s",
+ inet_ntop (AF_INET, &bnc->nexthop[i].gate.ipv4, buf,
+ INET6_ADDRSTRLEN), VTY_NEWLINE);
+ break;
+ case NEXTHOP_TYPE_IFINDEX:
+ vty_out (vty, " if %s%s",
+ ifindex2ifname(bnc->nexthop[i].ifindex), VTY_NEWLINE);
+ break;
+ case NEXTHOP_TYPE_IPV4_IFINDEX:
+ vty_out (vty, " gate %s, if %s%s",
+ inet_ntop(AF_INET, &bnc->nexthop[i].gate.ipv4, buf,
+ INET6_ADDRSTRLEN),
+ ifindex2ifname(bnc->nexthop[i].ifindex), VTY_NEWLINE);
+ break;
+ default:
+ vty_out (vty, " invalid nexthop type %u%s",
+ bnc->nexthop[i].type, VTY_NEWLINE);
+ }
+ }
+ else
+ vty_out (vty, " %s invalid%s",
+ inet_ntop (AF_INET, &rn->p.u.prefix4, buf, INET6_ADDRSTRLEN), VTY_NEWLINE);
+#ifdef HAVE_CLOCK_MONOTONIC
+ tbuf = time(NULL) - (bgp_clock() - bnc->last_update);
+ vty_out (vty, " Last update: %s", ctime(&tbuf));
+#else
+ vty_out (vty, " Last update: %s", ctime(&bnc->uptime));
+#endif /* HAVE_CLOCK_MONOTONIC */
+
+ vty_out(vty, "%s", VTY_NEWLINE);
+ }
+
+#ifdef HAVE_IPV6
+ {
+ for (rn = bgp_table_top (bgp_nexthop_cache_table[AFI_IP6]);
+ rn;
+ rn = bgp_route_next (rn))
+ if ((bnc = rn->info) != NULL)
+ {
+ if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID))
+ {
+ vty_out (vty, " %s valid [IGP metric %d]%s",
+ inet_ntop (AF_INET6, &rn->p.u.prefix6, buf,
+ INET6_ADDRSTRLEN),
+ bnc->metric, VTY_NEWLINE);
+ if (detail)
+ for (i = 0; i < bnc->nexthop_num; i++)
+ switch (bnc->nexthop[i].type)
+ {
+ case NEXTHOP_TYPE_IPV6:
+ vty_out (vty, " gate %s%s",
+ inet_ntop (AF_INET6, &bnc->nexthop[i].gate.ipv6,
+ buf, INET6_ADDRSTRLEN), VTY_NEWLINE);
+ break;
+ case NEXTHOP_TYPE_IPV6_IFINDEX:
+ vty_out(vty, " gate %s, if %s%s",
+ inet_ntop(AF_INET6, &bnc->nexthop[i].gate.ipv6, buf,
+ INET6_ADDRSTRLEN),
+ ifindex2ifname(bnc->nexthop[i].ifindex),
+ VTY_NEWLINE);
+ break;
+ case NEXTHOP_TYPE_IFINDEX:
+ vty_out (vty, " ifidx %u%s", bnc->nexthop[i].ifindex,
+ VTY_NEWLINE);
+ break;
+ default:
+ vty_out (vty, " invalid nexthop type %u%s",
+ bnc->nexthop[i].type, VTY_NEWLINE);
+ }
+ }
+ else
+ vty_out (vty, " %s invalid%s",
+ inet_ntop (AF_INET6, &rn->p.u.prefix6, buf, INET6_ADDRSTRLEN),
+ VTY_NEWLINE);
+#ifdef HAVE_CLOCK_MONOTONIC
+ tbuf = time(NULL) - (bgp_clock() - bnc->last_update);
+ vty_out (vty, " Last update: %s", ctime(&tbuf));
+#else
+ vty_out (vty, " Last update: %s", ctime(&bnc->uptime));
+#endif /* HAVE_CLOCK_MONOTONIC */
+
+ vty_out(vty, "%s", VTY_NEWLINE);
+ }
+ }
+#endif /* HAVE_IPV6 */
+ return CMD_SUCCESS;
+}
+
DEFUN (show_ip_bgp_scan,
show_ip_bgp_scan_cmd,
"show ip bgp scan",
@@ -1416,6 +1507,28 @@ DEFUN (show_ip_bgp_scan_detail,
return show_ip_bgp_scan_tables (vty, 1);
}
+DEFUN (show_ip_bgp_nexthop,
+ show_ip_bgp_nexthop_cmd,
+ "show ip bgp nexthop",
+ SHOW_STR
+ IP_STR
+ BGP_STR
+ "BGP nexthop table\n")
+{
+ return show_ip_bgp_nexthop_table (vty, 0);
+}
+
+DEFUN (show_ip_bgp_nexthop_detail,
+ show_ip_bgp_nexthop_detail_cmd,
+ "show ip bgp nexthop detail",
+ SHOW_STR
+ IP_STR
+ BGP_STR
+ "BGP nexthop table\n")
+{
+ return show_ip_bgp_nexthop_table (vty, 1);
+}
+
int
bgp_config_write_scan_time (struct vty *vty)
{
@@ -1458,8 +1571,12 @@ bgp_scan_init (void)
install_element (BGP_NODE, &no_bgp_scan_time_val_cmd);
install_element (VIEW_NODE, &show_ip_bgp_scan_cmd);
install_element (VIEW_NODE, &show_ip_bgp_scan_detail_cmd);
+ install_element (VIEW_NODE, &show_ip_bgp_nexthop_cmd);
+ install_element (VIEW_NODE, &show_ip_bgp_nexthop_detail_cmd);
install_element (RESTRICTED_NODE, &show_ip_bgp_scan_cmd);
install_element (ENABLE_NODE, &show_ip_bgp_scan_cmd);
+ install_element (ENABLE_NODE, &show_ip_bgp_nexthop_cmd);
+ install_element (ENABLE_NODE, &show_ip_bgp_nexthop_detail_cmd);
install_element (ENABLE_NODE, &show_ip_bgp_scan_detail_cmd);
}
diff --git a/bgpd/bgp_nexthop.h b/bgpd/bgp_nexthop.h
index 6e5350ead..abcf24263 100644
--- a/bgpd/bgp_nexthop.h
+++ b/bgpd/bgp_nexthop.h
@@ -22,6 +22,8 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#define _QUAGGA_BGP_NEXTHOP_H
#include "if.h"
+#include "queue.h"
+#include "prefix.h"
#define BGP_SCAN_INTERVAL_DEFAULT 60
#define BGP_IMPORT_INTERVAL_DEFAULT 15
@@ -44,6 +46,20 @@ struct bgp_nexthop_cache
/* Nexthop number and nexthop linked list.*/
u_char nexthop_num;
struct nexthop *nexthop;
+ time_t last_update;
+ u_int16_t flags;
+
+#define BGP_NEXTHOP_VALID (1 << 0)
+#define BGP_NEXTHOP_REGISTERED (1 << 1)
+
+ u_int16_t change_flags;
+
+#define BGP_NEXTHOP_CHANGED (1 << 0)
+#define BGP_NEXTHOP_METRIC_CHANGED (1 << 1)
+
+ struct bgp_node *node;
+ LIST_HEAD(path_list, bgp_info) paths;
+ unsigned int path_count;
};
extern void bgp_scan_init (void);
@@ -57,5 +73,9 @@ extern int bgp_config_write_scan_time (struct vty *);
extern int bgp_nexthop_onlink (afi_t, struct attr *);
extern int bgp_nexthop_self (struct attr *);
extern void bgp_address_init (void);
+extern struct bgp_nexthop_cache *bnc_new();
+extern void bnc_free(struct bgp_nexthop_cache *bnc);
+extern void bnc_nexthop_free(struct bgp_nexthop_cache *bnc);
+extern char *bnc_str(struct bgp_nexthop_cache *bnc, char *buf, int size);
#endif /* _QUAGGA_BGP_NEXTHOP_H */
diff --git a/bgpd/bgp_nht.c b/bgpd/bgp_nht.c
new file mode 100644
index 000000000..8f5f1af96
--- /dev/null
+++ b/bgpd/bgp_nht.c
@@ -0,0 +1,473 @@
+/* BGP Nexthop tracking
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#include "command.h"
+#include "thread.h"
+#include "prefix.h"
+#include "zclient.h"
+#include "stream.h"
+#include "network.h"
+#include "log.h"
+#include "memory.h"
+#include "nexthop.h"
+
+#include "bgpd/bgpd.h"
+#include "bgpd/bgp_table.h"
+#include "bgpd/bgp_route.h"
+#include "bgpd/bgp_attr.h"
+#include "bgpd/bgp_nexthop.h"
+#include "bgpd/bgp_debug.h"
+#include "bgpd/bgp_nht.h"
+
+extern struct zclient *zclient;
+extern struct bgp_table *bgp_nexthop_cache_table[AFI_MAX];
+
+static void register_nexthop(struct bgp_nexthop_cache *bnc);
+static void unregister_nexthop (struct bgp_nexthop_cache *bnc);
+static void evaluate_paths(struct bgp_nexthop_cache *bnc);
+static int make_prefix(int afi, struct bgp_info *ri, struct prefix *p);
+static void path_nh_map(struct bgp_info *path, struct bgp_nexthop_cache *bnc,
+ int keep);
+
+int
+bgp_find_nexthop (struct bgp_info *path, int *changed, int *metricchanged)
+{
+ struct bgp_nexthop_cache *bnc = path->nexthop;
+
+ if (!bnc)
+ return 0;
+
+ if (changed)
+ *changed = CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
+
+ if (metricchanged)
+ *metricchanged = CHECK_FLAG(bnc->change_flags,
+ BGP_NEXTHOP_METRIC_CHANGED);
+
+ return (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID));
+}
+
+void
+bgp_unlink_nexthop (struct bgp_info *path)
+{
+ struct bgp_nexthop_cache *bnc = path->nexthop;
+
+ if (!bnc)
+ return;
+
+ path_nh_map(path, NULL, 0);
+
+ if (LIST_EMPTY(&(bnc->paths)))
+ {
+ if (BGP_DEBUG(nht, NHT))
+ {
+ char buf[INET6_ADDRSTRLEN];
+ zlog_debug("bgp_unlink_nexthop: freeing bnc %s",
+ bnc_str(bnc, buf, INET6_ADDRSTRLEN));
+ }
+ unregister_nexthop(bnc);
+ bnc->node->info = NULL;
+ bgp_unlock_node(bnc->node);
+ bnc_free(bnc);
+ }
+}
+
+int
+bgp_find_or_add_nexthop (afi_t afi, struct bgp_info *ri, int *changed,
+ int *metricchanged)
+{
+ struct bgp_node *rn;
+ struct bgp_nexthop_cache *bnc;
+ struct prefix p;
+
+ if (make_prefix(afi, ri, &p) < 0)
+ return 1;
+ rn = bgp_node_get (bgp_nexthop_cache_table[afi], &p);
+
+ if (!rn->info)
+ {
+ bnc = bnc_new();
+ rn->info = bnc;
+ bnc->node = rn;
+ bgp_lock_node(rn);
+ register_nexthop(bnc);
+ }
+ bnc = rn->info;
+ bgp_unlock_node (rn);
+ path_nh_map(ri, bnc, 1);
+
+ if (changed)
+ *changed = CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
+
+ if (metricchanged)
+ *metricchanged = CHECK_FLAG(bnc->change_flags,
+ BGP_NEXTHOP_METRIC_CHANGED);
+
+ if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
+ (bgp_info_extra_get(ri))->igpmetric = bnc->metric;
+ else if (ri->extra)
+ ri->extra->igpmetric = 0;
+
+ return (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID));
+}
+
+void
+bgp_parse_nexthop_update (void)
+{
+ struct stream *s;
+ struct bgp_node *rn;
+ struct bgp_nexthop_cache *bnc;
+ struct nexthop *nexthop;
+ struct nexthop *oldnh;
+ struct nexthop *nhlist_head = NULL;
+ struct nexthop *nhlist_tail = NULL;
+ uint32_t metric;
+ u_char nexthop_num;
+ struct prefix p;
+ int i;
+
+ s = zclient->ibuf;
+
+ memset(&p, 0, sizeof(struct prefix));
+ p.family = stream_getw(s);
+ p.prefixlen = stream_getc(s);
+ switch (p.family)
+ {
+ case AF_INET:
+ p.u.prefix4.s_addr = stream_get_ipv4 (s);
+ break;
+ case AF_INET6:
+ stream_get(&p.u.prefix6, s, 16);
+ break;
+ default:
+ break;
+ }
+
+ rn = bgp_node_lookup(bgp_nexthop_cache_table[family2afi(p.family)], &p);
+ if (!rn || !rn->info)
+ {
+ if (BGP_DEBUG(nht, NHT))
+ {
+ char buf[INET6_ADDRSTRLEN];
+ prefix2str(&p, buf, INET6_ADDRSTRLEN);
+ zlog_debug("parse nexthop update(%s): rn not found", buf);
+ }
+ return;
+ }
+
+ bnc = rn->info;
+ bnc->last_update = bgp_clock();
+ bnc->change_flags = 0;
+ metric = stream_getl (s);
+ nexthop_num = stream_getc (s);
+
+ /* debug print the input */
+ if (BGP_DEBUG(nht, NHT))
+ {
+ char buf[INET6_ADDRSTRLEN];
+ prefix2str(&p, buf, INET6_ADDRSTRLEN);
+ zlog_debug("parse nexthop update(%s): metric=%d, #nexthop=%d", buf,
+ metric, nexthop_num);
+ }
+
+ if (metric != bnc->metric)
+ bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
+
+ if(nexthop_num != bnc->nexthop_num)
+ bnc->change_flags |= BGP_NEXTHOP_CHANGED;
+
+ if (nexthop_num)
+ {
+ bnc->flags |= BGP_NEXTHOP_VALID;
+ bnc->metric = metric;
+ bnc->nexthop_num = nexthop_num;
+
+ for (i = 0; i < nexthop_num; i++)
+ {
+ nexthop = nexthop_new();
+ nexthop->type = stream_getc (s);
+ switch (nexthop->type)
+ {
+ case ZEBRA_NEXTHOP_IPV4:
+ nexthop->gate.ipv4.s_addr = stream_get_ipv4 (s);
+ break;
+ case ZEBRA_NEXTHOP_IFINDEX:
+ case ZEBRA_NEXTHOP_IFNAME:
+ nexthop->ifindex = stream_getl (s);
+ break;
+ case ZEBRA_NEXTHOP_IPV4_IFINDEX:
+ case ZEBRA_NEXTHOP_IPV4_IFNAME:
+ nexthop->gate.ipv4.s_addr = stream_get_ipv4 (s);
+ nexthop->ifindex = stream_getl (s);
+ break;
+#ifdef HAVE_IPV6
+ case ZEBRA_NEXTHOP_IPV6:
+ stream_get (&nexthop->gate.ipv6, s, 16);
+ break;
+ case ZEBRA_NEXTHOP_IPV6_IFINDEX:
+ case ZEBRA_NEXTHOP_IPV6_IFNAME:
+ stream_get (&nexthop->gate.ipv6, s, 16);
+ nexthop->ifindex = stream_getl (s);
+ break;
+#endif
+ default:
+ /* do nothing */
+ break;
+ }
+
+ if (nhlist_tail)
+ {
+ nhlist_tail->next = nexthop;
+ nhlist_tail = nexthop;
+ }
+ else
+ {
+ nhlist_tail = nexthop;
+ nhlist_head = nexthop;
+ }
+
+ /* No need to evaluate the nexthop if we have already determined
+ * that there has been a change.
+ */
+ if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
+ continue;
+
+ for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
+ if (nexthop_same_no_recurse(oldnh, nexthop))
+ break;
+
+ if (!oldnh)
+ bnc->change_flags |= BGP_NEXTHOP_CHANGED;
+ }
+ bnc_nexthop_free(bnc);
+ bnc->nexthop = nhlist_head;
+ }
+ else
+ {
+ bnc->flags &= ~BGP_NEXTHOP_VALID;
+ bnc_nexthop_free(bnc);
+ bnc->nexthop = NULL;
+ }
+
+ evaluate_paths(bnc);
+}
+
+/**
+ * make_prefix - make a prefix structure from the path (essentially
+ * path's node.
+ */
+static int
+make_prefix (int afi, struct bgp_info *ri, struct prefix *p)
+{
+ memset (p, 0, sizeof (struct prefix));
+ switch (afi)
+ {
+ case AFI_IP:
+ p->family = AF_INET;
+ p->prefixlen = IPV4_MAX_BITLEN;
+ p->u.prefix4 = ri->attr->nexthop;
+ break;
+#ifdef HAVE_IPV6
+ case AFI_IP6:
+ if (ri->attr->extra->mp_nexthop_len != 16
+ || IN6_IS_ADDR_LINKLOCAL (&ri->attr->extra->mp_nexthop_global))
+ return -1;
+
+ p->family = AF_INET6;
+ p->prefixlen = IPV6_MAX_BITLEN;
+ p->u.prefix6 = ri->attr->extra->mp_nexthop_global;
+ break;
+#endif
+ default:
+ break;
+ }
+ return 0;
+}
+
+/**
+ * sendmsg_nexthop -- Format and send a nexthop register/Unregister
+ * command to Zebra.
+ * ARGUMENTS:
+ * struct bgp_nexthop_cache *bnc -- the nexthop structure.
+ * int command -- either ZEBRA_NEXTHOP_REGISTER or ZEBRA_NEXTHOP_UNREGISTER
+ * RETURNS:
+ * void.
+ */
+static void
+sendmsg_nexthop (struct bgp_nexthop_cache *bnc, int command)
+{
+ struct stream *s;
+ struct prefix *p;
+ int ret;
+
+ /* Check socket. */
+ if (!zclient || zclient->sock < 0)
+ return;
+
+ p = &(bnc->node->p);
+ s = zclient->obuf;
+ stream_reset (s);
+ zclient_create_header (s, command);
+ stream_putw(s, PREFIX_FAMILY(p));
+ stream_putc(s, p->prefixlen);
+ switch (PREFIX_FAMILY(p))
+ {
+ case AF_INET:
+ stream_put_in_addr (s, &p->u.prefix4);
+ break;
+#ifdef HAVE_IPV6
+ case AF_INET6:
+ stream_put(s, &(p->u.prefix6), 16);
+ break;
+#endif
+ default:
+ break;
+ }
+ stream_putw_at (s, 0, stream_get_endp (s));
+
+ ret = zclient_send_message(zclient);
+ /* TBD: handle the failure */
+ if (ret < 0)
+ zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
+ return;
+}
+
+/**
+ * register_nexthop - register a nexthop with Zebra for notification
+ * when the route to the nexthop changes.
+ * ARGUMENTS:
+ * struct bgp_nexthop_cache *bnc -- the nexthop structure.
+ * RETURNS:
+ * void.
+ */
+static void
+register_nexthop (struct bgp_nexthop_cache *bnc)
+{
+ /* Check if we have already registered */
+ if (bnc->flags & BGP_NEXTHOP_REGISTERED)
+ return;
+ sendmsg_nexthop(bnc, ZEBRA_NEXTHOP_REGISTER);
+ SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+}
+
+/**
+ * unregister_nexthop -- Unregister the nexthop from Zebra.
+ * ARGUMENTS:
+ * struct bgp_nexthop_cache *bnc -- the nexthop structure.
+ * RETURNS:
+ * void.
+ */
+static void
+unregister_nexthop (struct bgp_nexthop_cache *bnc)
+{
+ /* Check if we have already registered */
+ if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
+ return;
+
+ sendmsg_nexthop(bnc, ZEBRA_NEXTHOP_UNREGISTER);
+ UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
+}
+
+/**
+ * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
+ * ARGUMENTS:
+ * struct bgp_nexthop_cache *bnc -- the nexthop structure.
+ * RETURNS:
+ * void.
+ */
+static void
+evaluate_paths (struct bgp_nexthop_cache *bnc)
+{
+ struct bgp_node *rn;
+ struct bgp_info *path;
+ struct bgp *bgp = bgp_get_default();
+ int afi;
+
+ LIST_FOREACH(path, &(bnc->paths), nh_thread)
+ {
+ if (!(path->type == ZEBRA_ROUTE_BGP &&
+ path->sub_type == BGP_ROUTE_NORMAL))
+ continue;
+
+ rn = path->net;
+ afi = family2afi(rn->p.family);
+
+ /* Path becomes valid/invalid depending on whether the nexthop
+ * reachable/unreachable.
+ */
+ if ((CHECK_FLAG(path->flags, BGP_INFO_VALID) ? 1 : 0) !=
+ (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) ? 1 : 0))
+ {
+ if (CHECK_FLAG (path->flags, BGP_INFO_VALID))
+ {
+ bgp_aggregate_decrement (bgp, &rn->p, path,
+ afi, SAFI_UNICAST);
+ bgp_info_unset_flag (rn, path, BGP_INFO_VALID);
+ }
+ else
+ {
+ bgp_info_set_flag (rn, path, BGP_INFO_VALID);
+ bgp_aggregate_increment (bgp, &rn->p, path,
+ afi, SAFI_UNICAST);
+ }
+ }
+
+ /* Copy the metric to the path. Will be used for bestpath computation */
+ if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
+ (bgp_info_extra_get(path))->igpmetric = bnc->metric;
+ else if (path->extra)
+ path->extra->igpmetric = 0;
+
+ if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_METRIC_CHANGED) ||
+ CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CHANGED))
+ SET_FLAG(path->flags, BGP_INFO_IGP_CHANGED);
+
+ bgp_process(bgp, rn, afi, SAFI_UNICAST);
+ }
+ RESET_FLAG(bnc->change_flags);
+}
+
+/**
+ * path_nh_map - make or break path-to-nexthop association.
+ * ARGUMENTS:
+ * path - pointer to the path structure
+ * bnc - pointer to the nexthop structure
+ * make - if set, make the association. if unset, just break the existing
+ * association.
+ */
+static void
+path_nh_map (struct bgp_info *path, struct bgp_nexthop_cache *bnc, int make)
+{
+ if (path->nexthop)
+ {
+ LIST_REMOVE(path, nh_thread);
+ path->nexthop->path_count--;
+ path->nexthop = NULL;
+ }
+ if (make)
+ {
+ LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
+ path->nexthop = bnc;
+ path->nexthop->path_count++;
+ }
+}
diff --git a/bgpd/bgp_nht.h b/bgpd/bgp_nht.h
new file mode 100644
index 000000000..0e43f0a44
--- /dev/null
+++ b/bgpd/bgp_nht.h
@@ -0,0 +1,62 @@
+/* BGP Nexthop tracking
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef _BGP_NHT_H
+#define _BGP_NHT_H
+
+/**
+ * bgp_parse_nexthop_update() - parse a nexthop update message from Zebra.
+ */
+extern void bgp_parse_nexthop_update();
+
+/**
+ * bgp_find_nexthop() - lookup the nexthop cache table for the bnc object
+ * ARGUMENTS:
+ * p - path for which the nexthop object is being looked up
+ * c - output variable that stores whether the nexthop object has changed
+ * since last time.
+ * m - output variable that stores whether the nexthop metric has changed
+ * since last time.
+ */
+extern int bgp_find_nexthop(struct bgp_info *p, int *c, int *m);
+
+/**
+ * bgp_find_or_add_nexthop() - lookup the nexthop cache table for the bnc
+ * object. If not found, create a new object and register with ZEBRA for
+ * nexthop notification.
+ * ARGUMENTS:
+ * a - afi: AFI_IP or AF_IP6
+ * p - path for which the nexthop object is being looked up
+ * c - output variable that stores whether the nexthop object has changed
+ * since last time.
+ * m - output variable that stores whether the nexthop metric has changed
+ * since last time.
+ */
+extern int bgp_find_or_add_nexthop(afi_t a, struct bgp_info *p, int *c, int *m);
+
+/**
+ * bgp_unlink_nexthop() - Unlink the nexthop object from the path structure.
+ * ARGUMENTS:
+ * p - path structure.
+ */
+extern void bgp_unlink_nexthop(struct bgp_info *p);
+
+#endif /* _BGP_NHT_H */
diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c
index 15c3b6eab..d55a1081e 100644
--- a/bgpd/bgp_route.c
+++ b/bgpd/bgp_route.c
@@ -55,6 +55,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#include "bgpd/bgp_zebra.h"
#include "bgpd/bgp_vty.h"
#include "bgpd/bgp_mpath.h"
+#include "bgpd/bgp_nht.c"
/* Extern from bgp_dump.c */
extern const char *bgp_origin_str[];
@@ -126,20 +127,14 @@ bgp_info_extra_get (struct bgp_info *ri)
return ri->extra;
}
-/* Allocate new bgp info structure. */
-static struct bgp_info *
-bgp_info_new (void)
-{
- return XCALLOC (MTYPE_BGP_ROUTE, sizeof (struct bgp_info));
-}
-
/* Free bgp route information. */
static void
bgp_info_free (struct bgp_info *binfo)
{
if (binfo->attr)
bgp_attr_unintern (&binfo->attr);
-
+
+ bgp_unlink_nexthop(binfo);
bgp_info_extra_free (&binfo->extra);
bgp_info_mpath_free (&binfo->mpath);
@@ -1857,6 +1852,23 @@ bgp_rib_withdraw (struct bgp_node *rn, struct bgp_info *ri, struct peer *peer,
bgp_rib_remove (rn, ri, peer, afi, safi);
}
+static struct bgp_info *
+info_make (int type, int sub_type, struct peer *peer, struct attr *attr,
+ struct bgp_node *rn)
+{
+ struct bgp_info *new;
+
+ /* Make new BGP info. */
+ new = XCALLOC (MTYPE_BGP_ROUTE, sizeof (struct bgp_info));
+ new->type = type;
+ new->sub_type = sub_type;
+ new->peer = peer;
+ new->attr = attr;
+ new->uptime = bgp_clock ();
+ new->net = rn;
+ return new;
+}
+
static void
bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi,
struct attr *attr, struct peer *peer, struct prefix *p, int type,
@@ -2004,13 +2016,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi,
p->prefixlen, rsclient->host);
}
- /* Make new BGP info. */
- new = bgp_info_new ();
- new->type = type;
- new->sub_type = sub_type;
- new->peer = peer;
- new->attr = attr_new;
- new->uptime = bgp_clock ();
+ new = info_make(type, sub_type, peer, attr_new, rn);
/* Update MPLS tag. */
if (safi == SAFI_MPLS_VPN)
@@ -2316,7 +2322,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, struct attr *attr,
|| (peer->sort == BGP_PEER_EBGP && peer->ttl != 1)
|| CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)))
{
- if (bgp_nexthop_lookup (afi, peer, ri, NULL, NULL))
+ if (bgp_find_or_add_nexthop (afi, ri, NULL, NULL))
bgp_info_set_flag (rn, ri, BGP_INFO_VALID);
else
bgp_info_unset_flag (rn, ri, BGP_INFO_VALID);
@@ -2343,12 +2349,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, struct attr *attr,
}
/* Make new BGP info. */
- new = bgp_info_new ();
- new->type = type;
- new->sub_type = sub_type;
- new->peer = peer;
- new->attr = attr_new;
- new->uptime = bgp_clock ();
+ new = info_make(type, sub_type, peer, attr_new, rn);
/* Update MPLS tag. */
if (safi == SAFI_MPLS_VPN)
@@ -2362,7 +2363,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, struct attr *attr,
|| (peer->sort == BGP_PEER_EBGP && peer->ttl != 1)
|| CHECK_FLAG (peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK)))
{
- if (bgp_nexthop_lookup (afi, peer, new, NULL, NULL))
+ if (bgp_find_or_add_nexthop (afi, new, NULL, NULL))
bgp_info_set_flag (rn, new, BGP_INFO_VALID);
else
bgp_info_unset_flag (rn, new, BGP_INFO_VALID);
@@ -3450,15 +3451,11 @@ bgp_static_update_rsclient (struct peer *rsclient, struct prefix *p,
return;
}
}
-
+
/* Make new BGP info. */
- new = bgp_info_new ();
- new->type = ZEBRA_ROUTE_BGP;
- new->sub_type = BGP_ROUTE_STATIC;
- new->peer = bgp->peer_self;
+ new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, bgp->peer_self,
+ attr_new, rn);
SET_FLAG (new->flags, BGP_INFO_VALID);
- new->attr = attr_new;
- new->uptime = bgp_clock ();
/* Register new BGP information. */
bgp_info_add (rn, new);
@@ -3571,13 +3568,9 @@ bgp_static_update_main (struct bgp *bgp, struct prefix *p,
}
/* Make new BGP info. */
- new = bgp_info_new ();
- new->type = ZEBRA_ROUTE_BGP;
- new->sub_type = BGP_ROUTE_STATIC;
- new->peer = bgp->peer_self;
+ new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, bgp->peer_self, attr_new,
+ rn);
SET_FLAG (new->flags, BGP_INFO_VALID);
- new->attr = attr_new;
- new->uptime = bgp_clock ();
/* Aggregate address increment. */
bgp_aggregate_increment (bgp, p, new, afi, safi);
@@ -3622,13 +3615,10 @@ bgp_static_update_vpnv4 (struct bgp *bgp, struct prefix *p, afi_t afi,
rn = bgp_afi_node_get (bgp->rib[afi][safi], afi, safi, p, prd);
/* Make new BGP info. */
- new = bgp_info_new ();
- new->type = ZEBRA_ROUTE_BGP;
- new->sub_type = BGP_ROUTE_STATIC;
- new->peer = bgp->peer_self;
- new->attr = bgp_attr_default_intern (BGP_ORIGIN_IGP);
+ new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, bgp->peer_self,
+ bgp_attr_default_intern(BGP_ORIGIN_IGP), rn);
+
SET_FLAG (new->flags, BGP_INFO_VALID);
- new->uptime = bgp_clock ();
new->extra = bgp_info_extra_new();
memcpy (new->extra->tag, tag, 3);
@@ -4700,13 +4690,10 @@ bgp_aggregate_route (struct bgp *bgp, struct prefix *p, struct bgp_info *rinew,
if (aggregate->count > 0)
{
rn = bgp_node_get (table, p);
- new = bgp_info_new ();
- new->type = ZEBRA_ROUTE_BGP;
- new->sub_type = BGP_ROUTE_AGGREGATE;
- new->peer = bgp->peer_self;
+ new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_AGGREGATE, bgp->peer_self,
+ bgp_attr_aggregate_intern(bgp, origin, aspath, community,
+ aggregate->as_set), rn);
SET_FLAG (new->flags, BGP_INFO_VALID);
- new->attr = bgp_attr_aggregate_intern (bgp, origin, aspath, community, aggregate->as_set);
- new->uptime = bgp_clock ();
bgp_info_add (rn, new);
bgp_unlock_node (rn);
@@ -4884,14 +4871,10 @@ bgp_aggregate_add (struct bgp *bgp, struct prefix *p, afi_t afi, safi_t safi,
if (aggregate->count)
{
rn = bgp_node_get (table, p);
-
- new = bgp_info_new ();
- new->type = ZEBRA_ROUTE_BGP;
- new->sub_type = BGP_ROUTE_AGGREGATE;
- new->peer = bgp->peer_self;
+ new = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_AGGREGATE, bgp->peer_self,
+ bgp_attr_aggregate_intern(bgp, origin, aspath, community,
+ aggregate->as_set), rn);
SET_FLAG (new->flags, BGP_INFO_VALID);
- new->attr = bgp_attr_aggregate_intern (bgp, origin, aspath, community, aggregate->as_set);
- new->uptime = bgp_clock ();
bgp_info_add (rn, new);
bgp_unlock_node (rn);
@@ -5531,16 +5514,12 @@ bgp_redistribute_add (struct prefix *p, const struct in_addr *nexthop,
aspath_unintern (&attr.aspath);
bgp_attr_extra_free (&attr);
return;
- }
+ }
}
- new = bgp_info_new ();
- new->type = type;
- new->sub_type = BGP_ROUTE_REDISTRIBUTE;
- new->peer = bgp->peer_self;
+ new = info_make(type, BGP_ROUTE_REDISTRIBUTE, bgp->peer_self,
+ new_attr, bn);
SET_FLAG (new->flags, BGP_INFO_VALID);
- new->attr = new_attr;
- new->uptime = bgp_clock ();
bgp_aggregate_increment (bgp, p, new, afi, SAFI_UNICAST);
bgp_info_add (bn, new);
diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h
index fea18dd29..f8cc34fc3 100644
--- a/bgpd/bgp_route.h
+++ b/bgpd/bgp_route.h
@@ -21,8 +21,11 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#ifndef _QUAGGA_BGP_ROUTE_H
#define _QUAGGA_BGP_ROUTE_H
+#include "queue.h"
#include "bgp_table.h"
+struct bgp_nexthop_cache;
+
/* Ancillary information to struct bgp_info,
* used for uncommonly used data (aggregation, MPLS, etc.)
* and lazily allocated to save memory.
@@ -47,7 +50,16 @@ struct bgp_info
/* For linked list. */
struct bgp_info *next;
struct bgp_info *prev;
-
+
+ /* For nexthop linked list */
+ LIST_ENTRY(bgp_info) nh_thread;
+
+ /* Back pointer to the prefix node */
+ struct bgp_node *net;
+
+ /* Back pointer to the nexthop structure */
+ struct bgp_nexthop_cache *nexthop;
+
/* Peer structure. */
struct peer *peer;
diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c
index 99a7be21b..6ba4f4bf2 100644
--- a/bgpd/bgp_zebra.c
+++ b/bgpd/bgp_zebra.c
@@ -38,6 +38,7 @@ Boston, MA 02111-1307, USA. */
#include "bgpd/bgp_fsm.h"
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_mpath.h"
+#include "bgpd/bgp_nexthop.h"
/* All information about zebra. */
struct zclient *zclient = NULL;
@@ -75,6 +76,15 @@ bgp_router_id_update (int command, struct zclient *zclient, zebra_size_t length)
return 0;
}
+/* Nexthop update message from zebra. */
+static int
+bgp_read_nexthop_update (int command, struct zclient *zclient,
+ zebra_size_t length)
+{
+ bgp_parse_nexthop_update();
+ return 0;
+}
+
/* Inteface addition message from zebra. */
static int
bgp_interface_add (int command, struct zclient *zclient, zebra_size_t length)
@@ -1187,6 +1197,7 @@ bgp_zebra_init (void)
zclient->ipv6_route_add = zebra_read_ipv6;
zclient->ipv6_route_delete = zebra_read_ipv6;
#endif /* HAVE_IPV6 */
+ zclient->nexthop_update = bgp_read_nexthop_update;
/* Interface related init. */
if_init ();
diff --git a/doc/next-hop-tracking.txt b/doc/next-hop-tracking.txt
new file mode 100644
index 000000000..d157866e8
--- /dev/null
+++ b/doc/next-hop-tracking.txt
@@ -0,0 +1,326 @@
+0. Introduction
+
+This is the design specification for next hop tracking feature in
+Quagga.
+
+1. Background
+
+Recursive routes are of the form:
+
+ p/m --> n
+ [Ex: 1.1.0.0/16 --> 2.2.2.2]
+
+where 'n' itself is resolved through another route as follows:
+
+ p2/m --> h, interface
+ [Ex: 2.2.2.0/24 --> 3.3.3.3, eth0]
+
+Usually, BGP routes are recursive in nature and BGP nexthops get
+resolved through an IGP route. IGP usually adds its routes pointing to
+an interface (these are called non-recursive routes).
+
+When BGP receives a recursive route from a peer, it needs to validate
+the nexthop. The path is marked valid or invalid based on the
+reachability status of the nexthop. Nexthop validation is also
+important for BGP decision process as the metric to reach the nexthop
+is a parameter to best path selection process.
+
+As it goes with routing, this is a dynamic process. Route to the
+nexthop can change. The nexthop can become unreachable or
+reachable. In the current BGP implementation, the nexthop validation
+is done periodically in the scanner run. The default scanner run
+interval is one minute. Every minute, the scanner task walks the
+entire BGP table. It checks the validity of each nexthop with Zebra
+(the routing table manager) through a request and response message
+exchange between BGP and Zebra process. BGP process is blocked for
+that duration. The mechanism has two major drawbacks:
+
+(1) The scanner task runs to completion. That can potentially starve
+ the other tasks for long periods of time, based on the BGP table
+ size and number of nexthops.
+
+(2) Convergence around routing changes that affect the nexthops can be
+ long (around a minute with the default intervals). The interval
+ can be shortened to achieve faster reaction time, but it makes the
+ first problem worse, with the scanner task consuming most of the
+ CPU resources.
+
+"Next hop tracking" feature makes this process event-driven. It
+eliminates periodic nexthop validation and introduces an asynchronous
+communication path between BGP and Zebra for route change notifications
+that can then be acted upon.
+
+2. Goal
+
+Stating the obvious, the main goal is to remove the two limitations we
+discussed in the previous section. The goals, in a constructive tone,
+are the following:
+
+- fairness: the scanner run should not consume an unjustly high amount
+ of CPU time. This should give an overall good performance and
+ response time to other events (route changes, session events,
+ IO/user interface).
+
+- convergence: BGP must react to nexthop changes instantly and provide
+ sub-second convergence. This may involve diverting the routes from
+ one nexthop to another.
+
+3. Overview of the changes
+
+The changes are in both BGP and Zebra modules. The short summary is
+the following:
+
+- Zebra implements a registration mechanism by which clients can
+ register for next hop notification. Consequently, it maintains a
+ separate table, per (VRF, AF) pair, of next hops and interested
+ client-list per next hop.
+
+- When the main routing table changes in Zebra, it evaluates the next
+ hop table: for each next hop, it checks if the route table
+ modifications have changed its state. If so, it notifies the
+ interested clients.
+
+- BGP is one such client. It registers the next hops corresponding to
+ all of its received routes/paths. It also threads the paths against
+ each nexthop structure.
+
+- When BGP receives a next hop notification from Zebra, it walks the
+ corresponding path list. It makes them valid or invalid depending
+ on the next hop notification. It then re-computes best path for the
+ corresponding destination. This may result in re-announcing those
+ destinations to peers.
+
+4. Design
+
+4.1. Modules
+
+The core design introduces an "nht" (next hop tracking) module in BGP
+and "rnh" (recursive nexthop) module in Zebra. The "nht" module
+provides the following APIs:
+
+bgp_find_or_add_nexthop() : find or add a nexthop in BGP nexthop table
+bgp_find_nexthop() : find a nexthop in BGP nexthop table
+bgp_parse_nexthop_update() : parse a nexthop update message coming
+ from zebra
+
+The "rnh" module provides the following APIs:
+
+zebra_add_rnh() : add a recursive nexthop
+zebra_delete_rnh() : delete a recursive nexthop
+zebra_lookup_rnh() : lookup a recursive nexthop
+
+zebra_add_rnh_client() : register a client for nexthop notifications
+ against a recursive nexthop
+
+zebra_remove_rnh_client(): remove the client registration for a
+ recursive nexthop
+
+zebra_evaluate_rnh_table(): (re)evaluate the recursive nexthop table
+ (most probably because the main routing
+ table has changed).
+
+zebra_cleanup_rnh_client(): Cleanup a client from the "rnh" module
+ data structures (most probably because the
+ client is going away).
+
+4.2. Control flow
+
+The next hop registration control flow is the following:
+
+<==== BGP Process ====>|<==== Zebra Process ====>
+ |
+receive module nht module | zserv module rnh module
+----------------------------------------------------------------------
+ | | |
+bgp_update_ | | |
+ main() | bgp_find_or_add_ | |
+ | nexthop() | |
+ | | |
+ | | zserv_nexthop_ |
+ | | register() |
+ | | | zebra_add_rnh()
+ | | |
+
+
+The next hop notification control flow is the following:
+
+<==== Zebra Process ====>|<==== BGP Process ====>
+ |
+rib module rnh module | zebra module nht module
+----------------------------------------------------------------------
+ | | |
+meta_queue_ | | |
+ process() | zebra_evaluate_ | |
+ | rnh_table() | |
+ | | |
+ | | bgp_read_nexthop_ |
+ | | update() |
+ | | | bgp_parse_
+ | | | nexthop_update()
+ | | |
+
+
+4.3. zclient message format
+
+ZEBRA_NEXTHOP_REGISTER and ZEBRA_NEXTHOP_UNREGISTER messages are
+encoded in the following way:
+
+/*
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | AF | prefix len |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * . Nexthop prefix .
+ * . .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * . .
+ * . .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | AF | prefix len |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * . Nexthop prefix .
+ * . .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+ZEBRA_NEXTHOP_UPDATE message is encoded as follows:
+
+/*
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | AF | prefix len |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * . Nexthop prefix getting resolved .
+ * . .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | metric |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | #nexthops |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | nexthop type |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * . resolving Nexthop details .
+ * . .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * . .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | nexthop type |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * . resolving Nexthop details .
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+4.4. BGP data structure
+
+Legend:
+
+/\ struct bgp_node: a BGP destination/route/prefix
+\/
+
+[ ] struct bgp_info: a BGP path (e.g. route received from a peer)
+
+ _
+(_) struct bgp_nexthop_cache: a BGP nexthop
+
+
+
+ /\ NULL
+ \/--+ ^
+ | :
+ +--[ ]--[ ]--[ ]--> NULL
+ /\ :
+ \/--+ :
+ | :
+ +--[ ]--[ ]--> NULL
+ :
+ _ :
+ (_).............
+
+
+4.5. Zebra data structure
+
+rnh table:
+
+ O
+ / \
+ O O
+ / \
+ O O
+
+ struct rnh
+ {
+ u_char flags;
+ struct rib *state;
+ struct list *client_list;
+ struct route_node *node;
+ };
+
+5. User interface changes
+
+quagga# show ip nht
+3.3.3.3
+ resolved via kernel
+ via 11.0.0.6, swp1
+ Client list: bgp(fd 12)
+11.0.0.10
+ resolved via connected
+ is directly connected, swp2
+ Client list: bgp(fd 12)
+11.0.0.18
+ resolved via connected
+ is directly connected, swp4
+ Client list: bgp(fd 12)
+11.11.11.11
+ resolved via kernel
+ via 10.0.1.2, eth0
+ Client list: bgp(fd 12)
+
+quagga# show ip bgp nexthop
+Current BGP nexthop cache:
+ 3.3.3.3 valid [IGP metric 0], #paths 3
+ Last update: Wed Oct 16 04:43:49 2013
+
+ 11.0.0.10 valid [IGP metric 1], #paths 1
+ Last update: Wed Oct 16 04:43:51 2013
+
+ 11.0.0.18 valid [IGP metric 1], #paths 2
+ Last update: Wed Oct 16 04:43:47 2013
+
+ 11.11.11.11 valid [IGP metric 0], #paths 1
+ Last update: Wed Oct 16 04:43:47 2013
+
+quagga# show ipv6 nht
+quagga# show ip bgp nexthop detail
+
+quagga# debug bgp nht
+quagga# debug zebra nht
+
+6. Sample test cases
+
+ r2----r3
+ / \ /
+ r1----r4
+
+- Verify that a change in IGP cost triggers NHT
+ + shutdown the r1-r4 and r2-r4 links
+ + no shut the r1-r4 and r2-r4 links and wait for OSPF to come back
+ up
+ + We should be back to the original nexthop via r4 now
+- Verify that a NH becoming unreachable triggers NHT
+ + Shutdown all links to r4
+- Verify that a NH becoming reachable triggers NHT
+ + no shut all links to r4
+
+7. Future work
+
+- route-policy for next hop validation (e.g. ignore default route)
+- damping for rapid next hop changes
+- prioritized handling of nexthop changes ((un)reachability vs. metric
+ changes)
+- handling recursion loop, e.g.
+ 11.11.11.11/32 -> 12.12.12.12
+ 12.12.12.12/32 -> 11.11.11.11
+ 11.0.0.0/8 -> <interface>
+- better statistics
diff --git a/lib/Makefile.am b/lib/Makefile.am
index bd2109292..f64972dd0 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -12,7 +12,7 @@ libzebra_la_SOURCES = \
sockunion.c prefix.c thread.c if.c memory.c buffer.c table.c hash.c \
filter.c routemap.c distribute.c stream.c str.c log.c plist.c \
zclient.c sockopt.c smux.c agentx.c snmp.c md5.c if_rmap.c keychain.c privs.c \
- sigevent.c pqueue.c jhash.c memtypes.c workqueue.c
+ sigevent.c pqueue.c jhash.c memtypes.c workqueue.c nexthop.c
BUILT_SOURCES = memtypes.h route_types.h gitversion.h
@@ -27,7 +27,7 @@ pkginclude_HEADERS = \
str.h stream.h table.h thread.h vector.h version.h vty.h zebra.h \
plist.h zclient.h sockopt.h smux.h md5.h if_rmap.h keychain.h \
privs.h sigevent.h pqueue.h jhash.h zassert.h memtypes.h \
- workqueue.h route_types.h libospf.h
+ workqueue.h route_types.h libospf.h nexthop.h
EXTRA_DIST = \
regex.c regex-gnu.h \
diff --git a/lib/log.c b/lib/log.c
index 1058844b7..7e03aa9af 100644
--- a/lib/log.c
+++ b/lib/log.c
@@ -845,6 +845,9 @@ static const struct zebra_desc_table command_types[] = {
DESC_ENTRY (ZEBRA_ROUTER_ID_DELETE),
DESC_ENTRY (ZEBRA_ROUTER_ID_UPDATE),
DESC_ENTRY (ZEBRA_HELLO),
+ DESC_ENTRY (ZEBRA_NEXTHOP_REGISTER),
+ DESC_ENTRY (ZEBRA_NEXTHOP_UNREGISTER),
+ DESC_ENTRY (ZEBRA_NEXTHOP_UPDATE),
};
#undef DESC_ENTRY
diff --git a/lib/memtypes.c b/lib/memtypes.c
index 47a343873..5af2642be 100644
--- a/lib/memtypes.c
+++ b/lib/memtypes.c
@@ -84,6 +84,7 @@ struct memory_list memory_list_zebra[] =
{ MTYPE_STATIC_IPV6, "Static IPv6 route" },
{ MTYPE_RIB_DEST, "RIB destination" },
{ MTYPE_RIB_TABLE_INFO, "RIB table info" },
+ { MTYPE_RNH, "Nexthop tracking object" },
{ -1, NULL },
};
diff --git a/lib/nexthop.c b/lib/nexthop.c
new file mode 100644
index 000000000..2478dca7a
--- /dev/null
+++ b/lib/nexthop.c
@@ -0,0 +1,100 @@
+/* A generic nexthop structure
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+#include <zebra.h>
+
+#include "prefix.h"
+#include "table.h"
+#include "memory.h"
+#include "str.h"
+#include "command.h"
+#include "if.h"
+#include "log.h"
+#include "sockunion.h"
+#include "linklist.h"
+#include "thread.h"
+#include "prefix.h"
+#include "nexthop.h"
+
+/* check if nexthops are same, non-recursive */
+int
+nexthop_same_no_recurse (struct nexthop *next1, struct nexthop *next2)
+{
+ if (next1->type != next2->type)
+ return 0;
+
+ switch (next1->type)
+ {
+ case NEXTHOP_TYPE_IPV4:
+ case NEXTHOP_TYPE_IPV4_IFINDEX:
+ if (! IPV4_ADDR_SAME (&next1->gate.ipv4, &next2->gate.ipv4))
+ return 0;
+ if (next1->ifindex && (next1->ifindex != next2->ifindex))
+ return 0;
+ break;
+ case NEXTHOP_TYPE_IFINDEX:
+ case NEXTHOP_TYPE_IFNAME:
+ if (next1->ifindex != next2->ifindex)
+ return 0;
+ break;
+#ifdef HAVE_IPV6
+ case NEXTHOP_TYPE_IPV6:
+ if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6))
+ return 0;
+ break;
+ case NEXTHOP_TYPE_IPV6_IFINDEX:
+ case NEXTHOP_TYPE_IPV6_IFNAME:
+ if (! IPV6_ADDR_SAME (&next1->gate.ipv6, &next2->gate.ipv6))
+ return 0;
+ if (next1->ifindex != next2->ifindex)
+ return 0;
+ break;
+#endif /* HAVE_IPV6 */
+ default:
+ /* do nothing */
+ break;
+ }
+ return 1;
+}
+
+/*
+ * nexthop_type_to_str
+ */
+const char *
+nexthop_type_to_str (enum nexthop_types_t nh_type)
+{
+ static const char *desc[] = {
+ "none",
+ "Directly connected",
+ "Interface route",
+ "IPv4 nexthop",
+ "IPv4 nexthop with ifindex",
+ "IPv4 nexthop with ifname",
+ "IPv6 nexthop",
+ "IPv6 nexthop with ifindex",
+ "IPv6 nexthop with ifname",
+ "Null0 nexthop",
+ };
+
+ if (nh_type >= ZEBRA_NUM_OF (desc))
+ return "<Invalid nh type>";
+
+ return desc[nh_type];
+}
diff --git a/lib/nexthop.h b/lib/nexthop.h
new file mode 100644
index 000000000..bddac6554
--- /dev/null
+++ b/lib/nexthop.h
@@ -0,0 +1,89 @@
+/*
+ * Nexthop structure definition.
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef _LIB_NEXTHOP_H
+#define _LIB_NEXTHOP_H
+
+#include "prefix.h"
+
+union g_addr {
+ struct in_addr ipv4;
+#ifdef HAVE_IPV6
+ struct in6_addr ipv6;
+#endif /* HAVE_IPV6 */
+};
+
+enum nexthop_types_t
+{
+ NEXTHOP_TYPE_IFINDEX = 1, /* Directly connected. */
+ NEXTHOP_TYPE_IFNAME, /* Interface route. */
+ NEXTHOP_TYPE_IPV4, /* IPv4 nexthop. */
+ NEXTHOP_TYPE_IPV4_IFINDEX, /* IPv4 nexthop with ifindex. */
+ NEXTHOP_TYPE_IPV4_IFNAME, /* IPv4 nexthop with ifname. */
+ NEXTHOP_TYPE_IPV6, /* IPv6 nexthop. */
+ NEXTHOP_TYPE_IPV6_IFINDEX, /* IPv6 nexthop with ifindex. */
+ NEXTHOP_TYPE_IPV6_IFNAME, /* IPv6 nexthop with ifname. */
+ NEXTHOP_TYPE_BLACKHOLE, /* Null0 nexthop. */
+};
+
+/* Nexthop structure. */
+struct nexthop
+{
+ struct nexthop *next;
+ struct nexthop *prev;
+
+ /* Interface index. */
+ char *ifname;
+ unsigned int ifindex;
+
+ enum nexthop_types_t type;
+
+ u_char flags;
+#define NEXTHOP_FLAG_ACTIVE (1 << 0) /* This nexthop is alive. */
+#define NEXTHOP_FLAG_FIB (1 << 1) /* FIB nexthop. */
+#define NEXTHOP_FLAG_RECURSIVE (1 << 2) /* Recursive nexthop. */
+#define NEXTHOP_FLAG_ONLINK (1 << 3) /* Nexthop should be installed onlink. */
+#define NEXTHOP_FLAG_MATCHED (1 << 4) /* Already matched vs a nexthop */
+
+ /* Nexthop address */
+ union g_addr gate;
+ union g_addr src;
+
+ /* Nexthops obtained by recursive resolution.
+ *
+ * If the nexthop struct needs to be resolved recursively,
+ * NEXTHOP_FLAG_RECURSIVE will be set in flags and the nexthops
+ * obtained by recursive resolution will be added to `resolved'.
+ * Only one level of recursive resolution is currently supported. */
+ struct nexthop *resolved;
+};
+
+#define nexthop_new() \
+({ \
+ struct nexthop *n = XCALLOC (MTYPE_NEXTHOP, sizeof (struct nexthop)); \
+ n; \
+})
+
+extern const char *nexthop_type_to_str (enum nexthop_types_t nh_type);
+extern int nexthop_same_no_recurse (struct nexthop *next1, struct nexthop *next2);
+
+#endif /*_LIB_NEXTHOP_H */
diff --git a/lib/zclient.c b/lib/zclient.c
index 3b5477e90..b77fd3432 100644
--- a/lib/zclient.c
+++ b/lib/zclient.c
@@ -967,6 +967,12 @@ zclient_read (struct thread *thread)
if (zclient->ipv6_route_delete)
(*zclient->ipv6_route_delete) (command, zclient, length);
break;
+ case ZEBRA_NEXTHOP_UPDATE:
+ if (zclient_debug)
+ zlog_debug("zclient rcvd nexthop update\n");
+ if (zclient->nexthop_update)
+ (*zclient->nexthop_update) (command, zclient, length);
+ break;
default:
break;
}
diff --git a/lib/zclient.h b/lib/zclient.h
index a660bbf19..2ece81ebc 100644
--- a/lib/zclient.h
+++ b/lib/zclient.h
@@ -82,6 +82,7 @@ struct zclient
int (*ipv4_route_delete) (int, struct zclient *, uint16_t);
int (*ipv6_route_add) (int, struct zclient *, uint16_t);
int (*ipv6_route_delete) (int, struct zclient *, uint16_t);
+ int (*nexthop_update) (int, struct zclient *, uint16_t);
};
/* Zebra API message flag. */
diff --git a/lib/zebra.h b/lib/zebra.h
index 3715b342e..124431a85 100644
--- a/lib/zebra.h
+++ b/lib/zebra.h
@@ -424,7 +424,10 @@ struct in_pktinfo
#define ZEBRA_ROUTER_ID_DELETE 21
#define ZEBRA_ROUTER_ID_UPDATE 22
#define ZEBRA_HELLO 23
-#define ZEBRA_MESSAGE_MAX 24
+#define ZEBRA_NEXTHOP_REGISTER 24
+#define ZEBRA_NEXTHOP_UNREGISTER 25
+#define ZEBRA_NEXTHOP_UPDATE 26
+#define ZEBRA_MESSAGE_MAX 27
/* Marker value used in new Zserv, in the byte location corresponding
* the command value in the old zserv header. To allow old and new
@@ -525,6 +528,7 @@ extern const char *zserv_command_string (unsigned int command);
#define CHECK_FLAG(V,F) ((V) & (F))
#define SET_FLAG(V,F) (V) |= (F)
#define UNSET_FLAG(V,F) (V) &= ~(F)
+#define RESET_FLAG(V) (V) = 0
/* AFI and SAFI type. */
typedef u_int16_t afi_t;
diff --git a/zebra/Makefile.am b/zebra/Makefile.am
index 96f7bef30..9aa135236 100644
--- a/zebra/Makefile.am
+++ b/zebra/Makefile.am
@@ -34,16 +34,16 @@ zebra_SOURCES = \
zserv.c main.c interface.c connected.c zebra_rib.c zebra_routemap.c \
redistribute.c debug.c rtadv.c zebra_snmp.c zebra_vty.c \
irdp_main.c irdp_interface.c irdp_packet.c router-id.c zebra_fpm.c \
- $(othersrc)
+ $(othersrc) zebra_rnh.c
testzebra_SOURCES = test_main.c zebra_rib.c interface.c connected.c debug.c \
zebra_vty.c \
- kernel_null.c redistribute_null.c ioctl_null.c misc_null.c
+ kernel_null.c redistribute_null.c ioctl_null.c misc_null.c zebra_rnh_null.c
noinst_HEADERS = \
connected.h ioctl.h rib.h rt.h zserv.h redistribute.h debug.h rtadv.h \
interface.h ipforward.h irdp.h router-id.h kernel_socket.h \
- rt_netlink.h zebra_fpm.h zebra_fpm_private.h
+ rt_netlink.h zebra_fpm.h zebra_fpm_private.h zebra_rnh.h
zebra_LDADD = $(otherobj) ../lib/libzebra.la $(LIBCAP) $(LIB_IPV6)
diff --git a/zebra/debug.c b/zebra/debug.c
index c3b00e0fa..10aba2d55 100644
--- a/zebra/debug.c
+++ b/zebra/debug.c
@@ -30,6 +30,7 @@ unsigned long zebra_debug_packet;
unsigned long zebra_debug_kernel;
unsigned long zebra_debug_rib;
unsigned long zebra_debug_fpm;
+unsigned long zebra_debug_nht;
DEFUN (show_debugging_zebra,
show_debugging_zebra_cmd,
@@ -74,6 +75,8 @@ DEFUN (show_debugging_zebra,
if (IS_ZEBRA_DEBUG_FPM)
vty_out (vty, " Zebra FPM debugging is on%s", VTY_NEWLINE);
+ if (IS_ZEBRA_DEBUG_NHT)
+ vty_out (vty, " Zebra next-hop tracking debugging is on%s", VTY_NEWLINE);
return CMD_SUCCESS;
}
@@ -89,6 +92,17 @@ DEFUN (debug_zebra_events,
return CMD_WARNING;
}
+DEFUN (debug_zebra_nht,
+ debug_zebra_nht_cmd,
+ "debug zebra nht",
+ DEBUG_STR
+ "Zebra configuration\n"
+ "Debug option set for zebra next hop tracking\n")
+{
+ zebra_debug_nht = ZEBRA_DEBUG_NHT;
+ return CMD_WARNING;
+}
+
DEFUN (debug_zebra_packet,
debug_zebra_packet_cmd,
"debug zebra packet",
@@ -196,6 +210,18 @@ DEFUN (no_debug_zebra_events,
return CMD_SUCCESS;
}
+DEFUN (no_debug_zebra_nht,
+ no_debug_zebra_nht_cmd,
+ "no debug zebra nht",
+ NO_STR
+ DEBUG_STR
+ "Zebra configuration\n"
+ "Debug option set for zebra next hop tracking\n")
+{
+ zebra_debug_nht = 0;
+ return CMD_SUCCESS;
+}
+
DEFUN (no_debug_zebra_packet,
no_debug_zebra_packet_cmd,
"no debug zebra packet",
@@ -352,6 +378,7 @@ zebra_debug_init (void)
install_element (ENABLE_NODE, &show_debugging_zebra_cmd);
install_element (ENABLE_NODE, &debug_zebra_events_cmd);
+ install_element (ENABLE_NODE, &debug_zebra_nht_cmd);
install_element (ENABLE_NODE, &debug_zebra_packet_cmd);
install_element (ENABLE_NODE, &debug_zebra_packet_direct_cmd);
install_element (ENABLE_NODE, &debug_zebra_packet_detail_cmd);
@@ -360,6 +387,7 @@ zebra_debug_init (void)
install_element (ENABLE_NODE, &debug_zebra_rib_q_cmd);
install_element (ENABLE_NODE, &debug_zebra_fpm_cmd);
install_element (ENABLE_NODE, &no_debug_zebra_events_cmd);
+ install_element (ENABLE_NODE, &no_debug_zebra_nht_cmd);
install_element (ENABLE_NODE, &no_debug_zebra_packet_cmd);
install_element (ENABLE_NODE, &no_debug_zebra_kernel_cmd);
install_element (ENABLE_NODE, &no_debug_zebra_rib_cmd);
@@ -367,6 +395,7 @@ zebra_debug_init (void)
install_element (ENABLE_NODE, &no_debug_zebra_fpm_cmd);
install_element (CONFIG_NODE, &debug_zebra_events_cmd);
+ install_element (CONFIG_NODE, &debug_zebra_nht_cmd);
install_element (CONFIG_NODE, &debug_zebra_packet_cmd);
install_element (CONFIG_NODE, &debug_zebra_packet_direct_cmd);
install_element (CONFIG_NODE, &debug_zebra_packet_detail_cmd);
@@ -375,6 +404,7 @@ zebra_debug_init (void)
install_element (CONFIG_NODE, &debug_zebra_rib_q_cmd);
install_element (CONFIG_NODE, &debug_zebra_fpm_cmd);
install_element (CONFIG_NODE, &no_debug_zebra_events_cmd);
+ install_element (CONFIG_NODE, &no_debug_zebra_nht_cmd);
install_element (CONFIG_NODE, &no_debug_zebra_packet_cmd);
install_element (CONFIG_NODE, &no_debug_zebra_kernel_cmd);
install_element (CONFIG_NODE, &no_debug_zebra_rib_cmd);
diff --git a/zebra/debug.h b/zebra/debug.h
index d9231a22b..0fb4dd9fe 100644
--- a/zebra/debug.h
+++ b/zebra/debug.h
@@ -37,6 +37,7 @@
#define ZEBRA_DEBUG_RIB_Q 0x02
#define ZEBRA_DEBUG_FPM 0x01
+#define ZEBRA_DEBUG_NHT 0x01
/* Debug related macro. */
#define IS_ZEBRA_DEBUG_EVENT (zebra_debug_event & ZEBRA_DEBUG_EVENT)
@@ -52,12 +53,14 @@
#define IS_ZEBRA_DEBUG_RIB_Q (zebra_debug_rib & ZEBRA_DEBUG_RIB_Q)
#define IS_ZEBRA_DEBUG_FPM (zebra_debug_fpm & ZEBRA_DEBUG_FPM)
+#define IS_ZEBRA_DEBUG_NHT (zebra_debug_nht & ZEBRA_DEBUG_NHT)
extern unsigned long zebra_debug_event;
extern unsigned long zebra_debug_packet;
extern unsigned long zebra_debug_kernel;
extern unsigned long zebra_debug_rib;
extern unsigned long zebra_debug_fpm;
+extern unsigned long zebra_debug_nht;
extern void zebra_debug_init (void);
diff --git a/zebra/rib.h b/zebra/rib.h
index ad726c6cc..833b892cb 100644
--- a/zebra/rib.h
+++ b/zebra/rib.h
@@ -26,18 +26,10 @@
#include "prefix.h"
#include "table.h"
#include "queue.h"
+#include "nexthop.h"
#define DISTANCE_INFINITY 255
-/* Routing information base. */
-
-union g_addr {
- struct in_addr ipv4;
-#ifdef HAVE_IPV6
- struct in6_addr ipv6;
-#endif /* HAVE_IPV6 */
-};
-
struct rib
{
/* Link list. */
@@ -224,50 +216,6 @@ struct static_ipv6
};
#endif /* HAVE_IPV6 */
-enum nexthop_types_t
-{
- NEXTHOP_TYPE_IFINDEX = 1, /* Directly connected. */
- NEXTHOP_TYPE_IFNAME, /* Interface route. */
- NEXTHOP_TYPE_IPV4, /* IPv4 nexthop. */
- NEXTHOP_TYPE_IPV4_IFINDEX, /* IPv4 nexthop with ifindex. */
- NEXTHOP_TYPE_IPV4_IFNAME, /* IPv4 nexthop with ifname. */
- NEXTHOP_TYPE_IPV6, /* IPv6 nexthop. */
- NEXTHOP_TYPE_IPV6_IFINDEX, /* IPv6 nexthop with ifindex. */
- NEXTHOP_TYPE_IPV6_IFNAME, /* IPv6 nexthop with ifname. */
- NEXTHOP_TYPE_BLACKHOLE, /* Null0 nexthop. */
-};
-
-/* Nexthop structure. */
-struct nexthop
-{
- struct nexthop *next;
- struct nexthop *prev;
-
- /* Interface index. */
- char *ifname;
- unsigned int ifindex;
-
- enum nexthop_types_t type;
-
- u_char flags;
-#define NEXTHOP_FLAG_ACTIVE (1 << 0) /* This nexthop is alive. */
-#define NEXTHOP_FLAG_FIB (1 << 1) /* FIB nexthop. */
-#define NEXTHOP_FLAG_RECURSIVE (1 << 2) /* Recursive nexthop. */
-#define NEXTHOP_FLAG_ONLINK (1 << 3) /* Nexthop should be installed onlink. */
-
- /* Nexthop address */
- union g_addr gate;
- union g_addr src;
-
- /* Nexthops obtained by recursive resolution.
- *
- * If the nexthop struct needs to be resolved recursively,
- * NEXTHOP_FLAG_RECURSIVE will be set in flags and the nexthops
- * obtained by recursive resolution will be added to `resolved'.
- * Only one level of recursive resolution is currently supported. */
- struct nexthop *resolved;
-};
-
/* The following for loop allows to iterate over the nexthop
* structure of routes.
*
@@ -334,6 +282,9 @@ struct vrf
/* Static route configuration. */
struct route_table *stable[AFI_MAX][SAFI_MAX];
+
+ /* Recursive Nexthop table */
+ struct route_table *rnh_table[AFI_MAX];
};
/*
@@ -373,7 +324,6 @@ typedef struct rib_tables_iter_t_
rib_tables_iter_state_t state;
} rib_tables_iter_t;
-extern const char *nexthop_type_to_str (enum nexthop_types_t nh_type);
extern struct nexthop *nexthop_ifindex_add (struct rib *, unsigned int);
extern struct nexthop *nexthop_ifname_add (struct rib *, char *);
extern struct nexthop *nexthop_blackhole_add (struct rib *);
@@ -383,6 +333,10 @@ extern struct nexthop *nexthop_ipv4_ifindex_add (struct rib *,
struct in_addr *,
struct in_addr *,
unsigned int);
+extern void nexthop_free (struct nexthop *nexthop);
+extern void nexthops_free (struct nexthop *nexthop);
+extern void nexthop_add (struct rib *rib, struct nexthop *nexthop);
+
extern int nexthop_has_fib_child(struct nexthop *);
extern void rib_lookup_and_dump (struct prefix_ipv4 *);
extern void rib_lookup_and_pushup (struct prefix_ipv4 *);
diff --git a/zebra/rt_netlink.c b/zebra/rt_netlink.c
index c242480f0..11f6f0ab2 100644
--- a/zebra/rt_netlink.c
+++ b/zebra/rt_netlink.c
@@ -36,6 +36,7 @@
#include "rib.h"
#include "thread.h"
#include "privs.h"
+#include "nexthop.h"
#include "zebra/zserv.h"
#include "zebra/rt.h"
diff --git a/zebra/zebra_fpm_netlink.c b/zebra/zebra_fpm_netlink.c
index b5f2b7607..1de75be4b 100644
--- a/zebra/zebra_fpm_netlink.c
+++ b/zebra/zebra_fpm_netlink.c
@@ -29,6 +29,7 @@
#include "rib.h"
#include "rt_netlink.h"
+#include "nexthop.h"
#include "zebra_fpm_private.h"
diff --git a/zebra/zebra_rib.c b/zebra/zebra_rib.c
index 02116b3d2..5cd81e3de 100644
--- a/zebra/zebra_rib.c
+++ b/zebra/zebra_rib.c
@@ -34,6 +34,7 @@
#include "workqueue.h"
#include "prefix.h"
#include "routemap.h"
+#include "nexthop.h"
#include "zebra/rib.h"
#include "zebra/rt.h"
@@ -41,6 +42,7 @@
#include "zebra/redistribute.h"
#include "zebra/debug.h"
#include "zebra/zebra_fpm.h"
+#include "zebra/zebra_rnh.h"
/* Default rtm_table for all clients */
extern struct zebra_t zebrad;
@@ -118,6 +120,8 @@ vrf_alloc (const char *name)
vrf->stable[AFI_IP][SAFI_MULTICAST] = route_table_init ();
vrf->stable[AFI_IP6][SAFI_MULTICAST] = route_table_init ();
+ vrf->rnh_table[AFI_IP] = route_table_init();
+ vrf->rnh_table[AFI_IP6] = route_table_init();
return vrf;
}
@@ -177,31 +181,6 @@ vrf_static_table (afi_t afi, safi_t safi, u_int32_t id)
return vrf->stable[afi][safi];
}
-/*
- * nexthop_type_to_str
- */
-const char *
-nexthop_type_to_str (enum nexthop_types_t nh_type)
-{
- static const char *desc[] = {
- "none",
- "Directly connected",
- "Interface route",
- "IPv4 nexthop",
- "IPv4 nexthop with ifindex",
- "IPv4 nexthop with ifname",
- "IPv6 nexthop",
- "IPv6 nexthop with ifindex",
- "IPv6 nexthop with ifname",
- "Null0 nexthop",
- };
-
- if (nh_type >= ZEBRA_NUM_OF (desc))
- return "<Invalid nh type>";
-
- return desc[nh_type];
-}
-
/* Add nexthop to the end of a nexthop list. */
static void
_nexthop_add (struct nexthop **target, struct nexthop *nexthop)
@@ -218,7 +197,7 @@ _nexthop_add (struct nexthop **target, struct nexthop *nexthop)
}
/* Add nexthop to the end of a rib node's nexthop list */
-static void
+void
nexthop_add (struct rib *rib, struct nexthop *nexthop)
{
_nexthop_add(&rib->nexthop, nexthop);
@@ -238,10 +217,8 @@ nexthop_delete (struct rib *rib, struct nexthop *nexthop)
rib->nexthop_num--;
}
-static void nexthops_free(struct nexthop *nexthop);
-
/* Free nexthop. */
-static void
+void
nexthop_free (struct nexthop *nexthop)
{
if (nexthop->ifname)
@@ -252,7 +229,7 @@ nexthop_free (struct nexthop *nexthop)
}
/* Frees a list of nexthops */
-static void
+void
nexthops_free (struct nexthop *nexthop)
{
struct nexthop *nh, *next;
@@ -1475,6 +1452,18 @@ process_subq (struct list * subq, u_char qindex)
return 1;
}
+/*
+ * All meta queues have been processed. Trigger next-hop evaluation.
+ */
+static void
+meta_queue_process_complete (struct work_queue *dummy)
+{
+ zebra_evaluate_rnh_table(0, AF_INET);
+#ifdef HAVE_IPV6
+ zebra_evaluate_rnh_table(0, AF_INET6);
+#endif /* HAVE_IPV6 */
+}
+
/* Dispatch the meta queue by picking, processing and unlocking the next RN from
* a non-empty sub-queue with lowest priority. wq is equal to zebra->ribq and data
* is pointed to the meta queue structure.
@@ -1635,6 +1624,7 @@ rib_queue_init (struct zebra_t *zebra)
/* fill in the work queue spec */
zebra->ribq->spec.workfunc = &meta_queue_process;
zebra->ribq->spec.errorfunc = NULL;
+ zebra->ribq->spec.completion_func = &meta_queue_process_complete;
/* XXX: TODO: These should be runtime configurable via vty */
zebra->ribq->spec.max_retries = 3;
zebra->ribq->spec.hold = rib_process_hold_time;
diff --git a/zebra/zebra_rnh.c b/zebra/zebra_rnh.c
new file mode 100644
index 000000000..117a2659c
--- /dev/null
+++ b/zebra/zebra_rnh.c
@@ -0,0 +1,603 @@
+/* Zebra next hop tracking code
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <zebra.h>
+
+#include "prefix.h"
+#include "table.h"
+#include "memory.h"
+#include "str.h"
+#include "command.h"
+#include "if.h"
+#include "log.h"
+#include "sockunion.h"
+#include "linklist.h"
+#include "thread.h"
+#include "workqueue.h"
+#include "prefix.h"
+#include "routemap.h"
+#include "stream.h"
+#include "nexthop.h"
+
+#include "zebra/rib.h"
+#include "zebra/rt.h"
+#include "zebra/zserv.h"
+#include "zebra/redistribute.h"
+#include "zebra/debug.h"
+#include "zebra/zebra_rnh.h"
+
+#define lookup_rnh_table(v, f) \
+({ \
+ struct vrf *vrf; \
+ struct route_table *t = NULL; \
+ vrf = vrf_lookup(v); \
+ if (vrf) \
+ t = vrf->rnh_table[family2afi(f)]; \
+ t; \
+})
+
+static void free_state(struct rib *rib);
+static void copy_state(struct rnh *rnh, struct rib *rib);
+static int compare_state(struct rib *r1, struct rib *r2);
+static int send_client(struct rnh *rnh, struct zserv *client);
+static void print_rnh(struct route_node *rn, struct vty *vty);
+
+char *
+rnh_str (struct rnh *rnh, char *buf, int size)
+{
+ prefix2str(&(rnh->node->p), buf, size);
+ return buf;
+}
+
+struct rnh *
+zebra_add_rnh (struct prefix *p, u_int32_t vrfid)
+{
+ struct route_table *table;
+ struct route_node *rn;
+ struct rnh *rnh = NULL;
+
+ if (IS_ZEBRA_DEBUG_NHT)
+ {
+ char buf[INET6_ADDRSTRLEN];
+ prefix2str(p, buf, INET6_ADDRSTRLEN);
+ zlog_debug("add rnh %s in vrf %d", buf, vrfid);
+ }
+ table = lookup_rnh_table(vrfid, PREFIX_FAMILY(p));
+ if (!table)
+ {
+ zlog_debug("add_rnh: rnh table not found\n");
+ return NULL;
+ }
+
+ /* Make it sure prefixlen is applied to the prefix. */
+ apply_mask (p);
+
+ /* Lookup (or add) route node.*/
+ rn = route_node_get (table, p);
+
+ if (!rn->info)
+ {
+ rnh = XCALLOC(MTYPE_RNH, sizeof(struct rnh));
+ rnh->client_list = list_new();
+ route_lock_node (rn);
+ rn->info = rnh;
+ rnh->node = rn;
+ }
+
+ route_unlock_node (rn);
+ return (rn->info);
+}
+
+struct rnh *
+zebra_lookup_rnh (struct prefix *p, u_int32_t vrfid)
+{
+ struct route_table *table;
+ struct route_node *rn;
+
+ table = lookup_rnh_table(vrfid, PREFIX_FAMILY(p));
+ if (!table)
+ return NULL;
+
+ /* Make it sure prefixlen is applied to the prefix. */
+ apply_mask (p);
+
+ /* Lookup route node.*/
+ rn = route_node_lookup (table, p);
+ if (!rn)
+ return NULL;
+
+ route_unlock_node (rn);
+ return (rn->info);
+}
+
+void
+zebra_delete_rnh (struct rnh *rnh)
+{
+ struct route_node *rn;
+
+ if (!rnh || !(rn = rnh->node))
+ return;
+
+ if (IS_ZEBRA_DEBUG_NHT)
+ {
+ char buf[INET6_ADDRSTRLEN];
+ zlog_debug("delete rnh %s", rnh_str(rnh, buf, INET6_ADDRSTRLEN));
+ }
+
+ list_free(rnh->client_list);
+ free_state(rnh->state);
+ XFREE(MTYPE_RNH, rn->info);
+ rn->info = NULL;
+ route_unlock_node (rn);
+ return;
+}
+
+void
+zebra_add_rnh_client (struct rnh *rnh, struct zserv *client)
+{
+ if (IS_ZEBRA_DEBUG_NHT)
+ {
+ char buf[INET6_ADDRSTRLEN];
+ zlog_debug("client %s registers rnh %s",
+ zebra_route_string(client->proto),
+ rnh_str(rnh, buf, INET6_ADDRSTRLEN));
+ }
+ if (!listnode_lookup(rnh->client_list, client))
+ {
+ listnode_add(rnh->client_list, client);
+ send_client(rnh, client);
+ }
+}
+
+void
+zebra_remove_rnh_client (struct rnh *rnh, struct zserv *client)
+{
+ if (IS_ZEBRA_DEBUG_NHT)
+ {
+ char buf[INET6_ADDRSTRLEN];
+ zlog_debug("client %s unregisters rnh %s",
+ zebra_route_string(client->proto),
+ rnh_str(rnh, buf, INET6_ADDRSTRLEN));
+ }
+ listnode_delete(rnh->client_list, client);
+ if (list_isempty(rnh->client_list))
+ zebra_delete_rnh(rnh);
+}
+
+int
+zebra_evaluate_rnh_table (int vrfid, int family)
+{
+ struct route_table *ptable;
+ struct route_table *ntable;
+ struct route_node *prn;
+ struct route_node *nrn;
+ struct rnh *rnh;
+ struct zserv *client;
+ struct listnode *node;
+ struct rib *rib;
+
+ ntable = lookup_rnh_table(vrfid, family);
+ if (!ntable)
+ {
+ zlog_debug("evaluate_rnh_table: rnh table not found\n");
+ return -1;
+ }
+
+ ptable = vrf_table(family2afi(family), SAFI_UNICAST, vrfid);
+ if (!ptable)
+ {
+ zlog_debug("evaluate_rnh_table: prefix table not found\n");
+ return -1;
+ }
+
+ for (nrn = route_top (ntable); nrn; nrn = route_next (nrn))
+ {
+ if (!nrn->info)
+ continue;
+
+ prn = route_node_match(ptable, &nrn->p);
+ if (!prn)
+ rib = NULL;
+ else
+ {
+ RNODE_FOREACH_RIB(prn, rib)
+ {
+ if (CHECK_FLAG (rib->status, RIB_ENTRY_REMOVED))
+ continue;
+ if (CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
+ break;
+ }
+ }
+
+ rnh = nrn->info;
+ if (compare_state(rib, rnh->state))
+ {
+ if (IS_ZEBRA_DEBUG_NHT)
+ {
+ char bufn[INET6_ADDRSTRLEN];
+ char bufp[INET6_ADDRSTRLEN];
+ prefix2str(&nrn->p, bufn, INET6_ADDRSTRLEN);
+ if (prn)
+ prefix2str(&prn->p, bufp, INET6_ADDRSTRLEN);
+ else
+ strcpy(bufp, "null");
+ zlog_debug("rnh %s resolved through route %s - sending "
+ "nexthop %s event to clients", bufn, bufp,
+ rib ? "reachable" : "unreachable");
+ }
+ copy_state(rnh, rib);
+ for (ALL_LIST_ELEMENTS_RO(rnh->client_list, node, client))
+ send_client(rnh, client);
+ }
+ }
+ return 1;
+}
+
+int
+zebra_dispatch_rnh_table (int vrfid, int family, struct zserv *client)
+{
+ struct route_table *ntable;
+ struct route_node *nrn;
+ struct rnh *rnh;
+
+ ntable = lookup_rnh_table(vrfid, family);
+ if (!ntable)
+ {
+ zlog_debug("dispatch_rnh_table: rnh table not found\n");
+ return -1;
+ }
+
+ for (nrn = route_top (ntable); nrn; nrn = route_next (nrn))
+ {
+ if (!nrn->info)
+ continue;
+
+ rnh = nrn->info;
+ if (IS_ZEBRA_DEBUG_NHT)
+ {
+ char bufn[INET6_ADDRSTRLEN];
+ prefix2str(&nrn->p, bufn, INET6_ADDRSTRLEN);
+ zlog_debug("rnh %s - sending nexthop %s event to client %s", bufn,
+ rnh->state ? "reachable" : "unreachable",
+ zebra_route_string(client->proto));
+ }
+ send_client(rnh, client);
+ }
+ return 1;
+}
+
+void
+zebra_print_rnh_table (int vrfid, int af, struct vty *vty)
+{
+ struct route_table *table;
+ struct route_node *rn;
+
+ table = lookup_rnh_table(vrfid, af);
+ if (!table)
+ {
+ zlog_debug("print_rnhs: rnh table not found\n");
+ return;
+ }
+
+ for (rn = route_top(table); rn; rn = route_next(rn))
+ if (rn->info)
+ print_rnh(rn, vty);
+}
+
+int
+zebra_cleanup_rnh_client (int vrfid, int family, struct zserv *client)
+{
+ struct route_table *ntable;
+ struct route_node *nrn;
+ struct rnh *rnh;
+
+ ntable = lookup_rnh_table(vrfid, family);
+ if (!ntable)
+ {
+ zlog_debug("cleanup_rnh_client: rnh table not found\n");
+ return -1;
+ }
+
+ for (nrn = route_top (ntable); nrn; nrn = route_next (nrn))
+ {
+ if (!nrn->info)
+ continue;
+
+ rnh = nrn->info;
+ if (IS_ZEBRA_DEBUG_NHT)
+ {
+ char bufn[INET6_ADDRSTRLEN];
+ prefix2str(&nrn->p, bufn, INET6_ADDRSTRLEN);
+ zlog_debug("rnh %s - cleaning state for client %s", bufn,
+ zebra_route_string(client->proto));
+ }
+ zebra_remove_rnh_client(rnh, client);
+ }
+ return 1;
+}
+
+/**
+ * free_state - free up the rib structure associated with the rnh.
+ */
+static void
+free_state (struct rib *rib)
+{
+ struct nexthop *nexthop, *next;
+
+ if (!rib)
+ return;
+
+ /* free RIB and nexthops */
+ for (nexthop = rib->nexthop; nexthop; nexthop = next)
+ {
+ next = nexthop->next;
+ nexthop_free (nexthop);
+ }
+ XFREE (MTYPE_RIB, rib);
+}
+
+/**
+ * copy_nexthop - copy a nexthop to the rib structure.
+ */
+static void
+copy_nexthop (struct rib *state, struct nexthop *nh)
+{
+ struct nexthop *nexthop;
+
+ nexthop = nexthop_new();
+ nexthop->flags = nh->flags;
+ nexthop->type = nh->type;
+ nexthop->ifindex = nh->ifindex;
+ if (nh->ifname)
+ nexthop->ifname = XSTRDUP(0, nh->ifname);
+ memcpy(&(nexthop->gate), &(nh->gate), sizeof(union g_addr));
+ memcpy(&(nexthop->src), &(nh->src), sizeof(union g_addr));
+
+ nexthop_add(state, nexthop);
+}
+
+static void
+copy_state (struct rnh *rnh, struct rib *rib)
+{
+ struct rib *state;
+ struct nexthop *nh;
+
+ if (rnh->state)
+ {
+ free_state(rnh->state);
+ rnh->state = NULL;
+ }
+
+ if (!rib)
+ return;
+
+ state = XCALLOC (MTYPE_RIB, sizeof (struct rib));
+ state->type = rib->type;
+ state->metric = rib->metric;
+
+ for (nh = rib->nexthop; nh; nh = nh->next)
+ copy_nexthop(state, nh);
+ rnh->state = state;
+}
+
+static int
+compare_state (struct rib *r1, struct rib *r2)
+{
+ struct nexthop *nh1;
+ struct nexthop *nh2;
+ u_char found_nh = 0;
+
+ if (!r1 && !r2)
+ return 0;
+
+ if ((!r1 && r2) || (r1 && !r2))
+ return 1;
+
+ if (r1->metric != r2->metric)
+ return 1;
+
+ if (r1->nexthop_num != r2->nexthop_num)
+ return 1;
+
+ /* We need to verify that the nexthops for r1 match the nexthops for r2.
+ * Since it is possible for a rib entry to have the same nexthop multiple
+ * times (Example: [a,a]) we need to keep track of which r2 nexthops we have
+ * already used as a match against a r1 nexthop. We track this
+ * via NEXTHOP_FLAG_MATCHED. Clear this flag for all r2 nexthops when you
+ * are finished.
+ *
+ * TRUE: r1 [a,b], r2 [a,b]
+ * TRUE: r1 [a,b], r2 [b,a]
+ * FALSE: r1 [a,b], r2 [a,c]
+ * FALSE: r1 [a,a], r2 [a,b]
+ */
+ for (nh1 = r1->nexthop; nh1; nh1 = nh1->next)
+ {
+ found_nh = 0;
+ for (nh2 = r2->nexthop; nh2; nh2 = nh2->next)
+ {
+ if (CHECK_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED))
+ continue;
+
+ if (nexthop_same_no_recurse(nh1, nh2))
+ {
+ SET_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED);
+ found_nh = 1;
+ break;
+ }
+ }
+
+ if (!found_nh)
+ {
+ for (nh2 = r2->nexthop; nh2; nh2 = nh2->next)
+ if (CHECK_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED))
+ UNSET_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED);
+ return 1;
+ }
+ }
+
+ for (nh2 = r2->nexthop; nh2; nh2 = nh2->next)
+ if (CHECK_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED))
+ UNSET_FLAG (nh2->flags, NEXTHOP_FLAG_MATCHED);
+
+ return 0;
+}
+
+static int
+send_client (struct rnh *rnh, struct zserv *client)
+{
+ struct stream *s;
+ struct rib *rib;
+ unsigned long nump;
+ u_char num;
+ struct nexthop *nexthop;
+ struct route_node *rn;
+
+ rn = rnh->node;
+ rib = rnh->state;
+
+ /* Get output stream. */
+ s = client->obuf;
+ stream_reset (s);
+
+ zserv_create_header (s, ZEBRA_NEXTHOP_UPDATE);
+
+ stream_putw(s, rn->p.family);
+ stream_put_prefix (s, &rn->p);
+
+ if (rib)
+ {
+ stream_putl (s, rib->metric);
+ num = 0;
+ nump = stream_get_endp(s);
+ stream_putc (s, 0);
+ for (nexthop = rib->nexthop; nexthop; nexthop = nexthop->next)
+ if (CHECK_FLAG (nexthop->flags, NEXTHOP_FLAG_FIB))
+ {
+ stream_putc (s, nexthop->type);
+ switch (nexthop->type)
+ {
+ case ZEBRA_NEXTHOP_IPV4:
+ stream_put_in_addr (s, &nexthop->gate.ipv4);
+ break;
+ case ZEBRA_NEXTHOP_IFINDEX:
+ case ZEBRA_NEXTHOP_IFNAME:
+ stream_putl (s, nexthop->ifindex);
+ break;
+ case ZEBRA_NEXTHOP_IPV4_IFINDEX:
+ case ZEBRA_NEXTHOP_IPV4_IFNAME:
+ stream_put_in_addr (s, &nexthop->gate.ipv4);
+ stream_putl (s, nexthop->ifindex);
+ break;
+#ifdef HAVE_IPV6
+ case ZEBRA_NEXTHOP_IPV6:
+ stream_put (s, &nexthop->gate.ipv6, 16);
+ break;
+ case ZEBRA_NEXTHOP_IPV6_IFINDEX:
+ case ZEBRA_NEXTHOP_IPV6_IFNAME:
+ stream_put (s, &nexthop->gate.ipv6, 16);
+ stream_putl (s, nexthop->ifindex);
+ break;
+#endif /* HAVE_IPV6 */
+ default:
+ /* do nothing */
+ break;
+ }
+ num++;
+ }
+ stream_putc_at (s, nump, num);
+ }
+ else
+ {
+ stream_putl (s, 0);
+ stream_putc (s, 0);
+ }
+ stream_putw_at (s, 0, stream_get_endp (s));
+ return zebra_server_send_message(client);
+}
+
+static void
+print_nh (struct nexthop *nexthop, struct vty *vty)
+{
+ char buf[BUFSIZ];
+
+ switch (nexthop->type)
+ {
+ case NEXTHOP_TYPE_IPV4:
+ case NEXTHOP_TYPE_IPV4_IFINDEX:
+ vty_out (vty, " via %s", inet_ntoa (nexthop->gate.ipv4));
+ if (nexthop->ifindex)
+ vty_out (vty, ", %s", ifindex2ifname (nexthop->ifindex));
+ break;
+ case NEXTHOP_TYPE_IPV6:
+ case NEXTHOP_TYPE_IPV6_IFINDEX:
+ case NEXTHOP_TYPE_IPV6_IFNAME:
+ vty_out (vty, " %s",
+ inet_ntop (AF_INET6, &nexthop->gate.ipv6, buf, BUFSIZ));
+ if (nexthop->type == NEXTHOP_TYPE_IPV6_IFNAME)
+ vty_out (vty, ", %s", nexthop->ifname);
+ else if (nexthop->ifindex)
+ vty_out (vty, ", via %s", ifindex2ifname (nexthop->ifindex));
+ break;
+ case NEXTHOP_TYPE_IFINDEX:
+ vty_out (vty, " is directly connected, %s",
+ ifindex2ifname (nexthop->ifindex));
+ break;
+ case NEXTHOP_TYPE_IFNAME:
+ vty_out (vty, " is directly connected, %s", nexthop->ifname);
+ break;
+ case NEXTHOP_TYPE_BLACKHOLE:
+ vty_out (vty, " is directly connected, Null0");
+ break;
+ default:
+ break;
+ }
+ vty_out(vty, "%s", VTY_NEWLINE);
+}
+
+static void
+print_rnh (struct route_node *rn, struct vty *vty)
+{
+ struct rnh *rnh;
+ struct nexthop *nexthop;
+ struct listnode *node;
+ struct zserv *client;
+ char buf[BUFSIZ];
+
+ rnh = rn->info;
+ vty_out(vty, "%s%s", inet_ntop(rn->p.family, &rn->p.u.prefix, buf, BUFSIZ),
+ VTY_NEWLINE);
+ if (rnh->state)
+ {
+ vty_out(vty, " resolved via %s%s",
+ zebra_route_string(rnh->state->type), VTY_NEWLINE);
+ for (nexthop = rnh->state->nexthop; nexthop; nexthop = nexthop->next)
+ print_nh(nexthop, vty);
+ }
+ else
+ vty_out(vty, " unresolved%s", VTY_NEWLINE);
+
+ vty_out(vty, " Client list:");
+ for (ALL_LIST_ELEMENTS_RO(rnh->client_list, node, client))
+ vty_out(vty, " %s(fd %d)", zebra_route_string(client->proto),
+ client->sock);
+ vty_out(vty, "%s", VTY_NEWLINE);
+}
diff --git a/zebra/zebra_rnh.h b/zebra/zebra_rnh.h
new file mode 100644
index 000000000..212bab602
--- /dev/null
+++ b/zebra/zebra_rnh.h
@@ -0,0 +1,48 @@
+/*
+ * Zebra next hop tracking header
+ * Copyright (C) 2013 Cumulus Networks, Inc.
+ *
+ * This file is part of GNU Zebra.
+ *
+ * GNU Zebra is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * GNU Zebra is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Zebra; see the file COPYING. If not, write to the Free
+ * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#ifndef _ZEBRA_RNH_H
+#define _ZEBRA_RNH_H
+
+#include "prefix.h"
+#include "vty.h"
+
+/* Nexthop structure. */
+struct rnh
+{
+ u_char flags;
+ struct rib *state;
+ struct list *client_list;
+ struct route_node *node;
+};
+
+extern struct rnh *zebra_add_rnh(struct prefix *p, u_int32_t vrfid);
+extern struct rnh *zebra_lookup_rnh(struct prefix *p, u_int32_t vrfid);
+extern void zebra_delete_rnh(struct rnh *rnh);
+extern void zebra_add_rnh_client(struct rnh *rnh, struct zserv *client);
+extern void zebra_remove_rnh_client(struct rnh *rnh, struct zserv *client);
+extern int zebra_evaluate_rnh_table(int vrfid, int family);
+extern int zebra_dispatch_rnh_table(int vrfid, int family, struct zserv *cl);
+extern void zebra_print_rnh_table(int vrfid, int family, struct vty *vty);
+extern char *rnh_str(struct rnh *rnh, char *buf, int size);
+extern int zebra_cleanup_rnh_client(int vrf, int family, struct zserv *client);
+#endif /*_ZEBRA_RNH_H */
diff --git a/zebra/zebra_rnh_null.c b/zebra/zebra_rnh_null.c
new file mode 100644
index 000000000..68b58ce3e
--- /dev/null
+++ b/zebra/zebra_rnh_null.c
@@ -0,0 +1,10 @@
+#include <zebra.h>
+#include "zebra/rib.h"
+#include "zebra/zserv.h"
+#include "zebra/zebra_rnh.h"
+
+int zebra_evaluate_rnh_table (int vrfid, int family)
+{ return 0; }
+
+void zebra_print_rnh_table (int vrfid, int family, struct vty *vty)
+{}
diff --git a/zebra/zebra_routemap.c b/zebra/zebra_routemap.c
index b0dca088b..7b4472488 100644
--- a/zebra/zebra_routemap.c
+++ b/zebra/zebra_routemap.c
@@ -28,6 +28,7 @@
#include "command.h"
#include "filter.h"
#include "plist.h"
+#include "nexthop.h"
#include "zebra/zserv.h"
diff --git a/zebra/zebra_vty.c b/zebra/zebra_vty.c
index 254354c72..72095706e 100644
--- a/zebra/zebra_vty.c
+++ b/zebra/zebra_vty.c
@@ -27,8 +27,10 @@
#include "command.h"
#include "table.h"
#include "rib.h"
+#include "nexthop.h"
#include "zebra/zserv.h"
+#include "zebra/zebra_rnh.h"
/* General fucntion for static route. */
static int
@@ -811,6 +813,28 @@ DEFUN (show_ip_route,
return CMD_SUCCESS;
}
+DEFUN (show_ip_nht,
+ show_ip_nht_cmd,
+ "show ip nht",
+ SHOW_STR
+ IP_STR
+ "IP nexthop tracking table\n")
+{
+ zebra_print_rnh_table(0, AF_INET, vty);
+ return CMD_SUCCESS;
+}
+
+DEFUN (show_ipv6_nht,
+ show_ipv6_nht_cmd,
+ "show ipv6 nht",
+ SHOW_STR
+ IP_STR
+ "IPv6 nexthop tracking table\n")
+{
+ zebra_print_rnh_table(0, AF_INET6, vty);
+ return CMD_SUCCESS;
+}
+
DEFUN (show_ip_route_prefix_longer,
show_ip_route_prefix_longer_cmd,
"show ip route A.B.C.D/M longer-prefixes",
@@ -2210,6 +2234,8 @@ zebra_vty_init (void)
install_element (CONFIG_NODE, &no_ip_route_mask_flags_distance2_cmd);
install_element (VIEW_NODE, &show_ip_route_cmd);
+ install_element (VIEW_NODE, &show_ip_nht_cmd);
+ install_element (VIEW_NODE, &show_ipv6_nht_cmd);
install_element (VIEW_NODE, &show_ip_route_addr_cmd);
install_element (VIEW_NODE, &show_ip_route_prefix_cmd);
install_element (VIEW_NODE, &show_ip_route_prefix_longer_cmd);
@@ -2218,6 +2244,8 @@ zebra_vty_init (void)
install_element (VIEW_NODE, &show_ip_route_summary_cmd);
install_element (VIEW_NODE, &show_ip_route_summary_prefix_cmd);
install_element (ENABLE_NODE, &show_ip_route_cmd);
+ install_element (ENABLE_NODE, &show_ip_nht_cmd);
+ install_element (ENABLE_NODE, &show_ipv6_nht_cmd);
install_element (ENABLE_NODE, &show_ip_route_addr_cmd);
install_element (ENABLE_NODE, &show_ip_route_prefix_cmd);
install_element (ENABLE_NODE, &show_ip_route_prefix_longer_cmd);
diff --git a/zebra/zserv.c b/zebra/zserv.c
index b245b7f7b..ecb3bd05d 100644
--- a/zebra/zserv.c
+++ b/zebra/zserv.c
@@ -36,12 +36,14 @@
#include "privs.h"
#include "network.h"
#include "buffer.h"
+#include "nexthop.h"
#include "zebra/zserv.h"
#include "zebra/router-id.h"
#include "zebra/redistribute.h"
#include "zebra/debug.h"
#include "zebra/ipforward.h"
+#include "zebra/zebra_rnh.h"
/* Event list of zebra. */
enum event { ZEBRA_SERV, ZEBRA_READ, ZEBRA_WRITE };
@@ -101,7 +103,7 @@ zserv_flush_data(struct thread *thread)
return 0;
}
-static int
+int
zebra_server_send_message(struct zserv *client)
{
if (client->t_suicide)
@@ -130,7 +132,7 @@ zebra_server_send_message(struct zserv *client)
return 0;
}
-static void
+void
zserv_create_header (struct stream *s, uint16_t cmd)
{
/* length placeholder, caller can update */
@@ -599,6 +601,65 @@ zsend_ipv4_nexthop_lookup (struct zserv *client, struct in_addr addr)
return zebra_server_send_message(client);
}
+/* Nexthop register */
+static int
+zserv_nexthop_register (struct zserv *client, int sock, u_short length)
+{
+ struct rnh *rnh;
+ struct stream *s;
+ struct prefix p;
+ u_short l = 0;
+
+ if (IS_ZEBRA_DEBUG_NHT)
+ zlog_debug("nexthop_register msg from client %s: length=%d\n",
+ zebra_route_string(client->proto), length);
+
+ s = client->ibuf;
+
+ while (l < length)
+ {
+ p.family = stream_getw(s);
+ p.prefixlen = stream_getc(s);
+ l += 3;
+ stream_get(&p.u.prefix, s, PSIZE(p.prefixlen));
+ l += PSIZE(p.prefixlen);
+ rnh = zebra_add_rnh(&p, 0);
+ zebra_add_rnh_client(rnh, client);
+ }
+ zebra_evaluate_rnh_table(0, AF_INET);
+ zebra_evaluate_rnh_table(0, AF_INET6);
+ return 0;
+}
+
+/* Nexthop register */
+static int
+zserv_nexthop_unregister (struct zserv *client, int sock, u_short length)
+{
+ struct rnh *rnh;
+ struct stream *s;
+ struct prefix p;
+ u_short l = 0;
+
+ if (IS_ZEBRA_DEBUG_NHT)
+ zlog_debug("nexthop_unregister msg from client %s: length=%d\n",
+ zebra_route_string(client->proto), length);
+
+ s = client->ibuf;
+
+ while (l < length)
+ {
+ p.family = stream_getw(s);
+ p.prefixlen = stream_getc(s);
+ l += 3;
+ stream_get(&p.u.prefix, s, PSIZE(p.prefixlen));
+ l += PSIZE(p.prefixlen);
+ rnh = zebra_lookup_rnh(&p, 0);
+ if (rnh)
+ zebra_remove_rnh_client(rnh, client);
+ }
+ return 0;
+}
+
static int
zsend_ipv4_import_lookup (struct zserv *client, struct prefix_ipv4 *p)
{
@@ -1171,6 +1232,7 @@ zread_hello (struct zserv *client)
client->sock);
route_type_oaths[proto] = client->sock;
+ client->proto = proto;
}
}
@@ -1196,6 +1258,9 @@ zebra_score_rib (int client_sock)
static void
zebra_client_close (struct zserv *client)
{
+ zebra_cleanup_rnh_client(0, AF_INET, client);
+ zebra_cleanup_rnh_client(0, AF_INET6, client);
+
/* Close file descriptor. */
if (client->sock)
{
@@ -1408,6 +1473,12 @@ zebra_client_read (struct thread *thread)
case ZEBRA_HELLO:
zread_hello (client);
break;
+ case ZEBRA_NEXTHOP_REGISTER:
+ zserv_nexthop_register(client, sock, length);
+ break;
+ case ZEBRA_NEXTHOP_UNREGISTER:
+ zserv_nexthop_unregister(client, sock, length);
+ break;
default:
zlog_info ("Zebra received unknown command %d", command);
break;
@@ -1681,8 +1752,10 @@ DEFUN (show_zebra_client,
struct zserv *client;
for (ALL_LIST_ELEMENTS_RO (zebrad.client_list, node, client))
- vty_out (vty, "Client fd %d%s", client->sock, VTY_NEWLINE);
-
+ vty_out (vty, "Client %s fd %d%s",
+ zebra_route_string(client->proto), client->sock,
+ VTY_NEWLINE);
+
return CMD_SUCCESS;
}
diff --git a/zebra/zserv.h b/zebra/zserv.h
index 5e8bccac3..92b8ba3a6 100644
--- a/zebra/zserv.h
+++ b/zebra/zserv.h
@@ -66,6 +66,9 @@ struct zserv
/* Router-id information. */
u_char ridinfo;
+
+ /* client's protocol */
+ u_char proto;
};
/* Zebra instance */
@@ -110,4 +113,7 @@ extern int zsend_router_id_update(struct zserv *, struct prefix *);
extern pid_t pid;
+extern void zserv_create_header(struct stream *s, uint16_t cmd);
+extern int zebra_server_send_message(struct zserv *client);
+
#endif /* _ZEBRA_ZEBRA_H */