diff options
Diffstat (limited to 'bgpd/bgp_mpath.c')
-rw-r--r-- | bgpd/bgp_mpath.c | 438 |
1 files changed, 113 insertions, 325 deletions
diff --git a/bgpd/bgp_mpath.c b/bgpd/bgp_mpath.c index e12d84b8..609afa42 100644 --- a/bgpd/bgp_mpath.c +++ b/bgpd/bgp_mpath.c @@ -2,8 +2,10 @@ /* * BGP Multipath * Copyright (C) 2010 Google Inc. + * 2024 Nvidia Corporation + * Donald Sharp * - * This file is part of Quagga + * This file is part of FRR */ #include <zebra.h> @@ -191,78 +193,6 @@ int bgp_path_info_nexthop_cmp(struct bgp_path_info *bpi1, } /* - * bgp_path_info_mpath_cmp - * - * This function determines our multipath list ordering. By ordering - * the list we can deterministically select which paths are included - * in the multipath set. The ordering also helps in detecting changes - * in the multipath selection so we can detect whether to send an - * update to zebra. - * - * The order of paths is determined first by received nexthop, and then - * by peer address if the nexthops are the same. - */ -static int bgp_path_info_mpath_cmp(void *val1, void *val2) -{ - struct bgp_path_info *bpi1, *bpi2; - int compare; - - bpi1 = val1; - bpi2 = val2; - - compare = bgp_path_info_nexthop_cmp(bpi1, bpi2); - - if (!compare) { - if (!bpi1->peer->su_remote && !bpi2->peer->su_remote) - compare = 0; - else if (!bpi1->peer->su_remote) - compare = 1; - else if (!bpi2->peer->su_remote) - compare = -1; - else - compare = sockunion_cmp(bpi1->peer->su_remote, - bpi2->peer->su_remote); - } - - return compare; -} - -/* - * bgp_mp_list_init - * - * Initialize the mp_list, which holds the list of multipaths - * selected by bgp_best_selection - */ -void bgp_mp_list_init(struct list *mp_list) -{ - assert(mp_list); - memset(mp_list, 0, sizeof(struct list)); - mp_list->cmp = bgp_path_info_mpath_cmp; -} - -/* - * bgp_mp_list_clear - * - * Clears all entries out of the mp_list - */ -void bgp_mp_list_clear(struct list *mp_list) -{ - assert(mp_list); - list_delete_all_node(mp_list); -} - -/* - * bgp_mp_list_add - * - * Adds a multipath entry to the mp_list - */ -void bgp_mp_list_add(struct list *mp_list, struct bgp_path_info *mpinfo) -{ - assert(mp_list && mpinfo); - listnode_add_sort(mp_list, mpinfo); -} - -/* * bgp_path_info_mpath_new * * Allocate and zero memory for a new bgp_path_info_mpath element @@ -274,6 +204,7 @@ static struct bgp_path_info_mpath *bgp_path_info_mpath_new(void) new_mpath = XCALLOC(MTYPE_BGP_MPATH_INFO, sizeof(struct bgp_path_info_mpath)); + new_mpath->mp_count = 1; return new_mpath; } @@ -287,6 +218,8 @@ void bgp_path_info_mpath_free(struct bgp_path_info_mpath **mpath) if (mpath && *mpath) { if ((*mpath)->mp_attr) bgp_attr_unintern(&(*mpath)->mp_attr); + (*mpath)->mp_attr = NULL; + XFREE(MTYPE_BGP_MPATH_INFO, *mpath); } } @@ -314,58 +247,22 @@ bgp_path_info_mpath_get(struct bgp_path_info *path) } /* - * bgp_path_info_mpath_enqueue - * - * Enqueue a path onto the multipath list given the previous multipath - * list entry - */ -static void bgp_path_info_mpath_enqueue(struct bgp_path_info *prev_info, - struct bgp_path_info *path) -{ - struct bgp_path_info_mpath *prev, *mpath; - - prev = bgp_path_info_mpath_get(prev_info); - mpath = bgp_path_info_mpath_get(path); - if (!prev || !mpath) - return; - - mpath->mp_next = prev->mp_next; - mpath->mp_prev = prev; - if (prev->mp_next) - prev->mp_next->mp_prev = mpath; - prev->mp_next = mpath; - - SET_FLAG(path->flags, BGP_PATH_MULTIPATH); -} - -/* - * bgp_path_info_mpath_dequeue - * - * Remove a path from the multipath list - */ -void bgp_path_info_mpath_dequeue(struct bgp_path_info *path) -{ - struct bgp_path_info_mpath *mpath = path->mpath; - if (!mpath) - return; - if (mpath->mp_prev) - mpath->mp_prev->mp_next = mpath->mp_next; - if (mpath->mp_next) - mpath->mp_next->mp_prev = mpath->mp_prev; - mpath->mp_next = mpath->mp_prev = NULL; - UNSET_FLAG(path->flags, BGP_PATH_MULTIPATH); -} - -/* * bgp_path_info_mpath_next * * Given a bgp_path_info, return the next multipath entry */ struct bgp_path_info *bgp_path_info_mpath_next(struct bgp_path_info *path) { - if (!path->mpath || !path->mpath->mp_next) - return NULL; - return path->mpath->mp_next->mp_info; + path = path->next; + + while (path) { + if (CHECK_FLAG(path->flags, BGP_PATH_MULTIPATH)) + return path; + + path = path->next; + } + + return NULL; } /* @@ -386,7 +283,8 @@ struct bgp_path_info *bgp_path_info_mpath_first(struct bgp_path_info *path) uint32_t bgp_path_info_mpath_count(struct bgp_path_info *path) { if (!path->mpath) - return 0; + return 1; + return path->mpath->mp_count; } @@ -411,6 +309,10 @@ static void bgp_path_info_mpath_count_set(struct bgp_path_info *path, * bgp_path_info_mpath_lb_update * * Update cumulative info related to link-bandwidth + * + * This is only set on the first mpath of the list + * as such we should UNSET the flags when removing + * to ensure nothing accidently happens */ static void bgp_path_info_mpath_lb_update(struct bgp_path_info *path, bool set, bool all_paths_lb, uint64_t cum_bw) @@ -472,10 +374,10 @@ bool bgp_path_info_mpath_chkwtd(struct bgp *bgp, struct bgp_path_info *path) */ if (bgp->lb_handling != BGP_LINK_BW_SKIP_MISSING && bgp->lb_handling != BGP_LINK_BW_DEFWT_4_MISSING) - return (path->mpath->mp_flags & BGP_MP_LB_ALL); + return CHECK_FLAG(path->mpath->mp_flags, BGP_MP_LB_ALL); /* At least one path should have bandwidth. */ - return (path->mpath->mp_flags & BGP_MP_LB_PRESENT); + return CHECK_FLAG(path->mpath->mp_flags, BGP_MP_LB_PRESENT); } /* @@ -511,58 +413,51 @@ static void bgp_path_info_mpath_attr_set(struct bgp_path_info *path, /* * bgp_path_info_mpath_update * - * Compare and sync up the multipath list with the mp_list generated by - * bgp_best_selection + * Compare and sync up the multipath flags with what was choosen + * in best selection */ void bgp_path_info_mpath_update(struct bgp *bgp, struct bgp_dest *dest, - struct bgp_path_info *new_best, - struct bgp_path_info *old_best, - struct list *mp_list, - struct bgp_maxpaths_cfg *mpath_cfg) + struct bgp_path_info *new_best, struct bgp_path_info *old_best, + uint32_t num_candidates, struct bgp_maxpaths_cfg *mpath_cfg) { uint16_t maxpaths, mpath_count, old_mpath_count; uint64_t bwval; uint64_t cum_bw, old_cum_bw; - struct listnode *mp_node, *mp_next_node; - struct bgp_path_info *cur_mpath, *new_mpath, *next_mpath, *prev_mpath; - int mpath_changed, debug; + struct bgp_path_info *cur_iterator = NULL; + bool mpath_changed, debug; bool all_paths_lb; char path_buf[PATH_ADDPATH_STR_BUFFER]; + bool old_mpath, new_mpath; - mpath_changed = 0; + mpath_changed = false; maxpaths = multipath_num; mpath_count = 0; - cur_mpath = NULL; old_mpath_count = 0; old_cum_bw = cum_bw = 0; - prev_mpath = new_best; - mp_node = listhead(mp_list); debug = bgp_debug_bestpath(dest); - if (new_best) { - mpath_count++; - if (new_best != old_best) - bgp_path_info_mpath_dequeue(new_best); - maxpaths = (new_best->peer->sort == BGP_PEER_IBGP) - ? mpath_cfg->maxpaths_ibgp - : mpath_cfg->maxpaths_ebgp; - } - if (old_best) { - cur_mpath = bgp_path_info_mpath_first(old_best); old_mpath_count = bgp_path_info_mpath_count(old_best); + if (old_mpath_count == 1) + SET_FLAG(old_best->flags, BGP_PATH_MULTIPATH); old_cum_bw = bgp_path_info_mpath_cumbw(old_best); bgp_path_info_mpath_count_set(old_best, 0); bgp_path_info_mpath_lb_update(old_best, false, false, 0); - bgp_path_info_mpath_dequeue(old_best); + bgp_path_info_mpath_free(&old_best->mpath); + old_best->mpath = NULL; + } + + if (new_best) { + maxpaths = (new_best->peer->sort == BGP_PEER_IBGP) ? mpath_cfg->maxpaths_ibgp + : mpath_cfg->maxpaths_ebgp; + cur_iterator = new_best; } if (debug) - zlog_debug("%pBD(%s): starting mpath update, newbest %s num candidates %d old-mpath-count %d old-cum-bw %" PRIu64, - dest, bgp->name_pretty, - new_best ? new_best->peer->host : "NONE", - mp_list ? listcount(mp_list) : 0, old_mpath_count, - old_cum_bw); + zlog_debug("%pBD(%s): starting mpath update, newbest %s num candidates %d old-mpath-count %d old-cum-bw %" PRIu64 + " maxpaths set %u", + dest, bgp->name_pretty, new_best ? new_best->peer->host : "NONE", + num_candidates, old_mpath_count, old_cum_bw, maxpaths); /* * We perform an ordered walk through both lists in parallel. @@ -576,213 +471,106 @@ void bgp_path_info_mpath_update(struct bgp *bgp, struct bgp_dest *dest, * to skip over it */ all_paths_lb = true; /* We'll reset if any path doesn't have LB. */ - while (mp_node || cur_mpath) { - struct bgp_path_info *tmp_info; + while (cur_iterator) { + old_mpath = CHECK_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH); + new_mpath = CHECK_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH_NEW); + + UNSET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH_NEW); /* - * We can bail out of this loop if all existing paths on the - * multipath list have been visited (for cleanup purposes) and - * the maxpath requirement is fulfulled + * If the current mpath count is equal to the number of + * maxpaths that can be used then we can bail, after + * we clean up the flags associated with the rest of the + * bestpaths */ - if (!cur_mpath && (mpath_count >= maxpaths)) - break; + if (mpath_count >= maxpaths) { + while (cur_iterator) { + UNSET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH); + UNSET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH_NEW); - mp_next_node = mp_node ? listnextnode(mp_node) : NULL; - next_mpath = - cur_mpath ? bgp_path_info_mpath_next(cur_mpath) : NULL; - tmp_info = mp_node ? listgetdata(mp_node) : NULL; + cur_iterator = cur_iterator->next; + } - if (debug) - zlog_debug("%pBD(%s): comparing candidate %s with existing mpath %s", - dest, bgp->name_pretty, - tmp_info ? tmp_info->peer->host : "NONE", - cur_mpath ? cur_mpath->peer->host : "NONE"); + if (debug) + zlog_debug("%pBD(%s): Mpath count %u is equal to maximum paths allowed, finished comparision for MPATHS", + dest, bgp->name_pretty, mpath_count); + + break; + } + if (debug) + zlog_debug("%pBD(%s): Candidate %s old_mpath: %u new_mpath: %u, Nexthop %pI4 current mpath count: %u", + dest, bgp->name_pretty, cur_iterator->peer->host, old_mpath, + new_mpath, &cur_iterator->attr->nexthop, mpath_count); /* - * If equal, the path was a multipath and is still a multipath. - * Insert onto new multipath list if maxpaths allows. + * There is nothing to do if the cur_iterator is neither a old path + * or a new path */ - if (mp_node && (listgetdata(mp_node) == cur_mpath)) { - list_delete_node(mp_list, mp_node); - bgp_path_info_mpath_dequeue(cur_mpath); - if ((mpath_count < maxpaths) - && prev_mpath - && bgp_path_info_nexthop_cmp(prev_mpath, - cur_mpath)) { - bgp_path_info_mpath_enqueue(prev_mpath, - cur_mpath); - prev_mpath = cur_mpath; - mpath_count++; - if (ecommunity_linkbw_present(bgp_attr_get_ecommunity( - cur_mpath->attr), - &bwval) || - ecommunity_linkbw_present( - bgp_attr_get_ipv6_ecommunity( - cur_mpath->attr), - &bwval)) - cum_bw += bwval; - else - all_paths_lb = false; - if (debug) { - bgp_path_info_path_with_addpath_rx_str( - cur_mpath, path_buf, - sizeof(path_buf)); - zlog_debug("%pBD: %s is still multipath, cur count %d", - dest, path_buf, mpath_count); - } - } else { - mpath_changed = 1; - if (debug) { - bgp_path_info_path_with_addpath_rx_str( - cur_mpath, path_buf, - sizeof(path_buf)); - zlog_debug("%pBD: remove mpath %s nexthop %pI4, cur count %d", - dest, path_buf, - &cur_mpath->attr->nexthop, - mpath_count); - } - } - mp_node = mp_next_node; - cur_mpath = next_mpath; + if (!old_mpath && !new_mpath) { + UNSET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH); + cur_iterator = cur_iterator->next; continue; } - if (cur_mpath - && (!mp_node - || (bgp_path_info_mpath_cmp(cur_mpath, - listgetdata(mp_node)) - < 0))) { - /* - * If here, we have an old multipath and either the - * mp_list - * is finished or the next mp_node points to a later - * multipath, so we need to purge this path from the - * multipath list - */ - bgp_path_info_mpath_dequeue(cur_mpath); - mpath_changed = 1; + if (new_mpath) { + mpath_count++; + + if (cur_iterator != new_best) + SET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH); + + if (!old_mpath) + mpath_changed = true; + + if (ecommunity_linkbw_present(bgp_attr_get_ecommunity(cur_iterator->attr), + &bwval) || + ecommunity_linkbw_present(bgp_attr_get_ipv6_ecommunity( + cur_iterator->attr), + &bwval)) + cum_bw += bwval; + else + all_paths_lb = false; + if (debug) { - bgp_path_info_path_with_addpath_rx_str( - cur_mpath, path_buf, sizeof(path_buf)); - zlog_debug("%pBD: remove mpath %s nexthop %pI4, cur count %d", - dest, path_buf, - &cur_mpath->attr->nexthop, - mpath_count); + bgp_path_info_path_with_addpath_rx_str(cur_iterator, path_buf, + sizeof(path_buf)); + zlog_debug("%pBD: add mpath %s nexthop %pI4, cur count %d cum_bw: %" PRIu64 + " all_paths_lb: %u", + dest, path_buf, &cur_iterator->attr->nexthop, + mpath_count, cum_bw, all_paths_lb); } - cur_mpath = next_mpath; } else { /* - * If here, we have a path on the mp_list that was not - * previously - * a multipath (due to non-equivalance or maxpaths - * exceeded), - * or the matching multipath is sorted later in the - * multipath - * list. Before we enqueue the path on the new multipath - * list, - * make sure its not on the old_best multipath list or - * referenced - * via next_mpath: - * - If next_mpath points to this new path, update - * next_mpath to - * point to the multipath after this one - * - Dequeue the path from the multipath list just to - * make sure + * We know that old_mpath is true and new_mpath is false in this path */ - new_mpath = listgetdata(mp_node); - list_delete_node(mp_list, mp_node); - assert(new_mpath); - assert(prev_mpath); - if ((mpath_count < maxpaths) && (new_mpath != new_best) - && bgp_path_info_nexthop_cmp(prev_mpath, - new_mpath)) { - bgp_path_info_mpath_dequeue(new_mpath); - - bgp_path_info_mpath_enqueue(prev_mpath, - new_mpath); - prev_mpath = new_mpath; - mpath_changed = 1; - mpath_count++; - if (ecommunity_linkbw_present(bgp_attr_get_ecommunity( - new_mpath->attr), - &bwval) || - ecommunity_linkbw_present( - bgp_attr_get_ipv6_ecommunity( - new_mpath->attr), - &bwval)) - cum_bw += bwval; - else - all_paths_lb = false; - if (debug) { - bgp_path_info_path_with_addpath_rx_str( - new_mpath, path_buf, - sizeof(path_buf)); - zlog_debug("%pBD: add mpath %s nexthop %pI4, cur count %d", - dest, path_buf, - &new_mpath->attr->nexthop, - mpath_count); - } - } - mp_node = mp_next_node; + mpath_changed = true; + UNSET_FLAG(cur_iterator->flags, BGP_PATH_MULTIPATH); } + + cur_iterator = cur_iterator->next; } if (new_best) { - bgp_path_info_mpath_count_set(new_best, mpath_count - 1); - if (mpath_count <= 1 || - (!ecommunity_linkbw_present(bgp_attr_get_ecommunity( - new_best->attr), - &bwval) && - !ecommunity_linkbw_present(bgp_attr_get_ipv6_ecommunity( - new_best->attr), - &bwval))) - all_paths_lb = false; - else - cum_bw += bwval; - bgp_path_info_mpath_lb_update(new_best, true, - all_paths_lb, cum_bw); - + if (mpath_count > 1 || new_best->mpath) { + bgp_path_info_mpath_count_set(new_best, mpath_count); + bgp_path_info_mpath_lb_update(new_best, true, all_paths_lb, cum_bw); + } if (debug) zlog_debug("%pBD(%s): New mpath count (incl newbest) %d mpath-change %s all_paths_lb %d cum_bw %" PRIu64, dest, bgp->name_pretty, mpath_count, mpath_changed ? "YES" : "NO", all_paths_lb, cum_bw); + if (mpath_count == 1) + UNSET_FLAG(new_best->flags, BGP_PATH_MULTIPATH); if (mpath_changed || (bgp_path_info_mpath_count(new_best) != old_mpath_count)) SET_FLAG(new_best->flags, BGP_PATH_MULTIPATH_CHG); - if ((mpath_count - 1) != old_mpath_count || - old_cum_bw != cum_bw) + if ((mpath_count) != old_mpath_count || old_cum_bw != cum_bw) SET_FLAG(new_best->flags, BGP_PATH_LINK_BW_CHG); } } /* - * bgp_mp_dmed_deselect - * - * Clean up multipath information for BGP_PATH_DMED_SELECTED path that - * is not selected as best path - */ -void bgp_mp_dmed_deselect(struct bgp_path_info *dmed_best) -{ - struct bgp_path_info *mpinfo, *mpnext; - - if (!dmed_best) - return; - - for (mpinfo = bgp_path_info_mpath_first(dmed_best); mpinfo; - mpinfo = mpnext) { - mpnext = bgp_path_info_mpath_next(mpinfo); - bgp_path_info_mpath_dequeue(mpinfo); - } - - bgp_path_info_mpath_count_set(dmed_best, 0); - UNSET_FLAG(dmed_best->flags, BGP_PATH_MULTIPATH_CHG); - UNSET_FLAG(dmed_best->flags, BGP_PATH_LINK_BW_CHG); - assert(bgp_path_info_mpath_first(dmed_best) == NULL); -} - -/* * bgp_path_info_mpath_aggregate_update * * Set the multipath aggregate attribute. We need to see if the @@ -816,7 +604,7 @@ void bgp_path_info_mpath_aggregate_update(struct bgp_path_info *new_best, if (!new_best) return; - if (!bgp_path_info_mpath_count(new_best)) { + if (bgp_path_info_mpath_count(new_best) == 1) { if ((new_attr = bgp_path_info_mpath_attr(new_best))) { bgp_attr_unintern(&new_attr); bgp_path_info_mpath_attr_set(new_best, NULL); |