summaryrefslogtreecommitdiffstats
path: root/pimd/pim_bsr_rpdb.c
diff options
context:
space:
mode:
Diffstat (limited to 'pimd/pim_bsr_rpdb.c')
-rw-r--r--pimd/pim_bsr_rpdb.c634
1 files changed, 634 insertions, 0 deletions
diff --git a/pimd/pim_bsr_rpdb.c b/pimd/pim_bsr_rpdb.c
new file mode 100644
index 00000000..3ec9f99c
--- /dev/null
+++ b/pimd/pim_bsr_rpdb.c
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* PIM RP database for BSR operation
+ * Copyright (C) 2021 David Lamparter for NetDEF, Inc.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <lib/network.h>
+#include <lib/iana_afi.h>
+#include <lib/sockunion.h>
+
+#include "if.h"
+#include "pimd.h"
+#include "pim_iface.h"
+#include "pim_instance.h"
+#include "pim_rpf.h"
+#include "pim_hello.h"
+#include "pim_pim.h"
+#include "pim_nht.h"
+#include "pim_bsm.h"
+#include "pim_time.h"
+
+/* safety limits to prevent DoS/memory exhaustion attacks against the BSR
+ *
+ * The BSR is more susceptible than other PIM protocol operation because
+ * Candidate-RP messages are unicast to the BSR without any 2-way interaction
+ * and can thus be spoofed blindly(!) from anywhere in the internet.
+ *
+ * Everything else is on-link, multicast, or requires an adjacency - much
+ * harder to mess with.
+ */
+
+/* total number of RPs we keep information for */
+static size_t bsr_max_rps = 1024;
+
+DEFINE_MTYPE_STATIC(PIMD, PIM_BSR_CRP, "PIM BSR C-RP");
+DEFINE_MTYPE_STATIC(PIMD, PIM_BSR_GROUP, "PIM BSR range");
+DEFINE_MTYPE_STATIC(PIMD, PIM_BSR_ITEM, "PIM BSR C-RP range item");
+
+static int rp_cmp(const struct bsr_crp_rp *a, const struct bsr_crp_rp *b)
+{
+ return pim_addr_cmp(a->addr, b->addr);
+}
+
+DECLARE_RBTREE_UNIQ(bsr_crp_rps, struct bsr_crp_rp, item, rp_cmp);
+
+static int group_cmp(const struct bsr_crp_group *a,
+ const struct bsr_crp_group *b)
+{
+ return prefix_cmp(&a->range, &b->range);
+}
+
+DECLARE_RBTREE_UNIQ(bsr_crp_groups, struct bsr_crp_group, item, group_cmp);
+
+static int r_g_cmp(const struct bsr_crp_item *a, const struct bsr_crp_item *b)
+{
+ return prefix_cmp(&a->group->range, &b->group->range);
+}
+
+DECLARE_RBTREE_UNIQ(bsr_crp_rp_groups, struct bsr_crp_item, r_g_item, r_g_cmp);
+
+static int g_r_cmp(const struct bsr_crp_item *a, const struct bsr_crp_item *b)
+{
+ const struct bsr_crp_rp *rp_a = a->rp, *rp_b = b->rp;
+
+ /* NHT-failed RPs last */
+ if (rp_a->nht_ok > rp_b->nht_ok)
+ return -1;
+ if (rp_a->nht_ok < rp_b->nht_ok)
+ return 1;
+
+ /* This function determines BSR policy in what subset of the received
+ * RP candidates to advertise. The BSR is free to make its choices
+ * any way it deems useful
+ */
+
+ /* lower numeric values are better */
+ if (rp_a->prio < rp_b->prio)
+ return -1;
+ if (rp_a->prio > rp_b->prio)
+ return 1;
+
+ /* prefer older RP for less churn */
+ if (rp_a->seen_first < rp_b->seen_first)
+ return -1;
+ if (rp_a->seen_first > rp_b->seen_first)
+ return 1;
+
+ return pim_addr_cmp(rp_a->addr, rp_b->addr);
+}
+
+DECLARE_RBTREE_UNIQ(bsr_crp_group_rps, struct bsr_crp_item, g_r_item, g_r_cmp);
+
+void pim_bsm_generate(struct bsm_scope *scope)
+{
+ struct bsm_frag *frag;
+ struct bsm_hdr *hdr;
+ bool have_dead = false;
+
+ assertf(scope->state == BSR_ELECTED, "state=%d", scope->state);
+
+ pim_bsm_frags_free(scope);
+
+ struct bsr_crp_group *group;
+ struct bsr_crp_item *item;
+ struct bsr_crp_rp *rp;
+ size_t n_groups = 0, n_rps = 0;
+
+ frr_each (bsr_crp_groups, scope->ebsr_groups, group) {
+ if (group->n_selected == 0) {
+ if (group->dead_count >= PIM_BSR_DEAD_COUNT)
+ continue;
+
+ have_dead = true;
+ } else
+ group->dead_count = 0;
+
+ n_groups++;
+ n_rps += group->n_selected;
+ }
+
+ if (PIM_DEBUG_BSM)
+ zlog_debug("Generating BSM (%zu ranges, %zu RPs)", n_groups, n_rps);
+
+ size_t datalen = PIM_MSG_HEADER_LEN + sizeof(*hdr) +
+ n_groups * sizeof(struct bsmmsg_grpinfo) +
+ n_rps * sizeof(struct bsmmsg_rpinfo);
+
+ frag = XCALLOC(MTYPE_PIM_BSM_FRAG, sizeof(*frag) + datalen);
+
+ uint8_t *pos = frag->data + PIM_MSG_HEADER_LEN;
+ uint8_t *end = frag->data + datalen;
+
+ hdr = (struct bsm_hdr *)pos;
+ pos += sizeof(*hdr);
+ assert(pos <= end);
+
+ /* TODO: make BSR hashmasklen configurable */
+#if PIM_IPV == 6
+ hdr->hm_len = 126;
+#else
+ hdr->hm_len = 30;
+#endif
+ hdr->bsr_prio = scope->current_bsr_prio;
+ hdr->bsr_addr.family = PIM_IANA_AFI;
+ hdr->bsr_addr.reserved = 0;
+ hdr->bsr_addr.addr = scope->bsr_addrsel.run_addr;
+
+ frr_each (bsr_crp_groups, scope->ebsr_groups, group) {
+ if (group->n_selected == 0 &&
+ group->dead_count >= PIM_BSR_DEAD_COUNT)
+ continue;
+
+ struct bsmmsg_grpinfo *gi = (struct bsmmsg_grpinfo *)pos;
+
+ pos += sizeof(*gi);
+ assert(pos <= end);
+
+ gi->group.family = PIM_MSG_ADDRESS_FAMILY;
+ gi->group.mask = group->range.prefixlen;
+ gi->group.addr = group->range.prefix;
+
+ size_t n_added = 0;
+
+ frr_each (bsr_crp_group_rps, group->rps, item) {
+ if (!item->selected)
+ break;
+
+ struct bsmmsg_rpinfo *ri = (struct bsmmsg_rpinfo *)pos;
+
+ pos += sizeof(*ri);
+ assert(pos <= end);
+
+ rp = item->rp;
+ ri->rpaddr.family = PIM_MSG_ADDRESS_FAMILY;
+ ri->rpaddr.addr = rp->addr;
+ ri->rp_holdtime = htons(rp->holdtime);
+ ri->rp_pri = rp->prio;
+
+ n_added++;
+ }
+
+ gi->rp_count = group->n_selected;
+ gi->frag_rp_count = n_added;
+ assert(n_added == group->n_selected);
+ }
+
+ assertf(pos == end, "end-pos=%td", end - pos);
+ frag->size = datalen;
+
+ bsm_frags_add_head(scope->bsm_frags, frag);
+
+ scope->ebsr_have_dead_pending = have_dead;
+
+ /*
+ * The BSR itself doesn't receive (no loopback) the BSM msgs advertising
+ * the rps. Install the rps directly for the local BSR node.
+ */
+ pim_bsm_parse_install_g2rp(scope, ((uint8_t *) hdr) + PIM_BSM_HDR_LEN,
+ datalen - PIM_BSM_HDR_LEN - PIM_MSG_HEADER_LEN, scope->bsm_frag_tag);
+
+ pim_bsm_changed(scope);
+}
+
+static void pim_bsm_generate_timer(struct event *t)
+{
+ struct bsm_scope *scope = EVENT_ARG(t);
+
+ pim_bsm_generate(scope);
+}
+
+static void pim_bsm_generate_sched(struct bsm_scope *scope)
+{
+ assertf(scope->state == BSR_ELECTED, "state=%d", scope->state);
+
+ if (scope->t_ebsr_regen_bsm)
+ return;
+
+ event_add_timer(router->master, pim_bsm_generate_timer, scope, 1,
+ &scope->t_ebsr_regen_bsm);
+}
+
+void pim_bsm_sent(struct bsm_scope *scope)
+{
+ struct bsr_crp_group *group;
+ bool have_dead = false, changed = false;
+
+ if (!scope->ebsr_have_dead_pending)
+ return;
+
+ frr_each_safe (bsr_crp_groups, scope->ebsr_groups, group) {
+ if (group->n_selected != 0)
+ continue;
+
+ if (group->dead_count < PIM_BSR_DEAD_COUNT) {
+ group->dead_count++;
+ have_dead = true;
+ continue;
+ }
+
+ changed = true;
+
+ if (bsr_crp_group_rps_count(group->rps))
+ /* have RPs, but none selected */
+ continue;
+
+ /* no reason to keep this range anymore */
+ bsr_crp_groups_del(scope->ebsr_groups, group);
+ bsr_crp_group_rps_fini(group->rps);
+ XFREE(MTYPE_PIM_BSR_GROUP, group);
+ continue;
+ }
+
+ scope->ebsr_have_dead_pending = have_dead;
+ if (changed)
+ pim_bsm_generate_sched(scope);
+}
+
+static void bsr_crp_reselect(struct bsm_scope *scope,
+ struct bsr_crp_group *group)
+{
+ bool changed = false;
+ struct bsr_crp_item *item;
+ size_t n_selected = 0;
+
+ frr_each (bsr_crp_group_rps, group->rps, item) {
+ bool select = false;
+
+ /* hardcode best 2 RPs for now */
+ if (item->rp->nht_ok && n_selected < 2) {
+ select = true;
+ n_selected++;
+ }
+
+ if (item->selected != select) {
+ changed = true;
+ item->selected = select;
+ }
+ }
+
+ changed |= group->deleted_selected;
+ group->deleted_selected = false;
+ group->n_selected = n_selected;
+
+ if (changed)
+ pim_bsm_generate_sched(scope);
+
+ scope->elec_rp_data_changed |= changed;
+}
+
+/* changing rp->nht_ok or rp->prio affects the sort order in group->rp
+ * lists, so need a delete & re-add if either changes
+ */
+static void pim_crp_nht_prio_change(struct bsr_crp_rp *rp, bool nht_ok,
+ uint8_t prio)
+{
+ struct bsr_crp_item *item;
+
+ frr_each (bsr_crp_rp_groups, rp->groups, item)
+ bsr_crp_group_rps_del(item->group->rps, item);
+
+ rp->prio = prio;
+ rp->nht_ok = nht_ok;
+
+ frr_each (bsr_crp_rp_groups, rp->groups, item) {
+ bsr_crp_group_rps_add(item->group->rps, item);
+ bsr_crp_reselect(rp->scope, item->group);
+ }
+}
+
+static struct bsr_crp_group *group_get(struct bsm_scope *scope,
+ prefix_pim *range)
+{
+ struct bsr_crp_group *group, ref;
+
+ ref.range = *range;
+ group = bsr_crp_groups_find(scope->ebsr_groups, &ref);
+ if (!group) {
+ group = XCALLOC(MTYPE_PIM_BSR_GROUP, sizeof(*group));
+ group->range = *range;
+ bsr_crp_group_rps_init(group->rps);
+ bsr_crp_groups_add(scope->ebsr_groups, group);
+ }
+ return group;
+}
+
+static void pim_crp_update(struct bsr_crp_rp *rp, struct cand_rp_msg *msg,
+ size_t ngroups)
+{
+ struct bsr_crp_rp_groups_head oldgroups[1];
+ struct bsr_crp_item *item, itemref;
+ struct bsr_crp_group *group, groupref;
+
+ //struct bsm_scope *scope = rp->scope;
+
+ bsr_crp_rp_groups_init(oldgroups);
+ bsr_crp_rp_groups_swap_all(rp->groups, oldgroups);
+
+ itemref.rp = rp;
+ itemref.group = &groupref;
+
+ assert(msg || ngroups == 0);
+
+ for (size_t i = 0; i < ngroups; i++) {
+ if (msg->groups[i].family != PIM_MSG_ADDRESS_FAMILY)
+ continue;
+ if (msg->groups[i].bidir)
+ continue;
+
+ prefix_pim pfx;
+
+ pfx.family = PIM_AF;
+ pfx.prefixlen = msg->groups[i].mask;
+ pfx.prefix = msg->groups[i].addr;
+
+#if PIM_IPV == 4
+ if (pfx.prefixlen < 4)
+ continue;
+ if (!IPV4_CLASS_DE(ntohl(pfx.prefix.s_addr)))
+ continue;
+#endif
+
+ apply_mask(&pfx);
+
+ groupref.range = pfx;
+ item = bsr_crp_rp_groups_find(oldgroups, &itemref);
+
+ if (item) {
+ bsr_crp_rp_groups_del(oldgroups, item);
+ bsr_crp_rp_groups_add(rp->groups, item);
+ continue;
+ }
+
+ group = group_get(rp->scope, &pfx);
+
+ item = XCALLOC(MTYPE_PIM_BSR_ITEM, sizeof(*item));
+ item->rp = rp;
+ item->group = group;
+
+ bsr_crp_group_rps_add(group->rps, item);
+ bsr_crp_rp_groups_add(rp->groups, item);
+
+ bsr_crp_reselect(rp->scope, group);
+ }
+
+ while ((item = bsr_crp_rp_groups_pop(oldgroups))) {
+ group = item->group;
+ if (item->selected)
+ group->deleted_selected = true;
+
+ bsr_crp_group_rps_del(group->rps, item);
+ XFREE(MTYPE_PIM_BSR_ITEM, item);
+
+ bsr_crp_reselect(rp->scope, group);
+ }
+ bsr_crp_rp_groups_fini(oldgroups);
+
+ if (msg && msg->rp_prio != rp->prio)
+ pim_crp_nht_prio_change(rp, rp->nht_ok, msg->rp_prio);
+}
+
+void pim_crp_nht_update(struct pim_instance *pim, struct pim_nexthop_cache *pnc)
+{
+ struct bsm_scope *scope = &pim->global_scope;
+ struct bsr_crp_rp *rp, ref;
+ bool ok;
+
+ ref.addr = pnc->rpf.rpf_addr;
+ rp = bsr_crp_rps_find(scope->ebsr_rps, &ref);
+ assertf(rp, "addr=%pPA", &ref.addr);
+
+ ok = CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID);
+ if (ok == rp->nht_ok)
+ return;
+
+ if (PIM_DEBUG_BSM)
+ zlog_debug("Candidate-RP %pPA NHT %s", &rp->addr, ok ? "UP" : "DOWN");
+ pim_crp_nht_prio_change(rp, ok, rp->prio);
+}
+
+static void pim_crp_free(struct pim_instance *pim, struct bsr_crp_rp *rp)
+{
+ EVENT_OFF(rp->t_hold);
+ pim_nht_candrp_del(pim, rp->addr);
+ bsr_crp_rp_groups_fini(rp->groups);
+
+ XFREE(MTYPE_PIM_BSR_CRP, rp);
+}
+
+static void pim_crp_expire(struct event *t)
+{
+ struct bsr_crp_rp *rp = EVENT_ARG(t);
+ struct pim_instance *pim = rp->scope->pim;
+
+ if (PIM_DEBUG_BSM)
+ zlog_debug("Candidate-RP %pPA holdtime expired", &rp->addr);
+
+ pim_crp_update(rp, NULL, 0);
+
+ bsr_crp_rps_del(rp->scope->ebsr_rps, rp);
+ pim_crp_free(pim, rp);
+}
+
+int pim_crp_process(struct interface *ifp, pim_sgaddr *src_dst, uint8_t *buf,
+ uint32_t buf_size)
+{
+ struct pim_interface *pim_ifp = NULL;
+ struct pim_instance *pim;
+ struct bsm_scope *scope;
+
+ pim_ifp = ifp->info;
+ if (!pim_ifp) {
+ if (PIM_DEBUG_BSM)
+ zlog_debug("%s: multicast not enabled on interface %s",
+ __func__, ifp->name);
+ return -1;
+ }
+
+ //pim_ifp->pim_ifstat_bsm_rx++;
+ pim = pim_ifp->pim;
+ //pim->bsm_rcvd++;
+
+ if (!pim_ifp->bsm_enable) {
+ zlog_warn("%s: BSM not enabled on interface %s", __func__,
+ ifp->name);
+ //pim_ifp->pim_ifstat_bsm_cfg_miss++;
+ //pim->bsm_dropped++;
+ return -1;
+ }
+
+ if (buf_size < (PIM_MSG_HEADER_LEN + sizeof(struct cand_rp_msg))) {
+ if (PIM_DEBUG_BSM)
+ zlog_debug("%s: received buffer length of %d which is too small to properly decode",
+ __func__, buf_size);
+ return -1;
+ }
+
+ scope = &pim->global_scope;
+
+ if (scope->state < BSR_PENDING) {
+ if (PIM_DEBUG_BSM)
+ zlog_debug("received Candidate-RP message from %pPA while not BSR",
+ &src_dst->src);
+ return -1;
+ }
+
+ size_t remain = buf_size;
+ struct cand_rp_msg *crp_hdr;
+
+ buf += PIM_MSG_HEADER_LEN;
+ remain -= PIM_MSG_HEADER_LEN;
+
+ crp_hdr = (struct cand_rp_msg *)buf;
+ buf += sizeof(*crp_hdr);
+ remain -= sizeof(*crp_hdr);
+
+ size_t ngroups = crp_hdr->prefix_cnt;
+
+ if (remain < ngroups * sizeof(struct pim_encoded_group_ipv4)) {
+ if (PIM_DEBUG_BSM)
+ zlog_debug("truncated Candidate-RP advertisement for RP %pPA from %pPA (too short for %zu groups)",
+ (pim_addr *)&crp_hdr->rp_addr.addr,
+ &src_dst->src, ngroups);
+ return -1;
+ }
+
+ if (PIM_DEBUG_BSM)
+ zlog_debug("Candidate-RP: %pPA, prio=%u (from %pPA, %zu groups)",
+ (pim_addr *)&crp_hdr->rp_addr.addr, crp_hdr->rp_prio,
+ &src_dst->src, ngroups);
+
+
+ struct bsr_crp_rp *rp, ref;
+
+ ref.addr = crp_hdr->rp_addr.addr;
+ rp = bsr_crp_rps_find(scope->ebsr_rps, &ref);
+
+ if (!rp) {
+ if (bsr_crp_rps_count(scope->ebsr_rps) >= bsr_max_rps) {
+ zlog_err("BSR: number of tracked Candidate RPs (%zu) exceeds DoS-protection limit (%zu), dropping advertisement for RP %pPA (packet source %pPA)",
+ bsr_crp_rps_count(scope->ebsr_rps),
+ bsr_max_rps, (pim_addr *)&crp_hdr->rp_addr.addr,
+ &src_dst->src);
+ return -1;
+ }
+
+ if (PIM_DEBUG_BSM)
+ zlog_debug("new Candidate-RP: %pPA (from %pPA)",
+ (pim_addr *)&crp_hdr->rp_addr.addr,
+ &src_dst->src);
+
+ rp = XCALLOC(MTYPE_PIM_BSR_CRP, sizeof(*rp));
+ rp->scope = scope;
+ rp->addr = crp_hdr->rp_addr.addr;
+ rp->prio = 255;
+ bsr_crp_rp_groups_init(rp->groups);
+ rp->seen_first = monotime(NULL);
+
+ bsr_crp_rps_add(scope->ebsr_rps, rp);
+ rp->nht_ok = pim_nht_candrp_add(pim, rp->addr);
+ }
+
+ rp->seen_last = monotime(NULL);
+ rp->holdtime = ntohs(crp_hdr->rp_holdtime);
+
+ EVENT_OFF(rp->t_hold);
+ event_add_timer(router->master, pim_crp_expire, rp,
+ ntohs(crp_hdr->rp_holdtime), &rp->t_hold);
+
+ pim_crp_update(rp, crp_hdr, ngroups);
+ return 0;
+}
+
+void pim_crp_db_clear(struct bsm_scope *scope)
+{
+ struct bsr_crp_rp *rp;
+ struct bsr_crp_group *group;
+ struct bsr_crp_item *item;
+
+ while ((rp = bsr_crp_rps_pop(scope->ebsr_rps))) {
+ while ((item = bsr_crp_rp_groups_pop(rp->groups))) {
+ group = item->group;
+
+ if (item->selected)
+ group->deleted_selected = true;
+
+ bsr_crp_group_rps_del(group->rps, item);
+ XFREE(MTYPE_PIM_BSR_ITEM, item);
+ }
+ pim_crp_free(scope->pim, rp);
+ }
+
+ while ((group = bsr_crp_groups_pop(scope->ebsr_groups))) {
+ assertf(!bsr_crp_group_rps_count(group->rps),
+ "range=%pFX rp_count=%zu", &group->range,
+ bsr_crp_group_rps_count(group->rps));
+
+ bsr_crp_group_rps_fini(group->rps);
+ XFREE(MTYPE_PIM_BSR_GROUP, group);
+ }
+}
+
+int pim_crp_db_show(struct vty *vty, struct bsm_scope *scope, bool json)
+{
+ struct bsr_crp_rp *rp;
+ struct bsr_crp_item *item;
+
+ vty_out(vty, "RP/Group NHT Prio Uptime Hold\n");
+
+ frr_each (bsr_crp_rps, scope->ebsr_rps, rp) {
+ vty_out(vty, "%-15pPA %4s %4u %8ld %4lu\n", &rp->addr,
+ rp->nht_ok ? "UP" : "DOWN", rp->prio,
+ (long)(monotime(NULL) - rp->seen_first),
+ event_timer_remain_second(rp->t_hold));
+
+ frr_each (bsr_crp_rp_groups, rp->groups, item)
+ vty_out(vty, "%c %-18pFX\n", item->selected ? '>' : ' ',
+ &item->group->range);
+ }
+
+ return CMD_SUCCESS;
+}
+
+int pim_crp_groups_show(struct vty *vty, struct bsm_scope *scope, bool json)
+{
+ struct bsr_crp_group *group;
+ struct bsr_crp_item *item;
+
+ if (scope->ebsr_have_dead_pending)
+ vty_out(vty, "have_dead_pending\n");
+
+ frr_each (bsr_crp_groups, scope->ebsr_groups, group) {
+ vty_out(vty, "%c %pFX", group->n_selected ? '^' : '!',
+ &group->range);
+ if (group->n_selected == 0)
+ vty_out(vty, " (dead %u)", group->dead_count);
+
+ vty_out(vty, "\n");
+
+ frr_each (bsr_crp_group_rps, group->rps, item)
+ vty_out(vty, "%c %pPA\n", item->selected ? '>' : ' ',
+ &item->rp->addr);
+ }
+
+ return CMD_SUCCESS;
+}