summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorChristian Hopps <chopps@labn.net>2025-01-17 22:21:33 +0100
committerChristian Hopps <chopps@labn.net>2025-01-18 17:14:29 +0100
commit597d79a89e6628645e3648bdc03db0d7bdc7e0f4 (patch)
treee9b6e0195f29d416e9853c304bceb64c75560773 /lib
parentlib: mgmtd: only send notify selectors to backends that provide. (diff)
downloadfrr-597d79a89e6628645e3648bdc03db0d7bdc7e0f4.tar.xz
frr-597d79a89e6628645e3648bdc03db0d7bdc7e0f4.zip
lib: improve error handling for datastore notifications
Signed-off-by: Christian Hopps <chopps@labn.net>
Diffstat (limited to 'lib')
-rw-r--r--lib/northbound_notif.c148
1 files changed, 87 insertions, 61 deletions
diff --git a/lib/northbound_notif.c b/lib/northbound_notif.c
index b75c86561..10a81d05f 100644
--- a/lib/northbound_notif.c
+++ b/lib/northbound_notif.c
@@ -480,87 +480,96 @@ static struct op_changes_group *op_changes_group_next(void)
/* Query for changes and notify */
/* ---------------------------- */
+static void timer_walk_abort(struct nb_notif_walk_args *args);
static void timer_walk_continue(struct event *event);
+static void timer_walk_done(struct nb_notif_walk_args *args);
+
+static struct op_change *__next_change(struct op_changes_group *group)
+{
+ struct op_change *next = RB_NEXT(op_changes, group->cur_change);
+
+ /* Remove and free current so retry works */
+ RB_REMOVE(op_changes, group->cur_changes, group->cur_change);
+ op_change_free(group->cur_change);
+ return next;
+}
+
+static struct op_changes_group *__next_group(struct op_changes_group *group)
+{
+ __dbg("done with oper-path collection for group");
+ op_changes_group_free(group);
+ return op_changes_group_next();
+}
static enum nb_error oper_walk_done(const struct lyd_node *tree, void *arg, enum nb_error ret)
{
struct nb_notif_walk_args *args = arg;
struct op_changes_group *group = args->group;
const char *path = group->cur_change->path;
- const char *op = group->cur_changes == &group->adds ? "add" : "delete";
/* we don't send batches when yielding as we need completed edit in any patch */
assert(ret != NB_YIELD);
- nb_notif_walk = NULL;
-
if (ret == NB_ERR_NOT_FOUND) {
__dbg("Path not found while walking oper tree: %s", path);
- XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args);
- return ret;
- }
- /* Something else went wrong with the walk */
- if (ret != NB_OK) {
+ ret = NB_OK;
+ } else if (ret != NB_OK) {
error:
- __log_err("Error notifying for datastore change on path: %s: %s", path,
- nb_err_name(ret));
- XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args);
- /* XXX Need to inform mgmtd/front-ends things are out-of-sync */
- return ret;
- }
-
- __dbg("done with oper-path collection for %s path: %s", op, path);
-
- /* Do we need this? */
- while (tree->parent)
- tree = lyd_parent(tree);
-
- /* Send the add (replace) notification */
- if (mgmt_be_send_ds_replace_notification(path, tree)) {
- ret = NB_ERR;
- goto error;
+ __log_err("Error notifying for datastore path: %s: %s", path, nb_err_name(ret));
+
+ timer_walk_abort(args);
+ goto done;
+ } else {
+ __dbg("Done with oper-path collection for path: %s", path);
+
+ /* Do we need this? */
+ while (tree->parent)
+ tree = lyd_parent(tree);
+
+ /* Send the add (replace) notification */
+ if (mgmt_be_send_ds_replace_notification(path, tree)) {
+ __log_err("Error sending notification message for path: %s", path);
+ ret = NB_ERR;
+ goto error;
+ }
}
/*
- * Advance to next change (either dels or adds or both).
+ * Advance to next change.
*/
- group->cur_change = RB_NEXT(op_changes, group->cur_change);
+ group->cur_change = __next_change(group);
if (!group->cur_change) {
- __dbg("done with oper-path collection for group");
- op_changes_group_free(group);
-
- group = op_changes_group_next();
- args->group = group;
- if (!group) {
- __dbg("done with ALL oper-path collection for notification");
- XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args);
+ args->group = __next_group(group);
+ if (!args->group) {
+ timer_walk_done(args);
goto done;
}
}
+ /* Run next walk after giving other events a shot to run */
event_add_timer_msec(nb_notif_master, timer_walk_continue, args, 0, &nb_notif_timer);
done:
/* Done with current walk and scheduled next one if there is more */
nb_notif_walk = NULL;
- return NB_OK;
+ return ret;
}
-static LY_ERR nb_notify_delete_changes(struct nb_notif_walk_args *args)
+static int nb_notify_delete_changes(struct nb_notif_walk_args *args)
{
struct op_changes_group *group = args->group;
- LY_ERR err;
group->cur_change = RB_MIN(op_changes, group->cur_changes);
while (group->cur_change) {
- err = mgmt_be_send_ds_delete_notification(group->cur_change->path);
- assert(err == LY_SUCCESS); /* XXX */
-
- group->cur_change = RB_NEXT(op_changes, group->cur_change);
+ if (mgmt_be_send_ds_delete_notification(group->cur_change->path)) {
+ __log_err("Error sending delete notification message for path: %s",
+ group->cur_change->path);
+ return 1;
+ }
+ group->cur_change = __next_change(group);
}
-
- return LY_SUCCESS;
+ return 0;
}
static void timer_walk_continue(struct event *event)
@@ -568,15 +577,17 @@ static void timer_walk_continue(struct event *event)
struct nb_notif_walk_args *args = EVENT_ARG(event);
struct op_changes_group *group = args->group;
const char *path;
- LY_ERR err;
+ int ret;
/*
* Notify about deletes until we have add changes to collect.
*/
while (group->cur_changes == &group->dels) {
- err = nb_notify_delete_changes(args);
- assert(err == LY_SUCCESS); /* XXX */
- assert(!group->cur_change); /* we send all the deletes in one message */
+ ret = nb_notify_delete_changes(args);
+ if (ret) {
+ timer_walk_abort(args);
+ return;
+ }
/* after deletes advance to adds */
group->cur_changes = &group->adds;
@@ -584,14 +595,9 @@ static void timer_walk_continue(struct event *event)
if (group->cur_change)
break;
- __dbg("done with oper-path change group");
- op_changes_group_free(group);
-
- group = op_changes_group_next();
- args->group = group;
- if (!group) {
- __dbg("done with ALL oper-path changes");
- XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args);
+ args->group = __next_group(group);
+ if (!args->group) {
+ timer_walk_done(args);
return;
}
}
@@ -621,6 +627,22 @@ static void timer_walk_start(struct event *event)
timer_walk_continue(event);
}
+static void timer_walk_abort(struct nb_notif_walk_args *args)
+{
+ __dbg("Failed notifying datastore changes, will retry");
+
+ __dbg("oper-state notify setting retry timer to fire in: %d msec ", NB_NOTIF_TIMER_MSEC);
+ event_add_timer_msec(nb_notif_master, timer_walk_continue, args, NB_NOTIF_TIMER_MSEC,
+ &nb_notif_timer);
+}
+
+static void timer_walk_done(struct nb_notif_walk_args *args)
+{
+ __dbg("Finished notifying for all datastore changes");
+ assert(!args->group);
+ XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args);
+}
+
static void nb_notif_set_walk_timer(void)
{
if (nb_notif_walk) {
@@ -659,19 +681,23 @@ void nb_notif_init(struct event_loop *tm)
void nb_notif_terminate(void)
{
- struct nb_notif_walk_args *args;
+ struct nb_notif_walk_args *args = nb_notif_timer ? EVENT_ARG(nb_notif_timer) : NULL;
struct op_changes_group *group;
+ __dbg("terminating: timer: %p timer arg: %p walk %p", nb_notif_timer, args, nb_notif_walk);
+
EVENT_OFF(nb_notif_timer);
if (nb_notif_walk) {
- nb_oper_cancel_walk(nb_notif_walk);
- /* need to free the group that's in the walk */
+ /* Grab walk args from walk if active. */
args = nb_oper_walk_finish_arg(nb_notif_walk);
- if (args)
- op_changes_group_free(args->group);
+ nb_oper_cancel_walk(nb_notif_walk);
nb_notif_walk = NULL;
}
+ if (args) {
+ op_changes_group_free(args->group);
+ XFREE(MTYPE_NB_NOTIF_WALK_ARGS, args);
+ }
while ((group = op_changes_group_next()))
op_changes_group_free(group);