diff options
author | Donald Sharp <sharpd@nvidia.com> | 2021-02-03 15:13:59 +0100 |
---|---|---|
committer | Donald Sharp <sharpd@nvidia.com> | 2022-01-20 17:56:27 +0100 |
commit | ab01a00176db60080047731ab548136e773b6c51 (patch) | |
tree | dbfb8df8a6273dbbf259ec19f89e2484508f1921 /lib | |
parent | Merge pull request #10360 from opensourcerouting/clippy-rel-endian (diff) | |
download | frr-ab01a00176db60080047731ab548136e773b6c51.tar.xz frr-ab01a00176db60080047731ab548136e773b6c51.zip |
lib: Figure out if we are being starved for cpu
If a thread timer should have popped CPU_CONSUMED_CHECK
seconds in the past, and we are only handling it now. Consider
the thread starved and notice it.
Signed-off-by: Donald Sharp <sharpd@nvidia.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/lib_errors.c | 6 | ||||
-rw-r--r-- | lib/lib_errors.h | 1 | ||||
-rw-r--r-- | lib/thread.c | 19 |
3 files changed, 26 insertions, 0 deletions
diff --git a/lib/lib_errors.c b/lib/lib_errors.c index a139b9a14..acc9a05c3 100644 --- a/lib/lib_errors.c +++ b/lib/lib_errors.c @@ -57,6 +57,12 @@ static struct log_ref ferr_lib_warn[] = { .suggestion = "Gather log data and open an Issue", }, { + .code = EC_LIB_STARVE_THREAD, + .title = "The Event subsystem has detected a thread starvation issue", + .description = "The event subsystem has detected a thread starvation issue. This typically indicates that the system FRR is running on is heavily loaded and this load might be impacting FRR's ability to handle events in a timely fashion", + .suggestion = "Gather log data and open an Issue", + }, + { .code = EC_LIB_NO_THREAD, .title = "The Event subsystem has detected an internal FD problem", .description = "The Event subsystem has detected a file descriptor read/write event without an associated handling function. This is a bug, please collect log data and open an issue.", diff --git a/lib/lib_errors.h b/lib/lib_errors.h index 9f0f58d20..64ac6c1ce 100644 --- a/lib/lib_errors.h +++ b/lib/lib_errors.h @@ -46,6 +46,7 @@ enum lib_log_refs { EC_LIB_LINUX_NS, EC_LIB_SLOW_THREAD_CPU, EC_LIB_SLOW_THREAD_WALL, + EC_LIB_STARVE_THREAD, EC_LIB_NO_THREAD, EC_LIB_RMAP_RECURSION_LIMIT, EC_LIB_BACKUP_CONFIG, diff --git a/lib/thread.c b/lib/thread.c index 77e34f48f..73e0e4887 100644 --- a/lib/thread.c +++ b/lib/thread.c @@ -1651,12 +1651,31 @@ static void thread_process_io(struct thread_master *m, unsigned int num) static unsigned int thread_process_timers(struct thread_master *m, struct timeval *timenow) { + struct timeval prev = *timenow; + bool displayed = false; struct thread *thread; unsigned int ready = 0; while ((thread = thread_timer_list_first(&m->timer))) { if (timercmp(timenow, &thread->u.sands, <)) break; + prev = thread->u.sands; + prev.tv_sec += 4; + /* + * If the timer would have popped 4 seconds in the + * past then we are in a situation where we are + * really getting behind on handling of events. + * Let's log it and do the right thing with it. + */ + if (timercmp(timenow, &prev, >)) { + if (!displayed) + flog_warn( + EC_LIB_STARVE_THREAD, + "Thread Starvation: %pTHD was scheduled to pop greater than 4s ago", + thread); + displayed = true; + } + thread_timer_list_pop(&m->timer); thread->type = THREAD_READY; thread_list_add_tail(&m->ready, thread); |