diff options
author | Lukáš Ondráček <lukas.ondracek@nic.cz> | 2024-12-04 17:04:59 +0100 |
---|---|---|
committer | Lukáš Ondráček <lukas.ondracek@nic.cz> | 2024-12-04 17:04:59 +0100 |
commit | 70132c11b74f7976d84a2a383affb92308d81741 (patch) | |
tree | e9e0317dcd954b99c068a664a144963cd9e43267 /daemon | |
parent | daemon/defer: change address mismatch assert to warning (diff) | |
download | knot-resolver-70132c11b74f7976d84a2a383affb92308d81741.tar.xz knot-resolver-70132c11b74f7976d84a2a383affb92308d81741.zip |
daemon/defer: configuration changes + logging
Diffstat (limited to 'daemon')
-rw-r--r-- | daemon/defer.c | 142 |
1 files changed, 113 insertions, 29 deletions
diff --git a/daemon/defer.c b/daemon/defer.c index 0f59ba62..c4f7e021 100644 --- a/daemon/defer.c +++ b/daemon/defer.c @@ -2,6 +2,7 @@ * SPDX-License-Identifier: GPL-3.0-or-later */ +#include <math.h> #include "daemon/defer.h" #include "daemon/session2.h" #include "daemon/udp_queue.h" @@ -19,38 +20,24 @@ #define V6_PREFIXES_CNT (sizeof(V6_PREFIXES) / sizeof(*V6_PREFIXES)) #define MAX_PREFIXES_CNT ((V4_PREFIXES_CNT > V6_PREFIXES_CNT) ? V4_PREFIXES_CNT : V6_PREFIXES_CNT) -#define LOADS_THRESHOLDS (uint16_t[]) {1<<4, 1<<8, 1<<11, -1} // the last one should be UINT16_MAX +#define LOADS_THRESHOLDS (uint16_t[]) {1<<4, 1<<8, 1<<12, -1} // the last one should be UINT16_MAX #define QUEUES_CNT (sizeof(LOADS_THRESHOLDS) / sizeof(*LOADS_THRESHOLDS) + 1) // +1 for unverified #define PRIORITY_SYNC (-1) // no queue #define PRIORITY_UDP (QUEUES_CNT - 1) // last queue -#define KRU_CAPACITY (1<<19) - // same as ratelimiting default -#define MAX_DECAY (KRU_LIMIT * 0.0006929) - // halving counters in 1s - // 5s from max to 2^11 (priority 3) // TODO change 2^11 to 2^12 to make the times equal? - // 3s from 2^11 to 2^8 (priority 2) - // 4s from 2^8 to 2^4 (priority 1) - // 4s from 2^4 to zero (priority 0) -#define BASE_PRICE(nsec, cpus) ((uint64_t)MAX_DECAY * 10 * nsec / 1000000ll / cpus) - // max value when the single host uses 1/10 of all cpus' time; - // needed cpu utilization (rate limit) for other thresholds and prefixes: - // single v6/48 v4/24 v6/32 v4/20 v4/18 - // max: 10.000 % 40.00 % - - - - - // 2^11: 0.312 % 1.25 % 10.00 % 20.00 % 80.00 % - (priority 3) - // 2^8: 0.039 % 0.16 % 1.25 % 2.50 % 10.00 % 30.00 % (priority 2) - // 2^4: 0.002 % 0.01 % 0.08 % 0.16 % 0.63 % 1.87 % (priority 1) - // instant limit for single host and 1 cpu: (greater for larger networks and for more cpus) - // 35 us for 2^4, 0.56 ms for 2^8, 4.5 ms for 2^11, 144 ms max value - // TODO adjust somehow - // simple DoT query may cost 1 ms, DoH 2.5 ms; it gets priority 2 during handshake (on laptop); - // the instant limits can be doubled by: - // doubling half-life (approx.), - // doubling percents in the previous table, or - // doubling number of cpus - // possible solution: - // half-life 5s, BASE_PRICE /= 2.5 -> for 4 cpus 1.75 ms fits below 2^4; - // still not enough for home routers -> TODO make something configurable, maybe the BASE_PRICE multiplier +#define KRU_CAPACITY (1<<19) // same as ratelimiting default +#define MAX_DECAY (KRU_LIMIT * 0.00013862) // half-life: 5s +#define BASE_PRICE(nsec, cpus) ((uint64_t)MAX_DECAY * 4 * nsec / 1000000ll / cpus) + // max value reached when the single host uses 1/4 of all cpus' time; + // instant limits in us are multiplied by cpus while rate limits in % of all cpus' time are not; + // see log written by defer_str_conf for details + // TODO check that configuration makes sense (public resolvers vs home routers) + // laptop measurements: + // simple cached queries: + // TCP 0.5 ms + // DoT: 1.0 ms + // DoH: 2.5 ms + // uncached resolving: ~10 ms or more #define REQ_TIMEOUT 20000000 // ns (THREAD_CPUTIME), older deferred queries are dropped #define IDLE_TIMEOUT 1000000 // ns (THREAD_CPUTIME); if exceeded, continue processing after next poll phase @@ -59,7 +46,7 @@ #define MAX_WAITING_REQS_SIZE (64 * 1024 * 1024) // bytes; if exceeded, some deferred requests are processed in poll phase // single TCP allocates more than 64KiB wire buffer // TODO check whether all important allocations are counted; - // different things are not counted: tasks and subsessions (not deferred after created), uv handles, queues overhead, ...; + // different things are not counted: tasks and subsessions (not deferred after creation), uv handles, queues overhead, ...; // payload is counted either as part of session wire buffer (for stream) or as part of iter ctx (for datagrams) #define VERBOSE_LOG(...) kr_log_debug(DEFER, " | " __VA_ARGS__) @@ -134,6 +121,96 @@ static bool using_avx2(void) return result; } +/// Print configuration into desc array. +void defer_str_conf(char *desc, int desc_len) { + int len = 0; +#define append(...) len += snprintf(desc + len, desc_len > len ? desc_len - len : 0, __VA_ARGS__) +#define append_time(prefix, ms, suffix) { \ + if (ms < 1) append(prefix "%7.1f us" suffix, ms * 1000); \ + else if (ms < 1000) append(prefix "%7.1f ms" suffix, ms); \ + else append(prefix "%7.1f s " suffix, ms / 1000); } + append( " Expected cpus/procs: %5d\n", defer->cpus); + + append( " Max waiting requests:%7.1f MiB\n", MAX_WAITING_REQS_SIZE / 1024.0 / 1024.0); + append_time(" Request timeout: ", REQ_TIMEOUT / 1000000.0, "\n"); + append_time(" Idle: ", IDLE_TIMEOUT / 1000000.0, "\n"); + append_time(" UDP phase: ", PHASE_UDP_TIMEOUT / 1000000.0, "\n"); + append_time(" Non-UDP phase: ", PHASE_NON_UDP_TIMEOUT / 1000000.0, "\n"); + append( " Priority levels: %5ld + UDP\n", QUEUES_CNT - 1); + + append( " KRU capacity: %7.1f k\n", KRU_CAPACITY / 1000.0); + + bool uniform_thresholds = true; + for (int i = 1; i < QUEUES_CNT - 2; i++) + uniform_thresholds &= (LOADS_THRESHOLDS[i] == LOADS_THRESHOLDS[i-1] * LOADS_THRESHOLDS[0]); + uniform_thresholds &= ((1<<16) == (int)LOADS_THRESHOLDS[QUEUES_CNT - 3] * LOADS_THRESHOLDS[0]); + + append( " Max decay: %7.3f %% per ms (32-bit: %d)\n", + 100.0 * MAX_DECAY / KRU_LIMIT, (kru_price_t)MAX_DECAY); + float half_life = -1.0 / log2f(1.0 - MAX_DECAY / KRU_LIMIT); + append_time(" Half-life: ", half_life, "\n"); + if (uniform_thresholds) + append_time(" Priority rise in: ", half_life * 16 / (QUEUES_CNT - 1), "\n"); + append_time(" Counter reset in: ", half_life * 16, "\n"); + + append(" Rate limits for crossing priority levels as CPU utilization out of %d cores:\n", + defer->cpus); + + uint8_t *const prefixes[] = {V4_PREFIXES, V6_PREFIXES}; + kru_price_t *const rate_mult[] = {V4_RATE_MULT, V6_RATE_MULT}; + const size_t prefixes_cnt[] = {V4_PREFIXES_CNT, V6_PREFIXES_CNT}; + const int version[] = {4, 6}; + + append("%15s", ""); + for (int j = 0; j < 3; j++) + append("%10d", j+1); + append("%10s\n", "max"); + + for (int v = 0; v < 2; v++) { + for (int i = prefixes_cnt[v] - 1; i >= 0; i--) { + append("%9sv%d/%-3d: ", "", version[v], prefixes[v][i]); + for (int j = 0; j < QUEUES_CNT - 1; j++) { + float needed_util = MAX_DECAY / (1<<16) * LOADS_THRESHOLDS[j] / BASE_PRICE(1000000, 1) * rate_mult[v][i]; + if (needed_util <= 1) { + append("%8.3f %%", needed_util * 100); + } else { + append("%8s ", "-"); + } + } + append("\n"); + } + } + + append(" Instant limits for crossing priority levels as CPU time (depends on cpu count):\n"); + + append("%15s", ""); + for (int j = 0; j < 3; j++) + append("%10d", j+1); + append("%10s\n", "max"); + + for (int v = 0; v < 2; v++) { + for (int i = prefixes_cnt[v] - 1; i >= 0; i--) { + append("%9sv%d/%-3d: ", "", version[v], prefixes[v][i]); + for (int j = 0; j < QUEUES_CNT - 1; j++) { + float needed_time = (float)KRU_LIMIT / (1<<16) * LOADS_THRESHOLDS[j] / BASE_PRICE(1000000, defer->cpus) * rate_mult[v][i]; + if (needed_time < 1) { + append("%7.1f us", needed_time * 1000); + } else if (needed_time < 1000) { + append("%7.1f ms", needed_time); + } else { + append("%7.1f s ", needed_time / 1000); + } + } + append("\n"); + } + } + append(" (values above max are indistinguishable)\n"); + +#undef append_time +#undef append +} + + /// Increment KRU counters by given time. void defer_charge(uint64_t nsec, union kr_sockaddr *addr, bool stream) { @@ -574,6 +651,13 @@ int defer_init(const char *mmap_file, int cpus) if (ret != 0) goto fail; kr_log_info(SYSTEM, "Prioritization initialized (%s).\n", (defer->using_avx2 ? "AVX2" : "generic")); + + // log current configuration + if (KR_LOG_LEVEL_IS(LOG_INFO) || kr_log_group_is_set(LOG_GRP_DEFER)) { + char desc[8000]; + defer_str_conf(desc, sizeof(desc)); + kr_log_info(DEFER, "Defer configuration:\n%s", desc); + } } else if (ret == 0) { defer = defer_mmapped.mem; kr_log_info(SYSTEM, "Using existing prioritization data (%s).\n", (defer->using_avx2 ? "AVX2" : "generic")); |