summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorLukáš Ondráček <lukas.ondracek@nic.cz>2024-10-23 19:59:02 +0200
committerLukáš Ondráček <lukas.ondracek@nic.cz>2024-10-23 20:18:32 +0200
commiteb954b232093f43575a614e81d2a3a04683f4131 (patch)
tree25f8810187eb53d004f14ac5b59570547f6714f9 /daemon
parentRevert "daemon/defer: use total accounted time as KRU time" (diff)
downloadknot-resolver-eb954b232093f43575a614e81d2a3a04683f4131.tar.xz
knot-resolver-eb954b232093f43575a614e81d2a3a04683f4131.zip
daemon/defer: disable by default, use real time in KRU
Diffstat (limited to 'daemon')
-rw-r--r--daemon/defer.c91
-rw-r--r--daemon/defer.h9
-rw-r--r--daemon/lua/kres-gen-33.lua1
-rwxr-xr-xdaemon/lua/kres-gen.sh1
-rw-r--r--daemon/main.c2
5 files changed, 79 insertions, 25 deletions
diff --git a/daemon/defer.c b/daemon/defer.c
index 5c386eec..14d79f30 100644
--- a/daemon/defer.c
+++ b/daemon/defer.c
@@ -15,28 +15,52 @@
#define V6_PREFIXES_CNT (sizeof(V6_PREFIXES) / sizeof(*V6_PREFIXES))
#define MAX_PREFIXES_CNT ((V4_PREFIXES_CNT > V6_PREFIXES_CNT) ? V4_PREFIXES_CNT : V6_PREFIXES_CNT)
-#define LOADS_THRESHOLDS (uint16_t[]) {1<<4, 1<<8, 1<<11, -1} // the last one should be UINT16_MAX
-#define QUEUES_CNT (sizeof(LOADS_THRESHOLDS) / sizeof(*LOADS_THRESHOLDS) + 1) // +1 for unverified
-#define PRIORITY_SYNC (-1) // no queue
-#define PRIORITY_UDP (QUEUES_CNT - 1) // last queue
-
-#define KRU_CAPACITY (1<<10)
-#define MAX_DECAY (KRU_LIMIT * 0.0006929) // -> halving counters in 1s
-#define TIME_MULT 1/1 // NOLINT for now, TODO improve readability
- // max fraction of rate limit filled by one cpu (multiplies large int)
- // TODO divide by #cpus?
+#define LOADS_THRESHOLDS (uint16_t[]) {1<<4, 1<<8, 1<<11, -1} // the last one should be UINT16_MAX
+#define QUEUES_CNT (sizeof(LOADS_THRESHOLDS) / sizeof(*LOADS_THRESHOLDS) + 1) // +1 for unverified
+#define PRIORITY_SYNC (-1) // no queue
+#define PRIORITY_UDP (QUEUES_CNT - 1) // last queue
+
+#define KRU_CAPACITY (1<<19)
+ // same as ratelimiting default
+#define MAX_DECAY (KRU_LIMIT * 0.0006929)
+ // halving counters in 1s
+ // 5s from max to 2^11 (priority 3) // TODO change 2^11 to 2^12 to make the times equal?
+ // 3s from 2^11 to 2^8 (priority 2)
+ // 4s from 2^8 to 2^4 (priority 1)
+ // 4s from 2^4 to zero (priority 0)
+#define BASE_PRICE(nsec, cpus) ((uint64_t)MAX_DECAY * 10 * nsec / 1000000ll / cpus)
+ // max value when the single host uses 1/10 of all cpus' time;
+ // needed cpu utilization (rate limit) for other thresholds and prefixes:
+ // single v6/48 v4/24 v6/32 v4/20 v4/18
+ // max: 10.000 % 40.00 % - - - -
+ // 2^11: 0.312 % 1.25 % 10.00 % 20.00 % 80.00 % - (priority 3)
+ // 2^8: 0.039 % 0.16 % 1.25 % 2.50 % 10.00 % 30.00 % (priority 2)
+ // 2^4: 0.002 % 0.01 % 0.08 % 0.16 % 0.63 % 1.87 % (priority 1)
+ // instant limit for single host and 1 cpu: (greater for larger networks and for more cpus)
+ // 35 us for 2^4, 0.56 ms for 2^8, 4.5 ms for 2^11, 144 ms max value
+ // TODO adjust somehow
+ // simple DoT query may cost 1 ms, DoH 2.5 ms; it gets priority 2 during handshake (on laptop);
+ // the instant limits can be doubled by:
+ // doubling half-life (approx.),
+ // doubling percents in the previous table, or
+ // doubling number of cpus
+ // possible solution:
+ // half-life 5s, BASE_PRICE /= 2.5 -> for 4 cpus 1.75 ms fits below 2^4;
+ // still not enough for home routers -> TODO make something configurable, maybe the BASE_PRICE multiplier
#define REQ_TIMEOUT 5000000 // ns (THREAD_CPUTIME), older deferred queries are dropped
#define IDLE_TIMEOUT 1000000 // ns (THREAD_CPUTIME); if exceeded, continue processing after next poll phase
#define PHASE_UDP_TIMEOUT 400000 // ns (THREAD_CPUTIME); switch between udp, non-udp phases
#define PHASE_NON_UDP_TIMEOUT 400000 // ns (THREAD_CPUTIME); after timeout or emptying queue
#define MAX_WAITING_REQS 10000 // if exceeded, process single deferred request immediatelly in poll phase
+ // TODO measure memory usage instead
#define VERBOSE_LOG(...) kr_log_debug(DEFER, " | " __VA_ARGS__)
struct defer {
size_t capacity;
kru_price_t max_decay;
+ int cpus;
bool using_avx2;
_Alignas(64) uint8_t kru[];
};
@@ -62,13 +86,15 @@ enum phase {
uint64_t phase_elapsed = 0; // ns
bool phase_accounting = false; // add accounted time to phase_elapsed on next call of defer_account
-static inline void phase_set(enum phase p) {
+static inline void phase_set(enum phase p)
+{
if (phase != p) {
phase_elapsed = 0;
phase = p;
}
}
-static inline void phase_account(uint64_t nsec) {
+static inline void phase_account(uint64_t nsec)
+{
kr_assert(phase != PHASE_ANY);
phase_elapsed += nsec;
if ((phase == PHASE_UDP) && (phase_elapsed > PHASE_UDP_TIMEOUT)) {
@@ -99,7 +125,8 @@ static bool using_avx2(void)
}
/// Increment KRU counters by given time.
-void defer_account(uint64_t nsec, union kr_sockaddr *addr, bool stream) {
+void defer_account(uint64_t nsec, union kr_sockaddr *addr, bool stream)
+{
if (phase_accounting) {
phase_account(nsec);
phase_accounting = false;
@@ -110,7 +137,7 @@ void defer_account(uint64_t nsec, union kr_sockaddr *addr, bool stream) {
_Alignas(16) uint8_t key[16] = {0, };
uint16_t max_load = 0;
uint8_t prefix = 0;
- kru_price_t base_price = (uint64_t)MAX_DECAY * nsec * TIME_MULT / 1000000ll; // TODO adjust
+ kru_price_t base_price = BASE_PRICE(nsec, defer->cpus);
if (addr->ip.sa_family == AF_INET6) {
memcpy(key, &addr->ip6.sin6_addr, 16);
@@ -265,7 +292,8 @@ static inline void break_query(struct protolayer_iter_ctx *ctx, int err)
/// Process a single deferred query (or defer again) if there is any.
/// Time accounting should have been just started, the stamp is used, accounted address is set.
-static inline void process_single_deferred(void) {
+static inline void process_single_deferred(void)
+{
struct protolayer_iter_ctx *ctx = pop_query();
if (ctx == NULL) return;
@@ -314,7 +342,8 @@ static inline void process_single_deferred(void) {
}
/// Break expired requests at the beginning of queues, uses current stamp.
-static inline void cleanup_queues(void) {
+static inline void cleanup_queues(void)
+{
for (int i = 0; i < QUEUES_CNT; i++) {
int cnt = 0;
while (queue_len(queues[i]) > 0) {
@@ -338,6 +367,9 @@ static enum protolayer_iter_cb_result pl_defer_unwrap(
void *sess_data, void *iter_data,
struct protolayer_iter_ctx *ctx)
{
+ if (!defer)
+ return protolayer_continue(ctx);
+
if (ctx->session->outgoing)
return protolayer_continue(ctx);
@@ -378,7 +410,8 @@ static enum protolayer_iter_cb_result pl_defer_unwrap(
}
/// Idle: continue processing deferred requests.
-static void defer_queues_idle(uv_idle_t *handle) {
+static void defer_queues_idle(uv_idle_t *handle)
+{
kr_assert(waiting_requests > 0);
VERBOSE_LOG("IDLE\n");
VERBOSE_LOG(" %d waiting\n", waiting_requests);
@@ -401,12 +434,19 @@ static void defer_queues_idle(uv_idle_t *handle) {
}
-/// Initialize shared memory, queues, idle.
-int defer_init(uv_loop_t *loop)
+/// Initialize shared memory, queues. To be called from Lua.
+int defer_init(const char *mmap_file, int cpus)
{
+ int ret = 0;
+ if (cpus < 1) {
+ ret = EINVAL;
+ goto fail;
+ }
+
struct defer header = {
.capacity = KRU_CAPACITY,
.max_decay = MAX_DECAY,
+ .cpus = cpus,
.using_avx2 = using_avx2(),
};
@@ -418,9 +458,10 @@ int defer_init(uv_loop_t *loop)
kr_assert(header_size ==
sizeof(header.capacity) +
sizeof(header.max_decay) +
+ sizeof(header.cpus) +
sizeof(header.using_avx2)); // no undefined padding inside
- int ret = mmapped_init(&defer_mmapped, "defer", size, &header, header_size);
+ ret = mmapped_init(&defer_mmapped, mmap_file, size, &header, header_size);
if (ret == MMAPPED_WAS_FIRST) {
kr_log_info(SYSTEM, "Initializing prioritization...\n");
@@ -445,7 +486,6 @@ int defer_init(uv_loop_t *loop)
for (size_t i = 0; i < QUEUES_CNT; i++)
queue_init(queues[i]);
- uv_idle_init(loop, &idle_handle);
return 0;
fail:
@@ -454,8 +494,15 @@ fail:
return ret;
}
+/// Initialize idle.
+int defer_init_idle(uv_loop_t *loop)
+{
+ return uv_idle_init(loop, &idle_handle);
+}
+
/// Initialize session queue
-int pl_defer_sess_init(struct session2 *session, void *data, void *param) {
+int pl_defer_sess_init(struct session2 *session, void *data, void *param)
+{
struct pl_defer_sess_data *sdata = data;
queue_init(sdata->queue);
return 0;
diff --git a/daemon/defer.h b/daemon/defer.h
index b110151d..ab7a6a8f 100644
--- a/daemon/defer.h
+++ b/daemon/defer.h
@@ -3,8 +3,13 @@
#include "lib/utils.h"
#include "lib/kru.h"
-/// Initialize defer, incl. shared memory with KRU.
-int defer_init(uv_loop_t *loop);
+/// Initialize defer, incl. shared memory with KRU, excl. idle.
+/// To be called from Lua; defer is disabled by default otherwise.
+KR_EXPORT
+int defer_init(const char *mmap_file, int cpus);
+
+/// Initialize idle.
+int defer_init_idle(uv_loop_t *loop);
/// Deinitialize shared memory.
void defer_deinit(void);
diff --git a/daemon/lua/kres-gen-33.lua b/daemon/lua/kres-gen-33.lua
index af41dcd0..26547de5 100644
--- a/daemon/lua/kres-gen-33.lua
+++ b/daemon/lua/kres-gen-33.lua
@@ -617,6 +617,7 @@ struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags);
int zi_zone_import(const zi_config_t);
_Bool ratelimiting_request_begin(struct kr_request *);
int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, int);
+int defer_init(const char *, int);
struct engine {
char _stub[];
};
diff --git a/daemon/lua/kres-gen.sh b/daemon/lua/kres-gen.sh
index 76e9b632..a395253b 100755
--- a/daemon/lua/kres-gen.sh
+++ b/daemon/lua/kres-gen.sh
@@ -347,6 +347,7 @@ ${CDEFS} ${KRESD} functions <<-EOF
zi_zone_import
ratelimiting_request_begin
ratelimiting_init
+ defer_init
EOF
echo "struct engine" | ${CDEFS} ${KRESD} types | sed '/module_array_t/,$ d'
diff --git a/daemon/main.c b/daemon/main.c
index 227beba4..e89e2b01 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -571,7 +571,7 @@ int main(int argc, char **argv)
lua_settop(the_engine->L, 0);
}
- if (defer_init(loop) != 0) {
+ if (defer_init_idle(loop) != 0) {
ret = EXIT_FAILURE;
goto cleanup;
}