1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include <math.h>
/// Parametrization for speed of decay.
struct decay_config {
/// Bit shift per tick, fractional
double shift_bits;
/// Ticks to get zero loads
uint32_t max_ticks;
uint32_t mult_cache[32];
};
static inline void decay_initialize(struct decay_config *decay, kru_price_t max_decay)
{
decay->shift_bits = log2(KRU_LIMIT - 1) - log2(KRU_LIMIT - 1 - max_decay);
decay->max_ticks = 18 / decay->shift_bits;
decay->mult_cache[0] = 0; // not used
for (size_t ticks = 1; ticks < sizeof(decay->mult_cache) / sizeof(*decay->mult_cache); ticks++) {
decay->mult_cache[ticks] = exp2(32 - decay->shift_bits * ticks) + 0.5;
}
}
/// Catch up the time drift with configurably slower decay.
static inline void update_time(struct load_cl *l, const uint32_t time_now,
const struct decay_config *decay)
{
uint32_t ticks;
uint32_t time_last = atomic_load_explicit(&l->time, memory_order_relaxed);
do {
ticks = time_now - time_last;
if (__builtin_expect(!ticks, true)) // we optimize for time not advancing
return;
// We accept some desynchronization of time_now (e.g. from different threads).
if (ticks > (uint32_t)-1024)
return;
} while (!atomic_compare_exchange_weak_explicit(&l->time, &time_last, time_now, memory_order_relaxed, memory_order_relaxed));
// If we passed here, we have acquired a time difference we are responsibe for.
// Don't bother with complex computations if lots of ticks have passed. (little to no speed-up)
if (ticks > decay->max_ticks) {
memset(l->loads, 0, sizeof(l->loads));
return;
}
uint32_t mult;
if (__builtin_expect(ticks < sizeof(decay->mult_cache) / sizeof(*decay->mult_cache), 1)) {
mult = decay->mult_cache[ticks];
} else {
mult = exp2(32 - decay->shift_bits * ticks) + 0.5;
}
for (int i = 0; i < LOADS_LEN; ++i) {
// We perform decay for the acquired time difference; decays from different threads are commutative.
_Atomic uint16_t *load_at = (_Atomic uint16_t *)&l->loads[i];
uint16_t l1, load_orig = atomic_load_explicit(load_at, memory_order_relaxed);
const uint16_t rnd = rand_bits(16);
do {
uint64_t m = (((uint64_t)load_orig << 16)) * mult;
m = (m >> 32) + ((m >> 31) & 1);
l1 = (m >> 16) + (rnd < (uint16_t)m);
} while (!atomic_compare_exchange_weak_explicit(load_at, &load_orig, l1, memory_order_relaxed, memory_order_relaxed));
}
}
|