diff options
author | Daniel Salzman <daniel.salzman@nic.cz> | 2021-03-19 15:31:56 +0100 |
---|---|---|
committer | Daniel Salzman <daniel.salzman@nic.cz> | 2021-04-08 18:52:27 +0200 |
commit | 164fcff5bbe0a719d1346cbd21ad2dd9f8a4628d (patch) | |
tree | 53630619e1c34ff5b8843cae8159fbcad6671b44 | |
parent | server: add polling wrapper for unix poll and epoll (diff) | |
download | knot-164fcff5bbe0a719d1346cbd21ad2dd9f8a4628d.tar.xz knot-164fcff5bbe0a719d1346cbd21ad2dd9f8a4628d.zip |
server: merge epoll_ctx with fdset
-rw-r--r-- | Knot.files | 3 | ||||
-rw-r--r-- | configure.ac | 29 | ||||
-rw-r--r-- | src/knot/Makefile.inc | 3 | ||||
-rw-r--r-- | src/knot/common/apoll.h | 79 | ||||
-rw-r--r-- | src/knot/common/epoll_ctx.c | 261 | ||||
-rw-r--r-- | src/knot/common/epoll_ctx.h | 227 | ||||
-rw-r--r-- | src/knot/common/fdset.c | 260 | ||||
-rw-r--r-- | src/knot/common/fdset.h | 290 | ||||
-rw-r--r-- | src/knot/server/tcp-handler.c | 68 | ||||
-rw-r--r-- | src/knot/server/udp-handler.c | 51 | ||||
-rw-r--r-- | tests-fuzz/knotd_wrap/udp-handler.c | 4 | ||||
-rw-r--r-- | tests/knot/test_fdset.c | 214 | ||||
-rw-r--r-- | tests/knot/test_server.c | 3 |
13 files changed, 524 insertions, 968 deletions
diff --git a/Knot.files b/Knot.files index b0f235857..61b6366ec 100644 --- a/Knot.files +++ b/Knot.files @@ -108,9 +108,6 @@ src/knot/catalog/generate.c src/knot/catalog/generate.h src/knot/catalog/interpret.c src/knot/catalog/interpret.h -src/knot/common/apoll.h -src/knot/common/epoll_ctx.c -src/knot/common/epoll_ctx.h src/knot/common/evsched.c src/knot/common/evsched.h src/knot/common/fdset.c diff --git a/configure.ac b/configure.ac index 42161fcc0..773cea81b 100644 --- a/configure.ac +++ b/configure.ac @@ -283,6 +283,27 @@ AS_IF([test "$enable_systemd" = "yes"],[ ]) dnl enable_daemon +# Socket polling method +socket_polling= +AC_ARG_WITH([socket-polling], + AS_HELP_STRING([--with-socket-polling=auto|poll|epoll], + [Use specific socket polling method [default=auto]]), + [socket_polling=$withval], [socket_polling=auto] +) + +AS_CASE([$socket_polling], + [auto], [AC_CHECK_FUNCS([epoll_create], + [AC_DEFINE([HAVE_EPOLL], [1], [epoll available]) + socket_polling=epoll], + [socket_polling=poll])], + [poll], [socket_polling=poll], + [epoll], [AC_CHECK_FUNCS([epoll_create], + [AC_DEFINE([HAVE_EPOLL], [1], [epoll available]) + socket_polling=epoll], + [AC_MSG_ERROR([Epoll not available.])])], + [*], [AC_MSG_ERROR([Invalid value of --socket-polling.])] +) + # Alternative memory allocator malloc_LIBS= AC_ARG_WITH([memory-allocator], @@ -639,12 +660,6 @@ AC_CHECK_HEADERS_ONCE([pthread_np.h sys/uio.h bsd/string.h]) AC_CHECK_FUNCS([accept4 clock_gettime fgetln getline initgroups malloc_trim \ setgroups strlcat strlcpy sysctlbyname]) -# Check for a better polling method. -AC_CHECK_FUNCS([epoll_create], [ - AC_DEFINE([HAVE_EPOLL], [1], [epoll available]) - poll_method=epoll], [poll_method=poll] -) - # Check for robust memory cleanup implementations. AC_CHECK_FUNC([explicit_bzero], [ AC_DEFINE([HAVE_EXPLICIT_BZERO], [1], [explicit_bzero available]) @@ -744,7 +759,7 @@ result_msg_base=" Knot DNS $VERSION Use recvmmsg: ${enable_recvmmsg} Use SO_REUSEPORT(_LB): ${enable_reuseport} XDP support: ${enable_xdp} - Polling method: ${poll_method} + Socket polling: ${socket_polling} Memory allocator: ${with_memory_allocator} Fast zone parser: ${enable_fastparser} Utilities with IDN: ${with_libidn} diff --git a/src/knot/Makefile.inc b/src/knot/Makefile.inc index 88b1f13e2..5cc44fb33 100644 --- a/src/knot/Makefile.inc +++ b/src/knot/Makefile.inc @@ -119,9 +119,6 @@ libknotd_la_SOURCES = \ knot/query/query.h \ knot/query/requestor.c \ knot/query/requestor.h \ - knot/common/apoll.h \ - knot/common/epoll_ctx.c \ - knot/common/epoll_ctx.h \ knot/common/evsched.c \ knot/common/evsched.h \ knot/common/fdset.c \ diff --git a/src/knot/common/apoll.h b/src/knot/common/apoll.h deleted file mode 100644 index 8c666a191..000000000 --- a/src/knot/common/apoll.h +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -#pragma once - -#ifdef HAVE_EPOLL - #include "knot/common/epoll_ctx.h" - - #define apoll_t epoll_ctx_t - #define apoll_it_t epoll_it_t - #define apoll_sweep_state epoll_ctx_sweep_state_t - - #define APOLL_CTX_KEEP EPOLL_CTX_KEEP - #define APOLL_CTX_SWEEP EPOLL_CTX_SWEEP - - #define APOLL_POLLIN EPOLLIN - - #define APOLL_CTX_INIT_SIZE EPOLL_INIT_SIZE - - #define apoll_init(ctx, size) epoll_ctx_init(ctx, size) - #define apoll_close(ctx) epoll_ctx_close(ctx) - #define apoll_clear(ctx) epoll_ctx_clear(ctx) - #define apoll_add(ctx, fd, events, usrctx) epoll_ctx_add(ctx, fd, events, usrctx) - #define apoll_set_watchdog(ctx, idx, interval) epoll_ctx_set_watchdog(ctx, idx, interval) - #define apoll_get_length(ctx) epoll_ctx_get_length(ctx) - #define apoll_get_fd(ctx, idx) epoll_ctx_get_fd(ctx, idx) - #define apoll_poll(ctx, it, offset, timeout) epoll_ctx_wait(ctx, it, offset, timeout) - #define apoll_sweep(ctx, cb, data) epoll_ctx_sweep(ctx, cb, data) - #define apoll_it_next(it) epoll_it_next(it) - #define apoll_it_done(it) epoll_it_done(it) - #define apoll_it_remove(it) epoll_it_remove(it) - #define apoll_it_get_fd(it) epoll_it_get_fd(it) - #define apoll_it_get_idx(it) epoll_it_get_idx(it) - #define apoll_it_ev_is_pollin(it) epoll_it_ev_is_pollin(it) - #define apoll_it_ev_is_error(it) epoll_it_ev_is_err(it) -#else - #include "knot/common/fdset.h" - - #define apoll_t fdset_t - #define apoll_it_t fdset_it_t - #define apoll_sweep_state fdset_sweep_state_t - - #define APOLL_CTX_KEEP FDSET_KEEP - #define APOLL_CTX_SWEEP FDSET_SWEEP - - #define APOLL_POLLIN POLLIN - - #define APOLL_CTX_INIT_SIZE FDSET_INIT_SIZE - - #define apoll_init(ctx, size) fdset_init(ctx, size) - #define apoll_close(ctx) - #define apoll_clear(ctx) fdset_clear(ctx) - #define apoll_add(ctx, fd, events, usrctx) fdset_add(ctx, fd, events, usrctx) - #define apoll_set_watchdog(ctx, idx, interval) fdset_set_watchdog(ctx, idx, interval) - #define apoll_get_length(ctx) fdset_get_length(ctx) - #define apoll_get_fd(ctx, idx) fdset_get_fd(ctx, idx) - #define apoll_poll(ctx, it, offset, timeout) fdset_poll(ctx, it, offset, timeout) - #define apoll_sweep(ctx, cb, data) fdset_sweep(ctx, cb, data) - #define apoll_it_next(it) fdset_it_next(it) - #define apoll_it_done(it) fdset_it_done(it) - #define apoll_it_remove(it) fdset_it_remove(it) - #define apoll_it_get_fd(it) fdset_it_get_fd(it) - #define apoll_it_get_idx(it) fdset_it_get_idx(it) - #define apoll_it_ev_is_pollin(it) fdset_it_ev_is_pollin(it) - #define apoll_it_ev_is_error(it) fdset_it_ev_is_err(it) -#endif diff --git a/src/knot/common/epoll_ctx.c b/src/knot/common/epoll_ctx.c deleted file mode 100644 index 46ad8f1fa..000000000 --- a/src/knot/common/epoll_ctx.c +++ /dev/null @@ -1,261 +0,0 @@ -/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -#ifdef HAVE_EPOLL - -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <time.h> -#include <assert.h> -#include "knot/common/epoll_ctx.h" -#include "contrib/time.h" -#include "libknot/errcode.h" - -/* Realloc memory or return error (part of epoll_ctx_resize). */ -#define MEM_RESIZE(tmp, p, n) \ - if ((tmp = realloc((p), (n) * sizeof(*p))) == NULL) \ - return KNOT_ENOMEM; \ - (p) = tmp; - -static int epoll_ctx_resize(epoll_ctx_t *set, const unsigned size) -{ - void *tmp = NULL; - MEM_RESIZE(tmp, set->usrctx, size); - MEM_RESIZE(tmp, set->timeout, size); - MEM_RESIZE(tmp, set->ev, size); - set->size = size; - return KNOT_EOK; -} - -int epoll_ctx_init(epoll_ctx_t *ctx, const unsigned size) -{ - if (ctx == NULL) { - return KNOT_EINVAL; - } - - memset(ctx, 0, sizeof(epoll_ctx_t)); - - ctx->efd = epoll_create1(0); - if (ctx->efd < 0) { - return KNOT_EMFILE; - } - - return epoll_ctx_resize(ctx, size); -} - -int epoll_ctx_clear(epoll_ctx_t* ctx) -{ - if (ctx == NULL) { - return KNOT_EINVAL; - } - - int bck = ctx->efd; - free(ctx->ev); - free(ctx->usrctx); - free(ctx->timeout); - free(ctx->recv_ev); - memset(ctx, 0, sizeof(epoll_ctx_t)); - ctx->efd = bck; - return KNOT_EOK; -} - -void epoll_ctx_close(const epoll_ctx_t* ctx) -{ - close(ctx->efd); -} - -int epoll_ctx_add(epoll_ctx_t *ctx, const int fd, const unsigned events, void *usrctx) -{ - if (ctx == NULL || fd < 0) { - return KNOT_EINVAL; - } - - /* Realloc needed. */ - if (ctx->n == ctx->size && epoll_ctx_resize(ctx, ctx->size + EPOLL_INIT_SIZE)) - return KNOT_ENOMEM; - - /* Initialize. */ - const int i = ctx->n++; - ctx->ev[i].data.fd = fd; - ctx->ev[i].events = events; - ctx->usrctx[i] = usrctx; - ctx->timeout[i] = 0; - struct epoll_event ev = { - .data.u64 = i, - .events = events - }; - epoll_ctl(ctx->efd, EPOLL_CTL_ADD, fd, &ev); - - return i; -} - -static int epoll_ctx_remove(epoll_ctx_t *ctx, const unsigned idx) -{ - if (ctx == NULL || idx >= ctx->n) { - return KNOT_EINVAL; - } - - epoll_ctl(ctx->efd, EPOLL_CTL_DEL, ctx->ev[idx].data.fd, NULL); - const unsigned last = --ctx->n; - /* Nothing else if it is the last one. - * Move last -> i if some remain. */ - if (idx < last) { - ctx->ev[idx] = ctx->ev[last]; - ctx->timeout[idx] = ctx->timeout[last]; - ctx->usrctx[idx] = ctx->usrctx[last]; - struct epoll_event ev = { - .data.u64 = idx, - .events = ctx->ev[idx].events - }; - epoll_ctl(ctx->efd, EPOLL_CTL_MOD, ctx->ev[last].data.fd, &ev); - } - - return KNOT_EOK; -} - -int epoll_ctx_wait(epoll_ctx_t *ctx, epoll_it_t *it, const unsigned offset, const int timeout) -{ - if (ctx->recv_size != ctx->size) { - void *tmp = NULL; - MEM_RESIZE(tmp, ctx->recv_ev, ctx->size); - ctx->recv_size = ctx->size; - } - - it->ctx = ctx; - it->ptr = ctx->recv_ev; - it->offset = offset; - - /* - * NOTE: Can't skip offset without bunch of syscalls!! - * Because of that it waits for `ctx->n` (every socket). Offset is set when TCP - * trotlling is ON. Sometimes it can return with sockets where none of them are - * connection socket, but it should not be common. - * But it can cause problems when adopted in other use-case. - */ - return it->unprocessed = epoll_wait(ctx->efd, ctx->recv_ev, ctx->n, timeout * 1000); -} - -int epoll_ctx_set_watchdog(epoll_ctx_t *ctx, const unsigned idx, const int interval) -{ - if (ctx == NULL || idx >= ctx->n) { - return KNOT_EINVAL; - } - - /* Lift watchdog if interval is negative. */ - if (interval < 0) { - ctx->timeout[idx] = 0; - return KNOT_EOK; - } - - /* Update clock. */ - const struct timespec now = time_now(); - ctx->timeout[idx] = now.tv_sec + interval; /* Only seconds precision. */ - return KNOT_EOK; -} - -int epoll_ctx_get_fd(const epoll_ctx_t *ctx, const unsigned idx) -{ - if (ctx == NULL || idx >= ctx->n) { - return KNOT_EINVAL; - } - - return ctx->ev[idx].data.u64; -} - -unsigned epoll_ctx_get_length(const epoll_ctx_t *ctx) -{ - assert(ctx); - return ctx->n; -} - -int epoll_ctx_sweep(epoll_ctx_t* ctx, const epoll_ctx_sweep_cb_t cb, void *data) -{ - if (ctx == NULL || cb == NULL) { - return KNOT_EINVAL; - } - - /* Get time threshold. */ - const struct timespec now = time_now(); - unsigned idx = 0; - while (idx < ctx->n) { - /* Check sweep state, remove if requested. */ - if (ctx->timeout[idx] > 0 && ctx->timeout[idx] <= now.tv_sec) { - const int fd = epoll_ctx_get_fd(ctx, idx); - if (cb(ctx, fd, data) == EPOLL_CTX_SWEEP) { - if (epoll_ctx_remove(ctx, idx) == KNOT_EOK) { - continue; /* Stay on the index. */ - } - } - } - ++idx; - } - - return KNOT_EOK; -} - -void epoll_it_next(epoll_it_t *it) -{ - do { - it->ptr++; - it->unprocessed--; - } while (it->unprocessed > 0 && epoll_it_get_idx(it) < it->offset); -} - -int epoll_it_done(const epoll_it_t *it) -{ - return it->unprocessed <= 0; -} - -int epoll_it_remove(epoll_it_t *it) -{ - if (it == NULL || it->ctx == NULL) { - return KNOT_EINVAL; - } - epoll_ctx_t *ctx = it->ctx; - const int idx = epoll_it_get_idx(it); - epoll_ctx_remove(ctx, idx); - /* Iterator should return on last valid already processed element. */ - /* On `next` call (in for-loop) will point on first unprocessed. */ - it->ptr--; - return KNOT_EOK; -} - -int epoll_it_get_fd(const epoll_it_t *it) -{ - assert(it != NULL); - return it->ctx->ev[epoll_it_get_idx(it)].data.fd; -} - -unsigned epoll_it_get_idx(const epoll_it_t *it) -{ - assert(it != NULL); - return it->ptr->data.u64; -} - -int epoll_it_ev_is_pollin(const epoll_it_t *it) -{ - assert(it != NULL); - return it->ptr->events & EPOLLIN; -} - -int epoll_it_ev_is_err(const epoll_it_t *it) -{ - assert(it != NULL); - return it->ptr->events & (EPOLLERR|EPOLLHUP); -} - -#endif diff --git a/src/knot/common/epoll_ctx.h b/src/knot/common/epoll_ctx.h deleted file mode 100644 index a8fdc5f66..000000000 --- a/src/knot/common/epoll_ctx.h +++ /dev/null @@ -1,227 +0,0 @@ -/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -/*! - * \brief I/O multiplexing with context and timeouts for each fd. - */ - -#pragma once - -#ifdef HAVE_EPOLL - -#include <stddef.h> -#include <signal.h> -#include <sys/time.h> -#include <sys/epoll.h> - -#define EPOLL_INIT_SIZE 256 /* Resize step. */ - -/*! \brief Set of filedescriptors with associated context and timeouts. */ -typedef struct epoll_ctx { - int efd; /*!< File descriptor of epoll. */ - unsigned n; /*!< Active fds. */ - unsigned size; /*!< Array size (allocated). */ - unsigned recv_size; /*!< Size of array for received events. */ - struct epoll_event *ev; /*!< Epoll event storage for each fd */ - void* *usrctx; /*!< Context for each fd. */ - time_t *timeout; /*!< Timeout for each fd (seconds precision). */ - struct epoll_event *recv_ev; /*!< Array for received events. */ -} epoll_ctx_t; - -typedef struct epoll_it { - epoll_ctx_t *ctx; /*!< Iterator related context. */ - struct epoll_event *ptr; /*!< Pointer on processed event. */ - int offset; /*!< Event index offset. */ - int unprocessed; /*!< Unprocessed events left. */ -} epoll_it_t; - -/*! \brief Mark-and-sweep state. */ -typedef enum epoll_ctx_sweep_state { - EPOLL_CTX_KEEP, - EPOLL_CTX_SWEEP -} epoll_ctx_sweep_state_t; - -/*! \brief Sweep callback (set, index, data) */ -typedef enum epoll_ctx_sweep_state (*epoll_ctx_sweep_cb_t)(epoll_ctx_t*, int, void*); - -/*! - * \brief Initialize epoll_ctx to given size. - * - * \param ctx Target ctx. - * \param size Initial ctx size. - * - * \retval ret == 0 if successful. - * \retval ret < 0 on error. - */ -int epoll_ctx_init(epoll_ctx_t *ctx, const unsigned size); - -/*! - * \brief Clear whole context of epoll_ctx. - * - * \param ctx Target ctx. - * - * \retval ret == 0 if successful. - * \retval ret < 0 on error. - */ -int epoll_ctx_clear(epoll_ctx_t* ctx); - -/*! - * \brief Close epoll related file descriptor. - * - * \param ctx Target ctx. - */ -void epoll_ctx_close(const epoll_ctx_t* ctx); - -/*! - * \brief Add file descriptor to watched ctx. - * - * \param ctx Target ctx. - * \param fd Added file descriptor. - * \param events Mask of watched events. - * \param usrctx Context (optional). - * - * \retval ret >= 0 is index of the added fd. - * \retval ret < 0 on errors. - */ -int epoll_ctx_add(epoll_ctx_t *ctx, const int fd, const unsigned events, void *usrctx); - -/*! - * \brief Wait for receive events. - * - * \param ctx Target ctx. - * \param it Event iterator storage. - * \param offset Index of first event. - * \param timeout Timeout of operation (negative number for unlimited). - * - * \retval ret >= 0 represents number of events received. - * \retval ret < 0 on error. - */ -int epoll_ctx_wait(epoll_ctx_t *ctx, epoll_it_t *it, const unsigned offset, const int timeout); - -/*! - * \brief Set file descriptor watchdog interval. - * - * Set time (interval from now) after which the associated file descriptor - * should be sweeped (see epoll_ctx_sweep). Good example is setting a grace period - * of N seconds between socket activity. If socket is not active within - * <now, now + interval>, it is sweeped and potentially closed. - * - * \param ctx Target ctx. - * \param i Index for the file descriptor. - * \param interval Allowed interval without activity (seconds). - * -1 disables watchdog timer - * - * \retval ret == 0 on success. - * \retval ret < 0 on errors. - */ -int epoll_ctx_set_watchdog(epoll_ctx_t *ctx, const unsigned idx, const int interval); - -/*! - * \brief Returns file descriptor based on index. - * - * \param ctx Target ctx. - * \param idx Index of the file descriptor. - * - * \retval ret >= 0 for file descriptor. - * \retval ret < 0 on errors. - */ -int epoll_ctx_get_fd(const epoll_ctx_t *ctx, const unsigned idx); - -/*! - * \brief Returns number of file descriptors stored in ctx. - * - * \param ctx Target ctx. - * - * \retval Number of descriptors stored - */ -unsigned epoll_ctx_get_length(const epoll_ctx_t *ctx); - -/*! - * \brief Sweep file descriptors with exceeding inactivity period. - * - * \param ctx Target ctx. - * \param cb Callback for sweeped descriptors. - * \param data Pointer to extra data. - * - * \retval number of sweeped descriptors. - * \retval -1 on errors. - */ -int epoll_ctx_sweep(epoll_ctx_t* ctx, const epoll_ctx_sweep_cb_t cb, void *data); - -/*! - * \brief Move iterator on next received event. - * - * \param it Target iterator. - */ -void epoll_it_next(epoll_it_t *it); - -/*! - * \brief Decide if there is more received events. - * - * \param it Target iterator. - * - * \retval Logical flag represents 'done' state. - */ -int epoll_it_done(const epoll_it_t *it); - -/*! - * \brief Remove file descriptor from watched ctx. - * - * \param it Target iterator. - * - * \retval 0 if successful. - * \retval ret < 0 on error. - */ -int epoll_it_remove(epoll_it_t *it); - -/*! - * \brief Get file descriptor of event referenced by iterator. - * - * \param it Target iterator. - * - * \retval ret >= 0 for file descriptor. - * \retval ret < 0 on errors. - */ -int epoll_it_get_fd(const epoll_it_t *it); - -/*! - * \brief Get index of event in set referenced by iterator. - * - * \param it Target iterator. - * - * \retval Index of event. - */ -unsigned epoll_it_get_idx(const epoll_it_t *it); - -/*! - * \brief Decide if event referenced by iterator is POLLIN event. - * - * \param it Target iterator. - * - * \retval Logical flag represents 'POLLIN' event received. - */ -int epoll_it_ev_is_pollin(const epoll_it_t *it); - -/*! - * \brief Decide if event referenced by iterator is error event. - * - * \param it Target iterator. - * - * \retval Logical flag represents error event received. - */ -int epoll_it_ev_is_err(const epoll_it_t *it); - -#endif diff --git a/src/knot/common/fdset.c b/src/knot/common/fdset.c index aa98c845d..f4a533861 100644 --- a/src/knot/common/fdset.c +++ b/src/knot/common/fdset.c @@ -14,29 +14,32 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. */ -#ifdef ENABLE_POLL - #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <time.h> -#include <assert.h> + #include "knot/common/fdset.h" #include "contrib/time.h" -#include "libknot/errcode.h" -/* Realloc memory or return error (part of fdset_resize). */ -#define MEM_RESIZE(tmp, p, n) \ - if ((tmp = realloc((p), (n) * sizeof(*p))) == NULL) \ +#define MEM_RESIZE(p, n) { \ + void *tmp = NULL; \ + if ((tmp = realloc((p), (n) * sizeof(*p))) == NULL) { \ return KNOT_ENOMEM; \ - (p) = tmp; + } \ + (p) = tmp; \ +} static int fdset_resize(fdset_t *set, const unsigned size) { - void *tmp = NULL; - MEM_RESIZE(tmp, set->ctx, size); - MEM_RESIZE(tmp, set->pfd, size); - MEM_RESIZE(tmp, set->timeout, size); + assert(set); + + MEM_RESIZE(set->ctx, size); + MEM_RESIZE(set->timeout, size); +#ifdef HAVE_EPOLL + MEM_RESIZE(set->ev, size); +#else + MEM_RESIZE(set->pfd, size); +#endif set->size = size; return KNOT_EOK; } @@ -47,77 +50,169 @@ int fdset_init(fdset_t *set, const unsigned size) return KNOT_EINVAL; } - memset(set, 0, sizeof(fdset_t)); - return fdset_resize(set, size); + memset(set, 0, sizeof(*set)); + +#ifdef HAVE_EPOLL + set->efd = epoll_create1(0); + if (set->efd < 0) { + return knot_map_errno(); + } +#endif + int ret = fdset_resize(set, size); +#ifdef HAVE_EPOLL + if (ret != KNOT_EOK) { + close(set->efd); + } +#endif + return ret; } -int fdset_clear(fdset_t* set) +void fdset_clear(fdset_t *set) { if (set == NULL) { - return KNOT_EINVAL; + return; } free(set->ctx); - free(set->pfd); free(set->timeout); - memset(set, 0, sizeof(fdset_t)); - return KNOT_EOK; +#ifdef HAVE_EPOLL + free(set->ev); + free(set->recv_ev); + close(set->efd); +#else + free(set->pfd); +#endif + memset(set, 0, sizeof(*set)); } -int fdset_add(fdset_t *set, const int fd, const unsigned events, void *ctx) +int fdset_add(fdset_t *set, const int fd, const fdset_event_t events, void *ctx) { if (set == NULL || fd < 0) { return KNOT_EINVAL; } - /* Realloc needed. */ - if (set->n == set->size && fdset_resize(set, set->size + FDSET_INIT_SIZE)) + if (set->n == set->size && + fdset_resize(set, set->size + FDSET_RESIZE_STEP) != KNOT_EOK) { return KNOT_ENOMEM; + } - /* Initialize. */ - const int i = set->n++; - set->pfd[i].fd = fd; - set->pfd[i].events = events; - set->pfd[i].revents = 0; - set->ctx[i] = ctx; - set->timeout[i] = 0; + const int idx = set->n++; + set->ctx[idx] = ctx; + set->timeout[idx] = 0; +#ifdef HAVE_EPOLL + set->ev[idx].data.fd = fd; + set->ev[idx].events = events; + struct epoll_event ev = { + .data.u64 = idx, + .events = events + }; + if (epoll_ctl(set->efd, EPOLL_CTL_ADD, fd, &ev) != 0) { + return knot_map_errno(); + } +#else + set->pfd[idx].fd = fd; + set->pfd[idx].events = events; + set->pfd[idx].revents = 0; +#endif - /* Return index to this descriptor. */ - return i; + return idx; } -//TODO should be static, but it is a dependency in tests (? remove from -// tests or something ?) int fdset_remove(fdset_t *set, const unsigned idx) { if (set == NULL || idx >= set->n) { return KNOT_EINVAL; } + const int fd = fdset_get_fd(set, idx); +#ifdef HAVE_EPOLL + /* This is necessary as DDNS duplicates file descriptors! */ + (void)epoll_ctl(set->efd, EPOLL_CTL_DEL, fd, NULL); +#endif + close(fd); + const unsigned last = --set->n; - /* Nothing else if it is the last one. - * Move last -> i if some remain. */ + /* Nothing else if it is the last one. Move last -> i if some remain. */ if (idx < last) { - set->pfd[idx] = set->pfd[last]; - set->timeout[idx] = set->timeout[last]; set->ctx[idx] = set->ctx[last]; + set->timeout[idx] = set->timeout[last]; +#ifdef HAVE_EPOLL + set->ev[idx] = set->ev[last]; + struct epoll_event ev = { + .data.u64 = idx, + .events = set->ev[idx].events + }; + if (epoll_ctl(set->efd, EPOLL_CTL_MOD, set->ev[last].data.fd, &ev) != 0) { + return knot_map_errno(); + } +#else + set->pfd[idx] = set->pfd[last]; +#endif } return KNOT_EOK; } -int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int timeout) +int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int timeout_ms) { - it->ctx = set; + if (it == NULL) { + return KNOT_EINVAL; + } + it->unprocessed = 0; + + if (set == NULL) { + return KNOT_EINVAL; + } + + it->set = set; it->idx = offset; - it->unprocessed = poll(&set->pfd[offset], set->n - offset, 1000 * timeout); +#ifdef HAVE_EPOLL + if (set->recv_size != set->size) { + MEM_RESIZE(set->recv_ev, set->size); + set->recv_size = set->size; + } + it->ptr = set->recv_ev; + it->dirty = 0; + /* + * NOTE: Can't skip offset without bunch of syscalls!! + * Because of that it waits for `ctx->n` (every socket). Offset is set when TCP + * trotlling is ON. Sometimes it can return with sockets where none of them are + * connection socket, but it should not be common. + * But it can cause problems when adopted in other use-case. + */ + return it->unprocessed = epoll_wait(set->efd, set->recv_ev, set->n, timeout_ms); +#else + it->unprocessed = poll(&set->pfd[offset], set->n - offset, timeout_ms); while (it->unprocessed > 0 && set->pfd[it->idx].revents == 0) { it->idx++; } return it->unprocessed; +#endif } -int fdset_set_watchdog(fdset_t* set, const unsigned idx, const int interval) +void fdset_it_commit(fdset_it_t *it) +{ + if (it == NULL) { + return; + } +#ifdef HAVE_EPOLL + /* NOTE: reverse iteration to avoid as much "remove last" operations + * as possible. I'm not sure about performance improvement. It + * will skip some syscalls at begin of iteration, but what + * performance increase do we get is a question. + */ + fdset_t *set = it->set; + for (int i = set->n - 1; it->dirty > 0 && i >= 0; --i) { + if (set->ev[i].events == FDSET_REMOVE_FLAG) { + (void)fdset_remove(set, i); + it->dirty--; + } + } + assert(it->dirty == 0); +#endif +} + +int fdset_set_watchdog(fdset_t *set, const unsigned idx, const int interval) { if (set == NULL || idx >= set->n) { return KNOT_EINVAL; @@ -136,26 +231,10 @@ int fdset_set_watchdog(fdset_t* set, const unsigned idx, const int interval) return KNOT_EOK; } -int fdset_get_fd(const fdset_t *set, const unsigned idx) -{ - if (set == NULL || idx >= set->n) { - return KNOT_EINVAL; - } - - return set->pfd[idx].fd; -} - - -unsigned fdset_get_length(const fdset_t *set) -{ - assert(set); - return set->n; -} - -int fdset_sweep(fdset_t* set, const fdset_sweep_cb_t cb, void *data) +void fdset_sweep(fdset_t *set, const fdset_sweep_cb_t cb, void *data) { if (set == NULL || cb == NULL) { - return KNOT_EINVAL; + return; } /* Get time threshold. */ @@ -166,68 +245,9 @@ int fdset_sweep(fdset_t* set, const fdset_sweep_cb_t cb, void *data) if (set->timeout[idx] > 0 && set->timeout[idx] <= now.tv_sec) { const int fd = fdset_get_fd(set, idx); if (cb(set, fd, data) == FDSET_SWEEP) { - if (fdset_remove(set, idx) == KNOT_EOK) { - continue; /* Stay on the index. */ - } + (void)fdset_remove(set, idx); } } ++idx; } - return KNOT_EOK; -} - -void fdset_it_next(fdset_it_t *it) -{ - if (--it->unprocessed > 0) { - while (it->ctx->pfd[++it->idx].revents == 0); /* nop */ - } } - -int fdset_it_done(const fdset_it_t *it) -{ - return it->unprocessed <= 0; -} - -int fdset_it_remove(fdset_it_t *it) -{ - if (it == NULL || it->ctx == NULL) { - return KNOT_EINVAL; - } - - fdset_t *set = it->ctx; - const unsigned idx = fdset_it_get_idx(it); - fdset_remove(set, idx); - /* Iterator should return on last valid already processed element. */ - /* On `next` call (in for-loop) will point on first unprocessed. */ - --it->idx; - return KNOT_EOK; -} - -int fdset_it_get_fd(const fdset_it_t *it) -{ - if (it == NULL) { - return KNOT_EINVAL; - } - - return it->ctx->pfd[it->idx].fd; -} - -unsigned fdset_it_get_idx(const fdset_it_t *it) -{ - assert(it); - return it->idx; -} - -int fdset_it_ev_is_pollin(const fdset_it_t *it) -{ - assert(it); - return it->ctx->pfd[it->idx].revents & POLLIN; -} - -int fdset_it_ev_is_err(const fdset_it_t *it) -{ - assert(it); - return it->ctx->pfd[it->idx].revents & (POLLERR|POLLHUP|POLLNVAL); -} - -#endif diff --git a/src/knot/common/fdset.h b/src/knot/common/fdset.h index a75912c96..f6d5bb885 100644 --- a/src/knot/common/fdset.h +++ b/src/knot/common/fdset.h @@ -20,82 +20,107 @@ #pragma once -#if !defined(HAVE_EPOLL) - +#include <assert.h> +#include <stdbool.h> #include <stddef.h> +#include <time.h> + +#ifdef HAVE_EPOLL +#include <sys/epoll.h> +#else #include <poll.h> -#include <sys/time.h> -#include <signal.h> - -#define FDSET_INIT_SIZE 256 /* Resize step. */ - -/*! \brief Set of filedescriptors with associated context and timeouts. */ -typedef struct fdset { - unsigned n; /*!< Active fds. */ - unsigned size; /*!< Array size (allocated). */ - void* *ctx; /*!< Context for each fd. */ - struct pollfd *pfd; /*!< poll state for each fd */ - time_t *timeout; /*!< Timeout for each fd (seconds precision). */ +#endif + +#include "libknot/errcode.h" + +#define FDSET_RESIZE_STEP 256 +#ifdef HAVE_EPOLL +#define FDSET_REMOVE_FLAG ~0U +#endif + +/*! \brief Set of file descriptors with associated context and timeouts. */ +typedef struct { + unsigned n; /*!< Active fds. */ + unsigned size; /*!< Array size (allocated). */ + void **ctx; /*!< Context for each fd. */ + time_t *timeout; /*!< Timeout for each fd (seconds precision). */ +#ifdef HAVE_EPOLL + struct epoll_event *ev; /*!< Epoll event storage for each fd. */ + struct epoll_event *recv_ev; /*!< Array for polled events. */ + unsigned recv_size; /*!< Size of array for polled events. */ + int efd; /*!< File descriptor of epoll. */ +#else + struct pollfd *pfd; /*!< Poll state for each fd. */ +#endif } fdset_t; /*! \brief State of iterator over received events */ -typedef struct fdset_it { - fdset_t *ctx; /*!< Source fdset_t. */ - unsigned idx; /*!< Index of processed event. */ - int unprocessed; /*!< Unprocessed events left. */ +typedef struct { + fdset_t *set; /*!< Source fdset_t. */ + unsigned idx; /*!< Event index offset. */ + int unprocessed; /*!< Unprocessed events left. */ +#ifdef HAVE_EPOLL + struct epoll_event *ptr; /*!< Pointer on processed event. */ + unsigned dirty; /*!< Number of fd to be removed on commit. */ +#endif } fdset_it_t; +typedef enum { +#ifdef HAVE_EPOLL + FDSET_POLLIN = EPOLLIN, + FDSET_POLLOUT = EPOLLOUT, +#else + FDSET_POLLIN = POLLIN, + FDSET_POLLOUT = POLLOUT, +#endif +} fdset_event_t; + /*! \brief Mark-and-sweep state. */ -typedef enum fdset_sweep_state { +typedef enum { FDSET_KEEP, FDSET_SWEEP } fdset_sweep_state_t; /*! \brief Sweep callback (set, index, data) */ -typedef enum fdset_sweep_state (*fdset_sweep_cb_t)(fdset_t*, int, void*); +typedef fdset_sweep_state_t (*fdset_sweep_cb_t)(fdset_t *, int, void *); /*! * \brief Initialize fdset to given size. * - * \param set Target set. - * \param size Initial set size. + * \param set Target set. + * \param size Initial set size. * - * \retval ret == 0 if successful. - * \retval ret < 0 on error. + * \return Error code, KNOT_EOK if success. */ int fdset_init(fdset_t *set, const unsigned size); /*! - * \brief Clear whole context of FDSET. - * - * \param set Target set. + * \brief Clear whole context of the fdset. * - * \retval ret == 0 if successful. - * \retval ret < 0 on error. + * \param set Target set. */ -int fdset_clear(fdset_t* set); +void fdset_clear(fdset_t *set); /*! * \brief Add file descriptor to watched set. * - * \param set Target set. - * \param fd Added file descriptor. - * \param events Mask of watched events. - * \param ctx Context (optional). + * \param set Target set. + * \param fd Added file descriptor. + * \param events Mask of watched events. + * \param ctx Context (optional). * * \retval ret >= 0 is index of the added fd. - * \retval ret < 0 on errors. + * \retval ret < 0 on error. */ -int fdset_add(fdset_t *set, const int fd, const unsigned events, void *ctx); +int fdset_add(fdset_t *set, const int fd, const fdset_event_t events, void *ctx); /*! - * \brief Remove file descriptor from watched set. + * \brief Remove and close file descriptor from watched set. * - * \param set Target set. - * \param i Index of the removed fd. + * \param set Target set. + * \param idx Index of the removed fd. * - * \retval 0 if successful. - * \retval ret < 0 on errors. + * \return Error code, KNOT_EOK if success. */ int fdset_remove(fdset_t *set, const unsigned idx); @@ -104,15 +129,15 @@ int fdset_remove(fdset_t *set, const unsigned idx); * * Skip events based on offset and set iterator on first event. * - * \param set Target set. - * \param it Event iterator storage. - * \param offset Index of first event. - * \param timeout Timeout of operation (negative number for unlimited). + * \param set Target set. + * \param it Event iterator storage. + * \param offset Index of first event. + * \param timeout_ms Timeout of operation in milliseconds (use -1 for unlimited). * * \retval ret >= 0 represents number of events received. * \retval ret < 0 on error. */ -int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int timeout); +int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int timeout_ms); /*! * \brief Set file descriptor watchdog interval. @@ -120,111 +145,194 @@ int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int ti * Set time (interval from now) after which the associated file descriptor * should be sweeped (see fdset_sweep). Good example is setting a grace period * of N seconds between socket activity. If socket is not active within - * <now, now + interval>, it is sweeped and potentially closed. + * <now, now + interval>, it is sweeped and closed. * - * \param set Target set. - * \param idx Index of the file descriptor. - * \param interval Allowed interval without activity (seconds). - * -1 disables watchdog timer + * \param set Target set. + * \param idx Index of the file descriptor. + * \param interval Allowed interval without activity (seconds). + * -1 disables watchdog timer. * - * \retval ret == 0 on success. - * \retval ret < 0 on errors. + * \return Error code, KNOT_EOK if success. */ int fdset_set_watchdog(fdset_t *set, const unsigned idx, const int interval); /*! + * \brief Sweep file descriptors with exceeding inactivity period. + * + * \param set Target set. + * \param cb Callback for sweeped descriptors. + * \param data Pointer to extra data. + */ +void fdset_sweep(fdset_t *set, const fdset_sweep_cb_t cb, void *data); + +/*! * \brief Returns file descriptor based on index. * - * \param set Target set. - * \param idx Index of the file descriptor. + * \param set Target set. + * \param idx Index of the file descriptor. * * \retval ret >= 0 for file descriptor. * \retval ret < 0 on errors. */ -int fdset_get_fd(const fdset_t *set, const unsigned idx); +inline static int fdset_get_fd(const fdset_t *set, const unsigned idx) +{ + assert(set && idx < set->n); + +#ifdef HAVE_EPOLL + return set->ev[idx].data.fd; +#else + return set->pfd[idx].fd; +#endif +} /*! * \brief Returns number of file descriptors stored in set. * - * \param set Target set. + * \param set Target set. * * \retval Number of descriptors stored */ -unsigned fdset_get_length(const fdset_t *set); +inline static unsigned fdset_get_length(const fdset_t *set) +{ + assert(set); + + return set->n; +} /*! - * \brief Sweep file descriptors with exceeding inactivity period. + * \brief Get index of event in set referenced by iterator. * - * \param set Target set. - * \param cb Callback for sweeped descriptors. - * \param data Pointer to extra data. + * \param it Target iterator. * - * \retval number of sweeped descriptors. - * \retval -1 on errors. + * \retval Index of event. */ -int fdset_sweep(fdset_t* set, const fdset_sweep_cb_t cb, void *data); +inline static unsigned fdset_it_get_idx(const fdset_it_t *it) +{ + assert(it); + +#ifdef HAVE_EPOLL + return it->ptr->data.u64; +#else + return it->idx; +#endif +} /*! - * \brief Move iterator on next received event. + * \brief Get file descriptor of event referenced by iterator. + * + * \param it Target iterator. * - * \param it Target iterator. + * \retval ret >= 0 for file descriptor. + * \retval ret < 0 on errors. */ -void fdset_it_next(fdset_it_t *it); +inline static int fdset_it_get_fd(const fdset_it_t *it) +{ + assert(it); + +#ifdef HAVE_EPOLL + return it->set->ev[fdset_it_get_idx(it)].data.fd; +#else + return it->set->pfd[it->idx].fd; +#endif +} /*! - * \brief Decide if there is more received events. - * - * \param it Target iterator. + * \brief Move iterator on next received event. * - * \retval Logical flag represents 'done' state. + * \param it Target iterator. */ -int fdset_it_done(const fdset_it_t *it); +inline static void fdset_it_next(fdset_it_t *it) +{ + assert(it); + +#ifdef HAVE_EPOLL + do { + it->ptr++; + it->unprocessed--; + } while (it->unprocessed > 0 && fdset_it_get_idx(it) < it->idx); +#else + if (--it->unprocessed > 0) { + while (it->set->pfd[++it->idx].revents == 0); /* nop */ + } +#endif +} /*! * \brief Remove file descriptor referenced by iterator from watched set. * - * \param it Target iterator. + * \param it Target iterator. * - * \retval 0 if successful. - * \retval ret < 0 on error. + * \return Error code, KNOT_EOK if success. */ -int fdset_it_remove(fdset_it_t *it); +inline static void fdset_it_remove(fdset_it_t *it) +{ + assert(it); + +#ifdef HAVE_EPOLL + const int idx = fdset_it_get_idx(it); + it->set->ev[idx].events = FDSET_REMOVE_FLAG; + it->dirty++; +#else + (void)fdset_remove(it->set, fdset_it_get_idx(it)); + /* Iterator should return on last valid already processed element. */ + /* On `next` call (in for-loop) will point on first unprocessed. */ + it->idx--; +#endif +} /*! - * \brief Get file descriptor of event referenced by iterator. - * - * \param it Target iterator. + * \brief Commit changes made in fdset using iterator. * - * \retval ret >= 0 for file descriptor. - * \retval ret < 0 on errors. + * \param it Target iterator. */ -int fdset_it_get_fd(const fdset_it_t *it); +void fdset_it_commit(fdset_it_t *it); /*! - * \brief Get index of event in set referenced by iterator. + * \brief Decide if there is more received events. * - * \param it Target iterator. + * \param it Target iterator. * - * \retval Index of event. + * \retval Logical flag representing 'done' state. */ -unsigned fdset_it_get_idx(const fdset_it_t *it); +inline static bool fdset_it_is_done(const fdset_it_t *it) +{ + assert(it); + + return it->unprocessed <= 0; +} /*! * \brief Decide if event referenced by iterator is POLLIN event. * - * \param it Target iterator. + * \param it Target iterator. * * \retval Logical flag represents 'POLLIN' event received. */ -int fdset_it_ev_is_pollin(const fdset_it_t *it); +inline static bool fdset_it_is_pollin(const fdset_it_t *it) +{ + assert(it); + +#ifdef HAVE_EPOLL + return it->ptr->events & EPOLLIN; +#else + return it->set->pfd[it->idx].revents & POLLIN; +#endif +} /*! * \brief Decide if event referenced by iterator is error event. * - * \param it Target iterator. + * \param it Target iterator. * * \retval Logical flag represents error event received. */ -int fdset_it_ev_is_err(const fdset_it_t *it); +inline static bool fdset_it_is_error(const fdset_it_t *it) +{ + assert(it); +#ifdef HAVE_EPOLL + return it->ptr->events & (EPOLLERR | EPOLLHUP); +#else + return it->set->pfd[it->idx].revents & (POLLERR | POLLHUP | POLLNVAL); #endif +} diff --git a/src/knot/server/tcp-handler.c b/src/knot/server/tcp-handler.c index 7d17e79ba..19abeaf89 100644 --- a/src/knot/server/tcp-handler.c +++ b/src/knot/server/tcp-handler.c @@ -32,7 +32,7 @@ #include "knot/server/server.h" #include "knot/server/tcp-handler.h" #include "knot/common/log.h" -#include "knot/common/apoll.h" +#include "knot/common/fdset.h" #include "knot/nameserver/process_query.h" #include "knot/query/layer.h" #include "contrib/macros.h" @@ -50,7 +50,7 @@ typedef struct tcp_context { unsigned client_threshold; /*!< Index of first TCP client. */ struct timespec last_poll_time; /*!< Time of the last socket poll. */ bool is_throttled; /*!< TCP connections throttling switch. */ - apoll_t set; /*!< Set of server/client sockets. */ + fdset_t set; /*!< Set of server/client sockets. */ unsigned thread_id; /*!< Thread identifier. */ unsigned max_worker_fds; /*!< Max TCP clients per worker configuration + no. of ifaces. */ int idle_timeout; /*!< [s] TCP idle timeout configuration. */ @@ -76,7 +76,7 @@ static void update_tcp_conf(tcp_context_t *tcp) } /*! \brief Sweep TCP connection. */ -static apoll_sweep_state tcp_sweep(apoll_t *set, int fd, void *data) +static fdset_sweep_state_t tcp_sweep(fdset_t *set, int fd, void *data) { UNUSED(data); assert(set && fd >= 0); @@ -90,9 +90,7 @@ static apoll_sweep_state tcp_sweep(apoll_t *set, int fd, void *data) log_notice("TCP, terminated inactive client, address %s", addr_str); } - close(fd); - - return APOLL_CTX_SWEEP; + return FDSET_SWEEP; } static bool tcp_active_state(int state) @@ -117,7 +115,7 @@ static void tcp_log_error(struct sockaddr_storage *ss, const char *operation, in } static unsigned tcp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, - apoll_t *fds, int thread_id) + fdset_t *fds, int thread_id) { if (n_ifaces == 0) { return 0; @@ -139,10 +137,13 @@ static unsigned tcp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, tcp_id = thread_id - i->fd_udp_count; } #endif - apoll_add(fds, i->fd_tcp[tcp_id], APOLL_POLLIN, NULL); + int ret = fdset_add(fds, i->fd_tcp[tcp_id], FDSET_POLLIN, NULL); + if (ret < 0) { + return 0; + } } - return apoll_get_length(fds); + return fdset_get_length(fds); } static int tcp_handle(tcp_context_t *tcp, int fd, struct iovec *rx, struct iovec *tx) @@ -215,28 +216,28 @@ static int tcp_handle(tcp_context_t *tcp, int fd, struct iovec *rx, struct iovec static void tcp_event_accept(tcp_context_t *tcp, unsigned i) { /* Accept client. */ - int fd = apoll_get_fd((&tcp->set), i); + int fd = fdset_get_fd(&tcp->set, i); int client = net_accept(fd, NULL); if (client >= 0) { /* Assign to fdset. */ - int next_id = apoll_add(&tcp->set, client, APOLL_POLLIN, NULL); - if (next_id < 0) { + int idx = fdset_add(&tcp->set, client, FDSET_POLLIN, NULL); + if (idx < 0) { close(client); return; } /* Update watchdog timer. */ - apoll_set_watchdog(&tcp->set, next_id, tcp->idle_timeout); + (void)fdset_set_watchdog(&tcp->set, idx, tcp->idle_timeout); } } static int tcp_event_serve(tcp_context_t *tcp, unsigned i) { - int fd = apoll_get_fd((&tcp->set), i); - int ret = tcp_handle(tcp, fd, &tcp->iov[0], &tcp->iov[1]); + int ret = tcp_handle(tcp, fdset_get_fd(&tcp->set, i), + &tcp->iov[0], &tcp->iov[1]); if (ret == KNOT_EOK) { /* Update socket activity timer. */ - apoll_set_watchdog(&tcp->set, i, tcp->idle_timeout); + (void)fdset_set_watchdog(&tcp->set, i, tcp->idle_timeout); } return ret; @@ -244,33 +245,33 @@ static int tcp_event_serve(tcp_context_t *tcp, unsigned i) static void tcp_wait_for_events(tcp_context_t *tcp) { - apoll_t *set = &tcp->set; + fdset_t *set = &tcp->set; /* Check if throttled with many open TCP connections. */ - assert(apoll_get_length(set) <= tcp->max_worker_fds); - tcp->is_throttled = apoll_get_length(set) == tcp->max_worker_fds; + assert(fdset_get_length(set) <= tcp->max_worker_fds); + tcp->is_throttled = fdset_get_length(set) == tcp->max_worker_fds; /* If throttled, temporarily ignore new TCP connections. */ - unsigned i = tcp->is_throttled ? tcp->client_threshold : 0; + unsigned offset = tcp->is_throttled ? tcp->client_threshold : 0; /* Wait for events. */ - apoll_it_t it; - apoll_poll(set, &it, i, TCP_SWEEP_INTERVAL); + fdset_it_t it; + (void)fdset_poll(set, &it, offset, TCP_SWEEP_INTERVAL * 1000); /* Mark the time of last poll call. */ tcp->last_poll_time = time_now(); /* Process events. */ - for(; !apoll_it_done(&it); apoll_it_next(&it)) { + for (; !fdset_it_is_done(&it); fdset_it_next(&it)) { bool should_close = false; - unsigned int idx = apoll_it_get_idx(&it); - if (apoll_it_ev_is_error(&it)) { + unsigned int idx = fdset_it_get_idx(&it); + if (fdset_it_is_error(&it)) { should_close = (idx >= tcp->client_threshold); - } else if (apoll_it_ev_is_pollin(&it)) { + } else if (fdset_it_is_pollin(&it)) { /* Master sockets - new connection to accept. */ if (idx < tcp->client_threshold) { /* Don't accept more clients than configured. */ - if (apoll_get_length(set) < tcp->max_worker_fds) { + if (fdset_get_length(set) < tcp->max_worker_fds) { tcp_event_accept(tcp, idx); } /* Client sockets - already accepted connection or @@ -282,10 +283,10 @@ static void tcp_wait_for_events(tcp_context_t *tcp) /* Evaluate. */ if (should_close) { - close(apoll_get_fd(set, idx)); - apoll_it_remove(&it); + fdset_it_remove(&it); } } + fdset_it_commit(&it); } int tcp_master(dthread_t *thread) @@ -338,7 +339,9 @@ int tcp_master(dthread_t *thread) update_tcp_conf(&tcp); /* Prepare initial buffer for listening and bound sockets. */ - apoll_init(&tcp.set, APOLL_CTX_INIT_SIZE); + if (fdset_init(&tcp.set, FDSET_RESIZE_STEP) != KNOT_EOK) { + goto finish; + } /* Set descriptors for the configured interfaces. */ tcp.client_threshold = tcp_set_ifaces(handler->server->ifaces, @@ -359,7 +362,7 @@ int tcp_master(dthread_t *thread) /* Sweep inactive clients and refresh TCP configuration. */ if (tcp.last_poll_time.tv_sec >= next_sweep.tv_sec) { - apoll_sweep(&tcp.set, &tcp_sweep, NULL); + fdset_sweep(&tcp.set, &tcp_sweep, NULL); update_sweep_timer(&next_sweep); update_tcp_conf(&tcp); } @@ -369,8 +372,7 @@ finish: free(tcp.iov[0].iov_base); free(tcp.iov[1].iov_base); mp_delete(mm.ctx); - apoll_close(&tcp.set); - apoll_clear(&tcp.set); + fdset_clear(&tcp.set); return ret; } diff --git a/src/knot/server/udp-handler.c b/src/knot/server/udp-handler.c index 4d0cc7b38..190d46cfd 100644 --- a/src/knot/server/udp-handler.c +++ b/src/knot/server/udp-handler.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,25 +16,25 @@ #define __APPLE_USE_RFC_3542 +#include <assert.h> #include <dlfcn.h> -#include <unistd.h> #include <errno.h> +#include <string.h> #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include <arpa/inet.h> -#include <string.h> -#include <assert.h> #include <sys/param.h> #ifdef HAVE_SYS_UIO_H // struct iovec (OpenBSD) #include <sys/uio.h> #endif /* HAVE_SYS_UIO_H */ +#include <unistd.h> #include "contrib/macros.h" #include "contrib/mempattern.h" #include "contrib/sockaddr.h" #include "contrib/ucw/mempool.h" -#include "knot/common/apoll.h" +#include "knot/common/fdset.h" #include "knot/nameserver/process_query.h" #include "knot/query/layer.h" #include "knot/server/server.h" @@ -499,7 +499,7 @@ static int iface_udp_fd(const iface_t *iface, int thread_id, bool xdp_thread, } } -static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, apoll_t *fds, +static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, fdset_t *fds, int thread_id, void **xdp_socket) { if (n_ifaces == 0) { @@ -516,11 +516,14 @@ static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, apoll_t * if (fd < 0) { continue; } - apoll_add(fds, fd, APOLL_POLLIN, NULL); + int ret = fdset_add(fds, fd, FDSET_POLLIN, NULL); + if (ret < 0) { + return 0; + } count++; } - assert(!xdp_thread || count == 1); + assert((count == n_ifaces) || (xdp_thread && count == 1)); return count; } @@ -575,11 +578,13 @@ int udp_master(dthread_t *thread) /* Allocate descriptors for the configured interfaces. */ void *xdp_socket = NULL; size_t nifs = handler->server->n_ifaces; - apoll_t fds; - apoll_init(&fds, nifs); - unsigned fds_count = udp_set_ifaces(handler->server->ifaces, nifs, &fds, - thread_id, &xdp_socket); - if (fds_count == 0) { + fdset_t fds; + if (fdset_init(&fds, nifs) != KNOT_EOK) { + goto finish; + } + unsigned nfds = udp_set_ifaces(handler->server->ifaces, nifs, &fds, + thread_id, &xdp_socket); + if (nfds == 0) { goto finish; } @@ -591,18 +596,15 @@ int udp_master(dthread_t *thread) } /* Wait for events. */ - apoll_it_t it; - int ret = apoll_poll(&fds, &it, 0, -1); - if (ret <= 0) { - if (errno == EINTR || errno == EAGAIN) { - continue; - } - break; - } + fdset_it_t it; + (void)fdset_poll(&fds, &it, 0, -1); /* Process the events. */ - for(; !apoll_it_done(&it); apoll_it_next(&it)) { - if (api->udp_recv(apoll_it_get_fd(&it), rq, xdp_socket) > 0) { + for (; !fdset_it_is_done(&it); fdset_it_next(&it)) { + if (!fdset_it_is_pollin(&it)) { + continue; + } + if (api->udp_recv(fdset_it_get_fd(&it), rq, xdp_socket) > 0) { api->udp_handle(&udp, rq, xdp_socket); api->udp_send(rq, xdp_socket); } @@ -612,8 +614,7 @@ int udp_master(dthread_t *thread) finish: api->udp_deinit(rq); mp_delete(mm.ctx); - apoll_close(&fds); - apoll_clear(&fds); + fdset_clear(&fds); return KNOT_EOK; } diff --git a/tests-fuzz/knotd_wrap/udp-handler.c b/tests-fuzz/knotd_wrap/udp-handler.c index 51bddf3a1..c6e28be6e 100644 --- a/tests-fuzz/knotd_wrap/udp-handler.c +++ b/tests-fuzz/knotd_wrap/udp-handler.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -14,6 +14,8 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. */ +#include <signal.h> + #include "knot/server/udp-handler.c" #include "knot/common/log.h" diff --git a/tests/knot/test_fdset.c b/tests/knot/test_fdset.c index 10d1b4259..b07bf54f4 100644 --- a/tests/knot/test_fdset.c +++ b/tests/knot/test_fdset.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -14,139 +14,119 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. */ -#include <stdlib.h> -#include <stdint.h> -#include <sys/time.h> #include <pthread.h> -#include <fcntl.h> -#include <unistd.h> #include <tap/basic.h> -#include <time.h> +#include <unistd.h> #include "knot/common/fdset.h" +#include "contrib/time.h" -#define WRITE_PATTERN ((char) 0xde) -#define WRITE_PATTERN_LEN sizeof(char) +#define PATTERN1 "0x45" +#define PATTERN2 "0xED" -/* Subtract the `struct timeval' values X and Y, - storing the result in RESULT. - Return 1 if the difference is negative, otherwise 0. - Copyright http://www.delorie.com/gnu/docs/glibc/libc_428.html -*/ -static int timeval_subtract (struct timeval *result, struct timeval *x, struct timeval* y) +void *thr_action1(void *arg) { - /* Perform the carry for the later subtraction by updating y. */ - if (x->tv_usec < y->tv_usec) { - int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1; - y->tv_usec -= 1000000 * nsec; - y->tv_sec += nsec; - } - if (x->tv_usec - y->tv_usec > 1000000) { - int nsec = (x->tv_usec - y->tv_usec) / 1000000; - y->tv_usec += 1000000 * nsec; - y->tv_sec -= nsec; - } - - /* Compute the time remaining to wait. - tv_usec is certainly positive. */ - result->tv_sec = x->tv_sec - y->tv_sec; - result->tv_usec = x->tv_usec - y->tv_usec; - - /* Return 1 if result is negative. */ - return x->tv_sec < y->tv_sec; + usleep(10000); + (void)write(*((int *)arg), &PATTERN1, 1); + return NULL; } -static size_t timeval_diff(struct timeval *from, struct timeval *to) { - struct timeval res; - timeval_subtract(&res, to, from); - return res.tv_sec*1000 + res.tv_usec/1000; +void *thr_action2(void *arg) +{ + usleep(20000); + (void)write(*((int *)arg), &PATTERN2, 1); + return NULL; } -void* thr_action(void *arg) +int main(int argc, char *argv[]) { - int *fd = (int *)arg; + plan_lazy(); + + fdset_t fdset; + int ret = fdset_init(&fdset, 32); + ok(ret == KNOT_EOK, "fdset_init"); + + int fds0[2], fds1[2], fds2[2]; + + ret = pipe(fds0); + ok(ret >= 0, "create pipe 0"); + ret = fdset_add(&fdset, fds0[0], FDSET_POLLIN, NULL); + ok(ret >= 0, "add pipe 0 to fdset"); + + ret = pipe(fds1); + ok(ret >= 0, "create pipe 1"); + ret = fdset_add(&fdset, fds1[0], FDSET_POLLIN, NULL); + ok(ret >= 0, "add pipe 1 to fdset"); + + ret = pipe(fds2); + ok(ret >= 0, "create pipe 2"); + ret = fdset_add(&fdset, fds2[0], FDSET_POLLIN, NULL); + ok(ret >= 0, "add pipe 2 to fdset"); - /* Sleep for 100ms. */ - struct timespec ts = { .tv_nsec = 1e8 }; - nanosleep(&ts, NULL); + ok(fdset_get_length(&fdset) == 3, "fdset size full"); - /* Write pattern. */ - char pattern = WRITE_PATTERN; - if (write(*fd, &pattern, WRITE_PATTERN_LEN) == -1) { - // Error. + struct timespec time0 = time_now(); + + pthread_t t1, t2; + ret = pthread_create(&t1, 0, thr_action1, &fds1[1]); + ok(ret == 0, "create thread 1"); + ret = pthread_create(&t2, 0, thr_action2, &fds2[1]); + ok(ret == 0, "create thread 2"); + + fdset_it_t it; + ret = fdset_poll(&fdset, &it, 0, 100); + struct timespec time1 = time_now(); + double diff1 = time_diff_ms(&time0, &time1); + ok(ret == 1, "fdset_poll return 1"); + ok(diff1 > 5 && diff1 < 100, "fdset_poll timeout 1"); + for(; !fdset_it_is_done(&it); fdset_it_next(&it)) { + ok(!fdset_it_is_error(&it), "fdset no error"); + ok(fdset_it_is_pollin(&it), "fdset can read"); + + int fd = fdset_it_get_fd(&it); + ok(fd == fds1[0], "fdset_it fd check"); + + char buf = 0x00; + ret = read(fd, &buf, sizeof(buf)); + ok(ret == 1 && buf == PATTERN1[0], "fdset_it value check"); + + fdset_it_remove(&it); + } + fdset_it_commit(&it); + ok(fdset_get_length(&fdset) == 2, "fdset size 2"); + close(fds1[1]); + + ret = fdset_poll(&fdset, &it, 0, 100); + struct timespec time2 = time_now(); + double diff2 = time_diff_ms(&time0, &time2); + ok(ret == 1, "fdset_poll return 2"); + ok(diff2 > 15 && diff2 < 100, "fdset_poll timeout 2"); + for(; !fdset_it_is_done(&it); fdset_it_next(&it)) { + ok(!fdset_it_is_error(&it), "fdset no error"); + ok(fdset_it_is_pollin(&it), "fdset can read"); + + int fd = fdset_it_get_fd(&it); + ok(fd == fds2[0], "fdset_it fd check"); + + char buf = 0x00; + ret = read(fd, &buf, sizeof(buf)); + ok(ret == 1 && buf == PATTERN2[0], "fdset_it value check"); + + fdset_it_remove(&it); } + fdset_it_commit(&it); + ok(fdset_get_length(&fdset) == 1, "fdset size 1"); + close(fds2[1]); - return NULL; -} + ret = fdset_remove(&fdset, 0); + ok(ret == KNOT_EOK, "fdset remove"); + close(fds0[1]); + ok(fdset_get_length(&fdset) == 0, "fdset size 0"); -int main(int argc, char *argv[]) -{ - plan(12); - - /* 1. Create fdset. */ - fdset_t set; - int ret = fdset_init(&set, 32); - is_int(0, ret, "fdset: init"); - - /* 2. Create pipe. */ - int fds[2], tmpfds[2]; - ret = pipe(fds); - ok(ret >= 0, "fdset: pipe() works"); - ret = pipe(tmpfds); - ok(ret >= 0, "fdset: 2nd pipe() works"); - - /* 3. Add fd to set. */ - ret = fdset_add(&set, fds[0], POLLIN, NULL); - is_int(0, ret, "fdset: add to set works"); - fdset_add(&set, tmpfds[0], POLLIN, NULL); - - /* Schedule write. */ - struct timeval ts, te; - gettimeofday(&ts, 0); - pthread_t t; - pthread_create(&t, 0, thr_action, &fds[1]); - - /* 4. Watch fdset. */ - int nfds = poll(set.pfd, set.n, 60 * 1000); - gettimeofday(&te, 0); - size_t diff = timeval_diff(&ts, &te); - - ok(nfds > 0, "fdset: poll returned %d events in %zu ms", nfds, diff); - - /* 5. Prepare event set. */ - ok(set.pfd[0].revents & POLLIN, "fdset: pipe is active"); - - /* 6. Receive data. */ - char buf = 0x00; - ret = read(set.pfd[0].fd, &buf, WRITE_PATTERN_LEN); - ok(ret >= 0 && buf == WRITE_PATTERN, "fdset: contains valid data"); - - /* 7-9. Remove from event set. */ - ret = fdset_remove(&set, 0); - is_int(0, ret, "fdset: remove from fdset works"); - close(fds[0]); - close(fds[1]); - ret = fdset_remove(&set, 0); - close(tmpfds[1]); - close(tmpfds[1]); - is_int(0, ret, "fdset: remove from fdset works (2)"); - ret = fdset_remove(&set, 0); - ok(ret != 0, "fdset: removing nonexistent item"); - - /* 10. Crash test. */ - fdset_init(0, 0); - fdset_add(0, 1, 1, 0); - fdset_add(0, 0, 1, 0); - fdset_remove(0, 1); - fdset_remove(0, 0); - ok(1, "fdset: crash test successful"); - - /* 11. Destroy fdset. */ - ret = fdset_clear(&set); - is_int(0, ret, "fdset: destroyed"); - - /* Cleanup. */ - pthread_join(t, 0); + pthread_join(t1, 0); + pthread_join(t2, 0); + + fdset_clear(&fdset); return 0; } diff --git a/tests/knot/test_server.c b/tests/knot/test_server.c index 972f4b9f3..bde3d1031 100644 --- a/tests/knot/test_server.c +++ b/tests/knot/test_server.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -14,6 +14,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. */ +#include <signal.h> #include <tap/basic.h> #include "knot/server/server.h" #include "test_conf.h" |