summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Salzman <daniel.salzman@nic.cz>2021-03-19 15:31:56 +0100
committerDaniel Salzman <daniel.salzman@nic.cz>2021-04-08 18:52:27 +0200
commit164fcff5bbe0a719d1346cbd21ad2dd9f8a4628d (patch)
tree53630619e1c34ff5b8843cae8159fbcad6671b44
parentserver: add polling wrapper for unix poll and epoll (diff)
downloadknot-164fcff5bbe0a719d1346cbd21ad2dd9f8a4628d.tar.xz
knot-164fcff5bbe0a719d1346cbd21ad2dd9f8a4628d.zip
server: merge epoll_ctx with fdset
-rw-r--r--Knot.files3
-rw-r--r--configure.ac29
-rw-r--r--src/knot/Makefile.inc3
-rw-r--r--src/knot/common/apoll.h79
-rw-r--r--src/knot/common/epoll_ctx.c261
-rw-r--r--src/knot/common/epoll_ctx.h227
-rw-r--r--src/knot/common/fdset.c260
-rw-r--r--src/knot/common/fdset.h290
-rw-r--r--src/knot/server/tcp-handler.c68
-rw-r--r--src/knot/server/udp-handler.c51
-rw-r--r--tests-fuzz/knotd_wrap/udp-handler.c4
-rw-r--r--tests/knot/test_fdset.c214
-rw-r--r--tests/knot/test_server.c3
13 files changed, 524 insertions, 968 deletions
diff --git a/Knot.files b/Knot.files
index b0f235857..61b6366ec 100644
--- a/Knot.files
+++ b/Knot.files
@@ -108,9 +108,6 @@ src/knot/catalog/generate.c
src/knot/catalog/generate.h
src/knot/catalog/interpret.c
src/knot/catalog/interpret.h
-src/knot/common/apoll.h
-src/knot/common/epoll_ctx.c
-src/knot/common/epoll_ctx.h
src/knot/common/evsched.c
src/knot/common/evsched.h
src/knot/common/fdset.c
diff --git a/configure.ac b/configure.ac
index 42161fcc0..773cea81b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -283,6 +283,27 @@ AS_IF([test "$enable_systemd" = "yes"],[
]) dnl enable_daemon
+# Socket polling method
+socket_polling=
+AC_ARG_WITH([socket-polling],
+ AS_HELP_STRING([--with-socket-polling=auto|poll|epoll],
+ [Use specific socket polling method [default=auto]]),
+ [socket_polling=$withval], [socket_polling=auto]
+)
+
+AS_CASE([$socket_polling],
+ [auto], [AC_CHECK_FUNCS([epoll_create],
+ [AC_DEFINE([HAVE_EPOLL], [1], [epoll available])
+ socket_polling=epoll],
+ [socket_polling=poll])],
+ [poll], [socket_polling=poll],
+ [epoll], [AC_CHECK_FUNCS([epoll_create],
+ [AC_DEFINE([HAVE_EPOLL], [1], [epoll available])
+ socket_polling=epoll],
+ [AC_MSG_ERROR([Epoll not available.])])],
+ [*], [AC_MSG_ERROR([Invalid value of --socket-polling.])]
+)
+
# Alternative memory allocator
malloc_LIBS=
AC_ARG_WITH([memory-allocator],
@@ -639,12 +660,6 @@ AC_CHECK_HEADERS_ONCE([pthread_np.h sys/uio.h bsd/string.h])
AC_CHECK_FUNCS([accept4 clock_gettime fgetln getline initgroups malloc_trim \
setgroups strlcat strlcpy sysctlbyname])
-# Check for a better polling method.
-AC_CHECK_FUNCS([epoll_create], [
- AC_DEFINE([HAVE_EPOLL], [1], [epoll available])
- poll_method=epoll], [poll_method=poll]
-)
-
# Check for robust memory cleanup implementations.
AC_CHECK_FUNC([explicit_bzero], [
AC_DEFINE([HAVE_EXPLICIT_BZERO], [1], [explicit_bzero available])
@@ -744,7 +759,7 @@ result_msg_base=" Knot DNS $VERSION
Use recvmmsg: ${enable_recvmmsg}
Use SO_REUSEPORT(_LB): ${enable_reuseport}
XDP support: ${enable_xdp}
- Polling method: ${poll_method}
+ Socket polling: ${socket_polling}
Memory allocator: ${with_memory_allocator}
Fast zone parser: ${enable_fastparser}
Utilities with IDN: ${with_libidn}
diff --git a/src/knot/Makefile.inc b/src/knot/Makefile.inc
index 88b1f13e2..5cc44fb33 100644
--- a/src/knot/Makefile.inc
+++ b/src/knot/Makefile.inc
@@ -119,9 +119,6 @@ libknotd_la_SOURCES = \
knot/query/query.h \
knot/query/requestor.c \
knot/query/requestor.h \
- knot/common/apoll.h \
- knot/common/epoll_ctx.c \
- knot/common/epoll_ctx.h \
knot/common/evsched.c \
knot/common/evsched.h \
knot/common/fdset.c \
diff --git a/src/knot/common/apoll.h b/src/knot/common/apoll.h
deleted file mode 100644
index 8c666a191..000000000
--- a/src/knot/common/apoll.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#ifdef HAVE_EPOLL
- #include "knot/common/epoll_ctx.h"
-
- #define apoll_t epoll_ctx_t
- #define apoll_it_t epoll_it_t
- #define apoll_sweep_state epoll_ctx_sweep_state_t
-
- #define APOLL_CTX_KEEP EPOLL_CTX_KEEP
- #define APOLL_CTX_SWEEP EPOLL_CTX_SWEEP
-
- #define APOLL_POLLIN EPOLLIN
-
- #define APOLL_CTX_INIT_SIZE EPOLL_INIT_SIZE
-
- #define apoll_init(ctx, size) epoll_ctx_init(ctx, size)
- #define apoll_close(ctx) epoll_ctx_close(ctx)
- #define apoll_clear(ctx) epoll_ctx_clear(ctx)
- #define apoll_add(ctx, fd, events, usrctx) epoll_ctx_add(ctx, fd, events, usrctx)
- #define apoll_set_watchdog(ctx, idx, interval) epoll_ctx_set_watchdog(ctx, idx, interval)
- #define apoll_get_length(ctx) epoll_ctx_get_length(ctx)
- #define apoll_get_fd(ctx, idx) epoll_ctx_get_fd(ctx, idx)
- #define apoll_poll(ctx, it, offset, timeout) epoll_ctx_wait(ctx, it, offset, timeout)
- #define apoll_sweep(ctx, cb, data) epoll_ctx_sweep(ctx, cb, data)
- #define apoll_it_next(it) epoll_it_next(it)
- #define apoll_it_done(it) epoll_it_done(it)
- #define apoll_it_remove(it) epoll_it_remove(it)
- #define apoll_it_get_fd(it) epoll_it_get_fd(it)
- #define apoll_it_get_idx(it) epoll_it_get_idx(it)
- #define apoll_it_ev_is_pollin(it) epoll_it_ev_is_pollin(it)
- #define apoll_it_ev_is_error(it) epoll_it_ev_is_err(it)
-#else
- #include "knot/common/fdset.h"
-
- #define apoll_t fdset_t
- #define apoll_it_t fdset_it_t
- #define apoll_sweep_state fdset_sweep_state_t
-
- #define APOLL_CTX_KEEP FDSET_KEEP
- #define APOLL_CTX_SWEEP FDSET_SWEEP
-
- #define APOLL_POLLIN POLLIN
-
- #define APOLL_CTX_INIT_SIZE FDSET_INIT_SIZE
-
- #define apoll_init(ctx, size) fdset_init(ctx, size)
- #define apoll_close(ctx)
- #define apoll_clear(ctx) fdset_clear(ctx)
- #define apoll_add(ctx, fd, events, usrctx) fdset_add(ctx, fd, events, usrctx)
- #define apoll_set_watchdog(ctx, idx, interval) fdset_set_watchdog(ctx, idx, interval)
- #define apoll_get_length(ctx) fdset_get_length(ctx)
- #define apoll_get_fd(ctx, idx) fdset_get_fd(ctx, idx)
- #define apoll_poll(ctx, it, offset, timeout) fdset_poll(ctx, it, offset, timeout)
- #define apoll_sweep(ctx, cb, data) fdset_sweep(ctx, cb, data)
- #define apoll_it_next(it) fdset_it_next(it)
- #define apoll_it_done(it) fdset_it_done(it)
- #define apoll_it_remove(it) fdset_it_remove(it)
- #define apoll_it_get_fd(it) fdset_it_get_fd(it)
- #define apoll_it_get_idx(it) fdset_it_get_idx(it)
- #define apoll_it_ev_is_pollin(it) fdset_it_ev_is_pollin(it)
- #define apoll_it_ev_is_error(it) fdset_it_ev_is_err(it)
-#endif
diff --git a/src/knot/common/epoll_ctx.c b/src/knot/common/epoll_ctx.c
deleted file mode 100644
index 46ad8f1fa..000000000
--- a/src/knot/common/epoll_ctx.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>.
- */
-
-#ifdef HAVE_EPOLL
-
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <time.h>
-#include <assert.h>
-#include "knot/common/epoll_ctx.h"
-#include "contrib/time.h"
-#include "libknot/errcode.h"
-
-/* Realloc memory or return error (part of epoll_ctx_resize). */
-#define MEM_RESIZE(tmp, p, n) \
- if ((tmp = realloc((p), (n) * sizeof(*p))) == NULL) \
- return KNOT_ENOMEM; \
- (p) = tmp;
-
-static int epoll_ctx_resize(epoll_ctx_t *set, const unsigned size)
-{
- void *tmp = NULL;
- MEM_RESIZE(tmp, set->usrctx, size);
- MEM_RESIZE(tmp, set->timeout, size);
- MEM_RESIZE(tmp, set->ev, size);
- set->size = size;
- return KNOT_EOK;
-}
-
-int epoll_ctx_init(epoll_ctx_t *ctx, const unsigned size)
-{
- if (ctx == NULL) {
- return KNOT_EINVAL;
- }
-
- memset(ctx, 0, sizeof(epoll_ctx_t));
-
- ctx->efd = epoll_create1(0);
- if (ctx->efd < 0) {
- return KNOT_EMFILE;
- }
-
- return epoll_ctx_resize(ctx, size);
-}
-
-int epoll_ctx_clear(epoll_ctx_t* ctx)
-{
- if (ctx == NULL) {
- return KNOT_EINVAL;
- }
-
- int bck = ctx->efd;
- free(ctx->ev);
- free(ctx->usrctx);
- free(ctx->timeout);
- free(ctx->recv_ev);
- memset(ctx, 0, sizeof(epoll_ctx_t));
- ctx->efd = bck;
- return KNOT_EOK;
-}
-
-void epoll_ctx_close(const epoll_ctx_t* ctx)
-{
- close(ctx->efd);
-}
-
-int epoll_ctx_add(epoll_ctx_t *ctx, const int fd, const unsigned events, void *usrctx)
-{
- if (ctx == NULL || fd < 0) {
- return KNOT_EINVAL;
- }
-
- /* Realloc needed. */
- if (ctx->n == ctx->size && epoll_ctx_resize(ctx, ctx->size + EPOLL_INIT_SIZE))
- return KNOT_ENOMEM;
-
- /* Initialize. */
- const int i = ctx->n++;
- ctx->ev[i].data.fd = fd;
- ctx->ev[i].events = events;
- ctx->usrctx[i] = usrctx;
- ctx->timeout[i] = 0;
- struct epoll_event ev = {
- .data.u64 = i,
- .events = events
- };
- epoll_ctl(ctx->efd, EPOLL_CTL_ADD, fd, &ev);
-
- return i;
-}
-
-static int epoll_ctx_remove(epoll_ctx_t *ctx, const unsigned idx)
-{
- if (ctx == NULL || idx >= ctx->n) {
- return KNOT_EINVAL;
- }
-
- epoll_ctl(ctx->efd, EPOLL_CTL_DEL, ctx->ev[idx].data.fd, NULL);
- const unsigned last = --ctx->n;
- /* Nothing else if it is the last one.
- * Move last -> i if some remain. */
- if (idx < last) {
- ctx->ev[idx] = ctx->ev[last];
- ctx->timeout[idx] = ctx->timeout[last];
- ctx->usrctx[idx] = ctx->usrctx[last];
- struct epoll_event ev = {
- .data.u64 = idx,
- .events = ctx->ev[idx].events
- };
- epoll_ctl(ctx->efd, EPOLL_CTL_MOD, ctx->ev[last].data.fd, &ev);
- }
-
- return KNOT_EOK;
-}
-
-int epoll_ctx_wait(epoll_ctx_t *ctx, epoll_it_t *it, const unsigned offset, const int timeout)
-{
- if (ctx->recv_size != ctx->size) {
- void *tmp = NULL;
- MEM_RESIZE(tmp, ctx->recv_ev, ctx->size);
- ctx->recv_size = ctx->size;
- }
-
- it->ctx = ctx;
- it->ptr = ctx->recv_ev;
- it->offset = offset;
-
- /*
- * NOTE: Can't skip offset without bunch of syscalls!!
- * Because of that it waits for `ctx->n` (every socket). Offset is set when TCP
- * trotlling is ON. Sometimes it can return with sockets where none of them are
- * connection socket, but it should not be common.
- * But it can cause problems when adopted in other use-case.
- */
- return it->unprocessed = epoll_wait(ctx->efd, ctx->recv_ev, ctx->n, timeout * 1000);
-}
-
-int epoll_ctx_set_watchdog(epoll_ctx_t *ctx, const unsigned idx, const int interval)
-{
- if (ctx == NULL || idx >= ctx->n) {
- return KNOT_EINVAL;
- }
-
- /* Lift watchdog if interval is negative. */
- if (interval < 0) {
- ctx->timeout[idx] = 0;
- return KNOT_EOK;
- }
-
- /* Update clock. */
- const struct timespec now = time_now();
- ctx->timeout[idx] = now.tv_sec + interval; /* Only seconds precision. */
- return KNOT_EOK;
-}
-
-int epoll_ctx_get_fd(const epoll_ctx_t *ctx, const unsigned idx)
-{
- if (ctx == NULL || idx >= ctx->n) {
- return KNOT_EINVAL;
- }
-
- return ctx->ev[idx].data.u64;
-}
-
-unsigned epoll_ctx_get_length(const epoll_ctx_t *ctx)
-{
- assert(ctx);
- return ctx->n;
-}
-
-int epoll_ctx_sweep(epoll_ctx_t* ctx, const epoll_ctx_sweep_cb_t cb, void *data)
-{
- if (ctx == NULL || cb == NULL) {
- return KNOT_EINVAL;
- }
-
- /* Get time threshold. */
- const struct timespec now = time_now();
- unsigned idx = 0;
- while (idx < ctx->n) {
- /* Check sweep state, remove if requested. */
- if (ctx->timeout[idx] > 0 && ctx->timeout[idx] <= now.tv_sec) {
- const int fd = epoll_ctx_get_fd(ctx, idx);
- if (cb(ctx, fd, data) == EPOLL_CTX_SWEEP) {
- if (epoll_ctx_remove(ctx, idx) == KNOT_EOK) {
- continue; /* Stay on the index. */
- }
- }
- }
- ++idx;
- }
-
- return KNOT_EOK;
-}
-
-void epoll_it_next(epoll_it_t *it)
-{
- do {
- it->ptr++;
- it->unprocessed--;
- } while (it->unprocessed > 0 && epoll_it_get_idx(it) < it->offset);
-}
-
-int epoll_it_done(const epoll_it_t *it)
-{
- return it->unprocessed <= 0;
-}
-
-int epoll_it_remove(epoll_it_t *it)
-{
- if (it == NULL || it->ctx == NULL) {
- return KNOT_EINVAL;
- }
- epoll_ctx_t *ctx = it->ctx;
- const int idx = epoll_it_get_idx(it);
- epoll_ctx_remove(ctx, idx);
- /* Iterator should return on last valid already processed element. */
- /* On `next` call (in for-loop) will point on first unprocessed. */
- it->ptr--;
- return KNOT_EOK;
-}
-
-int epoll_it_get_fd(const epoll_it_t *it)
-{
- assert(it != NULL);
- return it->ctx->ev[epoll_it_get_idx(it)].data.fd;
-}
-
-unsigned epoll_it_get_idx(const epoll_it_t *it)
-{
- assert(it != NULL);
- return it->ptr->data.u64;
-}
-
-int epoll_it_ev_is_pollin(const epoll_it_t *it)
-{
- assert(it != NULL);
- return it->ptr->events & EPOLLIN;
-}
-
-int epoll_it_ev_is_err(const epoll_it_t *it)
-{
- assert(it != NULL);
- return it->ptr->events & (EPOLLERR|EPOLLHUP);
-}
-
-#endif
diff --git a/src/knot/common/epoll_ctx.h b/src/knot/common/epoll_ctx.h
deleted file mode 100644
index a8fdc5f66..000000000
--- a/src/knot/common/epoll_ctx.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>.
- */
-
-/*!
- * \brief I/O multiplexing with context and timeouts for each fd.
- */
-
-#pragma once
-
-#ifdef HAVE_EPOLL
-
-#include <stddef.h>
-#include <signal.h>
-#include <sys/time.h>
-#include <sys/epoll.h>
-
-#define EPOLL_INIT_SIZE 256 /* Resize step. */
-
-/*! \brief Set of filedescriptors with associated context and timeouts. */
-typedef struct epoll_ctx {
- int efd; /*!< File descriptor of epoll. */
- unsigned n; /*!< Active fds. */
- unsigned size; /*!< Array size (allocated). */
- unsigned recv_size; /*!< Size of array for received events. */
- struct epoll_event *ev; /*!< Epoll event storage for each fd */
- void* *usrctx; /*!< Context for each fd. */
- time_t *timeout; /*!< Timeout for each fd (seconds precision). */
- struct epoll_event *recv_ev; /*!< Array for received events. */
-} epoll_ctx_t;
-
-typedef struct epoll_it {
- epoll_ctx_t *ctx; /*!< Iterator related context. */
- struct epoll_event *ptr; /*!< Pointer on processed event. */
- int offset; /*!< Event index offset. */
- int unprocessed; /*!< Unprocessed events left. */
-} epoll_it_t;
-
-/*! \brief Mark-and-sweep state. */
-typedef enum epoll_ctx_sweep_state {
- EPOLL_CTX_KEEP,
- EPOLL_CTX_SWEEP
-} epoll_ctx_sweep_state_t;
-
-/*! \brief Sweep callback (set, index, data) */
-typedef enum epoll_ctx_sweep_state (*epoll_ctx_sweep_cb_t)(epoll_ctx_t*, int, void*);
-
-/*!
- * \brief Initialize epoll_ctx to given size.
- *
- * \param ctx Target ctx.
- * \param size Initial ctx size.
- *
- * \retval ret == 0 if successful.
- * \retval ret < 0 on error.
- */
-int epoll_ctx_init(epoll_ctx_t *ctx, const unsigned size);
-
-/*!
- * \brief Clear whole context of epoll_ctx.
- *
- * \param ctx Target ctx.
- *
- * \retval ret == 0 if successful.
- * \retval ret < 0 on error.
- */
-int epoll_ctx_clear(epoll_ctx_t* ctx);
-
-/*!
- * \brief Close epoll related file descriptor.
- *
- * \param ctx Target ctx.
- */
-void epoll_ctx_close(const epoll_ctx_t* ctx);
-
-/*!
- * \brief Add file descriptor to watched ctx.
- *
- * \param ctx Target ctx.
- * \param fd Added file descriptor.
- * \param events Mask of watched events.
- * \param usrctx Context (optional).
- *
- * \retval ret >= 0 is index of the added fd.
- * \retval ret < 0 on errors.
- */
-int epoll_ctx_add(epoll_ctx_t *ctx, const int fd, const unsigned events, void *usrctx);
-
-/*!
- * \brief Wait for receive events.
- *
- * \param ctx Target ctx.
- * \param it Event iterator storage.
- * \param offset Index of first event.
- * \param timeout Timeout of operation (negative number for unlimited).
- *
- * \retval ret >= 0 represents number of events received.
- * \retval ret < 0 on error.
- */
-int epoll_ctx_wait(epoll_ctx_t *ctx, epoll_it_t *it, const unsigned offset, const int timeout);
-
-/*!
- * \brief Set file descriptor watchdog interval.
- *
- * Set time (interval from now) after which the associated file descriptor
- * should be sweeped (see epoll_ctx_sweep). Good example is setting a grace period
- * of N seconds between socket activity. If socket is not active within
- * <now, now + interval>, it is sweeped and potentially closed.
- *
- * \param ctx Target ctx.
- * \param i Index for the file descriptor.
- * \param interval Allowed interval without activity (seconds).
- * -1 disables watchdog timer
- *
- * \retval ret == 0 on success.
- * \retval ret < 0 on errors.
- */
-int epoll_ctx_set_watchdog(epoll_ctx_t *ctx, const unsigned idx, const int interval);
-
-/*!
- * \brief Returns file descriptor based on index.
- *
- * \param ctx Target ctx.
- * \param idx Index of the file descriptor.
- *
- * \retval ret >= 0 for file descriptor.
- * \retval ret < 0 on errors.
- */
-int epoll_ctx_get_fd(const epoll_ctx_t *ctx, const unsigned idx);
-
-/*!
- * \brief Returns number of file descriptors stored in ctx.
- *
- * \param ctx Target ctx.
- *
- * \retval Number of descriptors stored
- */
-unsigned epoll_ctx_get_length(const epoll_ctx_t *ctx);
-
-/*!
- * \brief Sweep file descriptors with exceeding inactivity period.
- *
- * \param ctx Target ctx.
- * \param cb Callback for sweeped descriptors.
- * \param data Pointer to extra data.
- *
- * \retval number of sweeped descriptors.
- * \retval -1 on errors.
- */
-int epoll_ctx_sweep(epoll_ctx_t* ctx, const epoll_ctx_sweep_cb_t cb, void *data);
-
-/*!
- * \brief Move iterator on next received event.
- *
- * \param it Target iterator.
- */
-void epoll_it_next(epoll_it_t *it);
-
-/*!
- * \brief Decide if there is more received events.
- *
- * \param it Target iterator.
- *
- * \retval Logical flag represents 'done' state.
- */
-int epoll_it_done(const epoll_it_t *it);
-
-/*!
- * \brief Remove file descriptor from watched ctx.
- *
- * \param it Target iterator.
- *
- * \retval 0 if successful.
- * \retval ret < 0 on error.
- */
-int epoll_it_remove(epoll_it_t *it);
-
-/*!
- * \brief Get file descriptor of event referenced by iterator.
- *
- * \param it Target iterator.
- *
- * \retval ret >= 0 for file descriptor.
- * \retval ret < 0 on errors.
- */
-int epoll_it_get_fd(const epoll_it_t *it);
-
-/*!
- * \brief Get index of event in set referenced by iterator.
- *
- * \param it Target iterator.
- *
- * \retval Index of event.
- */
-unsigned epoll_it_get_idx(const epoll_it_t *it);
-
-/*!
- * \brief Decide if event referenced by iterator is POLLIN event.
- *
- * \param it Target iterator.
- *
- * \retval Logical flag represents 'POLLIN' event received.
- */
-int epoll_it_ev_is_pollin(const epoll_it_t *it);
-
-/*!
- * \brief Decide if event referenced by iterator is error event.
- *
- * \param it Target iterator.
- *
- * \retval Logical flag represents error event received.
- */
-int epoll_it_ev_is_err(const epoll_it_t *it);
-
-#endif
diff --git a/src/knot/common/fdset.c b/src/knot/common/fdset.c
index aa98c845d..f4a533861 100644
--- a/src/knot/common/fdset.c
+++ b/src/knot/common/fdset.c
@@ -14,29 +14,32 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-#ifdef ENABLE_POLL
-
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <time.h>
-#include <assert.h>
+
#include "knot/common/fdset.h"
#include "contrib/time.h"
-#include "libknot/errcode.h"
-/* Realloc memory or return error (part of fdset_resize). */
-#define MEM_RESIZE(tmp, p, n) \
- if ((tmp = realloc((p), (n) * sizeof(*p))) == NULL) \
+#define MEM_RESIZE(p, n) { \
+ void *tmp = NULL; \
+ if ((tmp = realloc((p), (n) * sizeof(*p))) == NULL) { \
return KNOT_ENOMEM; \
- (p) = tmp;
+ } \
+ (p) = tmp; \
+}
static int fdset_resize(fdset_t *set, const unsigned size)
{
- void *tmp = NULL;
- MEM_RESIZE(tmp, set->ctx, size);
- MEM_RESIZE(tmp, set->pfd, size);
- MEM_RESIZE(tmp, set->timeout, size);
+ assert(set);
+
+ MEM_RESIZE(set->ctx, size);
+ MEM_RESIZE(set->timeout, size);
+#ifdef HAVE_EPOLL
+ MEM_RESIZE(set->ev, size);
+#else
+ MEM_RESIZE(set->pfd, size);
+#endif
set->size = size;
return KNOT_EOK;
}
@@ -47,77 +50,169 @@ int fdset_init(fdset_t *set, const unsigned size)
return KNOT_EINVAL;
}
- memset(set, 0, sizeof(fdset_t));
- return fdset_resize(set, size);
+ memset(set, 0, sizeof(*set));
+
+#ifdef HAVE_EPOLL
+ set->efd = epoll_create1(0);
+ if (set->efd < 0) {
+ return knot_map_errno();
+ }
+#endif
+ int ret = fdset_resize(set, size);
+#ifdef HAVE_EPOLL
+ if (ret != KNOT_EOK) {
+ close(set->efd);
+ }
+#endif
+ return ret;
}
-int fdset_clear(fdset_t* set)
+void fdset_clear(fdset_t *set)
{
if (set == NULL) {
- return KNOT_EINVAL;
+ return;
}
free(set->ctx);
- free(set->pfd);
free(set->timeout);
- memset(set, 0, sizeof(fdset_t));
- return KNOT_EOK;
+#ifdef HAVE_EPOLL
+ free(set->ev);
+ free(set->recv_ev);
+ close(set->efd);
+#else
+ free(set->pfd);
+#endif
+ memset(set, 0, sizeof(*set));
}
-int fdset_add(fdset_t *set, const int fd, const unsigned events, void *ctx)
+int fdset_add(fdset_t *set, const int fd, const fdset_event_t events, void *ctx)
{
if (set == NULL || fd < 0) {
return KNOT_EINVAL;
}
- /* Realloc needed. */
- if (set->n == set->size && fdset_resize(set, set->size + FDSET_INIT_SIZE))
+ if (set->n == set->size &&
+ fdset_resize(set, set->size + FDSET_RESIZE_STEP) != KNOT_EOK) {
return KNOT_ENOMEM;
+ }
- /* Initialize. */
- const int i = set->n++;
- set->pfd[i].fd = fd;
- set->pfd[i].events = events;
- set->pfd[i].revents = 0;
- set->ctx[i] = ctx;
- set->timeout[i] = 0;
+ const int idx = set->n++;
+ set->ctx[idx] = ctx;
+ set->timeout[idx] = 0;
+#ifdef HAVE_EPOLL
+ set->ev[idx].data.fd = fd;
+ set->ev[idx].events = events;
+ struct epoll_event ev = {
+ .data.u64 = idx,
+ .events = events
+ };
+ if (epoll_ctl(set->efd, EPOLL_CTL_ADD, fd, &ev) != 0) {
+ return knot_map_errno();
+ }
+#else
+ set->pfd[idx].fd = fd;
+ set->pfd[idx].events = events;
+ set->pfd[idx].revents = 0;
+#endif
- /* Return index to this descriptor. */
- return i;
+ return idx;
}
-//TODO should be static, but it is a dependency in tests (? remove from
-// tests or something ?)
int fdset_remove(fdset_t *set, const unsigned idx)
{
if (set == NULL || idx >= set->n) {
return KNOT_EINVAL;
}
+ const int fd = fdset_get_fd(set, idx);
+#ifdef HAVE_EPOLL
+ /* This is necessary as DDNS duplicates file descriptors! */
+ (void)epoll_ctl(set->efd, EPOLL_CTL_DEL, fd, NULL);
+#endif
+ close(fd);
+
const unsigned last = --set->n;
- /* Nothing else if it is the last one.
- * Move last -> i if some remain. */
+ /* Nothing else if it is the last one. Move last -> i if some remain. */
if (idx < last) {
- set->pfd[idx] = set->pfd[last];
- set->timeout[idx] = set->timeout[last];
set->ctx[idx] = set->ctx[last];
+ set->timeout[idx] = set->timeout[last];
+#ifdef HAVE_EPOLL
+ set->ev[idx] = set->ev[last];
+ struct epoll_event ev = {
+ .data.u64 = idx,
+ .events = set->ev[idx].events
+ };
+ if (epoll_ctl(set->efd, EPOLL_CTL_MOD, set->ev[last].data.fd, &ev) != 0) {
+ return knot_map_errno();
+ }
+#else
+ set->pfd[idx] = set->pfd[last];
+#endif
}
return KNOT_EOK;
}
-int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int timeout)
+int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int timeout_ms)
{
- it->ctx = set;
+ if (it == NULL) {
+ return KNOT_EINVAL;
+ }
+ it->unprocessed = 0;
+
+ if (set == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ it->set = set;
it->idx = offset;
- it->unprocessed = poll(&set->pfd[offset], set->n - offset, 1000 * timeout);
+#ifdef HAVE_EPOLL
+ if (set->recv_size != set->size) {
+ MEM_RESIZE(set->recv_ev, set->size);
+ set->recv_size = set->size;
+ }
+ it->ptr = set->recv_ev;
+ it->dirty = 0;
+ /*
+ * NOTE: Can't skip offset without bunch of syscalls!!
+ * Because of that it waits for `ctx->n` (every socket). Offset is set when TCP
+ * trotlling is ON. Sometimes it can return with sockets where none of them are
+ * connection socket, but it should not be common.
+ * But it can cause problems when adopted in other use-case.
+ */
+ return it->unprocessed = epoll_wait(set->efd, set->recv_ev, set->n, timeout_ms);
+#else
+ it->unprocessed = poll(&set->pfd[offset], set->n - offset, timeout_ms);
while (it->unprocessed > 0 && set->pfd[it->idx].revents == 0) {
it->idx++;
}
return it->unprocessed;
+#endif
}
-int fdset_set_watchdog(fdset_t* set, const unsigned idx, const int interval)
+void fdset_it_commit(fdset_it_t *it)
+{
+ if (it == NULL) {
+ return;
+ }
+#ifdef HAVE_EPOLL
+ /* NOTE: reverse iteration to avoid as much "remove last" operations
+ * as possible. I'm not sure about performance improvement. It
+ * will skip some syscalls at begin of iteration, but what
+ * performance increase do we get is a question.
+ */
+ fdset_t *set = it->set;
+ for (int i = set->n - 1; it->dirty > 0 && i >= 0; --i) {
+ if (set->ev[i].events == FDSET_REMOVE_FLAG) {
+ (void)fdset_remove(set, i);
+ it->dirty--;
+ }
+ }
+ assert(it->dirty == 0);
+#endif
+}
+
+int fdset_set_watchdog(fdset_t *set, const unsigned idx, const int interval)
{
if (set == NULL || idx >= set->n) {
return KNOT_EINVAL;
@@ -136,26 +231,10 @@ int fdset_set_watchdog(fdset_t* set, const unsigned idx, const int interval)
return KNOT_EOK;
}
-int fdset_get_fd(const fdset_t *set, const unsigned idx)
-{
- if (set == NULL || idx >= set->n) {
- return KNOT_EINVAL;
- }
-
- return set->pfd[idx].fd;
-}
-
-
-unsigned fdset_get_length(const fdset_t *set)
-{
- assert(set);
- return set->n;
-}
-
-int fdset_sweep(fdset_t* set, const fdset_sweep_cb_t cb, void *data)
+void fdset_sweep(fdset_t *set, const fdset_sweep_cb_t cb, void *data)
{
if (set == NULL || cb == NULL) {
- return KNOT_EINVAL;
+ return;
}
/* Get time threshold. */
@@ -166,68 +245,9 @@ int fdset_sweep(fdset_t* set, const fdset_sweep_cb_t cb, void *data)
if (set->timeout[idx] > 0 && set->timeout[idx] <= now.tv_sec) {
const int fd = fdset_get_fd(set, idx);
if (cb(set, fd, data) == FDSET_SWEEP) {
- if (fdset_remove(set, idx) == KNOT_EOK) {
- continue; /* Stay on the index. */
- }
+ (void)fdset_remove(set, idx);
}
}
++idx;
}
- return KNOT_EOK;
-}
-
-void fdset_it_next(fdset_it_t *it)
-{
- if (--it->unprocessed > 0) {
- while (it->ctx->pfd[++it->idx].revents == 0); /* nop */
- }
}
-
-int fdset_it_done(const fdset_it_t *it)
-{
- return it->unprocessed <= 0;
-}
-
-int fdset_it_remove(fdset_it_t *it)
-{
- if (it == NULL || it->ctx == NULL) {
- return KNOT_EINVAL;
- }
-
- fdset_t *set = it->ctx;
- const unsigned idx = fdset_it_get_idx(it);
- fdset_remove(set, idx);
- /* Iterator should return on last valid already processed element. */
- /* On `next` call (in for-loop) will point on first unprocessed. */
- --it->idx;
- return KNOT_EOK;
-}
-
-int fdset_it_get_fd(const fdset_it_t *it)
-{
- if (it == NULL) {
- return KNOT_EINVAL;
- }
-
- return it->ctx->pfd[it->idx].fd;
-}
-
-unsigned fdset_it_get_idx(const fdset_it_t *it)
-{
- assert(it);
- return it->idx;
-}
-
-int fdset_it_ev_is_pollin(const fdset_it_t *it)
-{
- assert(it);
- return it->ctx->pfd[it->idx].revents & POLLIN;
-}
-
-int fdset_it_ev_is_err(const fdset_it_t *it)
-{
- assert(it);
- return it->ctx->pfd[it->idx].revents & (POLLERR|POLLHUP|POLLNVAL);
-}
-
-#endif
diff --git a/src/knot/common/fdset.h b/src/knot/common/fdset.h
index a75912c96..f6d5bb885 100644
--- a/src/knot/common/fdset.h
+++ b/src/knot/common/fdset.h
@@ -20,82 +20,107 @@
#pragma once
-#if !defined(HAVE_EPOLL)
-
+#include <assert.h>
+#include <stdbool.h>
#include <stddef.h>
+#include <time.h>
+
+#ifdef HAVE_EPOLL
+#include <sys/epoll.h>
+#else
#include <poll.h>
-#include <sys/time.h>
-#include <signal.h>
-
-#define FDSET_INIT_SIZE 256 /* Resize step. */
-
-/*! \brief Set of filedescriptors with associated context and timeouts. */
-typedef struct fdset {
- unsigned n; /*!< Active fds. */
- unsigned size; /*!< Array size (allocated). */
- void* *ctx; /*!< Context for each fd. */
- struct pollfd *pfd; /*!< poll state for each fd */
- time_t *timeout; /*!< Timeout for each fd (seconds precision). */
+#endif
+
+#include "libknot/errcode.h"
+
+#define FDSET_RESIZE_STEP 256
+#ifdef HAVE_EPOLL
+#define FDSET_REMOVE_FLAG ~0U
+#endif
+
+/*! \brief Set of file descriptors with associated context and timeouts. */
+typedef struct {
+ unsigned n; /*!< Active fds. */
+ unsigned size; /*!< Array size (allocated). */
+ void **ctx; /*!< Context for each fd. */
+ time_t *timeout; /*!< Timeout for each fd (seconds precision). */
+#ifdef HAVE_EPOLL
+ struct epoll_event *ev; /*!< Epoll event storage for each fd. */
+ struct epoll_event *recv_ev; /*!< Array for polled events. */
+ unsigned recv_size; /*!< Size of array for polled events. */
+ int efd; /*!< File descriptor of epoll. */
+#else
+ struct pollfd *pfd; /*!< Poll state for each fd. */
+#endif
} fdset_t;
/*! \brief State of iterator over received events */
-typedef struct fdset_it {
- fdset_t *ctx; /*!< Source fdset_t. */
- unsigned idx; /*!< Index of processed event. */
- int unprocessed; /*!< Unprocessed events left. */
+typedef struct {
+ fdset_t *set; /*!< Source fdset_t. */
+ unsigned idx; /*!< Event index offset. */
+ int unprocessed; /*!< Unprocessed events left. */
+#ifdef HAVE_EPOLL
+ struct epoll_event *ptr; /*!< Pointer on processed event. */
+ unsigned dirty; /*!< Number of fd to be removed on commit. */
+#endif
} fdset_it_t;
+typedef enum {
+#ifdef HAVE_EPOLL
+ FDSET_POLLIN = EPOLLIN,
+ FDSET_POLLOUT = EPOLLOUT,
+#else
+ FDSET_POLLIN = POLLIN,
+ FDSET_POLLOUT = POLLOUT,
+#endif
+} fdset_event_t;
+
/*! \brief Mark-and-sweep state. */
-typedef enum fdset_sweep_state {
+typedef enum {
FDSET_KEEP,
FDSET_SWEEP
} fdset_sweep_state_t;
/*! \brief Sweep callback (set, index, data) */
-typedef enum fdset_sweep_state (*fdset_sweep_cb_t)(fdset_t*, int, void*);
+typedef fdset_sweep_state_t (*fdset_sweep_cb_t)(fdset_t *, int, void *);
/*!
* \brief Initialize fdset to given size.
*
- * \param set Target set.
- * \param size Initial set size.
+ * \param set Target set.
+ * \param size Initial set size.
*
- * \retval ret == 0 if successful.
- * \retval ret < 0 on error.
+ * \return Error code, KNOT_EOK if success.
*/
int fdset_init(fdset_t *set, const unsigned size);
/*!
- * \brief Clear whole context of FDSET.
- *
- * \param set Target set.
+ * \brief Clear whole context of the fdset.
*
- * \retval ret == 0 if successful.
- * \retval ret < 0 on error.
+ * \param set Target set.
*/
-int fdset_clear(fdset_t* set);
+void fdset_clear(fdset_t *set);
/*!
* \brief Add file descriptor to watched set.
*
- * \param set Target set.
- * \param fd Added file descriptor.
- * \param events Mask of watched events.
- * \param ctx Context (optional).
+ * \param set Target set.
+ * \param fd Added file descriptor.
+ * \param events Mask of watched events.
+ * \param ctx Context (optional).
*
* \retval ret >= 0 is index of the added fd.
- * \retval ret < 0 on errors.
+ * \retval ret < 0 on error.
*/
-int fdset_add(fdset_t *set, const int fd, const unsigned events, void *ctx);
+int fdset_add(fdset_t *set, const int fd, const fdset_event_t events, void *ctx);
/*!
- * \brief Remove file descriptor from watched set.
+ * \brief Remove and close file descriptor from watched set.
*
- * \param set Target set.
- * \param i Index of the removed fd.
+ * \param set Target set.
+ * \param idx Index of the removed fd.
*
- * \retval 0 if successful.
- * \retval ret < 0 on errors.
+ * \return Error code, KNOT_EOK if success.
*/
int fdset_remove(fdset_t *set, const unsigned idx);
@@ -104,15 +129,15 @@ int fdset_remove(fdset_t *set, const unsigned idx);
*
* Skip events based on offset and set iterator on first event.
*
- * \param set Target set.
- * \param it Event iterator storage.
- * \param offset Index of first event.
- * \param timeout Timeout of operation (negative number for unlimited).
+ * \param set Target set.
+ * \param it Event iterator storage.
+ * \param offset Index of first event.
+ * \param timeout_ms Timeout of operation in milliseconds (use -1 for unlimited).
*
* \retval ret >= 0 represents number of events received.
* \retval ret < 0 on error.
*/
-int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int timeout);
+int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int timeout_ms);
/*!
* \brief Set file descriptor watchdog interval.
@@ -120,111 +145,194 @@ int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int ti
* Set time (interval from now) after which the associated file descriptor
* should be sweeped (see fdset_sweep). Good example is setting a grace period
* of N seconds between socket activity. If socket is not active within
- * <now, now + interval>, it is sweeped and potentially closed.
+ * <now, now + interval>, it is sweeped and closed.
*
- * \param set Target set.
- * \param idx Index of the file descriptor.
- * \param interval Allowed interval without activity (seconds).
- * -1 disables watchdog timer
+ * \param set Target set.
+ * \param idx Index of the file descriptor.
+ * \param interval Allowed interval without activity (seconds).
+ * -1 disables watchdog timer.
*
- * \retval ret == 0 on success.
- * \retval ret < 0 on errors.
+ * \return Error code, KNOT_EOK if success.
*/
int fdset_set_watchdog(fdset_t *set, const unsigned idx, const int interval);
/*!
+ * \brief Sweep file descriptors with exceeding inactivity period.
+ *
+ * \param set Target set.
+ * \param cb Callback for sweeped descriptors.
+ * \param data Pointer to extra data.
+ */
+void fdset_sweep(fdset_t *set, const fdset_sweep_cb_t cb, void *data);
+
+/*!
* \brief Returns file descriptor based on index.
*
- * \param set Target set.
- * \param idx Index of the file descriptor.
+ * \param set Target set.
+ * \param idx Index of the file descriptor.
*
* \retval ret >= 0 for file descriptor.
* \retval ret < 0 on errors.
*/
-int fdset_get_fd(const fdset_t *set, const unsigned idx);
+inline static int fdset_get_fd(const fdset_t *set, const unsigned idx)
+{
+ assert(set && idx < set->n);
+
+#ifdef HAVE_EPOLL
+ return set->ev[idx].data.fd;
+#else
+ return set->pfd[idx].fd;
+#endif
+}
/*!
* \brief Returns number of file descriptors stored in set.
*
- * \param set Target set.
+ * \param set Target set.
*
* \retval Number of descriptors stored
*/
-unsigned fdset_get_length(const fdset_t *set);
+inline static unsigned fdset_get_length(const fdset_t *set)
+{
+ assert(set);
+
+ return set->n;
+}
/*!
- * \brief Sweep file descriptors with exceeding inactivity period.
+ * \brief Get index of event in set referenced by iterator.
*
- * \param set Target set.
- * \param cb Callback for sweeped descriptors.
- * \param data Pointer to extra data.
+ * \param it Target iterator.
*
- * \retval number of sweeped descriptors.
- * \retval -1 on errors.
+ * \retval Index of event.
*/
-int fdset_sweep(fdset_t* set, const fdset_sweep_cb_t cb, void *data);
+inline static unsigned fdset_it_get_idx(const fdset_it_t *it)
+{
+ assert(it);
+
+#ifdef HAVE_EPOLL
+ return it->ptr->data.u64;
+#else
+ return it->idx;
+#endif
+}
/*!
- * \brief Move iterator on next received event.
+ * \brief Get file descriptor of event referenced by iterator.
+ *
+ * \param it Target iterator.
*
- * \param it Target iterator.
+ * \retval ret >= 0 for file descriptor.
+ * \retval ret < 0 on errors.
*/
-void fdset_it_next(fdset_it_t *it);
+inline static int fdset_it_get_fd(const fdset_it_t *it)
+{
+ assert(it);
+
+#ifdef HAVE_EPOLL
+ return it->set->ev[fdset_it_get_idx(it)].data.fd;
+#else
+ return it->set->pfd[it->idx].fd;
+#endif
+}
/*!
- * \brief Decide if there is more received events.
- *
- * \param it Target iterator.
+ * \brief Move iterator on next received event.
*
- * \retval Logical flag represents 'done' state.
+ * \param it Target iterator.
*/
-int fdset_it_done(const fdset_it_t *it);
+inline static void fdset_it_next(fdset_it_t *it)
+{
+ assert(it);
+
+#ifdef HAVE_EPOLL
+ do {
+ it->ptr++;
+ it->unprocessed--;
+ } while (it->unprocessed > 0 && fdset_it_get_idx(it) < it->idx);
+#else
+ if (--it->unprocessed > 0) {
+ while (it->set->pfd[++it->idx].revents == 0); /* nop */
+ }
+#endif
+}
/*!
* \brief Remove file descriptor referenced by iterator from watched set.
*
- * \param it Target iterator.
+ * \param it Target iterator.
*
- * \retval 0 if successful.
- * \retval ret < 0 on error.
+ * \return Error code, KNOT_EOK if success.
*/
-int fdset_it_remove(fdset_it_t *it);
+inline static void fdset_it_remove(fdset_it_t *it)
+{
+ assert(it);
+
+#ifdef HAVE_EPOLL
+ const int idx = fdset_it_get_idx(it);
+ it->set->ev[idx].events = FDSET_REMOVE_FLAG;
+ it->dirty++;
+#else
+ (void)fdset_remove(it->set, fdset_it_get_idx(it));
+ /* Iterator should return on last valid already processed element. */
+ /* On `next` call (in for-loop) will point on first unprocessed. */
+ it->idx--;
+#endif
+}
/*!
- * \brief Get file descriptor of event referenced by iterator.
- *
- * \param it Target iterator.
+ * \brief Commit changes made in fdset using iterator.
*
- * \retval ret >= 0 for file descriptor.
- * \retval ret < 0 on errors.
+ * \param it Target iterator.
*/
-int fdset_it_get_fd(const fdset_it_t *it);
+void fdset_it_commit(fdset_it_t *it);
/*!
- * \brief Get index of event in set referenced by iterator.
+ * \brief Decide if there is more received events.
*
- * \param it Target iterator.
+ * \param it Target iterator.
*
- * \retval Index of event.
+ * \retval Logical flag representing 'done' state.
*/
-unsigned fdset_it_get_idx(const fdset_it_t *it);
+inline static bool fdset_it_is_done(const fdset_it_t *it)
+{
+ assert(it);
+
+ return it->unprocessed <= 0;
+}
/*!
* \brief Decide if event referenced by iterator is POLLIN event.
*
- * \param it Target iterator.
+ * \param it Target iterator.
*
* \retval Logical flag represents 'POLLIN' event received.
*/
-int fdset_it_ev_is_pollin(const fdset_it_t *it);
+inline static bool fdset_it_is_pollin(const fdset_it_t *it)
+{
+ assert(it);
+
+#ifdef HAVE_EPOLL
+ return it->ptr->events & EPOLLIN;
+#else
+ return it->set->pfd[it->idx].revents & POLLIN;
+#endif
+}
/*!
* \brief Decide if event referenced by iterator is error event.
*
- * \param it Target iterator.
+ * \param it Target iterator.
*
* \retval Logical flag represents error event received.
*/
-int fdset_it_ev_is_err(const fdset_it_t *it);
+inline static bool fdset_it_is_error(const fdset_it_t *it)
+{
+ assert(it);
+#ifdef HAVE_EPOLL
+ return it->ptr->events & (EPOLLERR | EPOLLHUP);
+#else
+ return it->set->pfd[it->idx].revents & (POLLERR | POLLHUP | POLLNVAL);
#endif
+}
diff --git a/src/knot/server/tcp-handler.c b/src/knot/server/tcp-handler.c
index 7d17e79ba..19abeaf89 100644
--- a/src/knot/server/tcp-handler.c
+++ b/src/knot/server/tcp-handler.c
@@ -32,7 +32,7 @@
#include "knot/server/server.h"
#include "knot/server/tcp-handler.h"
#include "knot/common/log.h"
-#include "knot/common/apoll.h"
+#include "knot/common/fdset.h"
#include "knot/nameserver/process_query.h"
#include "knot/query/layer.h"
#include "contrib/macros.h"
@@ -50,7 +50,7 @@ typedef struct tcp_context {
unsigned client_threshold; /*!< Index of first TCP client. */
struct timespec last_poll_time; /*!< Time of the last socket poll. */
bool is_throttled; /*!< TCP connections throttling switch. */
- apoll_t set; /*!< Set of server/client sockets. */
+ fdset_t set; /*!< Set of server/client sockets. */
unsigned thread_id; /*!< Thread identifier. */
unsigned max_worker_fds; /*!< Max TCP clients per worker configuration + no. of ifaces. */
int idle_timeout; /*!< [s] TCP idle timeout configuration. */
@@ -76,7 +76,7 @@ static void update_tcp_conf(tcp_context_t *tcp)
}
/*! \brief Sweep TCP connection. */
-static apoll_sweep_state tcp_sweep(apoll_t *set, int fd, void *data)
+static fdset_sweep_state_t tcp_sweep(fdset_t *set, int fd, void *data)
{
UNUSED(data);
assert(set && fd >= 0);
@@ -90,9 +90,7 @@ static apoll_sweep_state tcp_sweep(apoll_t *set, int fd, void *data)
log_notice("TCP, terminated inactive client, address %s", addr_str);
}
- close(fd);
-
- return APOLL_CTX_SWEEP;
+ return FDSET_SWEEP;
}
static bool tcp_active_state(int state)
@@ -117,7 +115,7 @@ static void tcp_log_error(struct sockaddr_storage *ss, const char *operation, in
}
static unsigned tcp_set_ifaces(const iface_t *ifaces, size_t n_ifaces,
- apoll_t *fds, int thread_id)
+ fdset_t *fds, int thread_id)
{
if (n_ifaces == 0) {
return 0;
@@ -139,10 +137,13 @@ static unsigned tcp_set_ifaces(const iface_t *ifaces, size_t n_ifaces,
tcp_id = thread_id - i->fd_udp_count;
}
#endif
- apoll_add(fds, i->fd_tcp[tcp_id], APOLL_POLLIN, NULL);
+ int ret = fdset_add(fds, i->fd_tcp[tcp_id], FDSET_POLLIN, NULL);
+ if (ret < 0) {
+ return 0;
+ }
}
- return apoll_get_length(fds);
+ return fdset_get_length(fds);
}
static int tcp_handle(tcp_context_t *tcp, int fd, struct iovec *rx, struct iovec *tx)
@@ -215,28 +216,28 @@ static int tcp_handle(tcp_context_t *tcp, int fd, struct iovec *rx, struct iovec
static void tcp_event_accept(tcp_context_t *tcp, unsigned i)
{
/* Accept client. */
- int fd = apoll_get_fd((&tcp->set), i);
+ int fd = fdset_get_fd(&tcp->set, i);
int client = net_accept(fd, NULL);
if (client >= 0) {
/* Assign to fdset. */
- int next_id = apoll_add(&tcp->set, client, APOLL_POLLIN, NULL);
- if (next_id < 0) {
+ int idx = fdset_add(&tcp->set, client, FDSET_POLLIN, NULL);
+ if (idx < 0) {
close(client);
return;
}
/* Update watchdog timer. */
- apoll_set_watchdog(&tcp->set, next_id, tcp->idle_timeout);
+ (void)fdset_set_watchdog(&tcp->set, idx, tcp->idle_timeout);
}
}
static int tcp_event_serve(tcp_context_t *tcp, unsigned i)
{
- int fd = apoll_get_fd((&tcp->set), i);
- int ret = tcp_handle(tcp, fd, &tcp->iov[0], &tcp->iov[1]);
+ int ret = tcp_handle(tcp, fdset_get_fd(&tcp->set, i),
+ &tcp->iov[0], &tcp->iov[1]);
if (ret == KNOT_EOK) {
/* Update socket activity timer. */
- apoll_set_watchdog(&tcp->set, i, tcp->idle_timeout);
+ (void)fdset_set_watchdog(&tcp->set, i, tcp->idle_timeout);
}
return ret;
@@ -244,33 +245,33 @@ static int tcp_event_serve(tcp_context_t *tcp, unsigned i)
static void tcp_wait_for_events(tcp_context_t *tcp)
{
- apoll_t *set = &tcp->set;
+ fdset_t *set = &tcp->set;
/* Check if throttled with many open TCP connections. */
- assert(apoll_get_length(set) <= tcp->max_worker_fds);
- tcp->is_throttled = apoll_get_length(set) == tcp->max_worker_fds;
+ assert(fdset_get_length(set) <= tcp->max_worker_fds);
+ tcp->is_throttled = fdset_get_length(set) == tcp->max_worker_fds;
/* If throttled, temporarily ignore new TCP connections. */
- unsigned i = tcp->is_throttled ? tcp->client_threshold : 0;
+ unsigned offset = tcp->is_throttled ? tcp->client_threshold : 0;
/* Wait for events. */
- apoll_it_t it;
- apoll_poll(set, &it, i, TCP_SWEEP_INTERVAL);
+ fdset_it_t it;
+ (void)fdset_poll(set, &it, offset, TCP_SWEEP_INTERVAL * 1000);
/* Mark the time of last poll call. */
tcp->last_poll_time = time_now();
/* Process events. */
- for(; !apoll_it_done(&it); apoll_it_next(&it)) {
+ for (; !fdset_it_is_done(&it); fdset_it_next(&it)) {
bool should_close = false;
- unsigned int idx = apoll_it_get_idx(&it);
- if (apoll_it_ev_is_error(&it)) {
+ unsigned int idx = fdset_it_get_idx(&it);
+ if (fdset_it_is_error(&it)) {
should_close = (idx >= tcp->client_threshold);
- } else if (apoll_it_ev_is_pollin(&it)) {
+ } else if (fdset_it_is_pollin(&it)) {
/* Master sockets - new connection to accept. */
if (idx < tcp->client_threshold) {
/* Don't accept more clients than configured. */
- if (apoll_get_length(set) < tcp->max_worker_fds) {
+ if (fdset_get_length(set) < tcp->max_worker_fds) {
tcp_event_accept(tcp, idx);
}
/* Client sockets - already accepted connection or
@@ -282,10 +283,10 @@ static void tcp_wait_for_events(tcp_context_t *tcp)
/* Evaluate. */
if (should_close) {
- close(apoll_get_fd(set, idx));
- apoll_it_remove(&it);
+ fdset_it_remove(&it);
}
}
+ fdset_it_commit(&it);
}
int tcp_master(dthread_t *thread)
@@ -338,7 +339,9 @@ int tcp_master(dthread_t *thread)
update_tcp_conf(&tcp);
/* Prepare initial buffer for listening and bound sockets. */
- apoll_init(&tcp.set, APOLL_CTX_INIT_SIZE);
+ if (fdset_init(&tcp.set, FDSET_RESIZE_STEP) != KNOT_EOK) {
+ goto finish;
+ }
/* Set descriptors for the configured interfaces. */
tcp.client_threshold = tcp_set_ifaces(handler->server->ifaces,
@@ -359,7 +362,7 @@ int tcp_master(dthread_t *thread)
/* Sweep inactive clients and refresh TCP configuration. */
if (tcp.last_poll_time.tv_sec >= next_sweep.tv_sec) {
- apoll_sweep(&tcp.set, &tcp_sweep, NULL);
+ fdset_sweep(&tcp.set, &tcp_sweep, NULL);
update_sweep_timer(&next_sweep);
update_tcp_conf(&tcp);
}
@@ -369,8 +372,7 @@ finish:
free(tcp.iov[0].iov_base);
free(tcp.iov[1].iov_base);
mp_delete(mm.ctx);
- apoll_close(&tcp.set);
- apoll_clear(&tcp.set);
+ fdset_clear(&tcp.set);
return ret;
}
diff --git a/src/knot/server/udp-handler.c b/src/knot/server/udp-handler.c
index 4d0cc7b38..190d46cfd 100644
--- a/src/knot/server/udp-handler.c
+++ b/src/knot/server/udp-handler.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -16,25 +16,25 @@
#define __APPLE_USE_RFC_3542
+#include <assert.h>
#include <dlfcn.h>
-#include <unistd.h>
#include <errno.h>
+#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
-#include <string.h>
-#include <assert.h>
#include <sys/param.h>
#ifdef HAVE_SYS_UIO_H // struct iovec (OpenBSD)
#include <sys/uio.h>
#endif /* HAVE_SYS_UIO_H */
+#include <unistd.h>
#include "contrib/macros.h"
#include "contrib/mempattern.h"
#include "contrib/sockaddr.h"
#include "contrib/ucw/mempool.h"
-#include "knot/common/apoll.h"
+#include "knot/common/fdset.h"
#include "knot/nameserver/process_query.h"
#include "knot/query/layer.h"
#include "knot/server/server.h"
@@ -499,7 +499,7 @@ static int iface_udp_fd(const iface_t *iface, int thread_id, bool xdp_thread,
}
}
-static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, apoll_t *fds,
+static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, fdset_t *fds,
int thread_id, void **xdp_socket)
{
if (n_ifaces == 0) {
@@ -516,11 +516,14 @@ static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, apoll_t *
if (fd < 0) {
continue;
}
- apoll_add(fds, fd, APOLL_POLLIN, NULL);
+ int ret = fdset_add(fds, fd, FDSET_POLLIN, NULL);
+ if (ret < 0) {
+ return 0;
+ }
count++;
}
- assert(!xdp_thread || count == 1);
+ assert((count == n_ifaces) || (xdp_thread && count == 1));
return count;
}
@@ -575,11 +578,13 @@ int udp_master(dthread_t *thread)
/* Allocate descriptors for the configured interfaces. */
void *xdp_socket = NULL;
size_t nifs = handler->server->n_ifaces;
- apoll_t fds;
- apoll_init(&fds, nifs);
- unsigned fds_count = udp_set_ifaces(handler->server->ifaces, nifs, &fds,
- thread_id, &xdp_socket);
- if (fds_count == 0) {
+ fdset_t fds;
+ if (fdset_init(&fds, nifs) != KNOT_EOK) {
+ goto finish;
+ }
+ unsigned nfds = udp_set_ifaces(handler->server->ifaces, nifs, &fds,
+ thread_id, &xdp_socket);
+ if (nfds == 0) {
goto finish;
}
@@ -591,18 +596,15 @@ int udp_master(dthread_t *thread)
}
/* Wait for events. */
- apoll_it_t it;
- int ret = apoll_poll(&fds, &it, 0, -1);
- if (ret <= 0) {
- if (errno == EINTR || errno == EAGAIN) {
- continue;
- }
- break;
- }
+ fdset_it_t it;
+ (void)fdset_poll(&fds, &it, 0, -1);
/* Process the events. */
- for(; !apoll_it_done(&it); apoll_it_next(&it)) {
- if (api->udp_recv(apoll_it_get_fd(&it), rq, xdp_socket) > 0) {
+ for (; !fdset_it_is_done(&it); fdset_it_next(&it)) {
+ if (!fdset_it_is_pollin(&it)) {
+ continue;
+ }
+ if (api->udp_recv(fdset_it_get_fd(&it), rq, xdp_socket) > 0) {
api->udp_handle(&udp, rq, xdp_socket);
api->udp_send(rq, xdp_socket);
}
@@ -612,8 +614,7 @@ int udp_master(dthread_t *thread)
finish:
api->udp_deinit(rq);
mp_delete(mm.ctx);
- apoll_close(&fds);
- apoll_clear(&fds);
+ fdset_clear(&fds);
return KNOT_EOK;
}
diff --git a/tests-fuzz/knotd_wrap/udp-handler.c b/tests-fuzz/knotd_wrap/udp-handler.c
index 51bddf3a1..c6e28be6e 100644
--- a/tests-fuzz/knotd_wrap/udp-handler.c
+++ b/tests-fuzz/knotd_wrap/udp-handler.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -14,6 +14,8 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
+#include <signal.h>
+
#include "knot/server/udp-handler.c"
#include "knot/common/log.h"
diff --git a/tests/knot/test_fdset.c b/tests/knot/test_fdset.c
index 10d1b4259..b07bf54f4 100644
--- a/tests/knot/test_fdset.c
+++ b/tests/knot/test_fdset.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -14,139 +14,119 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
-#include <stdlib.h>
-#include <stdint.h>
-#include <sys/time.h>
#include <pthread.h>
-#include <fcntl.h>
-#include <unistd.h>
#include <tap/basic.h>
-#include <time.h>
+#include <unistd.h>
#include "knot/common/fdset.h"
+#include "contrib/time.h"
-#define WRITE_PATTERN ((char) 0xde)
-#define WRITE_PATTERN_LEN sizeof(char)
+#define PATTERN1 "0x45"
+#define PATTERN2 "0xED"
-/* Subtract the `struct timeval' values X and Y,
- storing the result in RESULT.
- Return 1 if the difference is negative, otherwise 0.
- Copyright http://www.delorie.com/gnu/docs/glibc/libc_428.html
-*/
-static int timeval_subtract (struct timeval *result, struct timeval *x, struct timeval* y)
+void *thr_action1(void *arg)
{
- /* Perform the carry for the later subtraction by updating y. */
- if (x->tv_usec < y->tv_usec) {
- int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
- y->tv_usec -= 1000000 * nsec;
- y->tv_sec += nsec;
- }
- if (x->tv_usec - y->tv_usec > 1000000) {
- int nsec = (x->tv_usec - y->tv_usec) / 1000000;
- y->tv_usec += 1000000 * nsec;
- y->tv_sec -= nsec;
- }
-
- /* Compute the time remaining to wait.
- tv_usec is certainly positive. */
- result->tv_sec = x->tv_sec - y->tv_sec;
- result->tv_usec = x->tv_usec - y->tv_usec;
-
- /* Return 1 if result is negative. */
- return x->tv_sec < y->tv_sec;
+ usleep(10000);
+ (void)write(*((int *)arg), &PATTERN1, 1);
+ return NULL;
}
-static size_t timeval_diff(struct timeval *from, struct timeval *to) {
- struct timeval res;
- timeval_subtract(&res, to, from);
- return res.tv_sec*1000 + res.tv_usec/1000;
+void *thr_action2(void *arg)
+{
+ usleep(20000);
+ (void)write(*((int *)arg), &PATTERN2, 1);
+ return NULL;
}
-void* thr_action(void *arg)
+int main(int argc, char *argv[])
{
- int *fd = (int *)arg;
+ plan_lazy();
+
+ fdset_t fdset;
+ int ret = fdset_init(&fdset, 32);
+ ok(ret == KNOT_EOK, "fdset_init");
+
+ int fds0[2], fds1[2], fds2[2];
+
+ ret = pipe(fds0);
+ ok(ret >= 0, "create pipe 0");
+ ret = fdset_add(&fdset, fds0[0], FDSET_POLLIN, NULL);
+ ok(ret >= 0, "add pipe 0 to fdset");
+
+ ret = pipe(fds1);
+ ok(ret >= 0, "create pipe 1");
+ ret = fdset_add(&fdset, fds1[0], FDSET_POLLIN, NULL);
+ ok(ret >= 0, "add pipe 1 to fdset");
+
+ ret = pipe(fds2);
+ ok(ret >= 0, "create pipe 2");
+ ret = fdset_add(&fdset, fds2[0], FDSET_POLLIN, NULL);
+ ok(ret >= 0, "add pipe 2 to fdset");
- /* Sleep for 100ms. */
- struct timespec ts = { .tv_nsec = 1e8 };
- nanosleep(&ts, NULL);
+ ok(fdset_get_length(&fdset) == 3, "fdset size full");
- /* Write pattern. */
- char pattern = WRITE_PATTERN;
- if (write(*fd, &pattern, WRITE_PATTERN_LEN) == -1) {
- // Error.
+ struct timespec time0 = time_now();
+
+ pthread_t t1, t2;
+ ret = pthread_create(&t1, 0, thr_action1, &fds1[1]);
+ ok(ret == 0, "create thread 1");
+ ret = pthread_create(&t2, 0, thr_action2, &fds2[1]);
+ ok(ret == 0, "create thread 2");
+
+ fdset_it_t it;
+ ret = fdset_poll(&fdset, &it, 0, 100);
+ struct timespec time1 = time_now();
+ double diff1 = time_diff_ms(&time0, &time1);
+ ok(ret == 1, "fdset_poll return 1");
+ ok(diff1 > 5 && diff1 < 100, "fdset_poll timeout 1");
+ for(; !fdset_it_is_done(&it); fdset_it_next(&it)) {
+ ok(!fdset_it_is_error(&it), "fdset no error");
+ ok(fdset_it_is_pollin(&it), "fdset can read");
+
+ int fd = fdset_it_get_fd(&it);
+ ok(fd == fds1[0], "fdset_it fd check");
+
+ char buf = 0x00;
+ ret = read(fd, &buf, sizeof(buf));
+ ok(ret == 1 && buf == PATTERN1[0], "fdset_it value check");
+
+ fdset_it_remove(&it);
+ }
+ fdset_it_commit(&it);
+ ok(fdset_get_length(&fdset) == 2, "fdset size 2");
+ close(fds1[1]);
+
+ ret = fdset_poll(&fdset, &it, 0, 100);
+ struct timespec time2 = time_now();
+ double diff2 = time_diff_ms(&time0, &time2);
+ ok(ret == 1, "fdset_poll return 2");
+ ok(diff2 > 15 && diff2 < 100, "fdset_poll timeout 2");
+ for(; !fdset_it_is_done(&it); fdset_it_next(&it)) {
+ ok(!fdset_it_is_error(&it), "fdset no error");
+ ok(fdset_it_is_pollin(&it), "fdset can read");
+
+ int fd = fdset_it_get_fd(&it);
+ ok(fd == fds2[0], "fdset_it fd check");
+
+ char buf = 0x00;
+ ret = read(fd, &buf, sizeof(buf));
+ ok(ret == 1 && buf == PATTERN2[0], "fdset_it value check");
+
+ fdset_it_remove(&it);
}
+ fdset_it_commit(&it);
+ ok(fdset_get_length(&fdset) == 1, "fdset size 1");
+ close(fds2[1]);
- return NULL;
-}
+ ret = fdset_remove(&fdset, 0);
+ ok(ret == KNOT_EOK, "fdset remove");
+ close(fds0[1]);
+ ok(fdset_get_length(&fdset) == 0, "fdset size 0");
-int main(int argc, char *argv[])
-{
- plan(12);
-
- /* 1. Create fdset. */
- fdset_t set;
- int ret = fdset_init(&set, 32);
- is_int(0, ret, "fdset: init");
-
- /* 2. Create pipe. */
- int fds[2], tmpfds[2];
- ret = pipe(fds);
- ok(ret >= 0, "fdset: pipe() works");
- ret = pipe(tmpfds);
- ok(ret >= 0, "fdset: 2nd pipe() works");
-
- /* 3. Add fd to set. */
- ret = fdset_add(&set, fds[0], POLLIN, NULL);
- is_int(0, ret, "fdset: add to set works");
- fdset_add(&set, tmpfds[0], POLLIN, NULL);
-
- /* Schedule write. */
- struct timeval ts, te;
- gettimeofday(&ts, 0);
- pthread_t t;
- pthread_create(&t, 0, thr_action, &fds[1]);
-
- /* 4. Watch fdset. */
- int nfds = poll(set.pfd, set.n, 60 * 1000);
- gettimeofday(&te, 0);
- size_t diff = timeval_diff(&ts, &te);
-
- ok(nfds > 0, "fdset: poll returned %d events in %zu ms", nfds, diff);
-
- /* 5. Prepare event set. */
- ok(set.pfd[0].revents & POLLIN, "fdset: pipe is active");
-
- /* 6. Receive data. */
- char buf = 0x00;
- ret = read(set.pfd[0].fd, &buf, WRITE_PATTERN_LEN);
- ok(ret >= 0 && buf == WRITE_PATTERN, "fdset: contains valid data");
-
- /* 7-9. Remove from event set. */
- ret = fdset_remove(&set, 0);
- is_int(0, ret, "fdset: remove from fdset works");
- close(fds[0]);
- close(fds[1]);
- ret = fdset_remove(&set, 0);
- close(tmpfds[1]);
- close(tmpfds[1]);
- is_int(0, ret, "fdset: remove from fdset works (2)");
- ret = fdset_remove(&set, 0);
- ok(ret != 0, "fdset: removing nonexistent item");
-
- /* 10. Crash test. */
- fdset_init(0, 0);
- fdset_add(0, 1, 1, 0);
- fdset_add(0, 0, 1, 0);
- fdset_remove(0, 1);
- fdset_remove(0, 0);
- ok(1, "fdset: crash test successful");
-
- /* 11. Destroy fdset. */
- ret = fdset_clear(&set);
- is_int(0, ret, "fdset: destroyed");
-
- /* Cleanup. */
- pthread_join(t, 0);
+ pthread_join(t1, 0);
+ pthread_join(t2, 0);
+
+ fdset_clear(&fdset);
return 0;
}
diff --git a/tests/knot/test_server.c b/tests/knot/test_server.c
index 972f4b9f3..bde3d1031 100644
--- a/tests/knot/test_server.c
+++ b/tests/knot/test_server.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -14,6 +14,7 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
+#include <signal.h>
#include <tap/basic.h>
#include "knot/server/server.h"
#include "test_conf.h"