summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Hak <jan.hak@nic.cz>2021-01-25 16:20:23 +0100
committerDaniel Salzman <daniel.salzman@nic.cz>2021-04-08 18:52:27 +0200
commit59eb2408f8bd00ff223057f87bdbf0fbc9f799fa (patch)
tree5e2df5abc357c59d7b7f0a7e770b0faf11e623f7
parentdnssec: fix little double-free (diff)
downloadknot-59eb2408f8bd00ff223057f87bdbf0fbc9f799fa.tar.xz
knot-59eb2408f8bd00ff223057f87bdbf0fbc9f799fa.zip
server: add polling wrapper for unix poll and epoll
-rw-r--r--Knot.files3
-rw-r--r--configure.ac7
-rw-r--r--src/knot/Makefile.inc3
-rw-r--r--src/knot/common/apoll.h79
-rw-r--r--src/knot/common/epoll_ctx.c261
-rw-r--r--src/knot/common/epoll_ctx.h227
-rw-r--r--src/knot/common/fdset.c144
-rw-r--r--src/knot/common/fdset.h151
-rw-r--r--src/knot/server/tcp-handler.c72
-rw-r--r--src/knot/server/udp-handler.c31
-rw-r--r--src/utils/knotd/main.c1
11 files changed, 878 insertions, 101 deletions
diff --git a/Knot.files b/Knot.files
index 61b6366ec..b0f235857 100644
--- a/Knot.files
+++ b/Knot.files
@@ -108,6 +108,9 @@ src/knot/catalog/generate.c
src/knot/catalog/generate.h
src/knot/catalog/interpret.c
src/knot/catalog/interpret.h
+src/knot/common/apoll.h
+src/knot/common/epoll_ctx.c
+src/knot/common/epoll_ctx.h
src/knot/common/evsched.c
src/knot/common/evsched.h
src/knot/common/fdset.c
diff --git a/configure.ac b/configure.ac
index 885986442..42161fcc0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -639,6 +639,12 @@ AC_CHECK_HEADERS_ONCE([pthread_np.h sys/uio.h bsd/string.h])
AC_CHECK_FUNCS([accept4 clock_gettime fgetln getline initgroups malloc_trim \
setgroups strlcat strlcpy sysctlbyname])
+# Check for a better polling method.
+AC_CHECK_FUNCS([epoll_create], [
+ AC_DEFINE([HAVE_EPOLL], [1], [epoll available])
+ poll_method=epoll], [poll_method=poll]
+)
+
# Check for robust memory cleanup implementations.
AC_CHECK_FUNC([explicit_bzero], [
AC_DEFINE([HAVE_EXPLICIT_BZERO], [1], [explicit_bzero available])
@@ -738,6 +744,7 @@ result_msg_base=" Knot DNS $VERSION
Use recvmmsg: ${enable_recvmmsg}
Use SO_REUSEPORT(_LB): ${enable_reuseport}
XDP support: ${enable_xdp}
+ Polling method: ${poll_method}
Memory allocator: ${with_memory_allocator}
Fast zone parser: ${enable_fastparser}
Utilities with IDN: ${with_libidn}
diff --git a/src/knot/Makefile.inc b/src/knot/Makefile.inc
index 5cc44fb33..88b1f13e2 100644
--- a/src/knot/Makefile.inc
+++ b/src/knot/Makefile.inc
@@ -119,6 +119,9 @@ libknotd_la_SOURCES = \
knot/query/query.h \
knot/query/requestor.c \
knot/query/requestor.h \
+ knot/common/apoll.h \
+ knot/common/epoll_ctx.c \
+ knot/common/epoll_ctx.h \
knot/common/evsched.c \
knot/common/evsched.h \
knot/common/fdset.c \
diff --git a/src/knot/common/apoll.h b/src/knot/common/apoll.h
new file mode 100644
index 000000000..8c666a191
--- /dev/null
+++ b/src/knot/common/apoll.h
@@ -0,0 +1,79 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#ifdef HAVE_EPOLL
+ #include "knot/common/epoll_ctx.h"
+
+ #define apoll_t epoll_ctx_t
+ #define apoll_it_t epoll_it_t
+ #define apoll_sweep_state epoll_ctx_sweep_state_t
+
+ #define APOLL_CTX_KEEP EPOLL_CTX_KEEP
+ #define APOLL_CTX_SWEEP EPOLL_CTX_SWEEP
+
+ #define APOLL_POLLIN EPOLLIN
+
+ #define APOLL_CTX_INIT_SIZE EPOLL_INIT_SIZE
+
+ #define apoll_init(ctx, size) epoll_ctx_init(ctx, size)
+ #define apoll_close(ctx) epoll_ctx_close(ctx)
+ #define apoll_clear(ctx) epoll_ctx_clear(ctx)
+ #define apoll_add(ctx, fd, events, usrctx) epoll_ctx_add(ctx, fd, events, usrctx)
+ #define apoll_set_watchdog(ctx, idx, interval) epoll_ctx_set_watchdog(ctx, idx, interval)
+ #define apoll_get_length(ctx) epoll_ctx_get_length(ctx)
+ #define apoll_get_fd(ctx, idx) epoll_ctx_get_fd(ctx, idx)
+ #define apoll_poll(ctx, it, offset, timeout) epoll_ctx_wait(ctx, it, offset, timeout)
+ #define apoll_sweep(ctx, cb, data) epoll_ctx_sweep(ctx, cb, data)
+ #define apoll_it_next(it) epoll_it_next(it)
+ #define apoll_it_done(it) epoll_it_done(it)
+ #define apoll_it_remove(it) epoll_it_remove(it)
+ #define apoll_it_get_fd(it) epoll_it_get_fd(it)
+ #define apoll_it_get_idx(it) epoll_it_get_idx(it)
+ #define apoll_it_ev_is_pollin(it) epoll_it_ev_is_pollin(it)
+ #define apoll_it_ev_is_error(it) epoll_it_ev_is_err(it)
+#else
+ #include "knot/common/fdset.h"
+
+ #define apoll_t fdset_t
+ #define apoll_it_t fdset_it_t
+ #define apoll_sweep_state fdset_sweep_state_t
+
+ #define APOLL_CTX_KEEP FDSET_KEEP
+ #define APOLL_CTX_SWEEP FDSET_SWEEP
+
+ #define APOLL_POLLIN POLLIN
+
+ #define APOLL_CTX_INIT_SIZE FDSET_INIT_SIZE
+
+ #define apoll_init(ctx, size) fdset_init(ctx, size)
+ #define apoll_close(ctx)
+ #define apoll_clear(ctx) fdset_clear(ctx)
+ #define apoll_add(ctx, fd, events, usrctx) fdset_add(ctx, fd, events, usrctx)
+ #define apoll_set_watchdog(ctx, idx, interval) fdset_set_watchdog(ctx, idx, interval)
+ #define apoll_get_length(ctx) fdset_get_length(ctx)
+ #define apoll_get_fd(ctx, idx) fdset_get_fd(ctx, idx)
+ #define apoll_poll(ctx, it, offset, timeout) fdset_poll(ctx, it, offset, timeout)
+ #define apoll_sweep(ctx, cb, data) fdset_sweep(ctx, cb, data)
+ #define apoll_it_next(it) fdset_it_next(it)
+ #define apoll_it_done(it) fdset_it_done(it)
+ #define apoll_it_remove(it) fdset_it_remove(it)
+ #define apoll_it_get_fd(it) fdset_it_get_fd(it)
+ #define apoll_it_get_idx(it) fdset_it_get_idx(it)
+ #define apoll_it_ev_is_pollin(it) fdset_it_ev_is_pollin(it)
+ #define apoll_it_ev_is_error(it) fdset_it_ev_is_err(it)
+#endif
diff --git a/src/knot/common/epoll_ctx.c b/src/knot/common/epoll_ctx.c
new file mode 100644
index 000000000..46ad8f1fa
--- /dev/null
+++ b/src/knot/common/epoll_ctx.c
@@ -0,0 +1,261 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#ifdef HAVE_EPOLL
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <time.h>
+#include <assert.h>
+#include "knot/common/epoll_ctx.h"
+#include "contrib/time.h"
+#include "libknot/errcode.h"
+
+/* Realloc memory or return error (part of epoll_ctx_resize). */
+#define MEM_RESIZE(tmp, p, n) \
+ if ((tmp = realloc((p), (n) * sizeof(*p))) == NULL) \
+ return KNOT_ENOMEM; \
+ (p) = tmp;
+
+static int epoll_ctx_resize(epoll_ctx_t *set, const unsigned size)
+{
+ void *tmp = NULL;
+ MEM_RESIZE(tmp, set->usrctx, size);
+ MEM_RESIZE(tmp, set->timeout, size);
+ MEM_RESIZE(tmp, set->ev, size);
+ set->size = size;
+ return KNOT_EOK;
+}
+
+int epoll_ctx_init(epoll_ctx_t *ctx, const unsigned size)
+{
+ if (ctx == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ memset(ctx, 0, sizeof(epoll_ctx_t));
+
+ ctx->efd = epoll_create1(0);
+ if (ctx->efd < 0) {
+ return KNOT_EMFILE;
+ }
+
+ return epoll_ctx_resize(ctx, size);
+}
+
+int epoll_ctx_clear(epoll_ctx_t* ctx)
+{
+ if (ctx == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ int bck = ctx->efd;
+ free(ctx->ev);
+ free(ctx->usrctx);
+ free(ctx->timeout);
+ free(ctx->recv_ev);
+ memset(ctx, 0, sizeof(epoll_ctx_t));
+ ctx->efd = bck;
+ return KNOT_EOK;
+}
+
+void epoll_ctx_close(const epoll_ctx_t* ctx)
+{
+ close(ctx->efd);
+}
+
+int epoll_ctx_add(epoll_ctx_t *ctx, const int fd, const unsigned events, void *usrctx)
+{
+ if (ctx == NULL || fd < 0) {
+ return KNOT_EINVAL;
+ }
+
+ /* Realloc needed. */
+ if (ctx->n == ctx->size && epoll_ctx_resize(ctx, ctx->size + EPOLL_INIT_SIZE))
+ return KNOT_ENOMEM;
+
+ /* Initialize. */
+ const int i = ctx->n++;
+ ctx->ev[i].data.fd = fd;
+ ctx->ev[i].events = events;
+ ctx->usrctx[i] = usrctx;
+ ctx->timeout[i] = 0;
+ struct epoll_event ev = {
+ .data.u64 = i,
+ .events = events
+ };
+ epoll_ctl(ctx->efd, EPOLL_CTL_ADD, fd, &ev);
+
+ return i;
+}
+
+static int epoll_ctx_remove(epoll_ctx_t *ctx, const unsigned idx)
+{
+ if (ctx == NULL || idx >= ctx->n) {
+ return KNOT_EINVAL;
+ }
+
+ epoll_ctl(ctx->efd, EPOLL_CTL_DEL, ctx->ev[idx].data.fd, NULL);
+ const unsigned last = --ctx->n;
+ /* Nothing else if it is the last one.
+ * Move last -> i if some remain. */
+ if (idx < last) {
+ ctx->ev[idx] = ctx->ev[last];
+ ctx->timeout[idx] = ctx->timeout[last];
+ ctx->usrctx[idx] = ctx->usrctx[last];
+ struct epoll_event ev = {
+ .data.u64 = idx,
+ .events = ctx->ev[idx].events
+ };
+ epoll_ctl(ctx->efd, EPOLL_CTL_MOD, ctx->ev[last].data.fd, &ev);
+ }
+
+ return KNOT_EOK;
+}
+
+int epoll_ctx_wait(epoll_ctx_t *ctx, epoll_it_t *it, const unsigned offset, const int timeout)
+{
+ if (ctx->recv_size != ctx->size) {
+ void *tmp = NULL;
+ MEM_RESIZE(tmp, ctx->recv_ev, ctx->size);
+ ctx->recv_size = ctx->size;
+ }
+
+ it->ctx = ctx;
+ it->ptr = ctx->recv_ev;
+ it->offset = offset;
+
+ /*
+ * NOTE: Can't skip offset without bunch of syscalls!!
+ * Because of that it waits for `ctx->n` (every socket). Offset is set when TCP
+ * trotlling is ON. Sometimes it can return with sockets where none of them are
+ * connection socket, but it should not be common.
+ * But it can cause problems when adopted in other use-case.
+ */
+ return it->unprocessed = epoll_wait(ctx->efd, ctx->recv_ev, ctx->n, timeout * 1000);
+}
+
+int epoll_ctx_set_watchdog(epoll_ctx_t *ctx, const unsigned idx, const int interval)
+{
+ if (ctx == NULL || idx >= ctx->n) {
+ return KNOT_EINVAL;
+ }
+
+ /* Lift watchdog if interval is negative. */
+ if (interval < 0) {
+ ctx->timeout[idx] = 0;
+ return KNOT_EOK;
+ }
+
+ /* Update clock. */
+ const struct timespec now = time_now();
+ ctx->timeout[idx] = now.tv_sec + interval; /* Only seconds precision. */
+ return KNOT_EOK;
+}
+
+int epoll_ctx_get_fd(const epoll_ctx_t *ctx, const unsigned idx)
+{
+ if (ctx == NULL || idx >= ctx->n) {
+ return KNOT_EINVAL;
+ }
+
+ return ctx->ev[idx].data.u64;
+}
+
+unsigned epoll_ctx_get_length(const epoll_ctx_t *ctx)
+{
+ assert(ctx);
+ return ctx->n;
+}
+
+int epoll_ctx_sweep(epoll_ctx_t* ctx, const epoll_ctx_sweep_cb_t cb, void *data)
+{
+ if (ctx == NULL || cb == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ /* Get time threshold. */
+ const struct timespec now = time_now();
+ unsigned idx = 0;
+ while (idx < ctx->n) {
+ /* Check sweep state, remove if requested. */
+ if (ctx->timeout[idx] > 0 && ctx->timeout[idx] <= now.tv_sec) {
+ const int fd = epoll_ctx_get_fd(ctx, idx);
+ if (cb(ctx, fd, data) == EPOLL_CTX_SWEEP) {
+ if (epoll_ctx_remove(ctx, idx) == KNOT_EOK) {
+ continue; /* Stay on the index. */
+ }
+ }
+ }
+ ++idx;
+ }
+
+ return KNOT_EOK;
+}
+
+void epoll_it_next(epoll_it_t *it)
+{
+ do {
+ it->ptr++;
+ it->unprocessed--;
+ } while (it->unprocessed > 0 && epoll_it_get_idx(it) < it->offset);
+}
+
+int epoll_it_done(const epoll_it_t *it)
+{
+ return it->unprocessed <= 0;
+}
+
+int epoll_it_remove(epoll_it_t *it)
+{
+ if (it == NULL || it->ctx == NULL) {
+ return KNOT_EINVAL;
+ }
+ epoll_ctx_t *ctx = it->ctx;
+ const int idx = epoll_it_get_idx(it);
+ epoll_ctx_remove(ctx, idx);
+ /* Iterator should return on last valid already processed element. */
+ /* On `next` call (in for-loop) will point on first unprocessed. */
+ it->ptr--;
+ return KNOT_EOK;
+}
+
+int epoll_it_get_fd(const epoll_it_t *it)
+{
+ assert(it != NULL);
+ return it->ctx->ev[epoll_it_get_idx(it)].data.fd;
+}
+
+unsigned epoll_it_get_idx(const epoll_it_t *it)
+{
+ assert(it != NULL);
+ return it->ptr->data.u64;
+}
+
+int epoll_it_ev_is_pollin(const epoll_it_t *it)
+{
+ assert(it != NULL);
+ return it->ptr->events & EPOLLIN;
+}
+
+int epoll_it_ev_is_err(const epoll_it_t *it)
+{
+ assert(it != NULL);
+ return it->ptr->events & (EPOLLERR|EPOLLHUP);
+}
+
+#endif
diff --git a/src/knot/common/epoll_ctx.h b/src/knot/common/epoll_ctx.h
new file mode 100644
index 000000000..a8fdc5f66
--- /dev/null
+++ b/src/knot/common/epoll_ctx.h
@@ -0,0 +1,227 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief I/O multiplexing with context and timeouts for each fd.
+ */
+
+#pragma once
+
+#ifdef HAVE_EPOLL
+
+#include <stddef.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <sys/epoll.h>
+
+#define EPOLL_INIT_SIZE 256 /* Resize step. */
+
+/*! \brief Set of filedescriptors with associated context and timeouts. */
+typedef struct epoll_ctx {
+ int efd; /*!< File descriptor of epoll. */
+ unsigned n; /*!< Active fds. */
+ unsigned size; /*!< Array size (allocated). */
+ unsigned recv_size; /*!< Size of array for received events. */
+ struct epoll_event *ev; /*!< Epoll event storage for each fd */
+ void* *usrctx; /*!< Context for each fd. */
+ time_t *timeout; /*!< Timeout for each fd (seconds precision). */
+ struct epoll_event *recv_ev; /*!< Array for received events. */
+} epoll_ctx_t;
+
+typedef struct epoll_it {
+ epoll_ctx_t *ctx; /*!< Iterator related context. */
+ struct epoll_event *ptr; /*!< Pointer on processed event. */
+ int offset; /*!< Event index offset. */
+ int unprocessed; /*!< Unprocessed events left. */
+} epoll_it_t;
+
+/*! \brief Mark-and-sweep state. */
+typedef enum epoll_ctx_sweep_state {
+ EPOLL_CTX_KEEP,
+ EPOLL_CTX_SWEEP
+} epoll_ctx_sweep_state_t;
+
+/*! \brief Sweep callback (set, index, data) */
+typedef enum epoll_ctx_sweep_state (*epoll_ctx_sweep_cb_t)(epoll_ctx_t*, int, void*);
+
+/*!
+ * \brief Initialize epoll_ctx to given size.
+ *
+ * \param ctx Target ctx.
+ * \param size Initial ctx size.
+ *
+ * \retval ret == 0 if successful.
+ * \retval ret < 0 on error.
+ */
+int epoll_ctx_init(epoll_ctx_t *ctx, const unsigned size);
+
+/*!
+ * \brief Clear whole context of epoll_ctx.
+ *
+ * \param ctx Target ctx.
+ *
+ * \retval ret == 0 if successful.
+ * \retval ret < 0 on error.
+ */
+int epoll_ctx_clear(epoll_ctx_t* ctx);
+
+/*!
+ * \brief Close epoll related file descriptor.
+ *
+ * \param ctx Target ctx.
+ */
+void epoll_ctx_close(const epoll_ctx_t* ctx);
+
+/*!
+ * \brief Add file descriptor to watched ctx.
+ *
+ * \param ctx Target ctx.
+ * \param fd Added file descriptor.
+ * \param events Mask of watched events.
+ * \param usrctx Context (optional).
+ *
+ * \retval ret >= 0 is index of the added fd.
+ * \retval ret < 0 on errors.
+ */
+int epoll_ctx_add(epoll_ctx_t *ctx, const int fd, const unsigned events, void *usrctx);
+
+/*!
+ * \brief Wait for receive events.
+ *
+ * \param ctx Target ctx.
+ * \param it Event iterator storage.
+ * \param offset Index of first event.
+ * \param timeout Timeout of operation (negative number for unlimited).
+ *
+ * \retval ret >= 0 represents number of events received.
+ * \retval ret < 0 on error.
+ */
+int epoll_ctx_wait(epoll_ctx_t *ctx, epoll_it_t *it, const unsigned offset, const int timeout);
+
+/*!
+ * \brief Set file descriptor watchdog interval.
+ *
+ * Set time (interval from now) after which the associated file descriptor
+ * should be sweeped (see epoll_ctx_sweep). Good example is setting a grace period
+ * of N seconds between socket activity. If socket is not active within
+ * <now, now + interval>, it is sweeped and potentially closed.
+ *
+ * \param ctx Target ctx.
+ * \param i Index for the file descriptor.
+ * \param interval Allowed interval without activity (seconds).
+ * -1 disables watchdog timer
+ *
+ * \retval ret == 0 on success.
+ * \retval ret < 0 on errors.
+ */
+int epoll_ctx_set_watchdog(epoll_ctx_t *ctx, const unsigned idx, const int interval);
+
+/*!
+ * \brief Returns file descriptor based on index.
+ *
+ * \param ctx Target ctx.
+ * \param idx Index of the file descriptor.
+ *
+ * \retval ret >= 0 for file descriptor.
+ * \retval ret < 0 on errors.
+ */
+int epoll_ctx_get_fd(const epoll_ctx_t *ctx, const unsigned idx);
+
+/*!
+ * \brief Returns number of file descriptors stored in ctx.
+ *
+ * \param ctx Target ctx.
+ *
+ * \retval Number of descriptors stored
+ */
+unsigned epoll_ctx_get_length(const epoll_ctx_t *ctx);
+
+/*!
+ * \brief Sweep file descriptors with exceeding inactivity period.
+ *
+ * \param ctx Target ctx.
+ * \param cb Callback for sweeped descriptors.
+ * \param data Pointer to extra data.
+ *
+ * \retval number of sweeped descriptors.
+ * \retval -1 on errors.
+ */
+int epoll_ctx_sweep(epoll_ctx_t* ctx, const epoll_ctx_sweep_cb_t cb, void *data);
+
+/*!
+ * \brief Move iterator on next received event.
+ *
+ * \param it Target iterator.
+ */
+void epoll_it_next(epoll_it_t *it);
+
+/*!
+ * \brief Decide if there is more received events.
+ *
+ * \param it Target iterator.
+ *
+ * \retval Logical flag represents 'done' state.
+ */
+int epoll_it_done(const epoll_it_t *it);
+
+/*!
+ * \brief Remove file descriptor from watched ctx.
+ *
+ * \param it Target iterator.
+ *
+ * \retval 0 if successful.
+ * \retval ret < 0 on error.
+ */
+int epoll_it_remove(epoll_it_t *it);
+
+/*!
+ * \brief Get file descriptor of event referenced by iterator.
+ *
+ * \param it Target iterator.
+ *
+ * \retval ret >= 0 for file descriptor.
+ * \retval ret < 0 on errors.
+ */
+int epoll_it_get_fd(const epoll_it_t *it);
+
+/*!
+ * \brief Get index of event in set referenced by iterator.
+ *
+ * \param it Target iterator.
+ *
+ * \retval Index of event.
+ */
+unsigned epoll_it_get_idx(const epoll_it_t *it);
+
+/*!
+ * \brief Decide if event referenced by iterator is POLLIN event.
+ *
+ * \param it Target iterator.
+ *
+ * \retval Logical flag represents 'POLLIN' event received.
+ */
+int epoll_it_ev_is_pollin(const epoll_it_t *it);
+
+/*!
+ * \brief Decide if event referenced by iterator is error event.
+ *
+ * \param it Target iterator.
+ *
+ * \retval Logical flag represents error event received.
+ */
+int epoll_it_ev_is_err(const epoll_it_t *it);
+
+#endif
diff --git a/src/knot/common/fdset.c b/src/knot/common/fdset.c
index bb836709a..aa98c845d 100644
--- a/src/knot/common/fdset.c
+++ b/src/knot/common/fdset.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -14,10 +14,13 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
+#ifdef ENABLE_POLL
+
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
+#include <assert.h>
#include "knot/common/fdset.h"
#include "contrib/time.h"
#include "libknot/errcode.h"
@@ -28,7 +31,7 @@
return KNOT_ENOMEM; \
(p) = tmp;
-static int fdset_resize(fdset_t *set, unsigned size)
+static int fdset_resize(fdset_t *set, const unsigned size)
{
void *tmp = NULL;
MEM_RESIZE(tmp, set->ctx, size);
@@ -38,7 +41,7 @@ static int fdset_resize(fdset_t *set, unsigned size)
return KNOT_EOK;
}
-int fdset_init(fdset_t *set, unsigned size)
+int fdset_init(fdset_t *set, const unsigned size)
{
if (set == NULL) {
return KNOT_EINVAL;
@@ -61,7 +64,7 @@ int fdset_clear(fdset_t* set)
return KNOT_EOK;
}
-int fdset_add(fdset_t *set, int fd, unsigned events, void *ctx)
+int fdset_add(fdset_t *set, const int fd, const unsigned events, void *ctx)
{
if (set == NULL || fd < 0) {
return KNOT_EINVAL;
@@ -72,7 +75,7 @@ int fdset_add(fdset_t *set, int fd, unsigned events, void *ctx)
return KNOT_ENOMEM;
/* Initialize. */
- int i = set->n++;
+ const int i = set->n++;
set->pfd[i].fd = fd;
set->pfd[i].events = events;
set->pfd[i].revents = 0;
@@ -83,69 +86,148 @@ int fdset_add(fdset_t *set, int fd, unsigned events, void *ctx)
return i;
}
-int fdset_remove(fdset_t *set, unsigned i)
+//TODO should be static, but it is a dependency in tests (? remove from
+// tests or something ?)
+int fdset_remove(fdset_t *set, const unsigned idx)
{
- if (set == NULL || i >= set->n) {
+ if (set == NULL || idx >= set->n) {
return KNOT_EINVAL;
}
- /* Decrement number of elms. */
- --set->n;
-
+ const unsigned last = --set->n;
/* Nothing else if it is the last one.
* Move last -> i if some remain. */
- unsigned last = set->n; /* Already decremented */
- if (i < last) {
- set->pfd[i] = set->pfd[last];
- set->timeout[i] = set->timeout[last];
- set->ctx[i] = set->ctx[last];
+ if (idx < last) {
+ set->pfd[idx] = set->pfd[last];
+ set->timeout[idx] = set->timeout[last];
+ set->ctx[idx] = set->ctx[last];
}
return KNOT_EOK;
}
-int fdset_set_watchdog(fdset_t* set, int i, int interval)
+int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int timeout)
+{
+ it->ctx = set;
+ it->idx = offset;
+ it->unprocessed = poll(&set->pfd[offset], set->n - offset, 1000 * timeout);
+ while (it->unprocessed > 0 && set->pfd[it->idx].revents == 0) {
+ it->idx++;
+ }
+ return it->unprocessed;
+}
+
+int fdset_set_watchdog(fdset_t* set, const unsigned idx, const int interval)
{
- if (set == NULL || i >= set->n) {
+ if (set == NULL || idx >= set->n) {
return KNOT_EINVAL;
}
/* Lift watchdog if interval is negative. */
if (interval < 0) {
- set->timeout[i] = 0;
+ set->timeout[idx] = 0;
return KNOT_EOK;
}
/* Update clock. */
- struct timespec now = time_now();
+ const struct timespec now = time_now();
+ set->timeout[idx] = now.tv_sec + interval; /* Only seconds precision. */
- set->timeout[i] = now.tv_sec + interval; /* Only seconds precision. */
return KNOT_EOK;
}
-int fdset_sweep(fdset_t* set, fdset_sweep_cb_t cb, void *data)
+int fdset_get_fd(const fdset_t *set, const unsigned idx)
{
- if (set == NULL || cb == NULL) {
+ if (set == NULL || idx >= set->n) {
return KNOT_EINVAL;
}
- /* Get time threshold. */
- struct timespec now = time_now();
+ return set->pfd[idx].fd;
+}
- unsigned i = 0;
- while (i < set->n) {
+unsigned fdset_get_length(const fdset_t *set)
+{
+ assert(set);
+ return set->n;
+}
+
+int fdset_sweep(fdset_t* set, const fdset_sweep_cb_t cb, void *data)
+{
+ if (set == NULL || cb == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ /* Get time threshold. */
+ const struct timespec now = time_now();
+ unsigned idx = 0;
+ while (idx < set->n) {
/* Check sweep state, remove if requested. */
- if (set->timeout[i] > 0 && set->timeout[i] <= now.tv_sec) {
- if (cb(set, i, data) == FDSET_SWEEP) {
- if (fdset_remove(set, i) == KNOT_EOK)
+ if (set->timeout[idx] > 0 && set->timeout[idx] <= now.tv_sec) {
+ const int fd = fdset_get_fd(set, idx);
+ if (cb(set, fd, data) == FDSET_SWEEP) {
+ if (fdset_remove(set, idx) == KNOT_EOK) {
continue; /* Stay on the index. */
+ }
}
}
+ ++idx;
+ }
+ return KNOT_EOK;
+}
+
+void fdset_it_next(fdset_it_t *it)
+{
+ if (--it->unprocessed > 0) {
+ while (it->ctx->pfd[++it->idx].revents == 0); /* nop */
+ }
+}
+
+int fdset_it_done(const fdset_it_t *it)
+{
+ return it->unprocessed <= 0;
+}
- /* Next descriptor. */
- ++i;
+int fdset_it_remove(fdset_it_t *it)
+{
+ if (it == NULL || it->ctx == NULL) {
+ return KNOT_EINVAL;
}
+ fdset_t *set = it->ctx;
+ const unsigned idx = fdset_it_get_idx(it);
+ fdset_remove(set, idx);
+ /* Iterator should return on last valid already processed element. */
+ /* On `next` call (in for-loop) will point on first unprocessed. */
+ --it->idx;
return KNOT_EOK;
}
+
+int fdset_it_get_fd(const fdset_it_t *it)
+{
+ if (it == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ return it->ctx->pfd[it->idx].fd;
+}
+
+unsigned fdset_it_get_idx(const fdset_it_t *it)
+{
+ assert(it);
+ return it->idx;
+}
+
+int fdset_it_ev_is_pollin(const fdset_it_t *it)
+{
+ assert(it);
+ return it->ctx->pfd[it->idx].revents & POLLIN;
+}
+
+int fdset_it_ev_is_err(const fdset_it_t *it)
+{
+ assert(it);
+ return it->ctx->pfd[it->idx].revents & (POLLERR|POLLHUP|POLLNVAL);
+}
+
+#endif
diff --git a/src/knot/common/fdset.h b/src/knot/common/fdset.h
index a5623f758..a75912c96 100644
--- a/src/knot/common/fdset.h
+++ b/src/knot/common/fdset.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,6 +20,8 @@
#pragma once
+#if !defined(HAVE_EPOLL)
+
#include <stddef.h>
#include <poll.h>
#include <sys/time.h>
@@ -36,25 +38,40 @@ typedef struct fdset {
time_t *timeout; /*!< Timeout for each fd (seconds precision). */
} fdset_t;
+/*! \brief State of iterator over received events */
+typedef struct fdset_it {
+ fdset_t *ctx; /*!< Source fdset_t. */
+ unsigned idx; /*!< Index of processed event. */
+ int unprocessed; /*!< Unprocessed events left. */
+} fdset_it_t;
+
/*! \brief Mark-and-sweep state. */
-enum fdset_sweep_state {
+typedef enum fdset_sweep_state {
FDSET_KEEP,
FDSET_SWEEP
-};
+} fdset_sweep_state_t;
/*! \brief Sweep callback (set, index, data) */
typedef enum fdset_sweep_state (*fdset_sweep_cb_t)(fdset_t*, int, void*);
/*!
* \brief Initialize fdset to given size.
+ *
+ * \param set Target set.
+ * \param size Initial set size.
+ *
+ * \retval ret == 0 if successful.
+ * \retval ret < 0 on error.
*/
-int fdset_init(fdset_t *set, unsigned size);
+int fdset_init(fdset_t *set, const unsigned size);
/*!
- * \brief Destroy FDSET.
+ * \brief Clear whole context of FDSET.
*
- * \retval 0 if successful.
- * \retval -1 on error.
+ * \param set Target set.
+ *
+ * \retval ret == 0 if successful.
+ * \retval ret < 0 on error.
*/
int fdset_clear(fdset_t* set);
@@ -66,10 +83,10 @@ int fdset_clear(fdset_t* set);
* \param events Mask of watched events.
* \param ctx Context (optional).
*
- * \retval index of the added fd if successful.
- * \retval -1 on errors.
+ * \retval ret >= 0 is index of the added fd.
+ * \retval ret < 0 on errors.
*/
-int fdset_add(fdset_t *set, int fd, unsigned events, void *ctx);
+int fdset_add(fdset_t *set, const int fd, const unsigned events, void *ctx);
/*!
* \brief Remove file descriptor from watched set.
@@ -78,9 +95,24 @@ int fdset_add(fdset_t *set, int fd, unsigned events, void *ctx);
* \param i Index of the removed fd.
*
* \retval 0 if successful.
- * \retval -1 on errors.
+ * \retval ret < 0 on errors.
+ */
+int fdset_remove(fdset_t *set, const unsigned idx);
+
+/*!
+ * \brief Wait for receive events.
+ *
+ * Skip events based on offset and set iterator on first event.
+ *
+ * \param set Target set.
+ * \param it Event iterator storage.
+ * \param offset Index of first event.
+ * \param timeout Timeout of operation (negative number for unlimited).
+ *
+ * \retval ret >= 0 represents number of events received.
+ * \retval ret < 0 on error.
*/
-int fdset_remove(fdset_t *set, unsigned i);
+int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int timeout);
/*!
* \brief Set file descriptor watchdog interval.
@@ -91,14 +123,34 @@ int fdset_remove(fdset_t *set, unsigned i);
* <now, now + interval>, it is sweeped and potentially closed.
*
* \param set Target set.
- * \param i Index for the file descriptor.
+ * \param idx Index of the file descriptor.
* \param interval Allowed interval without activity (seconds).
* -1 disables watchdog timer
*
- * \retval 0 if successful.
- * \retval -1 on errors.
+ * \retval ret == 0 on success.
+ * \retval ret < 0 on errors.
*/
-int fdset_set_watchdog(fdset_t* set, int i, int interval);
+int fdset_set_watchdog(fdset_t *set, const unsigned idx, const int interval);
+
+/*!
+ * \brief Returns file descriptor based on index.
+ *
+ * \param set Target set.
+ * \param idx Index of the file descriptor.
+ *
+ * \retval ret >= 0 for file descriptor.
+ * \retval ret < 0 on errors.
+ */
+int fdset_get_fd(const fdset_t *set, const unsigned idx);
+
+/*!
+ * \brief Returns number of file descriptors stored in set.
+ *
+ * \param set Target set.
+ *
+ * \retval Number of descriptors stored
+ */
+unsigned fdset_get_length(const fdset_t *set);
/*!
* \brief Sweep file descriptors with exceeding inactivity period.
@@ -110,4 +162,69 @@ int fdset_set_watchdog(fdset_t* set, int i, int interval);
* \retval number of sweeped descriptors.
* \retval -1 on errors.
*/
-int fdset_sweep(fdset_t* set, fdset_sweep_cb_t cb, void *data);
+int fdset_sweep(fdset_t* set, const fdset_sweep_cb_t cb, void *data);
+
+/*!
+ * \brief Move iterator on next received event.
+ *
+ * \param it Target iterator.
+ */
+void fdset_it_next(fdset_it_t *it);
+
+/*!
+ * \brief Decide if there is more received events.
+ *
+ * \param it Target iterator.
+ *
+ * \retval Logical flag represents 'done' state.
+ */
+int fdset_it_done(const fdset_it_t *it);
+
+/*!
+ * \brief Remove file descriptor referenced by iterator from watched set.
+ *
+ * \param it Target iterator.
+ *
+ * \retval 0 if successful.
+ * \retval ret < 0 on error.
+ */
+int fdset_it_remove(fdset_it_t *it);
+
+/*!
+ * \brief Get file descriptor of event referenced by iterator.
+ *
+ * \param it Target iterator.
+ *
+ * \retval ret >= 0 for file descriptor.
+ * \retval ret < 0 on errors.
+ */
+int fdset_it_get_fd(const fdset_it_t *it);
+
+/*!
+ * \brief Get index of event in set referenced by iterator.
+ *
+ * \param it Target iterator.
+ *
+ * \retval Index of event.
+ */
+unsigned fdset_it_get_idx(const fdset_it_t *it);
+
+/*!
+ * \brief Decide if event referenced by iterator is POLLIN event.
+ *
+ * \param it Target iterator.
+ *
+ * \retval Logical flag represents 'POLLIN' event received.
+ */
+int fdset_it_ev_is_pollin(const fdset_it_t *it);
+
+/*!
+ * \brief Decide if event referenced by iterator is error event.
+ *
+ * \param it Target iterator.
+ *
+ * \retval Logical flag represents error event received.
+ */
+int fdset_it_ev_is_err(const fdset_it_t *it);
+
+#endif
diff --git a/src/knot/server/tcp-handler.c b/src/knot/server/tcp-handler.c
index 69cf347b1..7d17e79ba 100644
--- a/src/knot/server/tcp-handler.c
+++ b/src/knot/server/tcp-handler.c
@@ -32,6 +32,7 @@
#include "knot/server/server.h"
#include "knot/server/tcp-handler.h"
#include "knot/common/log.h"
+#include "knot/common/apoll.h"
#include "knot/nameserver/process_query.h"
#include "knot/query/layer.h"
#include "contrib/macros.h"
@@ -49,7 +50,7 @@ typedef struct tcp_context {
unsigned client_threshold; /*!< Index of first TCP client. */
struct timespec last_poll_time; /*!< Time of the last socket poll. */
bool is_throttled; /*!< TCP connections throttling switch. */
- fdset_t set; /*!< Set of server/client sockets. */
+ apoll_t set; /*!< Set of server/client sockets. */
unsigned thread_id; /*!< Thread identifier. */
unsigned max_worker_fds; /*!< Max TCP clients per worker configuration + no. of ifaces. */
int idle_timeout; /*!< [s] TCP idle timeout configuration. */
@@ -75,11 +76,10 @@ static void update_tcp_conf(tcp_context_t *tcp)
}
/*! \brief Sweep TCP connection. */
-static enum fdset_sweep_state tcp_sweep(fdset_t *set, int i, void *data)
+static apoll_sweep_state tcp_sweep(apoll_t *set, int fd, void *data)
{
UNUSED(data);
- assert(set && i < set->n && i >= 0);
- int fd = set->pfd[i].fd;
+ assert(set && fd >= 0);
/* Best-effort, name and shame. */
struct sockaddr_storage ss;
@@ -92,7 +92,7 @@ static enum fdset_sweep_state tcp_sweep(fdset_t *set, int i, void *data)
close(fd);
- return FDSET_SWEEP;
+ return APOLL_CTX_SWEEP;
}
static bool tcp_active_state(int state)
@@ -117,13 +117,12 @@ static void tcp_log_error(struct sockaddr_storage *ss, const char *operation, in
}
static unsigned tcp_set_ifaces(const iface_t *ifaces, size_t n_ifaces,
- fdset_t *fds, int thread_id)
+ apoll_t *fds, int thread_id)
{
if (n_ifaces == 0) {
return 0;
}
- fdset_clear(fds);
for (const iface_t *i = ifaces; i != ifaces + n_ifaces; i++) {
if (i->fd_tcp_count == 0) { // Ignore XDP interface.
assert(i->fd_xdp_count > 0);
@@ -140,10 +139,10 @@ static unsigned tcp_set_ifaces(const iface_t *ifaces, size_t n_ifaces,
tcp_id = thread_id - i->fd_udp_count;
}
#endif
- fdset_add(fds, i->fd_tcp[tcp_id], POLLIN, NULL);
+ apoll_add(fds, i->fd_tcp[tcp_id], APOLL_POLLIN, NULL);
}
- return fds->n;
+ return apoll_get_length(fds);
}
static int tcp_handle(tcp_context_t *tcp, int fd, struct iovec *rx, struct iovec *tx)
@@ -216,28 +215,28 @@ static int tcp_handle(tcp_context_t *tcp, int fd, struct iovec *rx, struct iovec
static void tcp_event_accept(tcp_context_t *tcp, unsigned i)
{
/* Accept client. */
- int fd = tcp->set.pfd[i].fd;
+ int fd = apoll_get_fd((&tcp->set), i);
int client = net_accept(fd, NULL);
if (client >= 0) {
/* Assign to fdset. */
- int next_id = fdset_add(&tcp->set, client, POLLIN, NULL);
+ int next_id = apoll_add(&tcp->set, client, APOLL_POLLIN, NULL);
if (next_id < 0) {
close(client);
return;
}
/* Update watchdog timer. */
- fdset_set_watchdog(&tcp->set, next_id, tcp->idle_timeout);
+ apoll_set_watchdog(&tcp->set, next_id, tcp->idle_timeout);
}
}
static int tcp_event_serve(tcp_context_t *tcp, unsigned i)
{
- int fd = tcp->set.pfd[i].fd;
+ int fd = apoll_get_fd((&tcp->set), i);
int ret = tcp_handle(tcp, fd, &tcp->iov[0], &tcp->iov[1]);
if (ret == KNOT_EOK) {
/* Update socket activity timer. */
- fdset_set_watchdog(&tcp->set, i, tcp->idle_timeout);
+ apoll_set_watchdog(&tcp->set, i, tcp->idle_timeout);
}
return ret;
@@ -245,48 +244,46 @@ static int tcp_event_serve(tcp_context_t *tcp, unsigned i)
static void tcp_wait_for_events(tcp_context_t *tcp)
{
- fdset_t *set = &tcp->set;
+ apoll_t *set = &tcp->set;
/* Check if throttled with many open TCP connections. */
- assert(set->n <= tcp->max_worker_fds);
- tcp->is_throttled = set->n == tcp->max_worker_fds;
+ assert(apoll_get_length(set) <= tcp->max_worker_fds);
+ tcp->is_throttled = apoll_get_length(set) == tcp->max_worker_fds;
/* If throttled, temporarily ignore new TCP connections. */
unsigned i = tcp->is_throttled ? tcp->client_threshold : 0;
/* Wait for events. */
- int nfds = poll(&(set->pfd[i]), set->n - i, TCP_SWEEP_INTERVAL * 1000);
+ apoll_it_t it;
+ apoll_poll(set, &it, i, TCP_SWEEP_INTERVAL);
/* Mark the time of last poll call. */
tcp->last_poll_time = time_now();
/* Process events. */
- while (nfds > 0 && i < set->n) {
+ for(; !apoll_it_done(&it); apoll_it_next(&it)) {
bool should_close = false;
- if (set->pfd[i].revents & (POLLERR|POLLHUP|POLLNVAL)) {
- should_close = (i >= tcp->client_threshold);
- --nfds;
- } else if (set->pfd[i].revents & (POLLIN)) {
+ unsigned int idx = apoll_it_get_idx(&it);
+ if (apoll_it_ev_is_error(&it)) {
+ should_close = (idx >= tcp->client_threshold);
+ } else if (apoll_it_ev_is_pollin(&it)) {
/* Master sockets - new connection to accept. */
- if (i < tcp->client_threshold) {
+ if (idx < tcp->client_threshold) {
/* Don't accept more clients than configured. */
- if (set->n < tcp->max_worker_fds) {
- tcp_event_accept(tcp, i);
+ if (apoll_get_length(set) < tcp->max_worker_fds) {
+ tcp_event_accept(tcp, idx);
}
/* Client sockets - already accepted connection or
closed connection :-( */
- } else if (tcp_event_serve(tcp, i) != KNOT_EOK) {
+ } else if (tcp_event_serve(tcp, idx) != KNOT_EOK) {
should_close = true;
}
- --nfds;
}
/* Evaluate. */
if (should_close) {
- close(set->pfd[i].fd);
- fdset_remove(set, i);
- } else {
- ++i;
+ close(apoll_get_fd(set, idx));
+ apoll_it_remove(&it);
}
}
}
@@ -325,9 +322,6 @@ int tcp_master(dthread_t *thread)
};
knot_layer_init(&tcp.layer, &mm, process_query_layer());
- /* Prepare initial buffer for listening and bound sockets. */
- fdset_init(&tcp.set, FDSET_INIT_SIZE);
-
/* Create iovec abstraction. */
for (unsigned i = 0; i < 2; ++i) {
tcp.iov[i].iov_len = KNOT_WIRE_MAX_PKTSIZE;
@@ -343,6 +337,9 @@ int tcp_master(dthread_t *thread)
update_sweep_timer(&next_sweep);
update_tcp_conf(&tcp);
+ /* Prepare initial buffer for listening and bound sockets. */
+ apoll_init(&tcp.set, APOLL_CTX_INIT_SIZE);
+
/* Set descriptors for the configured interfaces. */
tcp.client_threshold = tcp_set_ifaces(handler->server->ifaces,
handler->server->n_ifaces,
@@ -362,7 +359,7 @@ int tcp_master(dthread_t *thread)
/* Sweep inactive clients and refresh TCP configuration. */
if (tcp.last_poll_time.tv_sec >= next_sweep.tv_sec) {
- fdset_sweep(&tcp.set, &tcp_sweep, NULL);
+ apoll_sweep(&tcp.set, &tcp_sweep, NULL);
update_sweep_timer(&next_sweep);
update_tcp_conf(&tcp);
}
@@ -372,7 +369,8 @@ finish:
free(tcp.iov[0].iov_base);
free(tcp.iov[1].iov_base);
mp_delete(mm.ctx);
- fdset_clear(&tcp.set);
+ apoll_close(&tcp.set);
+ apoll_clear(&tcp.set);
return ret;
}
diff --git a/src/knot/server/udp-handler.c b/src/knot/server/udp-handler.c
index 58b6a97a9..4d0cc7b38 100644
--- a/src/knot/server/udp-handler.c
+++ b/src/knot/server/udp-handler.c
@@ -34,6 +34,7 @@
#include "contrib/mempattern.h"
#include "contrib/sockaddr.h"
#include "contrib/ucw/mempool.h"
+#include "knot/common/apoll.h"
#include "knot/nameserver/process_query.h"
#include "knot/query/layer.h"
#include "knot/server/server.h"
@@ -498,7 +499,7 @@ static int iface_udp_fd(const iface_t *iface, int thread_id, bool xdp_thread,
}
}
-static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, struct pollfd *fds,
+static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, apoll_t *fds,
int thread_id, void **xdp_socket)
{
if (n_ifaces == 0) {
@@ -515,9 +516,7 @@ static unsigned udp_set_ifaces(const iface_t *ifaces, size_t n_ifaces, struct po
if (fd < 0) {
continue;
}
- fds[count].fd = fd;
- fds[count].events = POLLIN;
- fds[count].revents = 0;
+ apoll_add(fds, fd, APOLL_POLLIN, NULL);
count++;
}
@@ -576,10 +575,11 @@ int udp_master(dthread_t *thread)
/* Allocate descriptors for the configured interfaces. */
void *xdp_socket = NULL;
size_t nifs = handler->server->n_ifaces;
- struct pollfd fds[nifs];
- unsigned nfds = udp_set_ifaces(handler->server->ifaces, nifs, fds,
- thread_id, &xdp_socket);
- if (nfds == 0) {
+ apoll_t fds;
+ apoll_init(&fds, nifs);
+ unsigned fds_count = udp_set_ifaces(handler->server->ifaces, nifs, &fds,
+ thread_id, &xdp_socket);
+ if (fds_count == 0) {
goto finish;
}
@@ -591,8 +591,9 @@ int udp_master(dthread_t *thread)
}
/* Wait for events. */
- int events = poll(fds, nfds, -1);
- if (events <= 0) {
+ apoll_it_t it;
+ int ret = apoll_poll(&fds, &it, 0, -1);
+ if (ret <= 0) {
if (errno == EINTR || errno == EAGAIN) {
continue;
}
@@ -600,12 +601,8 @@ int udp_master(dthread_t *thread)
}
/* Process the events. */
- for (unsigned i = 0; i < nfds && events > 0; i++) {
- if (fds[i].revents == 0) {
- continue;
- }
- events -= 1;
- if (api->udp_recv(fds[i].fd, rq, xdp_socket) > 0) {
+ for(; !apoll_it_done(&it); apoll_it_next(&it)) {
+ if (api->udp_recv(apoll_it_get_fd(&it), rq, xdp_socket) > 0) {
api->udp_handle(&udp, rq, xdp_socket);
api->udp_send(rq, xdp_socket);
}
@@ -615,6 +612,8 @@ int udp_master(dthread_t *thread)
finish:
api->udp_deinit(rq);
mp_delete(mm.ctx);
+ apoll_close(&fds);
+ apoll_clear(&fds);
return KNOT_EOK;
}
diff --git a/src/utils/knotd/main.c b/src/utils/knotd/main.c
index c89ed96d1..0cc5c072e 100644
--- a/src/utils/knotd/main.c
+++ b/src/utils/knotd/main.c
@@ -22,6 +22,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <getopt.h>
+#include <signal.h>
#include <sys/stat.h>
#include <urcu.h>