summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Hak <jan.hak@nic.cz>2021-04-12 11:05:50 +0200
committerDaniel Salzman <daniel.salzman@nic.cz>2021-04-23 10:01:08 +0200
commit873e0d9726fc3d0ba9f66d187e7efa499e23dbc8 (patch)
tree4880b080638aa4c02a7587861d68c8c7702ea196
parentknotd: print error when unable to change owner of logging file (diff)
downloadknot-873e0d9726fc3d0ba9f66d187e7efa499e23dbc8.tar.xz
knot-873e0d9726fc3d0ba9f66d187e7efa499e23dbc8.zip
fdset: polling using kqueue on BSD systems
-rw-r--r--configure.ac31
-rw-r--r--src/knot/Makefile.inc7
-rw-r--r--src/knot/common/fdset.c109
-rw-r--r--src/knot/common/fdset.h48
-rw-r--r--src/utils/Makefile.inc2
-rw-r--r--tests-fuzz/Makefile.am1
-rw-r--r--tests/Makefile.am1
-rw-r--r--tests/knot/test_fdset.c15
8 files changed, 173 insertions, 41 deletions
diff --git a/configure.ac b/configure.ac
index 773cea81b..2a13e4139 100644
--- a/configure.ac
+++ b/configure.ac
@@ -286,22 +286,33 @@ AS_IF([test "$enable_systemd" = "yes"],[
# Socket polling method
socket_polling=
AC_ARG_WITH([socket-polling],
- AS_HELP_STRING([--with-socket-polling=auto|poll|epoll],
+ AS_HELP_STRING([--with-socket-polling=auto|poll|epoll|kqueue|libkqueue],
[Use specific socket polling method [default=auto]]),
[socket_polling=$withval], [socket_polling=auto]
)
AS_CASE([$socket_polling],
- [auto], [AC_CHECK_FUNCS([epoll_create],
- [AC_DEFINE([HAVE_EPOLL], [1], [epoll available])
- socket_polling=epoll],
- [socket_polling=poll])],
- [poll], [socket_polling=poll],
+ [auto], [AC_CHECK_FUNCS([kqueue],
+ [AC_DEFINE([HAVE_KQUEUE], [1], [kqueue available])
+ socket_polling=kqueue],
+ [AC_CHECK_FUNCS([epoll_create],
+ [AC_DEFINE([HAVE_EPOLL], [1], [epoll available])
+ socket_polling=epoll],
+ [socket_polling=poll])])],
+ [poll], [socket_polling=poll],
[epoll], [AC_CHECK_FUNCS([epoll_create],
- [AC_DEFINE([HAVE_EPOLL], [1], [epoll available])
- socket_polling=epoll],
- [AC_MSG_ERROR([Epoll not available.])])],
- [*], [AC_MSG_ERROR([Invalid value of --socket-polling.])]
+ [AC_DEFINE([HAVE_EPOLL], [1], [epoll available])
+ socket_polling=epoll],
+ [AC_MSG_ERROR([epoll not available.])])],
+ [kqueue], [AC_CHECK_FUNCS([kqueue],
+ [AC_DEFINE([HAVE_KQUEUE], [1], [kqueue available])
+ socket_polling=kqueue],
+ [AC_MSG_ERROR([kqueue not available.])])],
+ [libkqueue], [PKG_CHECK_MODULES([libkqueue], [libkqueue],
+ [AC_DEFINE([HAVE_KQUEUE], [1], [libkqueue available])
+ socket_polling=libkqueue],
+ [AC_MSG_ERROR([libkqueue not available.])])],
+ [*], [AC_MSG_ERROR([Invalid value of --socket-polling.])]
)
# Alternative memory allocator
diff --git a/src/knot/Makefile.inc b/src/knot/Makefile.inc
index 5cc44fb33..4075803c6 100644
--- a/src/knot/Makefile.inc
+++ b/src/knot/Makefile.inc
@@ -1,7 +1,8 @@
-libknotd_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(liburcu_CFLAGS) \
- $(lmdb_CFLAGS) $(systemd_CFLAGS) -DKNOTD_MOD_STATIC
+libknotd_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(libkqueue_CFLAGS) \
+ $(liburcu_CFLAGS) $(lmdb_CFLAGS) $(systemd_CFLAGS) \
+ -DKNOTD_MOD_STATIC
libknotd_la_LDFLAGS = $(AM_LDFLAGS) -export-symbols-regex '^knotd_'
-libknotd_la_LIBADD = $(dlopen_LIBS) $(pthread_LIBS)
+libknotd_la_LIBADD = $(dlopen_LIBS) $(libkqueue_LIBS) $(pthread_LIBS)
libknotd_LIBS = libknotd.la libknot.la libdnssec.la libzscanner.la \
$(libcontrib_LIBS) $(liburcu_LIBS) $(lmdb_LIBS) \
$(systemd_LIBS)
diff --git a/src/knot/common/fdset.c b/src/knot/common/fdset.c
index 6c0308724..3514edc52 100644
--- a/src/knot/common/fdset.c
+++ b/src/knot/common/fdset.c
@@ -36,7 +36,7 @@ static int fdset_resize(fdset_t *set, const unsigned size)
MEM_RESIZE(set->ctx, size);
MEM_RESIZE(set->timeout, size);
-#ifdef HAVE_EPOLL
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
MEM_RESIZE(set->ev, size);
#else
MEM_RESIZE(set->pfd, size);
@@ -53,16 +53,20 @@ int fdset_init(fdset_t *set, const unsigned size)
memset(set, 0, sizeof(*set));
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
#ifdef HAVE_EPOLL
- set->efd = epoll_create1(0);
- if (set->efd < 0) {
+ set->pfd = epoll_create1(0);
+#elif HAVE_KQUEUE
+ set->pfd = kqueue();
+#endif
+ if (set->pfd < 0) {
return knot_map_errno();
}
#endif
int ret = fdset_resize(set, size);
-#ifdef HAVE_EPOLL
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
if (ret != KNOT_EOK) {
- close(set->efd);
+ close(set->pfd);
}
#endif
return ret;
@@ -76,10 +80,10 @@ void fdset_clear(fdset_t *set)
free(set->ctx);
free(set->timeout);
-#ifdef HAVE_EPOLL
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
free(set->ev);
free(set->recv_ev);
- close(set->efd);
+ close(set->pfd);
#else
free(set->pfd);
#endif
@@ -107,7 +111,12 @@ int fdset_add(fdset_t *set, const int fd, const fdset_event_t events, void *ctx)
.data.u64 = idx,
.events = events
};
- if (epoll_ctl(set->efd, EPOLL_CTL_ADD, fd, &ev) != 0) {
+ if (epoll_ctl(set->pfd, EPOLL_CTL_ADD, fd, &ev) != 0) {
+ return knot_map_errno();
+ }
+#elif HAVE_KQUEUE
+ EV_SET(&set->ev[idx], fd, events, EV_ADD, 0, 0, (void *)(intptr_t)idx);
+ if (kevent(set->pfd, &set->ev[idx], 1, NULL, 0, NULL) < 0) {
return knot_map_errno();
}
#else
@@ -128,7 +137,25 @@ int fdset_remove(fdset_t *set, const unsigned idx)
const int fd = fdset_get_fd(set, idx);
#ifdef HAVE_EPOLL
/* This is necessary as DDNS duplicates file descriptors! */
- (void)epoll_ctl(set->efd, EPOLL_CTL_DEL, fd, NULL);
+ if (epoll_ctl(set->pfd, EPOLL_CTL_DEL, fd, NULL) != 0) {
+ close(fd);
+ return knot_map_errno();
+ }
+#elif HAVE_KQUEUE
+ /* Return delete flag back to original filter number. */
+#if defined(__NetBSD__)
+ if ((signed short)set->ev[idx].filter < 0)
+#else
+ if (set->ev[idx].filter >= 0)
+#endif
+ {
+ set->ev[idx].filter = ~set->ev[idx].filter;
+ }
+ set->ev[idx].flags = EV_DELETE;
+ if (kevent(set->pfd, &set->ev[idx], 1, NULL, 0, NULL) < 0) {
+ close(fd);
+ return knot_map_errno();
+ }
#endif
close(fd);
@@ -137,15 +164,23 @@ int fdset_remove(fdset_t *set, const unsigned idx)
if (idx < last) {
set->ctx[idx] = set->ctx[last];
set->timeout[idx] = set->timeout[last];
-#ifdef HAVE_EPOLL
+#if defined(HAVE_EPOLL) || defined (HAVE_KQUEUE)
set->ev[idx] = set->ev[last];
+#ifdef HAVE_EPOLL
struct epoll_event ev = {
.data.u64 = idx,
.events = set->ev[idx].events
};
- if (epoll_ctl(set->efd, EPOLL_CTL_MOD, set->ev[last].data.fd, &ev) != 0) {
+ if (epoll_ctl(set->pfd, EPOLL_CTL_MOD, set->ev[last].data.fd, &ev) != 0) {
return knot_map_errno();
}
+#elif HAVE_KQUEUE
+ EV_SET(&set->ev[idx], set->ev[last].ident, set->ev[last].filter,
+ EV_ADD, 0, 0, (void *)(intptr_t)idx);
+ if (kevent(set->pfd, &set->ev[idx], 1, NULL, 0, NULL) < 0) {
+ return knot_map_errno();
+ }
+#endif
#else
set->pfd[idx] = set->pfd[last];
#endif
@@ -167,21 +202,21 @@ int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int ti
it->set = set;
it->idx = offset;
-#ifdef HAVE_EPOLL
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
if (set->recv_size != set->size) {
MEM_RESIZE(set->recv_ev, set->size);
set->recv_size = set->size;
}
it->ptr = set->recv_ev;
it->dirty = 0;
- /*
- * NOTE: Can't skip offset without bunch of syscalls!!
- * Because of that it waits for `ctx->n` (every socket). Offset is set when TCP
- * trotlling is ON. Sometimes it can return with sockets where none of them are
- * connection socket, but it should not be common.
- * But it can cause problems when adopted in other use-case.
- */
- it->unprocessed = epoll_wait(set->efd, set->recv_ev, set->n, timeout_ms);
+#ifdef HAVE_EPOLL
+ if (set->n == 0) {
+ return 0;
+ }
+ if ((it->unprocessed = epoll_wait(set->pfd, set->recv_ev, set->recv_size,
+ timeout_ms)) == -1) {
+ return knot_map_errno();
+ }
#ifndef NDEBUG
/* In specific circumstances with valgrind, it sometimes happens that
* `set->n < it->unprocessed`. */
@@ -190,6 +225,26 @@ int fdset_poll(fdset_t *set, fdset_it_t *it, const unsigned offset, const int ti
it->unprocessed = 0;
}
#endif
+#elif HAVE_KQUEUE
+ struct timespec timeout = {
+ .tv_sec = timeout_ms / 1000,
+ .tv_nsec = (timeout_ms % 1000) * 1000000
+ };
+ if ((it->unprocessed = kevent(set->pfd, NULL, 0, set->recv_ev, set->recv_size,
+ (timeout_ms >= 0) ? &timeout : NULL)) == -1) {
+ return knot_map_errno();
+ }
+#endif
+ /*
+ * NOTE: Can't skip offset without bunch of syscalls!
+ * Because of that it waits for `ctx->n` (every socket). Offset is set when TCP
+ * trotlling is ON. Sometimes it can return with sockets where none of them is
+ * connected socket, but it should not be common.
+ */
+ while (it->unprocessed > 0 && fdset_it_get_idx(it) < it->idx) {
+ it->ptr++;
+ it->unprocessed--;
+ }
return it->unprocessed;
#else
it->unprocessed = poll(&set->pfd[offset], set->n - offset, timeout_ms);
@@ -213,7 +268,7 @@ void fdset_it_commit(fdset_it_t *it)
if (it == NULL) {
return;
}
-#ifdef HAVE_EPOLL
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
/* NOTE: reverse iteration to avoid as much "remove last" operations
* as possible. I'm not sure about performance improvement. It
* will skip some syscalls at begin of iteration, but what
@@ -221,7 +276,16 @@ void fdset_it_commit(fdset_it_t *it)
*/
fdset_t *set = it->set;
for (int i = set->n - 1; it->dirty > 0 && i >= 0; --i) {
- if (set->ev[i].events == FDSET_REMOVE_FLAG) {
+#ifdef HAVE_EPOLL
+ if (set->ev[i].events == FDSET_REMOVE_FLAG)
+#else
+#if defined(__NetBSD__)
+ if ((signed short)set->ev[i].filter < 0)
+#else
+ if (set->ev[i].filter >= 0)
+#endif
+#endif
+ {
(void)fdset_remove(set, i);
it->dirty--;
}
@@ -264,6 +328,7 @@ void fdset_sweep(fdset_t *set, const fdset_sweep_cb_t cb, void *data)
const int fd = fdset_get_fd(set, idx);
if (cb(set, fd, data) == FDSET_SWEEP) {
(void)fdset_remove(set, idx);
+ continue;
}
}
++idx;
diff --git a/src/knot/common/fdset.h b/src/knot/common/fdset.h
index f6d5bb885..95a5c61e4 100644
--- a/src/knot/common/fdset.h
+++ b/src/knot/common/fdset.h
@@ -27,6 +27,8 @@
#ifdef HAVE_EPOLL
#include <sys/epoll.h>
+#elif HAVE_KQUEUE
+#include <sys/event.h>
#else
#include <poll.h>
#endif
@@ -44,11 +46,16 @@ typedef struct {
unsigned size; /*!< Array size (allocated). */
void **ctx; /*!< Context for each fd. */
time_t *timeout; /*!< Timeout for each fd (seconds precision). */
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
#ifdef HAVE_EPOLL
struct epoll_event *ev; /*!< Epoll event storage for each fd. */
struct epoll_event *recv_ev; /*!< Array for polled events. */
+#elif HAVE_KQUEUE
+ struct kevent *ev; /*!< Kqueue event storage for each fd. */
+ struct kevent *recv_ev; /*!< Array for polled events. */
+#endif
unsigned recv_size; /*!< Size of array for polled events. */
- int efd; /*!< File descriptor of epoll. */
+ int pfd; /*!< File descriptor of kernel polling structure (epoll or kqueue). */
#else
struct pollfd *pfd; /*!< Poll state for each fd. */
#endif
@@ -59,8 +66,12 @@ typedef struct {
fdset_t *set; /*!< Source fdset_t. */
unsigned idx; /*!< Event index offset. */
int unprocessed; /*!< Unprocessed events left. */
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
#ifdef HAVE_EPOLL
struct epoll_event *ptr; /*!< Pointer on processed event. */
+#elif HAVE_KQUEUE
+ struct kevent *ptr; /*!< Pointer on processed event. */
+#endif
unsigned dirty; /*!< Number of fd to be removed on commit. */
#endif
} fdset_it_t;
@@ -69,6 +80,9 @@ typedef enum {
#ifdef HAVE_EPOLL
FDSET_POLLIN = EPOLLIN,
FDSET_POLLOUT = EPOLLOUT,
+#elif HAVE_KQUEUE
+ FDSET_POLLIN = EVFILT_READ,
+ FDSET_POLLOUT = EVFILT_WRITE,
#else
FDSET_POLLIN = POLLIN,
FDSET_POLLOUT = POLLOUT,
@@ -180,6 +194,8 @@ inline static int fdset_get_fd(const fdset_t *set, const unsigned idx)
#ifdef HAVE_EPOLL
return set->ev[idx].data.fd;
+#elif HAVE_KQUEUE
+ return set->ev[idx].ident;
#else
return set->pfd[idx].fd;
#endif
@@ -212,6 +228,8 @@ inline static unsigned fdset_it_get_idx(const fdset_it_t *it)
#ifdef HAVE_EPOLL
return it->ptr->data.u64;
+#elif HAVE_KQUEUE
+ return (unsigned)(intptr_t)it->ptr->udata;
#else
return it->idx;
#endif
@@ -231,6 +249,8 @@ inline static int fdset_it_get_fd(const fdset_it_t *it)
#ifdef HAVE_EPOLL
return it->set->ev[fdset_it_get_idx(it)].data.fd;
+#elif HAVE_KQUEUE
+ return it->ptr->ident;
#else
return it->set->pfd[it->idx].fd;
#endif
@@ -245,7 +265,7 @@ inline static void fdset_it_next(fdset_it_t *it)
{
assert(it);
-#ifdef HAVE_EPOLL
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
do {
it->ptr++;
it->unprocessed--;
@@ -272,6 +292,26 @@ inline static void fdset_it_remove(fdset_it_t *it)
const int idx = fdset_it_get_idx(it);
it->set->ev[idx].events = FDSET_REMOVE_FLAG;
it->dirty++;
+#elif HAVE_KQUEUE
+ const int idx = fdset_it_get_idx(it);
+ /* Bitwise negated filter marks event for delete. */
+ /* Filters become: */
+ /* [FreeBSD] */
+ /* EVFILT_READ (-1) -> 0 */
+ /* EVFILT_WRITE (-2) -> 1 */
+ /* [NetBSD] */
+ /* EVFILT_READ (0) -> -1 */
+ /* EVFILT_WRITE (1) -> -2 */
+ /* If not marked for delete then mark for delete. */
+#if defined(__NetBSD__)
+ if ((signed short)it->set->ev[idx].filter >= 0)
+#else
+ if (it->set->ev[idx].filter < 0)
+#endif
+ {
+ it->set->ev[idx].filter = ~it->set->ev[idx].filter;
+ }
+ it->dirty++;
#else
(void)fdset_remove(it->set, fdset_it_get_idx(it));
/* Iterator should return on last valid already processed element. */
@@ -314,6 +354,8 @@ inline static bool fdset_it_is_pollin(const fdset_it_t *it)
#ifdef HAVE_EPOLL
return it->ptr->events & EPOLLIN;
+#elif HAVE_KQUEUE
+ return it->ptr->filter == EVFILT_READ;
#else
return it->set->pfd[it->idx].revents & POLLIN;
#endif
@@ -332,6 +374,8 @@ inline static bool fdset_it_is_error(const fdset_it_t *it)
#ifdef HAVE_EPOLL
return it->ptr->events & (EPOLLERR | EPOLLHUP);
+#elif HAVE_KQUEUE
+ return it->ptr->flags & EV_ERROR;
#else
return it->set->pfd[it->idx].revents & (POLLERR | POLLHUP | POLLNVAL);
#endif
diff --git a/src/utils/Makefile.inc b/src/utils/Makefile.inc
index 030af39f3..db45ce0bd 100644
--- a/src/utils/Makefile.inc
+++ b/src/utils/Makefile.inc
@@ -6,7 +6,7 @@ noinst_LTLIBRARIES += libknotus.la
libknotus_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(gnutls_CFLAGS) \
$(libedit_CFLAGS) $(libidn2_CFLAGS) $(libidn_CFLAGS) \
- $(libnghttp2_CFLAGS) $(lmdb_CFLAGS)
+ $(libkqueue_CFLAGS) $(libnghttp2_CFLAGS) $(lmdb_CFLAGS)
libknotus_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS)
libknotus_la_LIBADD = $(libidn2_LIBS) $(libidn_LIBS) $(libnghttp2_LIBS)
libknotus_LIBS = libknotus.la libknot.la libdnssec.la $(libcontrib_LIBS) \
diff --git a/tests-fuzz/Makefile.am b/tests-fuzz/Makefile.am
index be2192b38..dcf18a658 100644
--- a/tests-fuzz/Makefile.am
+++ b/tests-fuzz/Makefile.am
@@ -28,6 +28,7 @@ knotd_stdio_SOURCES = \
knotd_stdio_CPPFLAGS = \
$(AM_CPPFLAGS) \
+ $(libkqueue_CFLAGS) \
$(liburcu_CFLAGS) \
$(lmdb_CFLAGS) \
$(systemd_CFLAGS)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 6ba5673d3..9c32d6b93 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -4,6 +4,7 @@ AM_CPPFLAGS = \
-I$(top_srcdir)/src/libdnssec \
-I$(top_srcdir)/src/libdnssec/shared \
$(gnutls_CFLAGS) \
+ $(libkqueue_CFLAGS) \
$(lmdb_CFLAGS)
LDADD = \
diff --git a/tests/knot/test_fdset.c b/tests/knot/test_fdset.c
index b07bf54f4..3f10cc2b4 100644
--- a/tests/knot/test_fdset.c
+++ b/tests/knot/test_fdset.c
@@ -96,6 +96,9 @@ int main(int argc, char *argv[])
ok(fdset_get_length(&fdset) == 2, "fdset size 2");
close(fds1[1]);
+ int fd2_dup = dup(fds2[0]);
+ ok(fd2_dup >= 0, "duplicate fd");
+
ret = fdset_poll(&fdset, &it, 0, 100);
struct timespec time2 = time_now();
double diff2 = time_diff_ms(&time0, &time2);
@@ -116,16 +119,22 @@ int main(int argc, char *argv[])
}
fdset_it_commit(&it);
ok(fdset_get_length(&fdset) == 1, "fdset size 1");
- close(fds2[1]);
+
+ pthread_join(t1, 0);
+ pthread_join(t2, 0);
ret = fdset_remove(&fdset, 0);
ok(ret == KNOT_EOK, "fdset remove");
close(fds0[1]);
ok(fdset_get_length(&fdset) == 0, "fdset size 0");
- pthread_join(t1, 0);
- pthread_join(t2, 0);
+ write(fds2[1], &PATTERN2, 1);
+ ret = fdset_poll(&fdset, &it, 0, 100);
+ ok(ret == 0, "fdset_poll return 3");
+
+ close(fds2[1]);
+ close(fd2_dup);
fdset_clear(&fdset);
return 0;