diff options
author | Lennart Poettering <lennart@poettering.net> | 2025-01-16 11:37:17 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-01-16 11:37:17 +0100 |
commit | 9ef559a0363f6baea17a7dfdc15ff52ba43bc4f6 (patch) | |
tree | da9dc7b981e30a63dacf6bb8314765b8c055b08d | |
parent | namespace-util: don't reset UID/GIDs in namespace_enter() unless we enter a u... (diff) | |
parent | process-util: depend on CLONE_PIDFD (diff) | |
download | systemd-9ef559a0363f6baea17a7dfdc15ff52ba43bc4f6.tar.xz systemd-9ef559a0363f6baea17a7dfdc15ff52ba43bc4f6.zip |
tree-wide: drop support for kernels without pidfd_open() and pidfd_send_signal() (#35971)
-rw-r--r-- | src/basic/missing_syscall.h | 45 | ||||
-rw-r--r-- | src/basic/pidfd-util.c | 6 | ||||
-rw-r--r-- | src/basic/pidref.c | 4 | ||||
-rw-r--r-- | src/basic/process-util.c | 77 | ||||
-rw-r--r-- | src/libsystemd/sd-event/sd-event.c | 85 | ||||
-rw-r--r-- | src/libsystemd/sd-event/test-event.c | 21 | ||||
-rw-r--r-- | src/login/pam_systemd.c | 2 |
7 files changed, 74 insertions, 166 deletions
diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h index 59cd9b844a..37048e1bc0 100644 --- a/src/basic/missing_syscall.h +++ b/src/basic/missing_syscall.h @@ -137,12 +137,7 @@ static inline int missing_name_to_handle_at(int fd, const char *name, struct fil #if !HAVE_SETNS static inline int missing_setns(int fd, int nstype) { -# ifdef __NR_setns return syscall(__NR_setns, fd, nstype); -# else - errno = ENOSYS; - return -1; -# endif } # define setns missing_setns @@ -162,12 +157,7 @@ static inline pid_t raw_getpid(void) { #if !HAVE_RENAMEAT2 static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, const char *newname, unsigned flags) { -# ifdef __NR_renameat2 return syscall(__NR_renameat2, oldfd, oldname, newfd, newname, flags); -# else - errno = ENOSYS; - return -1; -# endif } # define renameat2 missing_renameat2 @@ -177,12 +167,7 @@ static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, c #if !HAVE_KCMP static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) { -# if defined __NR_kcmp && __NR_kcmp >= 0 return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2); -# else - errno = ENOSYS; - return -1; -# endif } # define kcmp missing_kcmp @@ -192,34 +177,19 @@ static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long i #if !HAVE_KEYCTL static inline long missing_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { -# if defined __NR_keyctl && __NR_keyctl >= 0 return syscall(__NR_keyctl, cmd, arg2, arg3, arg4, arg5); -# else - errno = ENOSYS; - return -1; -# endif # define keyctl missing_keyctl } static inline key_serial_t missing_add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) { -# if defined __NR_add_key && __NR_add_key >= 0 return syscall(__NR_add_key, type, description, payload, plen, ringid); -# else - errno = ENOSYS; - return -1; -# endif # define add_key missing_add_key } static inline key_serial_t missing_request_key(const char *type, const char *description, const char * callout_info, key_serial_t destringid) { -# if defined __NR_request_key && __NR_request_key >= 0 return syscall(__NR_request_key, type, description, callout_info, destringid); -# else - errno = ENOSYS; - return -1; -# endif # define request_key missing_request_key } @@ -329,12 +299,7 @@ static inline long missing_get_mempolicy(int *mode, unsigned long *nodemask, #if !HAVE_PIDFD_SEND_SIGNAL static inline int missing_pidfd_send_signal(int fd, int sig, siginfo_t *info, unsigned flags) { -# ifdef __NR_pidfd_send_signal return syscall(__NR_pidfd_send_signal, fd, sig, info, flags); -# else - errno = ENOSYS; - return -1; -# endif } # define pidfd_send_signal missing_pidfd_send_signal @@ -342,12 +307,7 @@ static inline int missing_pidfd_send_signal(int fd, int sig, siginfo_t *info, un #if !HAVE_PIDFD_OPEN static inline int missing_pidfd_open(pid_t pid, unsigned flags) { -# ifdef __NR_pidfd_open return syscall(__NR_pidfd_open, pid, flags); -# else - errno = ENOSYS; - return -1; -# endif } # define pidfd_open missing_pidfd_open @@ -661,12 +621,7 @@ static inline ssize_t missing_getdents64(int fd, void *buffer, size_t length) { #if !HAVE_SCHED_SETATTR static inline ssize_t missing_sched_setattr(pid_t pid, struct sched_attr *attr, unsigned int flags) { -# if defined __NR_sched_setattr return syscall(__NR_sched_setattr, pid, attr, flags); -# else - errno = ENOSYS; - return -1; -# endif } # define sched_setattr missing_sched_setattr diff --git a/src/basic/pidfd-util.c b/src/basic/pidfd-util.c index 204439e444..c90699d066 100644 --- a/src/basic/pidfd-util.c +++ b/src/basic/pidfd-util.c @@ -24,12 +24,8 @@ static int pidfd_check_pidfs(void) { return have_pidfs; _cleanup_close_ int fd = pidfd_open(getpid_cached(), 0); - if (fd < 0) { - if (ERRNO_IS_NOT_SUPPORTED(errno)) - return (have_pidfs = false); - + if (fd < 0) return -errno; - } return (have_pidfs = fd_is_fs_type(fd, PID_FS_MAGIC)); } diff --git a/src/basic/pidref.c b/src/basic/pidref.c index bc3e96f426..a275f77b56 100644 --- a/src/basic/pidref.c +++ b/src/basic/pidref.c @@ -84,8 +84,8 @@ int pidref_set_pid(PidRef *pidref, pid_t pid) { fd = pidfd_open(pid, 0); if (fd < 0) { - /* Graceful fallback in case the kernel doesn't support pidfds or is out of fds */ - if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno) && !ERRNO_IS_RESOURCE(errno)) + /* Graceful fallback in case the kernel is out of fds */ + if (!ERRNO_IS_RESOURCE(errno)) return log_debug_errno(errno, "Failed to open pidfd for pid " PID_FMT ": %m", pid); fd = -EBADF; diff --git a/src/basic/process-util.c b/src/basic/process-util.c index a13e2d5f28..0367270724 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -2042,7 +2042,7 @@ int posix_spawn_wrapper( * issues. * * Also, move the newly-created process into 'cgroup' through POSIX_SPAWN_SETCGROUP (clone3()) - * if available. Note that CLONE_INTO_CGROUP is only supported on cgroup v2. + * if available. * returns 1: We're already in the right cgroup * 0: 'cgroup' not specified or POSIX_SPAWN_SETCGROUP is not supported. The caller * needs to call 'cg_attach' on their own */ @@ -2061,14 +2061,10 @@ int posix_spawn_wrapper( _unused_ _cleanup_(posix_spawnattr_destroyp) posix_spawnattr_t *attr_destructor = &attr; #if HAVE_PIDFD_SPAWN - static enum { - CLONE_ONLY_PID, - CLONE_CAN_PIDFD, /* 5.2 */ - CLONE_CAN_CGROUP, /* 5.7 */ - } clone_support = CLONE_CAN_CGROUP; + static bool have_clone_into_cgroup = true; /* kernel 5.7+ */ _cleanup_close_ int cgroup_fd = -EBADF; - if (cgroup && clone_support >= CLONE_CAN_CGROUP) { + if (cgroup && have_clone_into_cgroup) { _cleanup_free_ char *resolved_cgroup = NULL; r = cg_get_path_and_check( @@ -2099,47 +2095,41 @@ int posix_spawn_wrapper( return -r; #if HAVE_PIDFD_SPAWN - if (clone_support >= CLONE_CAN_PIDFD) { - _cleanup_close_ int pidfd = -EBADF; - - r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp); - if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) && - cg_is_threaded(cgroup) > 0) /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode. */ - return -EUCLEAN; - if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) && - FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) { - /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but - * need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3(). - * Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3() - * but not CLONE_INTO_CGROUP. */ - - /* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't - * retry every time. */ - assert(clone_support >= CLONE_CAN_CGROUP); - clone_support = CLONE_CAN_PIDFD; - - flags &= ~POSIX_SPAWN_SETCGROUP; - r = posix_spawnattr_setflags(&attr, flags); - if (r != 0) - return -r; - - r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp); - } - if (r == 0) { - r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd)); - if (r < 0) - return r; - - return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP); - } - if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r)) + _cleanup_close_ int pidfd = -EBADF; + + r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp); + if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) && cg_is_threaded(cgroup) > 0) + return -EUCLEAN; /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode, + turn that into something recognizable */ + if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) && + FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) { + /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but + * need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3(). + * Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3() + * but not CLONE_INTO_CGROUP. */ + + /* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't + * retry every time. */ + have_clone_into_cgroup = false; + + flags &= ~POSIX_SPAWN_SETCGROUP; + r = posix_spawnattr_setflags(&attr, flags); + if (r != 0) return -r; - clone_support = CLONE_ONLY_PID; /* No CLONE_PIDFD either? */ + r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp); } -#endif + if (r != 0) + return -r; + r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd)); + if (r < 0) + return r; + + return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP); +#else pid_t pid; + r = posix_spawn(&pid, path, NULL, &attr, argv, envp); if (r != 0) return -r; @@ -2149,6 +2139,7 @@ int posix_spawn_wrapper( return r; return 0; /* We did not use CLONE_INTO_CGROUP so return 0, the caller will have to move the child */ +#endif } int proc_dir_open(DIR **ret) { diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c index c1f1747fd0..3d1b6fb228 100644 --- a/src/libsystemd/sd-event/sd-event.c +++ b/src/libsystemd/sd-event/sd-event.c @@ -25,6 +25,7 @@ #include "missing_magic.h" #include "missing_syscall.h" #include "missing_threads.h" +#include "missing_wait.h" #include "origin-id.h" #include "path-util.h" #include "prioq.h" @@ -1074,6 +1075,8 @@ static void source_disconnect(sd_event_source *s) { } static sd_event_source* source_free(sd_event_source *s) { + int r; + assert(s); source_disconnect(s); @@ -1087,31 +1090,23 @@ static sd_event_source* source_free(sd_event_source *s) { if (s->child.process_owned) { if (!s->child.exited) { - bool sent = false; - - if (s->child.pidfd >= 0) { - if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) { - if (errno == ESRCH) /* Already dead */ - sent = true; - else if (!ERRNO_IS_NOT_SUPPORTED(errno)) - log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m", - s->child.pid); - } else - sent = true; - } - - if (!sent) - if (kill(s->child.pid, SIGKILL) < 0) - if (errno != ESRCH) /* Already dead */ - log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m", - s->child.pid); + if (s->child.pidfd >= 0) + r = RET_NERRNO(pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0)); + else + r = RET_NERRNO(kill(s->child.pid, SIGKILL)); + if (r < 0 && r != -ESRCH) + log_debug_errno(r, "Failed to kill process " PID_FMT ", ignoring: %m", + s->child.pid); } if (!s->child.waited) { siginfo_t si = {}; /* Reap the child if we can */ - (void) waitid(P_PID, s->child.pid, &si, WEXITED); + if (s->child.pidfd >= 0) + (void) waitid(P_PIDFD, s->child.pidfd, &si, WEXITED); + else + (void) waitid(P_PID, s->child.pid, &si, WEXITED); } } @@ -1578,11 +1573,6 @@ static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *us return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata)); } -static bool shall_use_pidfd(void) { - /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */ - return secure_getenv_bool("SYSTEMD_PIDFD") != 0; -} - _public_ int sd_event_add_child( sd_event *e, sd_event_source **ret, @@ -1630,34 +1620,29 @@ _public_ int sd_event_add_child( if (!s) return -ENOMEM; + /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we + * pin the PID, and make regular waitid() handling race-free. */ + + s->child.pidfd = pidfd_open(pid, 0); + if (s->child.pidfd < 0) + return -errno; + + s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */ + s->wakeup = WAKEUP_EVENT_SOURCE; s->child.options = options; s->child.callback = callback; s->userdata = userdata; s->enabled = SD_EVENT_ONESHOT; - /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we - * pin the PID, and make regular waitid() handling race-free. */ - - if (shall_use_pidfd()) { - s->child.pidfd = pidfd_open(pid, 0); - if (s->child.pidfd < 0) { - /* Propagate errors unless the syscall is not supported or blocked */ - if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) - return -errno; - } else - s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */ - } else - s->child.pidfd = -EBADF; - if (EVENT_SOURCE_WATCH_PIDFD(s)) { - /* We have a pidfd and we only want to watch for exit */ + /* We only want to watch for exit */ r = source_child_pidfd_register(s, s->enabled); if (r < 0) return r; } else { - /* We have no pidfd or we shall wait for some other event than WEXITED */ + /* We shall wait for some other event than WEXITED */ r = event_make_signal_data(e, SIGCHLD, NULL); if (r < 0) return r; @@ -1727,17 +1712,12 @@ _public_ int sd_event_add_child_pidfd( s->wakeup = WAKEUP_EVENT_SOURCE; s->child.pidfd = pidfd; - s->child.pid = pid; s->child.options = options; s->child.callback = callback; s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */ s->userdata = userdata; s->enabled = SD_EVENT_ONESHOT; - r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s); - if (r < 0) - return r; - if (EVENT_SOURCE_WATCH_PIDFD(s)) { /* We only want to watch for WEXITED */ r = source_child_pidfd_register(s, s->enabled); @@ -1752,6 +1732,11 @@ _public_ int sd_event_add_child_pidfd( e->need_process_child = true; } + r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s); + if (r < 0) + return r; + + s->child.pid = pid; e->n_online_child_sources++; if (ret) @@ -3239,12 +3224,10 @@ _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, cons if (si) copy = *si; - if (pidfd_send_signal(s->child.pidfd, sig, si ? © : NULL, 0) < 0) { - /* Let's propagate the error only if the system call is not implemented or prohibited */ - if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) - return -errno; - } else - return 0; + if (pidfd_send_signal(s->child.pidfd, sig, si ? © : NULL, 0) < 0) + return -errno; + + return 0; } /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c index 57dee392d7..6394507994 100644 --- a/src/libsystemd/sd-event/test-event.c +++ b/src/libsystemd/sd-event/test-event.c @@ -198,7 +198,7 @@ static int post_handler(sd_event_source *s, void *userdata) { return 2; } -static void test_basic_one(bool with_pidfd) { +TEST(basic) { sd_event *e = NULL; sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL; static const char ch = 'x'; @@ -207,10 +207,6 @@ static void test_basic_one(bool with_pidfd) { uint64_t event_now; int64_t priority; - log_info("/* %s(pidfd=%s) */", __func__, yes_no(with_pidfd)); - - assert_se(setenv("SYSTEMD_PIDFD", yes_no(with_pidfd), 1) >= 0); - assert_se(pipe(a) >= 0); assert_se(pipe(b) >= 0); assert_se(pipe(d) >= 0); @@ -301,13 +297,6 @@ static void test_basic_one(bool with_pidfd) { safe_close_pair(b); safe_close_pair(d); safe_close_pair(k); - - assert_se(unsetenv("SYSTEMD_PIDFD") >= 0); -} - -TEST(basic) { - test_basic_one(true); /* test with pidfd */ - test_basic_one(false); /* test without pidfd */ } TEST(sd_event_now) { @@ -583,13 +572,7 @@ TEST(pidfd) { assert_se(pid > 1); - pidfd = pidfd_open(pid, 0); - if (pidfd < 0) { - /* No pidfd_open() supported or blocked? */ - assert_se(ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno)); - (void) wait_for_terminate(pid, NULL); - return; - } + ASSERT_OK(pidfd = pidfd_open(pid, 0)); pid2 = fork(); if (pid2 == 0) diff --git a/src/login/pam_systemd.c b/src/login/pam_systemd.c index 00fc2c360d..893d03cdb6 100644 --- a/src/login/pam_systemd.c +++ b/src/login/pam_systemd.c @@ -879,7 +879,7 @@ static int create_session_message( if (!avoid_pidfd) { pidfd = pidfd_open(getpid_cached(), 0); - if (pidfd < 0 && !ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + if (pidfd < 0) return -errno; } |