summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/basic/missing_syscall.h45
-rw-r--r--src/basic/pidfd-util.c6
-rw-r--r--src/basic/pidref.c4
-rw-r--r--src/basic/process-util.c77
-rw-r--r--src/libsystemd/sd-event/sd-event.c85
-rw-r--r--src/libsystemd/sd-event/test-event.c21
-rw-r--r--src/login/pam_systemd.c2
7 files changed, 74 insertions, 166 deletions
diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h
index 59cd9b844a..37048e1bc0 100644
--- a/src/basic/missing_syscall.h
+++ b/src/basic/missing_syscall.h
@@ -137,12 +137,7 @@ static inline int missing_name_to_handle_at(int fd, const char *name, struct fil
#if !HAVE_SETNS
static inline int missing_setns(int fd, int nstype) {
-# ifdef __NR_setns
return syscall(__NR_setns, fd, nstype);
-# else
- errno = ENOSYS;
- return -1;
-# endif
}
# define setns missing_setns
@@ -162,12 +157,7 @@ static inline pid_t raw_getpid(void) {
#if !HAVE_RENAMEAT2
static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, const char *newname, unsigned flags) {
-# ifdef __NR_renameat2
return syscall(__NR_renameat2, oldfd, oldname, newfd, newname, flags);
-# else
- errno = ENOSYS;
- return -1;
-# endif
}
# define renameat2 missing_renameat2
@@ -177,12 +167,7 @@ static inline int missing_renameat2(int oldfd, const char *oldname, int newfd, c
#if !HAVE_KCMP
static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) {
-# if defined __NR_kcmp && __NR_kcmp >= 0
return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2);
-# else
- errno = ENOSYS;
- return -1;
-# endif
}
# define kcmp missing_kcmp
@@ -192,34 +177,19 @@ static inline int missing_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long i
#if !HAVE_KEYCTL
static inline long missing_keyctl(int cmd, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) {
-# if defined __NR_keyctl && __NR_keyctl >= 0
return syscall(__NR_keyctl, cmd, arg2, arg3, arg4, arg5);
-# else
- errno = ENOSYS;
- return -1;
-# endif
# define keyctl missing_keyctl
}
static inline key_serial_t missing_add_key(const char *type, const char *description, const void *payload, size_t plen, key_serial_t ringid) {
-# if defined __NR_add_key && __NR_add_key >= 0
return syscall(__NR_add_key, type, description, payload, plen, ringid);
-# else
- errno = ENOSYS;
- return -1;
-# endif
# define add_key missing_add_key
}
static inline key_serial_t missing_request_key(const char *type, const char *description, const char * callout_info, key_serial_t destringid) {
-# if defined __NR_request_key && __NR_request_key >= 0
return syscall(__NR_request_key, type, description, callout_info, destringid);
-# else
- errno = ENOSYS;
- return -1;
-# endif
# define request_key missing_request_key
}
@@ -329,12 +299,7 @@ static inline long missing_get_mempolicy(int *mode, unsigned long *nodemask,
#if !HAVE_PIDFD_SEND_SIGNAL
static inline int missing_pidfd_send_signal(int fd, int sig, siginfo_t *info, unsigned flags) {
-# ifdef __NR_pidfd_send_signal
return syscall(__NR_pidfd_send_signal, fd, sig, info, flags);
-# else
- errno = ENOSYS;
- return -1;
-# endif
}
# define pidfd_send_signal missing_pidfd_send_signal
@@ -342,12 +307,7 @@ static inline int missing_pidfd_send_signal(int fd, int sig, siginfo_t *info, un
#if !HAVE_PIDFD_OPEN
static inline int missing_pidfd_open(pid_t pid, unsigned flags) {
-# ifdef __NR_pidfd_open
return syscall(__NR_pidfd_open, pid, flags);
-# else
- errno = ENOSYS;
- return -1;
-# endif
}
# define pidfd_open missing_pidfd_open
@@ -661,12 +621,7 @@ static inline ssize_t missing_getdents64(int fd, void *buffer, size_t length) {
#if !HAVE_SCHED_SETATTR
static inline ssize_t missing_sched_setattr(pid_t pid, struct sched_attr *attr, unsigned int flags) {
-# if defined __NR_sched_setattr
return syscall(__NR_sched_setattr, pid, attr, flags);
-# else
- errno = ENOSYS;
- return -1;
-# endif
}
# define sched_setattr missing_sched_setattr
diff --git a/src/basic/pidfd-util.c b/src/basic/pidfd-util.c
index 204439e444..c90699d066 100644
--- a/src/basic/pidfd-util.c
+++ b/src/basic/pidfd-util.c
@@ -24,12 +24,8 @@ static int pidfd_check_pidfs(void) {
return have_pidfs;
_cleanup_close_ int fd = pidfd_open(getpid_cached(), 0);
- if (fd < 0) {
- if (ERRNO_IS_NOT_SUPPORTED(errno))
- return (have_pidfs = false);
-
+ if (fd < 0)
return -errno;
- }
return (have_pidfs = fd_is_fs_type(fd, PID_FS_MAGIC));
}
diff --git a/src/basic/pidref.c b/src/basic/pidref.c
index bc3e96f426..a275f77b56 100644
--- a/src/basic/pidref.c
+++ b/src/basic/pidref.c
@@ -84,8 +84,8 @@ int pidref_set_pid(PidRef *pidref, pid_t pid) {
fd = pidfd_open(pid, 0);
if (fd < 0) {
- /* Graceful fallback in case the kernel doesn't support pidfds or is out of fds */
- if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno) && !ERRNO_IS_RESOURCE(errno))
+ /* Graceful fallback in case the kernel is out of fds */
+ if (!ERRNO_IS_RESOURCE(errno))
return log_debug_errno(errno, "Failed to open pidfd for pid " PID_FMT ": %m", pid);
fd = -EBADF;
diff --git a/src/basic/process-util.c b/src/basic/process-util.c
index a13e2d5f28..0367270724 100644
--- a/src/basic/process-util.c
+++ b/src/basic/process-util.c
@@ -2042,7 +2042,7 @@ int posix_spawn_wrapper(
* issues.
*
* Also, move the newly-created process into 'cgroup' through POSIX_SPAWN_SETCGROUP (clone3())
- * if available. Note that CLONE_INTO_CGROUP is only supported on cgroup v2.
+ * if available.
* returns 1: We're already in the right cgroup
* 0: 'cgroup' not specified or POSIX_SPAWN_SETCGROUP is not supported. The caller
* needs to call 'cg_attach' on their own */
@@ -2061,14 +2061,10 @@ int posix_spawn_wrapper(
_unused_ _cleanup_(posix_spawnattr_destroyp) posix_spawnattr_t *attr_destructor = &attr;
#if HAVE_PIDFD_SPAWN
- static enum {
- CLONE_ONLY_PID,
- CLONE_CAN_PIDFD, /* 5.2 */
- CLONE_CAN_CGROUP, /* 5.7 */
- } clone_support = CLONE_CAN_CGROUP;
+ static bool have_clone_into_cgroup = true; /* kernel 5.7+ */
_cleanup_close_ int cgroup_fd = -EBADF;
- if (cgroup && clone_support >= CLONE_CAN_CGROUP) {
+ if (cgroup && have_clone_into_cgroup) {
_cleanup_free_ char *resolved_cgroup = NULL;
r = cg_get_path_and_check(
@@ -2099,47 +2095,41 @@ int posix_spawn_wrapper(
return -r;
#if HAVE_PIDFD_SPAWN
- if (clone_support >= CLONE_CAN_PIDFD) {
- _cleanup_close_ int pidfd = -EBADF;
-
- r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
- if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) &&
- cg_is_threaded(cgroup) > 0) /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode. */
- return -EUCLEAN;
- if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) &&
- FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
- /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but
- * need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3().
- * Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3()
- * but not CLONE_INTO_CGROUP. */
-
- /* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't
- * retry every time. */
- assert(clone_support >= CLONE_CAN_CGROUP);
- clone_support = CLONE_CAN_PIDFD;
-
- flags &= ~POSIX_SPAWN_SETCGROUP;
- r = posix_spawnattr_setflags(&attr, flags);
- if (r != 0)
- return -r;
-
- r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
- }
- if (r == 0) {
- r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd));
- if (r < 0)
- return r;
-
- return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP);
- }
- if (!ERRNO_IS_NOT_SUPPORTED(r) && !ERRNO_IS_PRIVILEGE(r))
+ _cleanup_close_ int pidfd = -EBADF;
+
+ r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
+ if (ERRNO_IS_NOT_SUPPORTED(r) && FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP) && cg_is_threaded(cgroup) > 0)
+ return -EUCLEAN; /* clone3() could also return EOPNOTSUPP if the target cgroup is in threaded mode,
+ turn that into something recognizable */
+ if ((ERRNO_IS_NOT_SUPPORTED(r) || ERRNO_IS_PRIVILEGE(r) || r == E2BIG) &&
+ FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP)) {
+ /* Compiled on a newer host, or seccomp&friends blocking clone3()? Fallback, but
+ * need to disable POSIX_SPAWN_SETCGROUP, which is what redirects to clone3().
+ * Note that we might get E2BIG here since some kernels (e.g. 5.4) support clone3()
+ * but not CLONE_INTO_CGROUP. */
+
+ /* CLONE_INTO_CGROUP definitely won't work, hence remember the fact so that we don't
+ * retry every time. */
+ have_clone_into_cgroup = false;
+
+ flags &= ~POSIX_SPAWN_SETCGROUP;
+ r = posix_spawnattr_setflags(&attr, flags);
+ if (r != 0)
return -r;
- clone_support = CLONE_ONLY_PID; /* No CLONE_PIDFD either? */
+ r = pidfd_spawn(&pidfd, path, NULL, &attr, argv, envp);
}
-#endif
+ if (r != 0)
+ return -r;
+ r = pidref_set_pidfd_consume(ret_pidref, TAKE_FD(pidfd));
+ if (r < 0)
+ return r;
+
+ return FLAGS_SET(flags, POSIX_SPAWN_SETCGROUP);
+#else
pid_t pid;
+
r = posix_spawn(&pid, path, NULL, &attr, argv, envp);
if (r != 0)
return -r;
@@ -2149,6 +2139,7 @@ int posix_spawn_wrapper(
return r;
return 0; /* We did not use CLONE_INTO_CGROUP so return 0, the caller will have to move the child */
+#endif
}
int proc_dir_open(DIR **ret) {
diff --git a/src/libsystemd/sd-event/sd-event.c b/src/libsystemd/sd-event/sd-event.c
index c1f1747fd0..3d1b6fb228 100644
--- a/src/libsystemd/sd-event/sd-event.c
+++ b/src/libsystemd/sd-event/sd-event.c
@@ -25,6 +25,7 @@
#include "missing_magic.h"
#include "missing_syscall.h"
#include "missing_threads.h"
+#include "missing_wait.h"
#include "origin-id.h"
#include "path-util.h"
#include "prioq.h"
@@ -1074,6 +1075,8 @@ static void source_disconnect(sd_event_source *s) {
}
static sd_event_source* source_free(sd_event_source *s) {
+ int r;
+
assert(s);
source_disconnect(s);
@@ -1087,31 +1090,23 @@ static sd_event_source* source_free(sd_event_source *s) {
if (s->child.process_owned) {
if (!s->child.exited) {
- bool sent = false;
-
- if (s->child.pidfd >= 0) {
- if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
- if (errno == ESRCH) /* Already dead */
- sent = true;
- else if (!ERRNO_IS_NOT_SUPPORTED(errno))
- log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
- s->child.pid);
- } else
- sent = true;
- }
-
- if (!sent)
- if (kill(s->child.pid, SIGKILL) < 0)
- if (errno != ESRCH) /* Already dead */
- log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
- s->child.pid);
+ if (s->child.pidfd >= 0)
+ r = RET_NERRNO(pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0));
+ else
+ r = RET_NERRNO(kill(s->child.pid, SIGKILL));
+ if (r < 0 && r != -ESRCH)
+ log_debug_errno(r, "Failed to kill process " PID_FMT ", ignoring: %m",
+ s->child.pid);
}
if (!s->child.waited) {
siginfo_t si = {};
/* Reap the child if we can */
- (void) waitid(P_PID, s->child.pid, &si, WEXITED);
+ if (s->child.pidfd >= 0)
+ (void) waitid(P_PIDFD, s->child.pidfd, &si, WEXITED);
+ else
+ (void) waitid(P_PID, s->child.pid, &si, WEXITED);
}
}
@@ -1578,11 +1573,6 @@ static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *us
return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
}
-static bool shall_use_pidfd(void) {
- /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
- return secure_getenv_bool("SYSTEMD_PIDFD") != 0;
-}
-
_public_ int sd_event_add_child(
sd_event *e,
sd_event_source **ret,
@@ -1630,34 +1620,29 @@ _public_ int sd_event_add_child(
if (!s)
return -ENOMEM;
+ /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
+ * pin the PID, and make regular waitid() handling race-free. */
+
+ s->child.pidfd = pidfd_open(pid, 0);
+ if (s->child.pidfd < 0)
+ return -errno;
+
+ s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
+
s->wakeup = WAKEUP_EVENT_SOURCE;
s->child.options = options;
s->child.callback = callback;
s->userdata = userdata;
s->enabled = SD_EVENT_ONESHOT;
- /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
- * pin the PID, and make regular waitid() handling race-free. */
-
- if (shall_use_pidfd()) {
- s->child.pidfd = pidfd_open(pid, 0);
- if (s->child.pidfd < 0) {
- /* Propagate errors unless the syscall is not supported or blocked */
- if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
- return -errno;
- } else
- s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
- } else
- s->child.pidfd = -EBADF;
-
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
- /* We have a pidfd and we only want to watch for exit */
+ /* We only want to watch for exit */
r = source_child_pidfd_register(s, s->enabled);
if (r < 0)
return r;
} else {
- /* We have no pidfd or we shall wait for some other event than WEXITED */
+ /* We shall wait for some other event than WEXITED */
r = event_make_signal_data(e, SIGCHLD, NULL);
if (r < 0)
return r;
@@ -1727,17 +1712,12 @@ _public_ int sd_event_add_child_pidfd(
s->wakeup = WAKEUP_EVENT_SOURCE;
s->child.pidfd = pidfd;
- s->child.pid = pid;
s->child.options = options;
s->child.callback = callback;
s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
s->userdata = userdata;
s->enabled = SD_EVENT_ONESHOT;
- r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
- if (r < 0)
- return r;
-
if (EVENT_SOURCE_WATCH_PIDFD(s)) {
/* We only want to watch for WEXITED */
r = source_child_pidfd_register(s, s->enabled);
@@ -1752,6 +1732,11 @@ _public_ int sd_event_add_child_pidfd(
e->need_process_child = true;
}
+ r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
+ if (r < 0)
+ return r;
+
+ s->child.pid = pid;
e->n_online_child_sources++;
if (ret)
@@ -3239,12 +3224,10 @@ _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, cons
if (si)
copy = *si;
- if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
- /* Let's propagate the error only if the system call is not implemented or prohibited */
- if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
- return -errno;
- } else
- return 0;
+ if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0)
+ return -errno;
+
+ return 0;
}
/* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
diff --git a/src/libsystemd/sd-event/test-event.c b/src/libsystemd/sd-event/test-event.c
index 57dee392d7..6394507994 100644
--- a/src/libsystemd/sd-event/test-event.c
+++ b/src/libsystemd/sd-event/test-event.c
@@ -198,7 +198,7 @@ static int post_handler(sd_event_source *s, void *userdata) {
return 2;
}
-static void test_basic_one(bool with_pidfd) {
+TEST(basic) {
sd_event *e = NULL;
sd_event_source *w = NULL, *x = NULL, *y = NULL, *z = NULL, *q = NULL, *t = NULL;
static const char ch = 'x';
@@ -207,10 +207,6 @@ static void test_basic_one(bool with_pidfd) {
uint64_t event_now;
int64_t priority;
- log_info("/* %s(pidfd=%s) */", __func__, yes_no(with_pidfd));
-
- assert_se(setenv("SYSTEMD_PIDFD", yes_no(with_pidfd), 1) >= 0);
-
assert_se(pipe(a) >= 0);
assert_se(pipe(b) >= 0);
assert_se(pipe(d) >= 0);
@@ -301,13 +297,6 @@ static void test_basic_one(bool with_pidfd) {
safe_close_pair(b);
safe_close_pair(d);
safe_close_pair(k);
-
- assert_se(unsetenv("SYSTEMD_PIDFD") >= 0);
-}
-
-TEST(basic) {
- test_basic_one(true); /* test with pidfd */
- test_basic_one(false); /* test without pidfd */
}
TEST(sd_event_now) {
@@ -583,13 +572,7 @@ TEST(pidfd) {
assert_se(pid > 1);
- pidfd = pidfd_open(pid, 0);
- if (pidfd < 0) {
- /* No pidfd_open() supported or blocked? */
- assert_se(ERRNO_IS_NOT_SUPPORTED(errno) || ERRNO_IS_PRIVILEGE(errno));
- (void) wait_for_terminate(pid, NULL);
- return;
- }
+ ASSERT_OK(pidfd = pidfd_open(pid, 0));
pid2 = fork();
if (pid2 == 0)
diff --git a/src/login/pam_systemd.c b/src/login/pam_systemd.c
index 00fc2c360d..893d03cdb6 100644
--- a/src/login/pam_systemd.c
+++ b/src/login/pam_systemd.c
@@ -879,7 +879,7 @@ static int create_session_message(
if (!avoid_pidfd) {
pidfd = pidfd_open(getpid_cached(), 0);
- if (pidfd < 0 && !ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
+ if (pidfd < 0)
return -errno;
}