diff options
author | Lennart Poettering <lennart@poettering.net> | 2025-01-16 13:48:48 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-01-16 13:48:48 +0100 |
commit | 8ce960a80dab1f3d4500874681d748b0973dafba (patch) | |
tree | be4d8b3522bb0f415c026314e138a8dcd3b7aec9 | |
parent | tree-wide: drop support for kernels without pidfd_open() and pidfd_send_signa... (diff) | |
parent | process-util: slightly update comment in freeze() (diff) | |
download | systemd-8ce960a80dab1f3d4500874681d748b0973dafba.tar.xz systemd-8ce960a80dab1f3d4500874681d748b0973dafba.zip |
process-util: port pid_from_same_root_fs() to pidref + more (#35975)
Let's continue our move to PidRef.
-rw-r--r-- | src/basic/namespace-util.c | 18 | ||||
-rw-r--r-- | src/basic/pidref.c | 10 | ||||
-rw-r--r-- | src/basic/process-util.c | 53 | ||||
-rw-r--r-- | src/basic/process-util.h | 3 | ||||
-rw-r--r-- | src/basic/virt.c | 19 | ||||
-rw-r--r-- | src/coredump/coredump.c | 17 | ||||
-rw-r--r-- | src/shared/killall.c | 40 | ||||
-rw-r--r-- | src/test/test-pidref.c | 24 | ||||
-rw-r--r-- | src/test/test-process-util.c | 50 |
9 files changed, 150 insertions, 84 deletions
diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c index 36ebda9ba4..6c559e4bf8 100644 --- a/src/basic/namespace-util.c +++ b/src/basic/namespace-util.c @@ -519,12 +519,10 @@ int userns_acquire_empty(void) { _cleanup_(pidref_done_sigkill_wait) PidRef pid = PIDREF_NULL; int r; - r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS, &pid); + r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid); if (r < 0) return r; - if (r == 0) - /* Child. We do nothing here, just freeze until somebody kills us. */ - freeze(); + assert(r > 0); return pidref_namespace_open_by_type(&pid, NAMESPACE_USER); } @@ -541,12 +539,10 @@ int userns_acquire(const char *uid_map, const char *gid_map) { * and then kills the process again. This way we have a userns fd that is not bound to any * process. We can use that for file system mounts and similar. */ - r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS, &pid); + r = pidref_safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_USERNS|FORK_FREEZE, &pid); if (r < 0) return r; - if (r == 0) - /* Child. We do nothing here, just freeze until somebody kills us. */ - freeze(); + assert(r > 0); xsprintf(path, "/proc/" PID_FMT "/uid_map", pid.pid); r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER); @@ -762,12 +758,10 @@ int netns_acquire(void) { /* Forks off a process in a new network namespace, acquires a network namespace fd, and then kills * the process again. This way we have a netns fd that is not bound to any process. */ - r = pidref_safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS, &pid); + r = pidref_safe_fork("(sd-mknetns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG_SIGKILL|FORK_NEW_NETNS|FORK_FREEZE, &pid); if (r < 0) return log_debug_errno(r, "Failed to fork process into new netns: %m"); - if (r == 0) - /* Child. We do nothing here, just freeze until somebody kills us. */ - freeze(); + assert(r > 0); return pidref_namespace_open_by_type(&pid, NAMESPACE_NET); } diff --git a/src/basic/pidref.c b/src/basic/pidref.c index a275f77b56..ccfa2903b6 100644 --- a/src/basic/pidref.c +++ b/src/basic/pidref.c @@ -40,6 +40,10 @@ int pidref_acquire_pidfd_id(PidRef *pidref) { bool pidref_equal(PidRef *a, PidRef *b) { + /* If this is the very same structure, it definitely refers to the same process */ + if (a == b) + return true; + if (!pidref_is_set(a)) return !pidref_is_set(b); @@ -58,9 +62,15 @@ bool pidref_equal(PidRef *a, PidRef *b) { if (a->fd_id == 0 || b->fd_id == 0) return true; } else { + /* If the other side is remote, then this is not the same */ if (pidref_is_remote(b)) return false; + /* PID1 cannot exit, hence it cannot change pidfs ids, hence no point in comparing them, we + * can shortcut things */ + if (a->pid == 1) + return true; + /* Try to compare pidfds using their inode numbers. This way we can ensure that we * don't spuriously consider two PidRefs equal if the pid has been reused once. Note * that we ignore all errors here, not only EOPNOTSUPP, as fstat() might fail due to diff --git a/src/basic/process-util.c b/src/basic/process-util.c index 0367270724..21e296864a 100644 --- a/src/basic/process-util.c +++ b/src/basic/process-util.c @@ -1228,18 +1228,53 @@ int pidref_is_alive(const PidRef *pidref) { return result; } -int pid_from_same_root_fs(pid_t pid) { - const char *root; +int pidref_from_same_root_fs(PidRef *a, PidRef *b) { + _cleanup_(pidref_done) PidRef self = PIDREF_NULL; + int r; - if (pid < 0) + /* Checks if the two specified processes have the same root fs. Either can be specified as NULL in + * which case we'll check against ourselves. */ + + if (!a || !b) { + r = pidref_set_self(&self); + if (r < 0) + return r; + if (!a) + a = &self; + if (!b) + b = &self; + } + + if (!pidref_is_set(a) || !pidref_is_set(b)) + return -ESRCH; + + /* If one of the two processes have the same root they cannot have the same root fs, but if both of + * them do we don't know */ + if (pidref_is_remote(a) && pidref_is_remote(b)) + return -EREMOTE; + if (pidref_is_remote(a) || pidref_is_remote(b)) return false; - if (pid == 0 || pid == getpid_cached()) + if (pidref_equal(a, b)) return true; - root = procfs_file_alloca(pid, "root"); + const char *roota = procfs_file_alloca(a->pid, "root"); + const char *rootb = procfs_file_alloca(b->pid, "root"); - return inode_same(root, "/proc/1/root", 0); + int result = inode_same(roota, rootb, 0); + if (result == -ENOENT) + return proc_mounted() == 0 ? -ENOSYS : -ESRCH; + if (result < 0) + return result; + + r = pidref_verify(a); + if (r < 0) + return r; + r = pidref_verify(b); + if (r < 0) + return r; + + return result; } bool is_main_thread(void) { @@ -1763,6 +1798,9 @@ int safe_fork_full( } } + if (FLAGS_SET(flags, FORK_FREEZE)) + freeze(); + if (ret_pid) *ret_pid = getpid_cached(); @@ -1959,7 +1997,8 @@ _noreturn_ void freeze(void) { break; } - /* waitid() failed with an unexpected error, things are really borked. Freeze now! */ + /* waitid() failed with an ECHLD error (because there are no left-over child processes) or any other + * (unexpected) error. Freeze for good now! */ for (;;) pause(); } diff --git a/src/basic/process-util.h b/src/basic/process-util.h index d4d7d87c06..cfb967c3bc 100644 --- a/src/basic/process-util.h +++ b/src/basic/process-util.h @@ -93,7 +93,7 @@ int pid_is_unwaited(pid_t pid); int pidref_is_unwaited(const PidRef *pidref); int pid_is_my_child(pid_t pid); int pidref_is_my_child(const PidRef *pidref); -int pid_from_same_root_fs(pid_t pid); +int pidref_from_same_root_fs(PidRef *a, PidRef *b); bool is_main_thread(void); @@ -192,6 +192,7 @@ typedef enum ForkFlags { FORK_NEW_USERNS = 1 << 19, /* Run child in its own user namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ FORK_NEW_NETNS = 1 << 20, /* Run child in its own network namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ FORK_NEW_PIDNS = 1 << 21, /* Run child in its own PID namespace 💣 DO NOT USE IN THREADED PROGRAMS! 💣 */ + FORK_FREEZE = 1 << 22, /* Don't return in child, just call freeze() instead */ } ForkFlags; int safe_fork_full( diff --git a/src/basic/virt.c b/src/basic/virt.c index 1ba8d36dba..0d6fcaee56 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -799,19 +799,16 @@ int running_in_chroot(void) { if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0) return 0; - r = inode_same("/proc/1/root", "/", 0); - if (r == -ENOENT) { - r = proc_mounted(); - if (r == 0) { - if (getpid_cached() == 1) - return false; /* We will mount /proc, assuming we're not in a chroot. */ + r = pidref_from_same_root_fs(&PIDREF_MAKE_FROM_PID(1), NULL); + if (r == -ENOSYS) { + if (getpid_cached() == 1) + return false; /* We will mount /proc, assuming we're not in a chroot. */ - log_debug("/proc is not mounted, assuming we're in a chroot."); - return true; - } - if (r > 0) /* If we have fake /proc/, we can't do the check properly. */ - return -ENOSYS; + log_debug("/proc/ is not mounted, assuming we're in a chroot."); + return true; } + if (r == -ESRCH) /* We must have a fake /proc/, we can't do the check properly. */ + return -ENOSYS; if (r < 0) return r; diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index 55d075fdf3..fa009bdacf 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -758,23 +758,16 @@ static int compose_open_fds(pid_t pid, char **ret) { * Returns a negative number on errors. */ static int get_process_container_parent_cmdline(PidRef *pid, char** ret_cmdline) { - const char *proc_root_path; - struct stat root_stat, proc_root_stat; int r; assert(pidref_is_set(pid)); assert(!pidref_is_remote(pid)); - /* To compare inodes of / and /proc/[pid]/root */ - if (stat("/", &root_stat) < 0) - return -errno; - - proc_root_path = procfs_file_alloca(pid->pid, "root"); - if (stat(proc_root_path, &proc_root_stat) < 0) - return -errno; - - /* The process uses system root. */ - if (stat_inode_same(&proc_root_stat, &root_stat)) { + r = pidref_from_same_root_fs(pid, &PIDREF_MAKE_FROM_PID(1)); + if (r < 0) + return r; + if (r > 0) { + /* The process uses system root. */ *ret_cmdline = NULL; return 0; } diff --git a/src/shared/killall.c b/src/shared/killall.c index 184aec018b..b3a9c7e16f 100644 --- a/src/shared/killall.c +++ b/src/shared/killall.c @@ -23,23 +23,27 @@ #include "string-util.h" #include "terminal-util.h" -static bool argv_has_at(pid_t pid) { - _cleanup_fclose_ FILE *f = NULL; - const char *p; - char c = 0; +static int argv_has_at(const PidRef *pid) { + int r; - p = procfs_file_alloca(pid, "cmdline"); - f = fopen(p, "re"); - if (!f) { - log_debug_errno(errno, "Failed to open %s, ignoring: %m", p); - return true; /* not really, but has the desired effect */ - } + assert(pidref_is_set(pid)); + assert(!pidref_is_remote(pid)); + + const char *p = procfs_file_alloca(pid->pid, "cmdline"); + _cleanup_fclose_ FILE *f = fopen(p, "re"); + if (!f) + return log_debug_errno(errno, "Failed to open %s, ignoring: %m", p); /* Try to read the first character of the command line. If the cmdline is empty (which might be the case for * kernel threads but potentially also other stuff), this line won't do anything, but we don't care much, as * actual kernel threads are already filtered out above. */ + char c = 0; (void) fread(&c, 1, 1, f); + r = pidref_verify(pid); + if (r < 0) + return log_debug_errno(r, "Failed to verify pid " PID_FMT ", ignoring: %m", pid->pid); + /* Processes with argv[0][0] = '@' we ignore from the killing spree. * * https://systemd.io/ROOT_STORAGE_DAEMONS */ @@ -74,9 +78,8 @@ static bool is_in_survivor_cgroup(const PidRef *pid) { return r > 0; } -static bool ignore_proc(const PidRef *pid, bool warn_rootfs) { +static bool ignore_proc(PidRef *pid, bool warn_rootfs) { uid_t uid; - int r; assert(pidref_is_set(pid)); @@ -85,28 +88,25 @@ static bool ignore_proc(const PidRef *pid, bool warn_rootfs) { return true; /* Ignore kernel threads */ - r = pidref_is_kernel_thread(pid); - if (r != 0) + if (pidref_is_kernel_thread(pid) != 0) return true; /* also ignore processes where we can't determine this */ /* Ignore processes that are part of a cgroup marked with the user.survive_final_kill_signal xattr */ if (is_in_survivor_cgroup(pid)) return true; - r = pidref_get_uid(pid, &uid); - if (r < 0) + if (pidref_get_uid(pid, &uid) < 0) return true; /* not really, but better safe than sorry */ /* Non-root processes otherwise are always subject to be killed */ if (uid != 0) return false; - if (!argv_has_at(pid->pid)) - return false; + if (argv_has_at(pid) == 0) + return false; /* if this fails, ignore the process */ if (warn_rootfs && - pid_from_same_root_fs(pid->pid) > 0) { - + pidref_from_same_root_fs(pid, NULL) > 0) { _cleanup_free_ char *comm = NULL; (void) pidref_get_comm(pid, &comm); diff --git a/src/test/test-pidref.c b/src/test/test-pidref.c index 5535e98ab0..10033b5826 100644 --- a/src/test/test-pidref.c +++ b/src/test/test-pidref.c @@ -159,12 +159,8 @@ TEST(pidref_new_from_pid) { TEST(pidref_kill) { _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; siginfo_t si; - int r; - r = pidref_safe_fork("(test-pidref-kill)", FORK_DEATHSIG_SIGKILL, &pidref); - assert_se(r >= 0); - if (r == 0) - freeze(); + ASSERT_OK_POSITIVE(pidref_safe_fork("(test-pidref-kill)", FORK_DEATHSIG_SIGKILL|FORK_FREEZE, &pidref)); assert_se(pidref_kill(&pidref, SIGKILL) >= 0); assert_se(pidref_wait_for_terminate(&pidref, &si) >= 0); @@ -174,12 +170,8 @@ TEST(pidref_kill) { TEST(pidref_kill_and_sigcont) { _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; siginfo_t si; - int r; - r = pidref_safe_fork("(test-pidref-kill-and-sigcont)", FORK_DEATHSIG_SIGTERM, &pidref); - assert_se(r >= 0); - if (r == 0) - freeze(); + ASSERT_OK_POSITIVE(pidref_safe_fork("(test-pidref-kill-and-sigcont)", FORK_DEATHSIG_SIGTERM|FORK_FREEZE, &pidref)); assert_se(pidref_kill_and_sigcont(&pidref, SIGTERM) >= 0); assert_se(pidref_wait_for_terminate(&pidref, &si) >= 0); @@ -189,12 +181,8 @@ TEST(pidref_kill_and_sigcont) { TEST(pidref_sigqueue) { _cleanup_(pidref_done) PidRef pidref = PIDREF_NULL; siginfo_t si; - int r; - r = pidref_safe_fork("(test-pidref-sigqueue)", FORK_DEATHSIG_SIGTERM, &pidref); - assert_se(r >= 0); - if (r == 0) - freeze(); + ASSERT_OK_POSITIVE(pidref_safe_fork("(test-pidref-sigqueue)", FORK_DEATHSIG_SIGTERM|FORK_FREEZE, &pidref)); assert_se(pidref_sigqueue(&pidref, SIGTERM, 42) >= 0); assert_se(pidref_wait_for_terminate(&pidref, &si) >= 0); @@ -203,12 +191,8 @@ TEST(pidref_sigqueue) { TEST(pidref_done_sigkill_wait) { _cleanup_(pidref_done_sigkill_wait) PidRef pidref = PIDREF_NULL; - int r; - r = pidref_safe_fork("(test-pidref-done-sigkill-wait)", FORK_DEATHSIG_SIGKILL, &pidref); - assert_se(r >= 0); - if (r == 0) - freeze(); + ASSERT_OK_POSITIVE(pidref_safe_fork("(test-pidref-done-sigkill-wait)", FORK_DEATHSIG_SIGKILL|FORK_FREEZE, &pidref)); } TEST(pidref_verify) { diff --git a/src/test/test-process-util.c b/src/test/test-process-util.c index 12bac656e2..166c0fa2c9 100644 --- a/src/test/test-process-util.c +++ b/src/test/test-process-util.c @@ -3,6 +3,7 @@ #include <fcntl.h> #include <linux/oom.h> #include <pthread.h> +#include <sys/eventfd.h> #include <sys/mount.h> #include <sys/personality.h> #include <sys/prctl.h> @@ -1007,7 +1008,7 @@ TEST(pid_get_start_time) { _cleanup_(pidref_done_sigkill_wait) PidRef child = PIDREF_NULL; - ASSERT_OK(pidref_safe_fork("(stub)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG, &child)); + ASSERT_OK_POSITIVE(pidref_safe_fork("(stub)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG|FORK_FREEZE, &child)); usec_t start_time2; ASSERT_OK(pidref_get_start_time(&child, &start_time2)); @@ -1017,6 +1018,53 @@ TEST(pid_get_start_time) { ASSERT_GE(start_time2, start_time); } +TEST(pidref_from_same_root_fs) { + int r; + + _cleanup_(pidref_done) PidRef pid1 = PIDREF_NULL, self = PIDREF_NULL; + + ASSERT_OK(pidref_set_self(&self)); + ASSERT_OK(pidref_set_pid(&pid1, 1)); + + ASSERT_OK_POSITIVE(pidref_from_same_root_fs(&self, &self)); + ASSERT_OK_POSITIVE(pidref_from_same_root_fs(&pid1, &pid1)); + + r = pidref_from_same_root_fs(&pid1, &self); + if (ERRNO_IS_NEG_PRIVILEGE(r)) + return (void) log_tests_skipped("skipping pidref_from_same_root_fs() test, lacking privileged."); + ASSERT_OK(r); + log_info("PID1 and us have the same rootfs: %s", yes_no(r)); + + int q = pidref_from_same_root_fs(&self, &pid1); + ASSERT_OK(q); + ASSERT_EQ(r, q); + + _cleanup_(pidref_done_sigkill_wait) PidRef child1 = PIDREF_NULL; + ASSERT_OK(pidref_safe_fork("(child1)", FORK_RESET_SIGNALS|FORK_CLOSE_ALL_FDS|FORK_REOPEN_LOG|FORK_FREEZE, &child1)); + ASSERT_OK_POSITIVE(pidref_from_same_root_fs(&self, &child1)); + + _cleanup_close_ int efd = eventfd(0, EFD_CLOEXEC); + ASSERT_OK_ERRNO(efd); + + _cleanup_(pidref_done_sigkill_wait) PidRef child2 = PIDREF_NULL; + r = pidref_safe_fork("(child2)", FORK_RESET_SIGNALS|FORK_REOPEN_LOG, &child2); + ASSERT_OK(r); + + if (r == 0) { + ASSERT_OK_ERRNO(chroot("/usr")); + uint64_t u = 1; + + ASSERT_OK_EQ_ERRNO(write(efd, &u, sizeof(u)), (ssize_t) sizeof(u)); + freeze(); + } + + uint64_t u; + ASSERT_OK_EQ_ERRNO(read(efd, &u, sizeof(u)), (ssize_t) sizeof(u)); + + ASSERT_OK_ZERO(pidref_from_same_root_fs(&self, &child2)); + ASSERT_OK_ZERO(pidref_from_same_root_fs(&child2, &self)); +} + static int intro(void) { log_show_color(true); return EXIT_SUCCESS; |