summaryrefslogtreecommitdiffstats
path: root/src/coredump
diff options
context:
space:
mode:
authorMichal Sekletar <msekleta@redhat.com>2024-07-09 15:21:34 +0200
committerLuca Boccassi <luca.boccassi@gmail.com>2024-08-06 18:32:42 +0200
commit68511cebe58977ea68ae4f57c6462e979efd1cff (patch)
tree9fb9445b00cd0b8f810dbb24220fc1e52043dc90 /src/coredump
parentsysusers: check if requested group name matches user name in queue (diff)
downloadsystemd-68511cebe58977ea68ae4f57c6462e979efd1cff.tar.xz
systemd-68511cebe58977ea68ae4f57c6462e979efd1cff.zip
coredump: generate properly symbolized stacktrace for containerized processes
Diffstat (limited to 'src/coredump')
-rw-r--r--src/coredump/coredump.c180
-rw-r--r--src/coredump/coredump.conf1
2 files changed, 158 insertions, 23 deletions
diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c
index 843a500d8c..e751b67417 100644
--- a/src/coredump/coredump.c
+++ b/src/coredump/coredump.c
@@ -39,6 +39,8 @@
#include "main-func.h"
#include "memory-util.h"
#include "memstream-util.h"
+#include "missing_mount.h"
+#include "missing_syscall.h"
#include "mkdir-label.h"
#include "namespace-util.h"
#include "parse-util.h"
@@ -165,16 +167,22 @@ static uint64_t arg_external_size_max = EXTERNAL_SIZE_MAX;
static uint64_t arg_journal_size_max = JOURNAL_SIZE_MAX;
static uint64_t arg_keep_free = UINT64_MAX;
static uint64_t arg_max_use = UINT64_MAX;
+static bool arg_access_container = false;
static int parse_config(void) {
static const ConfigTableItem items[] = {
- { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage },
- { "Coredump", "Compress", config_parse_bool, 0, &arg_compress },
- { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max },
- { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max },
- { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max },
- { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free },
- { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use },
+ { "Coredump", "Storage", config_parse_coredump_storage, 0, &arg_storage },
+ { "Coredump", "Compress", config_parse_bool, 0, &arg_compress },
+ { "Coredump", "ProcessSizeMax", config_parse_iec_uint64, 0, &arg_process_size_max },
+ { "Coredump", "ExternalSizeMax", config_parse_iec_uint64_infinity, 0, &arg_external_size_max },
+ { "Coredump", "JournalSizeMax", config_parse_iec_size, 0, &arg_journal_size_max },
+ { "Coredump", "KeepFree", config_parse_iec_uint64, 0, &arg_keep_free },
+ { "Coredump", "MaxUse", config_parse_iec_uint64, 0, &arg_max_use },
+#if HAVE_DWFL_SET_SYSROOT
+ { "Coredump", "AccessContainer", config_parse_bool, 0, &arg_access_container },
+#else
+ { "Coredump", "AccessContainer", config_parse_warn_compat, DISABLED_CONFIGURATION, 0 },
+#endif
{}
};
@@ -774,15 +782,44 @@ static int change_uid_gid(const Context *context) {
return drop_privileges(uid, gid, 0);
}
+static int setup_container_mount_tree(int mount_tree_fd, char **container_root) {
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ assert(mount_tree_fd >= 0);
+ assert(container_root);
+
+ r = unshare(CLONE_NEWNS);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to unshare mount namespace: %m");
+
+ r = mount(NULL, "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to disable mount propagation: %m");
+
+ r = mkdtemp_malloc("/tmp/systemd-coredump-root-XXXXXX", &root);
+ if (r < 0)
+ return log_warning_errno(r, "Failed to create temporary directory: %m");
+
+ r = move_mount(mount_tree_fd, "", -EBADF, root, MOVE_MOUNT_F_EMPTY_PATH);
+ if (r < 0)
+ return log_warning_errno(errno, "Failed to move mount tree: %m");
+
+ *container_root = TAKE_PTR(root);
+ return 0;
+}
+
static int submit_coredump(
const Context *context,
struct iovec_wrapper *iovw,
- int input_fd) {
+ int input_fd,
+ int mount_tree_fd) {
_cleanup_(sd_json_variant_unrefp) sd_json_variant *json_metadata = NULL;
_cleanup_close_ int coredump_fd = -EBADF, coredump_node_fd = -EBADF;
_cleanup_free_ char *filename = NULL, *coredump_data = NULL;
_cleanup_free_ char *stacktrace = NULL;
+ _cleanup_free_ char *root = NULL;
const char *module_name;
uint64_t coredump_size = UINT64_MAX, coredump_compressed_size = UINT64_MAX;
bool truncated = false, written = false;
@@ -819,6 +856,12 @@ static int submit_coredump(
(void) coredump_vacuum(coredump_node_fd >= 0 ? coredump_node_fd : coredump_fd, arg_keep_free, arg_max_use);
}
+ if (mount_tree_fd >= 0 && arg_access_container) {
+ r = setup_container_mount_tree(mount_tree_fd, &root);
+ if (r < 0)
+ log_warning_errno(r, "Failed to setup container mount tree, ignoring: %m");
+ }
+
/* Now, let's drop privileges to become the user who owns the segfaulted process and allocate the
* coredump memory under the user's uid. This also ensures that the credentials journald will see are
* the ones of the coredumping user, thus making sure the user gets access to the core dump. Let's
@@ -826,7 +869,6 @@ static int submit_coredump(
r = change_uid_gid(context);
if (r < 0)
return log_error_errno(r, "Failed to drop privileges: %m");
-
if (written) {
/* Try to get a stack trace if we can */
if (coredump_size > arg_process_size_max)
@@ -838,6 +880,7 @@ static int submit_coredump(
(void) parse_elf_object(coredump_fd,
context->meta[META_EXE],
+ root,
/* fork_disable_dump= */ skip, /* avoid loops */
&stacktrace,
&json_metadata);
@@ -1000,10 +1043,11 @@ static int save_context(Context *context, const struct iovec_wrapper *iovw) {
}
static int process_socket(int fd) {
- _cleanup_close_ int input_fd = -EBADF;
+ _cleanup_close_ int input_fd = -EBADF, mount_tree_fd = -EBADF;
Context context = {};
struct iovec_wrapper iovw = {};
struct iovec iovec;
+ bool first = true;
int r;
assert(fd >= 0);
@@ -1051,16 +1095,34 @@ static int process_socket(int fd) {
free(iovec.iov_base);
- found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
- if (!found) {
- cmsg_close_all(&mh);
- r = log_error_errno(SYNTHETIC_ERRNO(EBADMSG),
- "Coredump file descriptor missing.");
- goto finish;
+ found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int) * 2));
+ if (found) {
+ int fds[2] = EBADF_PAIR;
+
+ memcpy(fds, CMSG_TYPED_DATA(found, int), sizeof(int) * 2);
+
+ assert(mount_tree_fd < 0);
+
+ /* Maybe we already got coredump FD in previous iteration? */
+ safe_close(input_fd);
+
+ input_fd = fds[0];
+ mount_tree_fd = fds[1];
+
+ /* We have all FDs we need let's take a shortcut here. */
+ break;
+ } else {
+ found = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, CMSG_LEN(sizeof(int)));
+ if (found)
+ input_fd = *CMSG_TYPED_DATA(found, int);
+ }
+
+ /* This is the first message that carries file descriptors, maybe there will be one more that actually contains array of descriptors. */
+ if (first) {
+ first = false;
+ continue;
}
- assert(input_fd < 0);
- input_fd = *CMSG_TYPED_DATA(found, int);
break;
} else
cmsg_close_all(&mh);
@@ -1090,14 +1152,14 @@ static int process_socket(int fd) {
goto finish;
}
- r = submit_coredump(&context, &iovw, input_fd);
+ r = submit_coredump(&context, &iovw, input_fd, mount_tree_fd);
finish:
iovw_free_contents(&iovw, true);
return r;
}
-static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) {
+static int send_iovec(const struct iovec_wrapper *iovw, int input_fd, int mounts_fd) {
_cleanup_close_ int fd = -EBADF;
int r;
@@ -1154,6 +1216,12 @@ static int send_iovec(const struct iovec_wrapper *iovw, int input_fd) {
if (r < 0)
return log_error_errno(r, "Failed to send coredump fd: %m");
+ if (mounts_fd >= 0) {
+ r = send_many_fds(fd, (int[]) { input_fd, mounts_fd }, 2, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to send coredump fds: %m");
+ }
+
return 0;
}
@@ -1532,7 +1600,7 @@ static int forward_coredump_to_container(Context *context) {
_exit(EXIT_FAILURE);
}
- r = send_iovec(iovw, STDIN_FILENO);
+ r = send_iovec(iovw, STDIN_FILENO, -EBADF);
if (r < 0) {
log_debug_errno(r, "Failed to send iovec to coredump socket: %m");
_exit(EXIT_FAILURE);
@@ -1560,8 +1628,68 @@ static int forward_coredump_to_container(Context *context) {
return 0;
}
+static int gather_pid_mount_tree_fd(const Context *context) {
+ _cleanup_close_ int mntns_fd = -EBADF, root_fd = -EBADF;
+ _cleanup_close_pair_ int pair[2] = EBADF_PAIR;
+ int fd = -EBADF, r;
+ pid_t child;
+
+ assert(context);
+
+ /* Don't bother preparing environment if we can't pass it to libdwfl. */
+#if !HAVE_DWFL_SET_SYSROOT
+ return -EBADF;
+#endif
+
+ if (!arg_access_container)
+ return -EBADF;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, pair) < 0)
+ return log_error_errno(errno, "Failed to create socket pair: %m");
+
+ r = namespace_open(context->pid, NULL, &mntns_fd, NULL, NULL, &root_fd);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open mount namespace of crashing process: %m");
+
+ r = namespace_fork("(sd-mount-tree-ns)", "(sd-mount-tree)", NULL, 0, FORK_RESET_SIGNALS|FORK_DEATHSIG_SIGKILL, -1, mntns_fd, -1, -1, root_fd, &child);
+ if (r < 0)
+ return log_error_errno(r, "Failed to fork(): %m");
+ if (r == 0) {
+ pair[0] = safe_close(pair[0]);
+
+ r = open_tree(-EBADF, "/", AT_NO_AUTOMOUNT | AT_RECURSIVE | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ if (r < 0) {
+ log_error_errno(errno, "Failed to clone mount tree: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ r = send_one_fd(pair[1], r, 0);
+ if (r < 0) {
+ log_error_errno(r, "Failed to send mount tree to parent: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ pair[1] = safe_close(pair[1]);
+
+ r = wait_for_terminate_and_check("(sd-mount-tree-ns)", child, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to wait for child: %m");
+ if (r != EXIT_SUCCESS)
+ return log_error_errno(SYNTHETIC_ERRNO(ECHILD), "Child died abnormally.");
+
+ fd = receive_one_fd(pair[0], MSG_DONTWAIT);
+ if (fd < 0)
+ return log_error_errno(fd, "Failed to receive mount tree: %m");
+
+ return fd;
+}
+
static int process_kernel(int argc, char* argv[]) {
_cleanup_(iovw_free_freep) struct iovec_wrapper *iovw = NULL;
+ _cleanup_close_ int mount_tree_fd = -EBADF;
Context context = {};
int r, signo;
@@ -1607,6 +1735,12 @@ static int process_kernel(int argc, char* argv[]) {
r = forward_coredump_to_container(&context);
if (r >= 0)
return 0;
+
+ r = gather_pid_mount_tree_fd(&context);
+ if (r < 0 && r != -EBADF)
+ log_warning_errno(r, "Failed to access the mount tree of a container, ignoring: %m");
+ else
+ mount_tree_fd = r;
}
/* If this is PID 1 disable coredump collection, we'll unlikely be able to process
@@ -1624,9 +1758,9 @@ static int process_kernel(int argc, char* argv[]) {
(void) iovw_put_string_field(iovw, "PRIORITY=", STRINGIFY(LOG_CRIT));
if (context.is_journald || context.is_pid1)
- return submit_coredump(&context, iovw, STDIN_FILENO);
+ return submit_coredump(&context, iovw, STDIN_FILENO, mount_tree_fd);
- return send_iovec(iovw, STDIN_FILENO);
+ return send_iovec(iovw, STDIN_FILENO, mount_tree_fd);
}
static int process_backtrace(int argc, char *argv[]) {
diff --git a/src/coredump/coredump.conf b/src/coredump/coredump.conf
index ae341e40d7..2790bf1be6 100644
--- a/src/coredump/coredump.conf
+++ b/src/coredump/coredump.conf
@@ -25,3 +25,4 @@
#JournalSizeMax=767M
#MaxUse=
#KeepFree=
+#AccessContainer=no