summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDaan De Meyer <daan.j.demeyer@gmail.com>2024-09-12 20:42:02 +0200
committerLuca Boccassi <luca.boccassi@gmail.com>2024-10-16 15:18:24 +0200
commitddbddebe0cc5edcf3d0fbc82ba5850e45925e8ae (patch)
treebf490c65668c80e8a8a38b70194008e175724d20 /src
parentsysext: Run unmerge in a subprocess (diff)
downloadsystemd-ddbddebe0cc5edcf3d0fbc82ba5850e45925e8ae.tar.xz
systemd-ddbddebe0cc5edcf3d0fbc82ba5850e45925e8ae.zip
sysext: Deal with nested mounts properly
Nested mounts should be carried over from host to overlayfs to overlayfs (and back to host if unmerged). Otherwise you run into hard to debug issues where merging extensions means you can't unmount those nested mounts anymore as they are hidden by the overlayfs mount. To fix this, before unmerging any previous extensions, let's move the nested mounts from the hierarchy to the workspace, then set up the new hierachy, and finally, just before moving the hierarchy into place, move the nested mounts back into place. Because there might be multiple nested mounts that consists of one or more mounts stacked on top of each other, we make sure to move all stacked mounts properly to the overlayfs. The kernel doesn't really provide a nice way to do this, so we create a stack, pop off each mount onto the stack and then pop from the stack again to the destination to re-establish the stacked mounts in the same order in the destination.
Diffstat (limited to 'src')
-rw-r--r--src/sysext/sysext.c128
1 files changed, 117 insertions, 11 deletions
diff --git a/src/sysext/sysext.c b/src/sysext/sysext.c
index 94669dbd52..fcf29a99d3 100644
--- a/src/sysext/sysext.c
+++ b/src/sysext/sysext.c
@@ -273,6 +273,52 @@ static int need_reload(
return false;
}
+static int move_submounts(const char *src, const char *dst) {
+ SubMount *submounts = NULL;
+ size_t n_submounts = 0;
+ int r;
+
+ assert(src);
+ assert(dst);
+
+ CLEANUP_ARRAY(submounts, n_submounts, sub_mount_array_free);
+
+ r = get_sub_mounts(src, &submounts, &n_submounts);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get submounts for %s: %m", src);
+
+ FOREACH_ARRAY(m, submounts, n_submounts) {
+ _cleanup_free_ char *t = NULL;
+ const char *suffix;
+ struct stat st;
+
+ assert_se(suffix = path_startswith(m->path, src));
+
+ t = path_join(dst, suffix);
+ if (!t)
+ return log_oom();
+
+ if (fstat(m->mount_fd, &st) < 0)
+ return log_error_errno(errno, "Failed to stat %s: %m", m->path);
+
+ r = mkdir_parents(t, 0755);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create parent directories of %s: %m", t);
+
+ r = make_mount_point_inode_from_stat(&st, t, 0755);
+ if (r < 0 && r != -EEXIST)
+ return log_error_errno(r, "Failed to create mountpoint %s: %m", t);
+
+ r = mount_follow_verbose(LOG_ERR, m->path, t, NULL, MS_BIND|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ (void) umount_verbose(LOG_WARNING, m->path, MNT_DETACH);
+ }
+
+ return 0;
+}
+
static int daemon_reload(void) {
_cleanup_(sd_bus_flush_close_unrefp) sd_bus *bus = NULL;
int r;
@@ -284,11 +330,10 @@ static int daemon_reload(void) {
return bus_service_manager_reload(bus);
}
-static int unmerge_hierarchy(
- ImageClass image_class,
- const char *p) {
+static int unmerge_hierarchy(ImageClass image_class, const char *p, const char *submounts_path) {
_cleanup_free_ char *dot_dir = NULL, *work_dir_info_file = NULL;
+ int n_unmerged = 0;
int r;
assert(p);
@@ -338,6 +383,12 @@ static int unmerge_hierarchy(
return log_error_errno(r, "Failed to unmount '%s': %m", dot_dir);
}
+ /* After we've unmounted the metadata directory, save all other submounts so we can restore
+ * them after unmerging the hierarchy. */
+ r = move_submounts(p, submounts_path);
+ if (r < 0)
+ return r;
+
r = umount_verbose(LOG_ERR, p, MNT_DETACH|UMOUNT_NOFOLLOW);
if (r < 0)
return r;
@@ -349,19 +400,39 @@ static int unmerge_hierarchy(
}
log_info("Unmerged '%s'.", p);
+ n_unmerged++;
}
- return 0;
+ return n_unmerged;
}
static int unmerge_subprocess(
ImageClass image_class,
- char **hierarchies) {
+ char **hierarchies,
+ const char *workspace) {
int r, ret = 0;
+ assert(workspace);
+ assert(path_startswith(workspace, "/run/"));
+
+ /* Mark the whole of /run as MS_SLAVE, so that we can mount stuff below it that doesn't show up on
+ * the host otherwise. */
+ r = mount_nofollow_verbose(LOG_ERR, NULL, "/run", NULL, MS_SLAVE|MS_REC, NULL);
+ if (r < 0)
+ return r;
+
+ /* Let's create the workspace if it's missing */
+ r = mkdir_p(workspace, 0700);
+ if (r < 0)
+ return log_error_errno(r, "Failed to create '%s': %m", workspace);
+
STRV_FOREACH(h, hierarchies) {
- _cleanup_free_ char *resolved = NULL;
+ _cleanup_free_ char *submounts_path = NULL, *resolved = NULL;
+
+ submounts_path = path_join(workspace, "submounts", *h);
+ if (!submounts_path)
+ return log_oom();
r = chase(*h, arg_root, CHASE_PREFIX_ROOT, &resolved, NULL);
if (r == -ENOENT) {
@@ -373,11 +444,20 @@ static int unmerge_subprocess(
continue;
}
- r = unmerge_hierarchy(image_class, resolved);
+ r = unmerge_hierarchy(image_class, resolved, submounts_path);
if (r < 0) {
RET_GATHER(ret, r);
continue;
}
+ if (r == 0)
+ continue;
+
+ /* If we unmerged something, then we have to move the submounts from the hierarchy back into
+ * place in the host's original hierarchy. */
+
+ r = move_submounts(submounts_path, resolved);
+ if (r < 0)
+ return r;
}
return ret;
@@ -402,7 +482,7 @@ static int unmerge(
if (r == 0) {
/* Child with its own mount namespace */
- r = unmerge_subprocess(image_class, hierarchies);
+ r = unmerge_subprocess(image_class, hierarchies, "/run/systemd/sysext");
/* Our namespace ceases to exist here, also implicitly detaching all temporary mounts we
* created below /run. Nice! */
@@ -1505,6 +1585,8 @@ static int merge_subprocess(
Image *img;
int r;
+ assert(path_startswith(workspace, "/run/"));
+
/* Mark the whole of /run as MS_SLAVE, so that we can mount stuff below it that doesn't show up on
* the host otherwise. */
r = mount_nofollow_verbose(LOG_ERR, NULL, "/run", NULL, MS_SLAVE|MS_REC, NULL);
@@ -1733,20 +1815,33 @@ static int merge_subprocess(
/* Let's now unmerge the status quo ante, since to build the new overlayfs we need a reference to the
* underlying fs. */
STRV_FOREACH(h, hierarchies) {
- _cleanup_free_ char *resolved = NULL;
+ _cleanup_free_ char *submounts_path = NULL, *resolved = NULL;
+
+ submounts_path = path_join(workspace, "submounts", *h);
+ if (!submounts_path)
+ return log_oom();
r = chase(*h, arg_root, CHASE_PREFIX_ROOT|CHASE_NONEXISTENT, &resolved, NULL);
if (r < 0)
return log_error_errno(r, "Failed to resolve hierarchy '%s%s': %m", strempty(arg_root), *h);
- r = unmerge_hierarchy(image_class, resolved);
+ r = unmerge_hierarchy(image_class, resolved, submounts_path);
+ if (r < 0)
+ return r;
+ if (r > 0)
+ continue;
+
+ /* If we didn't unmerge anything, then we have to move the submounts from the host's
+ * original hierarchy. */
+
+ r = move_submounts(resolved, submounts_path);
if (r < 0)
return r;
}
/* Create overlayfs mounts for all hierarchies */
STRV_FOREACH(h, hierarchies) {
- _cleanup_free_ char *meta_path = NULL, *overlay_path = NULL, *merge_hierarchy_workspace = NULL;
+ _cleanup_free_ char *meta_path = NULL, *overlay_path = NULL, *merge_hierarchy_workspace = NULL, *submounts_path = NULL;
meta_path = path_join(workspace, "meta", *h); /* The place where to store metadata about this instance */
if (!meta_path)
@@ -1761,6 +1856,10 @@ static int merge_subprocess(
if (!merge_hierarchy_workspace)
return log_oom();
+ submounts_path = path_join(workspace, "submounts", *h);
+ if (!submounts_path)
+ return log_oom();
+
r = merge_hierarchy(
image_class,
*h,
@@ -1772,6 +1871,13 @@ static int merge_subprocess(
merge_hierarchy_workspace);
if (r < 0)
return r;
+
+ /* After the new hierarchy is set up, move the submounts from the original hierarchy into
+ * place. */
+
+ r = move_submounts(submounts_path, overlay_path);
+ if (r < 0)
+ return r;
}
/* And move them all into place. This is where things appear in the host namespace */