summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2024-07-05 13:08:54 +0200
committerChristian Brauner <brauner@kernel.org>2024-07-06 08:34:45 +0200
commit5e8a9cebc5580ca7c01d6c151017187785dc0dfe (patch)
tree6998bc937a7b41e0bd311bb1546f695ca5283dee /fs
parentfs: only copy to userspace on success in listmount() (diff)
downloadlinux-5e8a9cebc5580ca7c01d6c151017187785dc0dfe.tar.xz
linux-5e8a9cebc5580ca7c01d6c151017187785dc0dfe.zip
fs: find rootfs mount of the mount namespace
The method we used was predicated on the assumption that the mount immediately following the root mount of the mount namespace would be the rootfs mount of the namespace. That's not always the case though. For example: ID PARENT ID 408 412 0:60 /containers/overlay-containers/bc391117192b32071b22ef2083ebe7735d5c390f87a5779e02faf79ba0746ceb/userdata/hosts /etc/hosts rw,nosuid,nodev,relatime - tmpfs tmpfs rw,size=954664k,nr_inodes=238666,mode=700,uid=1000,gid=1000,inode64 409 414 0:61 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=64000k,uid=1000,gid=1000,inode64 410 412 0:60 /containers/overlay-containers/bc391117192b32071b22ef2083ebe7735d5c390f87a5779e02faf79ba0746ceb/userdata/.containerenv /run/.containerenv rw,nosuid,nodev,relatime - tmpfs tmpfs rw,size=954664k,nr_inodes=238666,mode=700,uid=1000,gid=1000,inode64 411 412 0:60 /containers/overlay-containers/bc391117192b32071b22ef2083ebe7735d5c390f87a5779e02faf79ba0746ceb/userdata/hostname /etc/hostname rw,nosuid,nodev,relatime - tmpfs tmpfs rw,size=954664k,nr_inodes=238666,mode=700,uid=1000,gid=1000,inode64 412 363 0:65 / / rw,relatime - overlay overlay rw,lowerdir=/home/user1/.local/share/containers/storage/overlay/l/JS65SUCGTPCP2EEBHLRP4UCFI5:/home/user1/.local/share/containers/storage/overlay/l/DLW22KVDWUNI4242D6SDJ5GKCL [...] 413 412 0:68 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw 414 412 0:69 / /dev rw,nosuid - tmpfs tmpfs rw,size=65536k,mode=755,uid=1000,gid=1000,inode64 415 412 0:70 / /sys ro,nosuid,nodev,noexec,relatime - sysfs sysfs rw 416 414 0:71 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=100004,mode=620,ptmxmode=666 417 414 0:67 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw 418 415 0:27 / /sys/fs/cgroup ro,nosuid,nodev,noexec,relatime - cgroup2 cgroup2 rw,nsdelegate,memory_recursiveprot 419 414 0:6 /null /dev/null rw,nosuid,noexec - devtmpfs devtmpfs rw,size=4096k,nr_inodes=1179282,mode=755,inode64 420 414 0:6 /zero /dev/zero rw,nosuid,noexec - devtmpfs devtmpfs rw,size=4096k,nr_inodes=1179282,mode=755,inode64 422 414 0:6 /full /dev/full rw,nosuid,noexec - devtmpfs devtmpfs rw,size=4096k,nr_inodes=1179282,mode=755,inode64 423 414 0:6 /tty /dev/tty rw,nosuid,noexec - devtmpfs devtmpfs rw,size=4096k,nr_inodes=1179282,mode=755,inode64 430 414 0:6 /random /dev/random rw,nosuid,noexec - devtmpfs devtmpfs rw,size=4096k,nr_inodes=1179282,mode=755,inode64 431 414 0:6 /urandom /dev/urandom rw,nosuid,noexec - devtmpfs devtmpfs rw,size=4096k,nr_inodes=1179282,mode=755,inode64 433 413 0:72 / /proc/acpi ro,relatime - tmpfs tmpfs rw,size=0k,uid=1000,gid=1000,inode64 440 413 0:6 /null /proc/kcore ro,nosuid - devtmpfs devtmpfs rw,size=4096k,nr_inodes=1179282,mode=755,inode64 441 413 0:6 /null /proc/keys ro,nosuid - devtmpfs devtmpfs rw,size=4096k,nr_inodes=1179282,mode=755,inode64 442 413 0:6 /null /proc/timer_list ro,nosuid - devtmpfs devtmpfs rw,size=4096k,nr_inodes=1179282,mode=755,inode64 443 413 0:73 / /proc/scsi ro,relatime - tmpfs tmpfs rw,size=0k,uid=1000,gid=1000,inode64 444 415 0:74 / /sys/firmware ro,relatime - tmpfs tmpfs rw,size=0k,uid=1000,gid=1000,inode64 445 415 0:75 / /sys/dev/block ro,relatime - tmpfs tmpfs rw,size=0k,uid=1000,gid=1000,inode64 446 413 0:68 /bus /proc/bus ro,nosuid,nodev,noexec,relatime - proc proc rw 447 413 0:68 /fs /proc/fs ro,nosuid,nodev,noexec,relatime - proc proc rw 448 413 0:68 /irq /proc/irq ro,nosuid,nodev,noexec,relatime - proc proc rw 449 413 0:68 /sys /proc/sys ro,nosuid,nodev,noexec,relatime - proc proc rw 450 413 0:68 /sysrq-trigger /proc/sysrq-trigger ro,nosuid,nodev,noexec,relatime - proc proc rw 364 414 0:71 /0 /dev/console rw,relatime - devpts devpts rw,gid=100004,mode=620,ptmxmode=666 In this mount table the root mount of the mount namespace is the mount with id 363 (It isn't visible because it's literally just what the rootfs mount is mounted upon and usually it's just a copy of the real rootfs). The rootfs mount that's mounted on the root mount of the mount namespace is the mount with id 412. But the mount namespace contains mounts that were created before the rootfs mount and thus have earlier mount ids. So the first call to listmnt_next() would return the mount with the mount id 408 and not the rootfs mount. So we need to find the actual rootfs mount mounted on the root mount of the mount namespace. This logic is also present in mntns_install() where vfs_path_lookup() is used. We can't use this though as we're holding the namespace semaphore. We could look at the children of the root mount of the mount namespace directly but that also seems a bit out of place while we have the rbtree. So let's just iterate through the rbtree starting from the root mount of the mount namespace and find the mount whose parent is the root mount of the mount namespace. That mount will usually appear very early in the rbtree and afaik there can only be one. IOW, it would be very strange if we ended up with a root mount of a mount namespace that has shadow mounts. Fixes: 0a3deb11858a ("fs: Allow listmount() in foreign mount namespace") # mainline only Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/namespace.c16
1 files changed, 11 insertions, 5 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index f44e5448c8a0..56c1dcffb4dc 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5086,7 +5086,7 @@ static struct mount *listmnt_next(struct mount *curr, bool reverse)
static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
{
- struct mount *first;
+ struct mount *first, *child;
rwsem_assert_held(&namespace_sem);
@@ -5103,10 +5103,16 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
if (RB_EMPTY_ROOT(&ns->mounts))
return -ENOENT;
- first = listmnt_next(ns->root, false);
- if (!first)
- return -ENOENT;
- root->mnt = mntget(&first->mnt);
+ first = child = ns->root;
+ for (;;) {
+ child = listmnt_next(child, false);
+ if (!child)
+ return -ENOENT;
+ if (child->mnt_parent == first)
+ break;
+ }
+
+ root->mnt = mntget(&child->mnt);
root->dentry = dget(root->mnt->mnt_root);
return 0;
}