diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 298 |
1 files changed, 255 insertions, 43 deletions
diff --git a/fs/namei.c b/fs/namei.c index 8b61d103a8a7..409a441ba2ae 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -508,56 +508,78 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) { struct fs_struct *fs = current->fs; struct dentry *parent = nd->path.dentry; - int want_root = 0; BUG_ON(!(nd->flags & LOOKUP_RCU)); - if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { - want_root = 1; - spin_lock(&fs->lock); - if (nd->root.mnt != fs->root.mnt || - nd->root.dentry != fs->root.dentry) - goto err_root; - } - spin_lock(&parent->d_lock); + + /* + * Get a reference to the parent first: we're + * going to make "path_put(nd->path)" valid in + * non-RCU context for "terminate_walk()". + * + * If this doesn't work, return immediately with + * RCU walking still active (and then we will do + * the RCU walk cleanup in terminate_walk()). + */ + if (!lockref_get_not_dead(&parent->d_lockref)) + return -ECHILD; + + /* + * After the mntget(), we terminate_walk() will do + * the right thing for non-RCU mode, and all our + * subsequent exit cases should unlock_rcu_walk() + * before returning. + */ + mntget(nd->path.mnt); + nd->flags &= ~LOOKUP_RCU; + + /* + * For a negative lookup, the lookup sequence point is the parents + * sequence point, and it only needs to revalidate the parent dentry. + * + * For a positive lookup, we need to move both the parent and the + * dentry from the RCU domain to be properly refcounted. And the + * sequence number in the dentry validates *both* dentry counters, + * since we checked the sequence number of the parent after we got + * the child sequence number. So we know the parent must still + * be valid if the child sequence number is still valid. + */ if (!dentry) { - if (!__d_rcu_to_refcount(parent, nd->seq)) - goto err_parent; + if (read_seqcount_retry(&parent->d_seq, nd->seq)) + goto out; BUG_ON(nd->inode != parent->d_inode); } else { - if (dentry->d_parent != parent) - goto err_parent; - spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - if (!__d_rcu_to_refcount(dentry, nd->seq)) - goto err_child; - /* - * If the sequence check on the child dentry passed, then - * the child has not been removed from its parent. This - * means the parent dentry must be valid and able to take - * a reference at this point. - */ - BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent); - BUG_ON(!parent->d_count); - parent->d_count++; - spin_unlock(&dentry->d_lock); + if (!lockref_get_not_dead(&dentry->d_lockref)) + goto out; + if (read_seqcount_retry(&dentry->d_seq, nd->seq)) + goto drop_dentry; } - spin_unlock(&parent->d_lock); - if (want_root) { + + /* + * Sequence counts matched. Now make sure that the root is + * still valid and get it if required. + */ + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { + spin_lock(&fs->lock); + if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry) + goto unlock_and_drop_dentry; path_get(&nd->root); spin_unlock(&fs->lock); } - mntget(nd->path.mnt); unlock_rcu_walk(); - nd->flags &= ~LOOKUP_RCU; return 0; -err_child: - spin_unlock(&dentry->d_lock); -err_parent: - spin_unlock(&parent->d_lock); -err_root: - if (want_root) - spin_unlock(&fs->lock); +unlock_and_drop_dentry: + spin_unlock(&fs->lock); +drop_dentry: + unlock_rcu_walk(); + dput(dentry); + goto drop_root_mnt; +out: + unlock_rcu_walk(); +drop_root_mnt: + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; return -ECHILD; } @@ -585,14 +607,16 @@ static int complete_walk(struct nameidata *nd) nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - spin_lock(&dentry->d_lock); - if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { - spin_unlock(&dentry->d_lock); + + if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) { unlock_rcu_walk(); return -ECHILD; } - BUG_ON(nd->inode != dentry->d_inode); - spin_unlock(&dentry->d_lock); + if (read_seqcount_retry(&dentry->d_seq, nd->seq)) { + unlock_rcu_walk(); + dput(dentry); + return -ECHILD; + } mntget(nd->path.mnt); unlock_rcu_walk(); } @@ -2184,6 +2208,194 @@ user_path_parent(int dfd, const char __user *path, struct nameidata *nd, return s; } +/** + * mountpoint_last - look up last component for umount + * @nd: pathwalk nameidata - currently pointing at parent directory of "last" + * @path: pointer to container for result + * + * This is a special lookup_last function just for umount. In this case, we + * need to resolve the path without doing any revalidation. + * + * The nameidata should be the result of doing a LOOKUP_PARENT pathwalk. Since + * mountpoints are always pinned in the dcache, their ancestors are too. Thus, + * in almost all cases, this lookup will be served out of the dcache. The only + * cases where it won't are if nd->last refers to a symlink or the path is + * bogus and it doesn't exist. + * + * Returns: + * -error: if there was an error during lookup. This includes -ENOENT if the + * lookup found a negative dentry. The nd->path reference will also be + * put in this case. + * + * 0: if we successfully resolved nd->path and found it to not to be a + * symlink that needs to be followed. "path" will also be populated. + * The nd->path reference will also be put. + * + * 1: if we successfully resolved nd->last and found it to be a symlink + * that needs to be followed. "path" will be populated with the path + * to the link, and nd->path will *not* be put. + */ +static int +mountpoint_last(struct nameidata *nd, struct path *path) +{ + int error = 0; + struct dentry *dentry; + struct dentry *dir = nd->path.dentry; + + /* If we're in rcuwalk, drop out of it to handle last component */ + if (nd->flags & LOOKUP_RCU) { + if (unlazy_walk(nd, NULL)) { + error = -ECHILD; + goto out; + } + } + + nd->flags &= ~LOOKUP_PARENT; + + if (unlikely(nd->last_type != LAST_NORM)) { + error = handle_dots(nd, nd->last_type); + if (error) + goto out; + dentry = dget(nd->path.dentry); + goto done; + } + + mutex_lock(&dir->d_inode->i_mutex); + dentry = d_lookup(dir, &nd->last); + if (!dentry) { + /* + * No cached dentry. Mounted dentries are pinned in the cache, + * so that means that this dentry is probably a symlink or the + * path doesn't actually point to a mounted dentry. + */ + dentry = d_alloc(dir, &nd->last); + if (!dentry) { + error = -ENOMEM; + goto out; + } + dentry = lookup_real(dir->d_inode, dentry, nd->flags); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out; + } + mutex_unlock(&dir->d_inode->i_mutex); + +done: + if (!dentry->d_inode) { + error = -ENOENT; + dput(dentry); + goto out; + } + path->dentry = dentry; + path->mnt = mntget(nd->path.mnt); + if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW)) + return 1; + follow_mount(path); + error = 0; +out: + terminate_walk(nd); + return error; +} + +/** + * path_mountpoint - look up a path to be umounted + * @dfd: directory file descriptor to start walk from + * @name: full pathname to walk + * @flags: lookup flags + * + * Look up the given name, but don't attempt to revalidate the last component. + * Returns 0 and "path" will be valid on success; Retuns error otherwise. + */ +static int +path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) +{ + struct file *base = NULL; + struct nameidata nd; + int err; + + err = path_init(dfd, name, flags | LOOKUP_PARENT, &nd, &base); + if (unlikely(err)) + return err; + + current->total_link_count = 0; + err = link_path_walk(name, &nd); + if (err) + goto out; + + err = mountpoint_last(&nd, path); + while (err > 0) { + void *cookie; + struct path link = *path; + err = may_follow_link(&link, &nd); + if (unlikely(err)) + break; + nd.flags |= LOOKUP_PARENT; + err = follow_link(&link, &nd, &cookie); + if (err) + break; + err = mountpoint_last(&nd, path); + put_link(&nd, &link, cookie); + } +out: + if (base) + fput(base); + + if (nd.root.mnt && !(nd.flags & LOOKUP_ROOT)) + path_put(&nd.root); + + return err; +} + +static int +filename_mountpoint(int dfd, struct filename *s, struct path *path, + unsigned int flags) +{ + int error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); + if (unlikely(error == -ECHILD)) + error = path_mountpoint(dfd, s->name, path, flags); + if (unlikely(error == -ESTALE)) + error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); + if (likely(!error)) + audit_inode(s, path->dentry, 0); + return error; +} + +/** + * user_path_mountpoint_at - lookup a path from userland in order to umount it + * @dfd: directory file descriptor + * @name: pathname from userland + * @flags: lookup flags + * @path: pointer to container to hold result + * + * A umount is a special case for path walking. We're not actually interested + * in the inode in this situation, and ESTALE errors can be a problem. We + * simply want track down the dentry and vfsmount attached at the mountpoint + * and avoid revalidating the last component. + * + * Returns 0 and populates "path" on success. + */ +int +user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags, + struct path *path) +{ + struct filename *s = getname(name); + int error; + if (IS_ERR(s)) + return PTR_ERR(s); + error = filename_mountpoint(dfd, s, path, flags); + putname(s); + return error; +} + +int +kern_path_mountpoint(int dfd, const char *name, struct path *path, + unsigned int flags) +{ + struct filename s = {.name = name}; + return filename_mountpoint(dfd, &s, path, flags); +} +EXPORT_SYMBOL(kern_path_mountpoint); + /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. @@ -3327,7 +3539,7 @@ void dentry_unhash(struct dentry *dentry) { shrink_dcache_parent(dentry); spin_lock(&dentry->d_lock); - if (dentry->d_count == 1) + if (dentry->d_lockref.count == 1) __d_drop(dentry); spin_unlock(&dentry->d_lock); } |