diff options
Diffstat (limited to 'fs')
77 files changed, 883 insertions, 366 deletions
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 9defa12208f9..1775fcc7f0e8 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -179,13 +179,14 @@ extern int v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); -extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid); +extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, + bool new); extern const struct inode_operations v9fs_dir_inode_operations_dotl; extern const struct inode_operations v9fs_file_inode_operations_dotl; extern const struct inode_operations v9fs_symlink_inode_operations_dotl; extern const struct netfs_request_ops v9fs_req_ops; extern struct inode *v9fs_fid_iget_dotl(struct super_block *sb, - struct p9_fid *fid); + struct p9_fid *fid, bool new); /* other default globals */ #define V9FS_PORT 564 @@ -224,12 +225,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) */ static inline struct inode * v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, - struct super_block *sb) + struct super_block *sb, bool new) { if (v9fs_proto_dotl(v9ses)) - return v9fs_fid_iget_dotl(sb, fid); + return v9fs_fid_iget_dotl(sb, fid, new); else - return v9fs_fid_iget(sb, fid); + return v9fs_fid_iget(sb, fid, new); } #endif diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 47bd77199e20..7a3308d77606 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -364,7 +364,8 @@ void v9fs_evict_inode(struct inode *inode) clear_inode(inode); } -struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) +struct inode * +v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, bool new) { dev_t rdev; int retval; @@ -376,8 +377,18 @@ struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) inode = iget_locked(sb, QID2INO(&fid->qid)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; + if (!(inode->i_state & I_NEW)) { + if (!new) { + goto done; + } else { + p9_debug(P9_DEBUG_VFS, "WARNING: Inode collision %ld\n", + inode->i_ino); + iput(inode); + remove_inode_hash(inode); + inode = iget_locked(sb, QID2INO(&fid->qid)); + WARN_ON(!(inode->i_state & I_NEW)); + } + } /* * initialize the inode with the stat info @@ -401,11 +412,11 @@ struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid) v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); unlock_new_inode(inode); +done: return inode; error: iget_failed(inode); return ERR_PTR(retval); - } /** @@ -437,8 +448,15 @@ static int v9fs_at_to_dotl_flags(int flags) */ static void v9fs_dec_count(struct inode *inode) { - if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) - drop_nlink(inode); + if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) { + if (inode->i_nlink) { + drop_nlink(inode); + } else { + p9_debug(P9_DEBUG_VFS, + "WARNING: unexpected i_nlink zero %d inode %ld\n", + inode->i_nlink, inode->i_ino); + } + } } /** @@ -489,6 +507,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) } else v9fs_dec_count(inode); + if (inode->i_nlink <= 0) /* no more refs unhash it */ + remove_inode_hash(inode); + v9fs_invalidate_inode_attr(inode); v9fs_invalidate_inode_attr(dir); @@ -554,7 +575,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, /* * instantiate inode and assign the unopened fid to the dentry */ - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, @@ -683,7 +704,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, else if (IS_ERR(fid)) inode = ERR_CAST(fid); else - inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); + inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, false); /* * If we had a rename on the server and a parallel lookup * for the new name, then make sure we instantiate with diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 55dde186041a..c61b97bd13b9 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -52,7 +52,10 @@ static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) return current_fsgid(); } -struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) + + +struct inode * +v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid, bool new) { int retval; struct inode *inode; @@ -62,8 +65,18 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) inode = iget_locked(sb, QID2INO(&fid->qid)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; + if (!(inode->i_state & I_NEW)) { + if (!new) { + goto done; + } else { /* deal with race condition in inode number reuse */ + p9_debug(P9_DEBUG_ERROR, "WARNING: Inode collision %lx\n", + inode->i_ino); + iput(inode); + remove_inode_hash(inode); + inode = iget_locked(sb, QID2INO(&fid->qid)); + WARN_ON(!(inode->i_state & I_NEW)); + } + } /* * initialize the inode with the stat info @@ -90,12 +103,11 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid) goto error; unlock_new_inode(inode); - +done: return inode; error: iget_failed(inode); return ERR_PTR(retval); - } struct dotl_openflag_map { @@ -247,7 +259,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto out; } - inode = v9fs_fid_iget_dotl(dir->i_sb, fid); + inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); @@ -340,7 +352,7 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, } /* instantiate inode and assign the unopened fid to the dentry */ - inode = v9fs_fid_iget_dotl(dir->i_sb, fid); + inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", @@ -776,7 +788,7 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir, err); goto error; } - inode = v9fs_fid_iget_dotl(dir->i_sb, fid); + inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 55e67e36ae68..f52fdf42945c 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -139,7 +139,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, else sb->s_d_op = &v9fs_dentry_operations; - inode = v9fs_get_inode_from_fid(v9ses, fid, sb); + inode = v9fs_get_inode_from_fid(v9ses, fid, sb, true); if (IS_ERR(inode)) { retval = PTR_ERR(inode); goto release_sb; diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index fadb1078903d..a20044201002 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -470,7 +470,7 @@ found: goto err; } - bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL); + bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); bio->bi_iter.bi_sector = p.ptr.offset; bch2_bio_map(bio, data_buf, bytes); ret = submit_bio_wait(bio); diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 085987435a5e..f7fbfccd2b1e 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1504,7 +1504,8 @@ enum btree_id_flags { BIT_ULL(KEY_TYPE_stripe)) \ x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \ BIT_ULL(KEY_TYPE_reflink_v)| \ - BIT_ULL(KEY_TYPE_indirect_inline_data)) \ + BIT_ULL(KEY_TYPE_indirect_inline_data)| \ + BIT_ULL(KEY_TYPE_error)) \ x(subvolumes, 8, 0, \ BIT_ULL(KEY_TYPE_subvolume)) \ x(snapshots, 9, 0, \ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index ecbd9598f69f..791470b0c654 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1587,7 +1587,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(new); if (ret) - return ret; + goto out; if (!r->refcount) new->k.type = KEY_TYPE_deleted; @@ -1595,6 +1595,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); ret = bch2_trans_update(trans, iter, new, 0); } +out: fsck_err: printbuf_exit(&buf); return ret; diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 9678b2375bed..debb0edc3455 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -888,7 +888,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, btree_node_bkey_bad_u64s, - "bad k->u64s %u (min %u max %lu)", k->u64s, + "bad k->u64s %u (min %u max %zu)", k->u64s, bkeyp_key_u64s(&b->format, k), U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k))) goto drop_this_key; diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 88a3582a3275..e8c1c530cd95 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -842,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, * Newest freed entries are at the end of the list - once we hit one * that's too new to be freed, we can bail out: */ - scanned += bc->nr_freed_nonpcpu; - list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) @@ -857,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, bc->nr_freed_nonpcpu--; } - if (scanned >= nr) - goto out; - - scanned += bc->nr_freed_pcpu; - list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) { if (!poll_state_synchronize_srcu(&c->btree_trans_barrier, ck->btree_trans_barrier_seq)) @@ -875,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, bc->nr_freed_pcpu--; } - if (scanned >= nr) - goto out; - rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); if (bc->shrink_iter >= tbl->size) @@ -893,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); ck = container_of(pos, struct bkey_cached, hash); - if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) + if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { goto next; - - if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) + } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) { clear_bit(BKEY_CACHED_ACCESSED, &ck->flags); - else if (bkey_cached_lock_for_evict(ck)) { + goto next; + } else if (bkey_cached_lock_for_evict(ck)) { bkey_cached_evict(bc, ck); bkey_cached_free(bc, ck); } @@ -916,7 +906,6 @@ next: } while (scanned < nr && bc->shrink_iter != start); rcu_read_unlock(); -out: memalloc_nofs_restore(flags); srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); mutex_unlock(&bc->lock); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 866bd278439f..45cb8149d374 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -57,13 +57,14 @@ static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_n bp->v.seq = cpu_to_le64(f->cookie); bp->v.sectors_written = 0; bp->v.flags = 0; + bp->v.sectors_written = cpu_to_le16(f->sectors_written); bp->v.min_key = f->min_key; SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated); memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs); } static bool found_btree_node_is_readable(struct btree_trans *trans, - const struct found_btree_node *f) + struct found_btree_node *f) { struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k; @@ -71,8 +72,10 @@ static bool found_btree_node_is_readable(struct btree_trans *trans, struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false); bool ret = !IS_ERR_OR_NULL(b); - if (ret) + if (ret) { + f->sectors_written = b->written; six_unlock_read(&b->c.lock); + } /* * We might update this node's range; if that happens, we need the node @@ -302,6 +305,8 @@ again: start->max_key = bpos_predecessor(n->min_key); start->range_updated = true; + } else if (n->level) { + n->overwritten = true; } else { struct printbuf buf = PRINTBUF; diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h index abb7b27d556a..5cfaeb5ac831 100644 --- a/fs/bcachefs/btree_node_scan_types.h +++ b/fs/bcachefs/btree_node_scan_types.h @@ -9,6 +9,7 @@ struct found_btree_node { bool overwritten:1; u8 btree_id; u8 level; + unsigned sectors_written; u32 seq; u64 cookie; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index e0c982a4195c..c69b233c41bb 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -321,9 +321,9 @@ struct bkey_cached { struct btree_bkey_cached_common c; unsigned long flags; + unsigned long btree_trans_barrier_seq; u16 u64s; bool valid; - u32 btree_trans_barrier_seq; struct bkey_cached_key key; struct rhash_head hash; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 6030c396754f..b4efd8cc4d1a 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1960,7 +1960,11 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates) return 0; - flags &= ~BCH_WATERMARK_MASK; + if ((flags & BCH_WATERMARK_MASK) <= BCH_WATERMARK_reclaim) { + flags &= ~BCH_WATERMARK_MASK; + flags |= BCH_WATERMARK_btree; + flags |= BCH_TRANS_COMMIT_journal_reclaim; + } b = trans->paths[path].l[level].b; diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 941401a210f5..82f179258867 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -525,7 +525,6 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, "different types of data in same bucket: %s, %s", bch2_data_type_str(g->data_type), bch2_data_type_str(data_type))) { - BUG(); ret = -EIO; goto err; } @@ -629,7 +628,6 @@ int bch2_check_bucket_ref(struct btree_trans *trans, bch2_data_type_str(ptr_data_type), (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - BUG(); ret = -EIO; goto err; } diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 72781aad6ba7..4d14f19f5185 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -232,13 +232,15 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a /* We need request_key() to be called before we punt to kthread: */ opt_set(thr->opts, nostart, true); + bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); + thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); if (!IS_ERR(thr->c) && thr->c->opts.errors == BCH_ON_ERROR_panic) thr->c->opts.errors = BCH_ON_ERROR_ro; - ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops); + ret = __bch2_run_thread_with_stdio(&thr->thr); out: darray_for_each(devs, i) kfree(*i); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b5ea9fa1259d..fce690007edf 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -188,7 +188,8 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino BUG_ON(!old); if (unlikely(old != inode)) { - discard_new_inode(&inode->v); + __destroy_inode(&inode->v); + kmem_cache_free(bch2_inode_cache, inode); inode = old; } else { mutex_lock(&c->vfs_inodes_lock); @@ -225,8 +226,10 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans) if (unlikely(!inode)) { int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM); - if (ret && inode) - discard_new_inode(&inode->v); + if (ret && inode) { + __destroy_inode(&inode->v); + kmem_cache_free(bch2_inode_cache, inode); + } if (ret) return ERR_PTR(ret); } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index ca4a066e9a54..0f95d7fb5ec0 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -606,7 +606,7 @@ int bch2_trigger_inode(struct btree_trans *trans, struct bkey_s new, unsigned flags) { - s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k); + s64 nr = (s64) bkey_is_inode(new.k) - (s64) bkey_is_inode(old.k); if (flags & BTREE_TRIGGER_TRANSACTIONAL) { if (nr) { diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 9aa28b52ab92..eb1f9d6f5a19 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1723,7 +1723,7 @@ static void journal_write_endio(struct bio *bio) percpu_ref_put(&ca->io_ref); } -static CLOSURE_CALLBACK(do_journal_write) +static CLOSURE_CALLBACK(journal_write_submit) { closure_type(w, struct journal_buf, io); struct journal *j = container_of(w, struct journal, buf[w->idx]); @@ -1768,6 +1768,44 @@ static CLOSURE_CALLBACK(do_journal_write) continue_at(cl, journal_write_done, j->wq); } +static CLOSURE_CALLBACK(journal_write_preflush) +{ + closure_type(w, struct journal_buf, io); + struct journal *j = container_of(w, struct journal, buf[w->idx]); + struct bch_fs *c = container_of(j, struct bch_fs, journal); + + if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) { + spin_lock(&j->lock); + closure_wait(&j->async_wait, cl); + spin_unlock(&j->lock); + + continue_at(cl, journal_write_preflush, j->wq); + return; + } + + if (w->separate_flush) { + for_each_rw_member(c, ca) { + percpu_ref_get(&ca->io_ref); + + struct journal_device *ja = &ca->journal; + struct bio *bio = &ja->bio[w->idx]->bio; + bio_reset(bio, ca->disk_sb.bdev, + REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH); + bio->bi_end_io = journal_write_endio; + bio->bi_private = ca; + closure_bio_submit(bio, cl); + } + + continue_at(cl, journal_write_submit, j->wq); + } else { + /* + * no need to punt to another work item if we're not waiting on + * preflushes + */ + journal_write_submit(&cl->work); + } +} + static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) { struct bch_fs *c = container_of(j, struct bch_fs, journal); @@ -2033,23 +2071,9 @@ CLOSURE_CALLBACK(bch2_journal_write) goto err; if (!JSET_NO_FLUSH(w->data)) - closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq)); - - if (!JSET_NO_FLUSH(w->data) && w->separate_flush) { - for_each_rw_member(c, ca) { - percpu_ref_get(&ca->io_ref); - - struct journal_device *ja = &ca->journal; - struct bio *bio = &ja->bio[w->idx]->bio; - bio_reset(bio, ca->disk_sb.bdev, - REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH); - bio->bi_end_io = journal_write_endio; - bio->bi_private = ca; - closure_bio_submit(bio, cl); - } - } - - continue_at(cl, do_journal_write, j->wq); + continue_at(cl, journal_write_preflush, j->wq); + else + continue_at(cl, journal_write_submit, j->wq); return; no_io: continue_at(cl, journal_write_done, j->wq); diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0f328aba9760..be5b47619327 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -249,7 +249,10 @@ int bch2_journal_replay(struct bch_fs *c) struct journal_key *k = *kp; - replay_now_at(j, k->journal_seq); + if (k->journal_seq) + replay_now_at(j, k->journal_seq); + else + replay_now_at(j, j->replay_journal_seq_end); ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc| diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 5980ba2563fe..35ca3f138de6 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -29,6 +29,14 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle for (entry = clean->start; entry < (struct jset_entry *) vstruct_end(&clean->field); entry = vstruct_next(entry)) { + if (vstruct_end(entry) > vstruct_end(&clean->field)) { + bch_err(c, "journal entry (u64s %u) overran end of superblock clean section (u64s %u) by %zu", + le16_to_cpu(entry->u64s), le32_to_cpu(clean->field.u64s), + (u64 *) vstruct_end(entry) - (u64 *) vstruct_end(&clean->field)); + bch2_sb_error_count(c, BCH_FSCK_ERR_sb_clean_entry_overrun); + return -BCH_ERR_fsck_repair_unimplemented; + } + ret = bch2_journal_entry_validate(c, NULL, entry, le16_to_cpu(c->disk_sb.sb->version), BCH_SB_BIG_ENDIAN(c->disk_sb.sb), diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h index 4ca6e7b0d8aa..06c7a644f4a4 100644 --- a/fs/bcachefs/sb-errors_types.h +++ b/fs/bcachefs/sb-errors_types.h @@ -271,7 +271,8 @@ x(btree_root_unreadable_and_scan_found_nothing, 263) \ x(snapshot_node_missing, 264) \ x(dup_backpointer_to_bad_csum_extent, 265) \ - x(btree_bitmap_not_marked, 266) + x(btree_bitmap_not_marked, 266) \ + x(sb_clean_entry_overrun, 267) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 522a969345e5..5b8e621ac5eb 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -463,8 +463,8 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns m->btree_bitmap_shift += resize; } - for (unsigned bit = sectors >> m->btree_bitmap_shift; - bit << m->btree_bitmap_shift < end; + for (unsigned bit = start >> m->btree_bitmap_shift; + (u64) bit << m->btree_bitmap_shift < end; bit++) bitmap |= BIT_ULL(bit); diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index b27c3e4467cf..5efa64eca5f8 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -235,11 +235,11 @@ static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 { u64 end = start + sectors; - if (end > 64 << ca->mi.btree_bitmap_shift) + if (end > 64ULL << ca->mi.btree_bitmap_shift) return false; - for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift; - bit << ca->mi.btree_bitmap_shift < end; + for (unsigned bit = start >> ca->mi.btree_bitmap_shift; + (u64) bit << ca->mi.btree_bitmap_shift < end; bit++) if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit))) return false; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 8daf80a38d60..88e214c609bb 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -544,6 +544,7 @@ static void __bch2_fs_free(struct bch_fs *c) bch2_find_btree_nodes_exit(&c->found_btree_nodes); bch2_free_pending_node_rewrites(c); + bch2_fs_allocator_background_exit(c); bch2_fs_sb_errors_exit(c); bch2_fs_counters_exit(c); bch2_fs_snapshots_exit(c); diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index 940db15d6a93..b1af7ac430f6 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -294,16 +294,27 @@ static int thread_with_stdio_fn(void *arg) return 0; } -int bch2_run_thread_with_stdio(struct thread_with_stdio *thr, - const struct thread_with_stdio_ops *ops) +void bch2_thread_with_stdio_init(struct thread_with_stdio *thr, + const struct thread_with_stdio_ops *ops) { stdio_buf_init(&thr->stdio.input); stdio_buf_init(&thr->stdio.output); thr->ops = ops; +} +int __bch2_run_thread_with_stdio(struct thread_with_stdio *thr) +{ return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, thread_with_stdio_fn); } +int bch2_run_thread_with_stdio(struct thread_with_stdio *thr, + const struct thread_with_stdio_ops *ops) +{ + bch2_thread_with_stdio_init(thr, ops); + + return __bch2_run_thread_with_stdio(thr); +} + int bch2_run_thread_with_stdout(struct thread_with_stdio *thr, const struct thread_with_stdio_ops *ops) { diff --git a/fs/bcachefs/thread_with_file.h b/fs/bcachefs/thread_with_file.h index af54ea8f5b0f..1d63d14d7dca 100644 --- a/fs/bcachefs/thread_with_file.h +++ b/fs/bcachefs/thread_with_file.h @@ -63,6 +63,9 @@ struct thread_with_stdio { const struct thread_with_stdio_ops *ops; }; +void bch2_thread_with_stdio_init(struct thread_with_stdio *, + const struct thread_with_stdio_ops *); +int __bch2_run_thread_with_stdio(struct thread_with_stdio *); int bch2_run_thread_with_stdio(struct thread_with_stdio *, const struct thread_with_stdio_ops *); int bch2_run_thread_with_stdout(struct thread_with_stdio *, diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index c1e6a5bbeeaf..58110c968667 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2776,20 +2776,14 @@ struct btrfs_data_container *init_data_container(u32 total_bytes) size_t alloc_bytes; alloc_bytes = max_t(size_t, total_bytes, sizeof(*data)); - data = kvmalloc(alloc_bytes, GFP_KERNEL); + data = kvzalloc(alloc_bytes, GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); - if (total_bytes >= sizeof(*data)) { + if (total_bytes >= sizeof(*data)) data->bytes_left = total_bytes - sizeof(*data); - data->bytes_missing = 0; - } else { + else data->bytes_missing = sizeof(*data) - total_bytes; - data->bytes_left = 0; - } - - data->elem_cnt = 0; - data->elem_missed = 0; return data; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 445f7716f1e2..24a048210b15 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -817,7 +817,7 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, split->block_len = em->block_len; split->orig_start = em->orig_start; } else { - const u64 diff = start + len - em->start; + const u64 diff = end - em->start; split->block_len = split->len; split->block_start += diff; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c65fe5de4022..7fed887e700c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1145,13 +1145,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk, 0, *alloc_hint, &ins, 1, 1); if (ret) { /* - * Here we used to try again by going back to non-compressed - * path for ENOSPC. But we can't reserve space even for - * compressed size, how could it work for uncompressed size - * which requires larger size? So here we directly go error - * path. + * We can't reserve contiguous space for the compressed size. + * Unlikely, but it's possible that we could have enough + * non-contiguous space for the uncompressed size instead. So + * fall back to uncompressed. */ - goto out_free; + submit_uncompressed_range(inode, async_extent, locked_page); + goto done; } /* Here we're doing allocation and writeback of the compressed pages */ @@ -1203,7 +1203,6 @@ done: out_free_reserve: btrfs_dec_block_group_reservations(fs_info, ins.objectid); btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); -out_free: mapping_set_error(inode->vfs_inode.i_mapping, -EIO); extent_clear_unlock_delalloc(inode, start, end, NULL, EXTENT_LOCKED | EXTENT_DELALLOC | diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 55f3ba6a831c..0493272a7668 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3758,15 +3758,43 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) goto drop_write; } - down_write(&fs_info->subvol_sem); - switch (sa->cmd) { case BTRFS_QUOTA_CTL_ENABLE: case BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA: + down_write(&fs_info->subvol_sem); ret = btrfs_quota_enable(fs_info, sa); + up_write(&fs_info->subvol_sem); break; case BTRFS_QUOTA_CTL_DISABLE: + /* + * Lock the cleaner mutex to prevent races with concurrent + * relocation, because relocation may be building backrefs for + * blocks of the quota root while we are deleting the root. This + * is like dropping fs roots of deleted snapshots/subvolumes, we + * need the same protection. + * + * This also prevents races between concurrent tasks trying to + * disable quotas, because we will unlock and relock + * qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes. + * + * We take this here because we have the dependency of + * + * inode_lock -> subvol_sem + * + * because of rename. With relocation we can prealloc extents, + * so that makes the dependency chain + * + * cleaner_mutex -> inode_lock -> subvol_sem + * + * so we must take the cleaner_mutex here before we take the + * subvol_sem. The deadlock can't actually happen, but this + * quiets lockdep. + */ + mutex_lock(&fs_info->cleaner_mutex); + down_write(&fs_info->subvol_sem); ret = btrfs_quota_disable(fs_info); + up_write(&fs_info->subvol_sem); + mutex_unlock(&fs_info->cleaner_mutex); break; default: ret = -EINVAL; @@ -3774,7 +3802,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) } kfree(sa); - up_write(&fs_info->subvol_sem); drop_write: mnt_drop_write_file(file); return ret; diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c index c96dd66fd0f7..210d9c82e2ae 100644 --- a/fs/btrfs/messages.c +++ b/fs/btrfs/messages.c @@ -7,7 +7,7 @@ #ifdef CONFIG_PRINTK -#define STATE_STRING_PREFACE ": state " +#define STATE_STRING_PREFACE " state " #define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT + 1) /* diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b749ba45da2b..c2a42bcde98e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1188,6 +1188,7 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( ordered->disk_bytenr += len; ordered->num_bytes -= len; ordered->disk_num_bytes -= len; + ordered->ram_bytes -= len; if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { ASSERT(ordered->bytes_left == 0); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cf8820ce7aa2..364acc9bbe73 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1342,16 +1342,10 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) lockdep_assert_held_write(&fs_info->subvol_sem); /* - * Lock the cleaner mutex to prevent races with concurrent relocation, - * because relocation may be building backrefs for blocks of the quota - * root while we are deleting the root. This is like dropping fs roots - * of deleted snapshots/subvolumes, we need the same protection. - * - * This also prevents races between concurrent tasks trying to disable - * quotas, because we will unlock and relock qgroup_ioctl_lock across - * BTRFS_FS_QUOTA_ENABLED changes. + * Relocation will mess with backrefs, so make sure we have the + * cleaner_mutex held to protect us from relocate. */ - mutex_lock(&fs_info->cleaner_mutex); + lockdep_assert_held(&fs_info->cleaner_mutex); mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) @@ -1373,9 +1367,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); btrfs_qgroup_wait_for_completion(fs_info, false); + /* + * We have nothing held here and no trans handle, just return the error + * if there is one. + */ ret = flush_reservations(fs_info); if (ret) - goto out_unlock_cleaner; + return ret; /* * 1 For the root item @@ -1439,9 +1437,6 @@ out: btrfs_end_transaction(trans); else if (trans) ret = btrfs_commit_transaction(trans); -out_unlock_cleaner: - mutex_unlock(&fs_info->cleaner_mutex); - return ret; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index fa25004ab04e..4b22cfe9a98c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1012,6 +1012,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work) struct btrfs_fs_info *fs_info = sctx->fs_info; int num_copies = btrfs_num_copies(fs_info, stripe->bg->start, stripe->bg->length); + unsigned long repaired; int mirror; int i; @@ -1078,16 +1079,15 @@ out: * Submit the repaired sectors. For zoned case, we cannot do repair * in-place, but queue the bg to be relocated. */ - if (btrfs_is_zoned(fs_info)) { - if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) + bitmap_andnot(&repaired, &stripe->init_error_bitmap, &stripe->error_bitmap, + stripe->nr_sectors); + if (!sctx->readonly && !bitmap_empty(&repaired, stripe->nr_sectors)) { + if (btrfs_is_zoned(fs_info)) { btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start); - } else if (!sctx->readonly) { - unsigned long repaired; - - bitmap_andnot(&repaired, &stripe->init_error_bitmap, - &stripe->error_bitmap, stripe->nr_sectors); - scrub_write_sectors(sctx, stripe, repaired, false); - wait_scrub_stripe_io(stripe); + } else { + scrub_write_sectors(sctx, stripe, repaired, false); + wait_scrub_stripe_io(stripe); + } } scrub_stripe_report_errors(sctx, stripe); diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index 253cce7ffecf..47b5d301038e 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -847,6 +847,11 @@ static int test_case_7(struct btrfs_fs_info *fs_info) goto out; } + if (em->block_start != SZ_32K + SZ_4K) { + test_err("em->block_start is %llu, expected 36K", em->block_start); + goto out; + } + free_extent_map(em); read_lock(&em_tree->lock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f15591f3e54f..ef6bd2f4251b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3455,6 +3455,7 @@ again: * alignment and size). */ ret = -EUCLEAN; + mutex_unlock(&fs_info->reclaim_bgs_lock); goto error; } diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 8aff1a724805..62da538d91cb 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -151,7 +151,7 @@ static int erofs_fscache_read_io_async(struct fscache_cookie *cookie, if (WARN_ON(len == 0)) source = NETFS_INVALID_READ; if (source != NETFS_READ_FROM_CACHE) { - erofs_err(NULL, "prepare_read failed (source %d)", source); + erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source); return -EIO; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 39c67119f43b..d28ccfc0352b 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -84,13 +84,6 @@ struct erofs_dev_context { bool flatdev; }; -struct erofs_fs_context { - struct erofs_mount_opts opt; - struct erofs_dev_context *devs; - char *fsid; - char *domain_id; -}; - /* all filesystem-wide lz4 configurations */ struct erofs_sb_lz4_info { /* # of pages needed for EROFS lz4 rolling decompression */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index c0eb139adb07..30b49b2eee53 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -370,18 +370,18 @@ out: return ret; } -static void erofs_default_options(struct erofs_fs_context *ctx) +static void erofs_default_options(struct erofs_sb_info *sbi) { #ifdef CONFIG_EROFS_FS_ZIP - ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; - ctx->opt.max_sync_decompress_pages = 3; - ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; + sbi->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; + sbi->opt.max_sync_decompress_pages = 3; + sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; #endif #ifdef CONFIG_EROFS_FS_XATTR - set_opt(&ctx->opt, XATTR_USER); + set_opt(&sbi->opt, XATTR_USER); #endif #ifdef CONFIG_EROFS_FS_POSIX_ACL - set_opt(&ctx->opt, POSIX_ACL); + set_opt(&sbi->opt, POSIX_ACL); #endif } @@ -426,16 +426,16 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = { static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) { #ifdef CONFIG_FS_DAX - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; switch (mode) { case EROFS_MOUNT_DAX_ALWAYS: - set_opt(&ctx->opt, DAX_ALWAYS); - clear_opt(&ctx->opt, DAX_NEVER); + set_opt(&sbi->opt, DAX_ALWAYS); + clear_opt(&sbi->opt, DAX_NEVER); return true; case EROFS_MOUNT_DAX_NEVER: - set_opt(&ctx->opt, DAX_NEVER); - clear_opt(&ctx->opt, DAX_ALWAYS); + set_opt(&sbi->opt, DAX_NEVER); + clear_opt(&sbi->opt, DAX_ALWAYS); return true; default: DBG_BUGON(1); @@ -450,7 +450,7 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) static int erofs_fc_parse_param(struct fs_context *fc, struct fs_parameter *param) { - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; struct fs_parse_result result; struct erofs_device_info *dif; int opt, ret; @@ -463,9 +463,9 @@ static int erofs_fc_parse_param(struct fs_context *fc, case Opt_user_xattr: #ifdef CONFIG_EROFS_FS_XATTR if (result.boolean) - set_opt(&ctx->opt, XATTR_USER); + set_opt(&sbi->opt, XATTR_USER); else - clear_opt(&ctx->opt, XATTR_USER); + clear_opt(&sbi->opt, XATTR_USER); #else errorfc(fc, "{,no}user_xattr options not supported"); #endif @@ -473,16 +473,16 @@ static int erofs_fc_parse_param(struct fs_context *fc, case Opt_acl: #ifdef CONFIG_EROFS_FS_POSIX_ACL if (result.boolean) - set_opt(&ctx->opt, POSIX_ACL); + set_opt(&sbi->opt, POSIX_ACL); else - clear_opt(&ctx->opt, POSIX_ACL); + clear_opt(&sbi->opt, POSIX_ACL); #else errorfc(fc, "{,no}acl options not supported"); #endif break; case Opt_cache_strategy: #ifdef CONFIG_EROFS_FS_ZIP - ctx->opt.cache_strategy = result.uint_32; + sbi->opt.cache_strategy = result.uint_32; #else errorfc(fc, "compression not supported, cache_strategy ignored"); #endif @@ -504,27 +504,27 @@ static int erofs_fc_parse_param(struct fs_context *fc, kfree(dif); return -ENOMEM; } - down_write(&ctx->devs->rwsem); - ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL); - up_write(&ctx->devs->rwsem); + down_write(&sbi->devs->rwsem); + ret = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); + up_write(&sbi->devs->rwsem); if (ret < 0) { kfree(dif->path); kfree(dif); return ret; } - ++ctx->devs->extra_devices; + ++sbi->devs->extra_devices; break; #ifdef CONFIG_EROFS_FS_ONDEMAND case Opt_fsid: - kfree(ctx->fsid); - ctx->fsid = kstrdup(param->string, GFP_KERNEL); - if (!ctx->fsid) + kfree(sbi->fsid); + sbi->fsid = kstrdup(param->string, GFP_KERNEL); + if (!sbi->fsid) return -ENOMEM; break; case Opt_domain_id: - kfree(ctx->domain_id); - ctx->domain_id = kstrdup(param->string, GFP_KERNEL); - if (!ctx->domain_id) + kfree(sbi->domain_id); + sbi->domain_id = kstrdup(param->string, GFP_KERNEL); + if (!sbi->domain_id) return -ENOMEM; break; #else @@ -581,8 +581,7 @@ static const struct export_operations erofs_export_ops = { static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; - struct erofs_sb_info *sbi; - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = EROFS_SB(sb); int err; sb->s_magic = EROFS_SUPER_MAGIC; @@ -590,19 +589,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_op = &erofs_sops; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - - sb->s_fs_info = sbi; - sbi->opt = ctx->opt; - sbi->devs = ctx->devs; - ctx->devs = NULL; - sbi->fsid = ctx->fsid; - ctx->fsid = NULL; - sbi->domain_id = ctx->domain_id; - ctx->domain_id = NULL; - sbi->blkszbits = PAGE_SHIFT; if (erofs_is_fscache_mode(sb)) { sb->s_blocksize = PAGE_SIZE; @@ -706,9 +692,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) static int erofs_fc_get_tree(struct fs_context *fc) { - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; - if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid) + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) return get_tree_nodev(fc, erofs_fc_fill_super); return get_tree_bdev(fc, erofs_fc_fill_super); @@ -718,19 +704,19 @@ static int erofs_fc_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct erofs_sb_info *sbi = EROFS_SB(sb); - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *new_sbi = fc->s_fs_info; DBG_BUGON(!sb_rdonly(sb)); - if (ctx->fsid || ctx->domain_id) + if (new_sbi->fsid || new_sbi->domain_id) erofs_info(sb, "ignoring reconfiguration for fsid|domain_id."); - if (test_opt(&ctx->opt, POSIX_ACL)) + if (test_opt(&new_sbi->opt, POSIX_ACL)) fc->sb_flags |= SB_POSIXACL; else fc->sb_flags &= ~SB_POSIXACL; - sbi->opt = ctx->opt; + sbi->opt = new_sbi->opt; fc->sb_flags |= SB_RDONLY; return 0; @@ -761,12 +747,15 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs) static void erofs_fc_free(struct fs_context *fc) { - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; - erofs_free_dev_context(ctx->devs); - kfree(ctx->fsid); - kfree(ctx->domain_id); - kfree(ctx); + if (!sbi) + return; + + erofs_free_dev_context(sbi->devs); + kfree(sbi->fsid); + kfree(sbi->domain_id); + kfree(sbi); } static const struct fs_context_operations erofs_context_ops = { @@ -778,38 +767,35 @@ static const struct fs_context_operations erofs_context_ops = { static int erofs_init_fs_context(struct fs_context *fc) { - struct erofs_fs_context *ctx; + struct erofs_sb_info *sbi; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) return -ENOMEM; - ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); - if (!ctx->devs) { - kfree(ctx); + + sbi->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); + if (!sbi->devs) { + kfree(sbi); return -ENOMEM; } - fc->fs_private = ctx; + fc->s_fs_info = sbi; - idr_init(&ctx->devs->tree); - init_rwsem(&ctx->devs->rwsem); - erofs_default_options(ctx); + idr_init(&sbi->devs->tree); + init_rwsem(&sbi->devs->rwsem); + erofs_default_options(sbi); fc->ops = &erofs_context_ops; return 0; } static void erofs_kill_sb(struct super_block *sb) { - struct erofs_sb_info *sbi; + struct erofs_sb_info *sbi = EROFS_SB(sb); - if (erofs_is_fscache_mode(sb)) + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) kill_anon_super(sb); else kill_block_super(sb); - sbi = EROFS_SB(sb); - if (!sbi) - return; - erofs_free_dev_context(sbi->devs); fs_put_dax(sbi->dax_dev, NULL); erofs_fscache_unregister_fs(sb); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 882b89edc52a..f53ca4f7fced 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -980,6 +980,34 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep } /* + * The ffd.file pointer may be in the process of being torn down due to + * being closed, but we may not have finished eventpoll_release() yet. + * + * Normally, even with the atomic_long_inc_not_zero, the file may have + * been free'd and then gotten re-allocated to something else (since + * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU). + * + * But for epoll, users hold the ep->mtx mutex, and as such any file in + * the process of being free'd will block in eventpoll_release_file() + * and thus the underlying file allocation will not be free'd, and the + * file re-use cannot happen. + * + * For the same reason we can avoid a rcu_read_lock() around the + * operation - 'ffd.file' cannot go away even if the refcount has + * reached zero (but we must still not call out to ->poll() functions + * etc). + */ +static struct file *epi_fget(const struct epitem *epi) +{ + struct file *file; + + file = epi->ffd.file; + if (!atomic_long_inc_not_zero(&file->f_count)) + file = NULL; + return file; +} + +/* * Differs from ep_eventpoll_poll() in that internal callers already have * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested() * is correctly annotated. @@ -987,14 +1015,22 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth) { - struct file *file = epi->ffd.file; + struct file *file = epi_fget(epi); __poll_t res; + /* + * We could return EPOLLERR | EPOLLHUP or something, but let's + * treat this more as "file doesn't exist, poll didn't happen". + */ + if (!file) + return 0; + pt->_key = epi->event.events; if (!is_file_epoll(file)) res = vfs_poll(file, pt); else res = __ep_eventpoll_poll(file, pt, depth); + fput(file); return res & epi->event.events; } diff --git a/fs/ioctl.c b/fs/ioctl.c index 1d5abfdf0f22..fb0628e680c4 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -769,7 +769,7 @@ static int ioctl_getfsuuid(struct file *file, void __user *argp) struct fsuuid2 u = { .len = sb->s_uuid_len, }; if (!sb->s_uuid_len) - return -ENOIOCTLCMD; + return -ENOTTY; memcpy(&u.uuid[0], &sb->s_uuid, sb->s_uuid_len); @@ -781,7 +781,7 @@ static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp) struct super_block *sb = file_inode(file)->i_sb; if (!strlen(sb->s_sysfs_name)) - return -ENOIOCTLCMD; + return -ENOTTY; struct fs_sysfs_path u = {}; diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 9a0d32e4b422..267b622d923b 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -164,7 +164,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, enum netfs_how_to_modify howto; enum netfs_folio_trace trace; unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; - ssize_t written = 0, ret; + ssize_t written = 0, ret, ret2; loff_t i_size, pos = iocb->ki_pos, from, to; size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; bool maybe_trouble = false; @@ -172,15 +172,14 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) ) { - if (pos < i_size_read(inode)) { - ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); - if (ret < 0) { - goto out; - } - } - wbc_attach_fdatawrite_inode(&wbc, mapping->host); + ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count); + if (ret < 0) { + wbc_detach_inode(&wbc); + goto out; + } + wreq = netfs_begin_writethrough(iocb, iter->count); if (IS_ERR(wreq)) { wbc_detach_inode(&wbc); @@ -395,10 +394,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, out: if (unlikely(wreq)) { - ret = netfs_end_writethrough(wreq, iocb); + ret2 = netfs_end_writethrough(wreq, iocb); wbc_detach_inode(&wbc); - if (ret == -EIOCBQUEUED) - return ret; + if (ret2 == -EIOCBQUEUED) + return ret2; + if (ret == 0) + ret = ret2; } iocb->ki_pos += written; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c709c296ea9a..acef52ecb1bb 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2429,7 +2429,12 @@ static int nfs_net_init(struct net *net) struct nfs_net *nn = net_generic(net, nfs_net_id); nfs_clients_init(net); - rpc_proc_register(net, &nn->rpcstats); + + if (!rpc_proc_register(net, &nn->rpcstats)) { + nfs_clients_exit(net); + return -ENOMEM; + } + return nfs_fs_proc_net_init(net); } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 87c9547989f6..e88aca0c6e8e 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -983,15 +983,7 @@ static struct workqueue_struct *callback_wq; static bool nfsd4_queue_cb(struct nfsd4_callback *cb) { trace_nfsd_cb_queue(cb->cb_clp, cb); - return queue_delayed_work(callback_wq, &cb->cb_work, 0); -} - -static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb, - unsigned long msecs) -{ - trace_nfsd_cb_queue(cb->cb_clp, cb); - queue_delayed_work(callback_wq, &cb->cb_work, - msecs_to_jiffies(msecs)); + return queue_work(callback_wq, &cb->cb_work); } static void nfsd41_cb_inflight_begin(struct nfs4_client *clp) @@ -1490,7 +1482,7 @@ static void nfsd4_run_cb_work(struct work_struct *work) { struct nfsd4_callback *cb = - container_of(work, struct nfsd4_callback, cb_work.work); + container_of(work, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; int flags; @@ -1502,16 +1494,8 @@ nfsd4_run_cb_work(struct work_struct *work) clnt = clp->cl_cb_client; if (!clnt) { - if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) - nfsd41_destroy_cb(cb); - else { - /* - * XXX: Ideally, we could wait for the client to - * reconnect, but I haven't figured out how - * to do that yet. - */ - nfsd4_queue_cb_delayed(cb, 25); - } + /* Callback channel broken, or client killed; give up: */ + nfsd41_destroy_cb(cb); return; } @@ -1544,7 +1528,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_resp = cb; cb->cb_ops = ops; - INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work); + INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); cb->cb_status = 0; cb->cb_need_restart = false; cb->cb_holds_slot = false; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1955481832e0..a644460f3a5e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3515,6 +3515,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.exp = exp; args.dentry = dentry; args.ignore_crossmnt = (ignore_crossmnt != 0); + args.acl = NULL; /* * Make a local copy of the attribute bitmap that can be modified. @@ -3573,7 +3574,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, } else args.fhp = fhp; - args.acl = NULL; if (attrmask[0] & FATTR4_WORD0_ACL) { err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl); if (err == -EOPNOTSUPP) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 01c6f3445646..2ed0fcf879fd 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -68,7 +68,7 @@ struct nfsd4_callback { struct nfs4_client *cb_clp; struct rpc_message cb_msg; const struct nfsd4_callback_ops *cb_ops; - struct delayed_work cb_work; + struct work_struct cb_work; int cb_seq_status; int cb_status; bool cb_need_restart; diff --git a/fs/ntfs3/Kconfig b/fs/ntfs3/Kconfig index cdfdf51e55d7..7bc31d69f680 100644 --- a/fs/ntfs3/Kconfig +++ b/fs/ntfs3/Kconfig @@ -46,3 +46,12 @@ config NTFS3_FS_POSIX_ACL NOTE: this is linux only feature. Windows will ignore these ACLs. If you don't know what Access Control Lists are, say N. + +config NTFS_FS + tristate "NTFS file system support" + select NTFS3_FS + select BUFFER_HEAD + select NLS + help + This config option is here only for backward compatibility. NTFS + filesystem is now handled by the NTFS3 driver. diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 5cf3d9decf64..263635199b60 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -616,4 +616,11 @@ const struct file_operations ntfs_dir_operations = { .compat_ioctl = ntfs_compat_ioctl, #endif }; + +const struct file_operations ntfs_legacy_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate_shared = ntfs_readdir, + .open = ntfs_file_open, +}; // clang-format on diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 5418662c80d8..b73969e05052 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1236,4 +1236,12 @@ const struct file_operations ntfs_file_operations = { .fallocate = ntfs_fallocate, .release = ntfs_file_release, }; + +const struct file_operations ntfs_legacy_file_operations = { + .llseek = generic_file_llseek, + .read_iter = ntfs_file_read_iter, + .splice_read = ntfs_file_splice_read, + .open = ntfs_file_open, + .release = ntfs_file_release, +}; // clang-format on diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index eb7a8c9fba01..d273eda1cf45 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -440,7 +440,10 @@ end_enum: * Usually a hard links to directories are disabled. */ inode->i_op = &ntfs_dir_inode_operations; - inode->i_fop = &ntfs_dir_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_dir_operations; + else + inode->i_fop = &ntfs_dir_operations; ni->i_valid = 0; } else if (S_ISLNK(mode)) { ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; @@ -450,7 +453,10 @@ end_enum: } else if (S_ISREG(mode)) { ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; inode->i_op = &ntfs_file_inode_operations; - inode->i_fop = &ntfs_file_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_file_operations; + else + inode->i_fop = &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; if (ino != MFT_REC_MFT) @@ -1614,7 +1620,10 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, if (S_ISDIR(mode)) { inode->i_op = &ntfs_dir_inode_operations; - inode->i_fop = &ntfs_dir_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_dir_operations; + else + inode->i_fop = &ntfs_dir_operations; } else if (S_ISLNK(mode)) { inode->i_op = &ntfs_link_inode_operations; inode->i_fop = NULL; @@ -1623,7 +1632,10 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, inode_nohighmem(inode); } else if (S_ISREG(mode)) { inode->i_op = &ntfs_file_inode_operations; - inode->i_fop = &ntfs_file_operations; + if (is_legacy_ntfs(inode->i_sb)) + inode->i_fop = &ntfs_legacy_file_operations; + else + inode->i_fop = &ntfs_file_operations; inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; init_rwsem(&ni->file.run_lock); diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 79356fd29a14..5f4d288c6adf 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -493,6 +493,7 @@ struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni, struct ntfs_fnd *fnd); bool dir_is_empty(struct inode *dir); extern const struct file_operations ntfs_dir_operations; +extern const struct file_operations ntfs_legacy_dir_operations; /* Globals from file.c */ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, @@ -507,6 +508,7 @@ long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg); extern const struct inode_operations ntfs_special_inode_operations; extern const struct inode_operations ntfs_file_inode_operations; extern const struct file_operations ntfs_file_operations; +extern const struct file_operations ntfs_legacy_file_operations; /* Globals from frecord.c */ void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi); @@ -1154,4 +1156,6 @@ static inline void le64_sub_cpu(__le64 *var, u64 val) *var = cpu_to_le64(le64_to_cpu(*var) - val); } +bool is_legacy_ntfs(struct super_block *sb); + #endif /* _LINUX_NTFS3_NTFS_FS_H */ diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 9df7c20d066f..b26d95a8d327 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -408,6 +408,12 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) struct ntfs_mount_options *new_opts = fc->fs_private; int ro_rw; + /* If ntfs3 is used as legacy ntfs enforce read-only mode. */ + if (is_legacy_ntfs(sb)) { + fc->sb_flags |= SB_RDONLY; + goto out; + } + ro_rw = sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY); if (ro_rw && (sbi->flags & NTFS_FLAGS_NEED_REPLAY)) { errorf(fc, @@ -427,8 +433,6 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) fc, "ntfs3: Cannot use different iocharset when remounting!"); - sync_filesystem(sb); - if (ro_rw && (sbi->volume.flags & VOLUME_FLAG_DIRTY) && !new_opts->force) { errorf(fc, @@ -436,6 +440,8 @@ static int ntfs_fs_reconfigure(struct fs_context *fc) return -EINVAL; } +out: + sync_filesystem(sb); swap(sbi->options, fc->fs_private); return 0; @@ -1613,6 +1619,8 @@ load_root: } #endif + if (is_legacy_ntfs(sb)) + sb->s_flags |= SB_RDONLY; return 0; put_inode_out: @@ -1730,7 +1738,7 @@ static const struct fs_context_operations ntfs_context_ops = { * This will called when mount/remount. We will first initialize * options so that if remount we can use just that. */ -static int ntfs_init_fs_context(struct fs_context *fc) +static int __ntfs_init_fs_context(struct fs_context *fc) { struct ntfs_mount_options *opts; struct ntfs_sb_info *sbi; @@ -1778,6 +1786,11 @@ free_opts: return -ENOMEM; } +static int ntfs_init_fs_context(struct fs_context *fc) +{ + return __ntfs_init_fs_context(fc); +} + static void ntfs3_kill_sb(struct super_block *sb) { struct ntfs_sb_info *sbi = sb->s_fs_info; @@ -1798,6 +1811,50 @@ static struct file_system_type ntfs_fs_type = { .kill_sb = ntfs3_kill_sb, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, }; + +#if IS_ENABLED(CONFIG_NTFS_FS) +static int ntfs_legacy_init_fs_context(struct fs_context *fc) +{ + int ret; + + ret = __ntfs_init_fs_context(fc); + /* If ntfs3 is used as legacy ntfs enforce read-only mode. */ + fc->sb_flags |= SB_RDONLY; + return ret; +} + +static struct file_system_type ntfs_legacy_fs_type = { + .owner = THIS_MODULE, + .name = "ntfs", + .init_fs_context = ntfs_legacy_init_fs_context, + .parameters = ntfs_fs_parameters, + .kill_sb = ntfs3_kill_sb, + .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, +}; +MODULE_ALIAS_FS("ntfs"); + +static inline void register_as_ntfs_legacy(void) +{ + int err = register_filesystem(&ntfs_legacy_fs_type); + if (err) + pr_warn("ntfs3: Failed to register legacy ntfs filesystem driver: %d\n", err); +} + +static inline void unregister_as_ntfs_legacy(void) +{ + unregister_filesystem(&ntfs_legacy_fs_type); +} +bool is_legacy_ntfs(struct super_block *sb) +{ + return sb->s_type == &ntfs_legacy_fs_type; +} +#else +static inline void register_as_ntfs_legacy(void) {} +static inline void unregister_as_ntfs_legacy(void) {} +bool is_legacy_ntfs(struct super_block *sb) { return false; } +#endif + + // clang-format on static int __init init_ntfs_fs(void) @@ -1832,6 +1889,7 @@ static int __init init_ntfs_fs(void) goto out1; } + register_as_ntfs_legacy(); err = register_filesystem(&ntfs_fs_type); if (err) goto out; @@ -1849,6 +1907,7 @@ static void __exit exit_ntfs_fs(void) rcu_barrier(); kmem_cache_destroy(ntfs_inode_cachep); unregister_filesystem(&ntfs_fs_type); + unregister_as_ntfs_legacy(); ntfs3_exit_bitmap(); #ifdef CONFIG_PROC_FS diff --git a/fs/proc/page.c b/fs/proc/page.c index 195b077c0fac..9223856c934b 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -67,7 +67,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, */ ppage = pfn_to_online_page(pfn); - if (!ppage || PageSlab(ppage) || page_has_type(ppage)) + if (!ppage) pcount = 0; else pcount = page_mapcount(ppage); @@ -124,11 +124,8 @@ u64 stable_page_flags(struct page *page) /* * pseudo flags for the well known (anonymous) memory mapped pages - * - * Note that page->_mapcount is overloaded in SLAB, so the - * simple test in page_mapped() is not enough. */ - if (!PageSlab(page) && page_mapped(page)) + if (page_mapped(page)) u |= 1 << KPF_MMAP; if (PageAnon(page)) u |= 1 << KPF_ANON; diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index d41eedbff674..39277c37185c 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -389,6 +389,7 @@ cifs_alloc_inode(struct super_block *sb) * server, can not assume caching of file data or metadata. */ cifs_set_oplock_level(cifs_inode, 0); + cifs_inode->lease_granted = false; cifs_inode->flags = 0; spin_lock_init(&cifs_inode->writers_lock); cifs_inode->writers = 0; @@ -739,6 +740,8 @@ static void cifs_umount_begin(struct super_block *sb) spin_lock(&cifs_tcp_ses_lock); spin_lock(&tcon->tc_lock); + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_umount); if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) { /* we have other mounts to same share or we have already tried to umount this and woken up diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index d6669ce4ae87..6ff35570db81 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1190,6 +1190,7 @@ struct cifs_fattr { */ struct cifs_tcon { struct list_head tcon_list; + int debug_id; /* Debugging for tracing */ int tc_count; struct list_head rlist; /* reconnect list */ spinlock_t tc_lock; /* protect anything here that is not protected */ @@ -1276,7 +1277,9 @@ struct cifs_tcon { __u32 max_cached_dirs; #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ + bool fscache_acquired; /* T if we've tried acquiring a cookie */ struct fscache_volume *fscache; /* cookie for share */ + struct mutex fscache_lock; /* Prevent regetting a cookie */ #endif struct list_head pending_opens; /* list of incomplete opens */ struct cached_fids *cfids; diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index c0513fbb8a59..c46d418c1c0c 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -882,7 +882,7 @@ typedef struct smb_com_open_rsp { __u8 OplockLevel; __u16 Fid; __le32 CreateAction; - struct_group(common_attributes, + struct_group_attr(common_attributes, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; @@ -2266,7 +2266,7 @@ typedef struct { /* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */ /******************************************************************************/ typedef struct { /* data block encoding of response to level 263 QPathInfo */ - struct_group(common_attributes, + struct_group_attr(common_attributes, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 8e0a348f1f66..fbc358c09da3 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -303,7 +303,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, struct TCP_Server_Info *primary_server); extern void cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect); -extern void cifs_put_tcon(struct cifs_tcon *tcon); +extern void cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace); extern void cifs_release_automount_timer(void); @@ -530,8 +530,9 @@ extern int CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses); extern struct cifs_ses *sesInfoAlloc(void); extern void sesInfoFree(struct cifs_ses *); -extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled); -extern void tconInfoFree(struct cifs_tcon *); +extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled, + enum smb3_tcon_ref_trace trace); +extern void tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace); extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number); @@ -721,8 +722,6 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options) return options; } -struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon); -void cifs_put_tcon_super(struct super_block *sb); int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry); /* Put references of @ses and its children */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 4e35970681bf..7a16e12f5da8 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -1943,7 +1943,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) } /* no need to setup directory caching on IPC share, so pass in false */ - tcon = tcon_info_alloc(false); + tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_ipc); if (tcon == NULL) return -ENOMEM; @@ -1960,7 +1960,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) if (rc) { cifs_server_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc); - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc_fail); goto out; } @@ -2043,7 +2043,7 @@ void __cifs_put_smb_ses(struct cifs_ses *ses) * files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an * SMB2 LOGOFF Request. */ - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc); if (do_logoff) { xid = get_xid(); rc = server->ops->logoff(xid, ses); @@ -2432,6 +2432,8 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) continue; } ++tcon->tc_count; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_find); spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); return tcon; @@ -2441,7 +2443,7 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) } void -cifs_put_tcon(struct cifs_tcon *tcon) +cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace) { unsigned int xid; struct cifs_ses *ses; @@ -2457,6 +2459,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count); spin_lock(&cifs_tcp_ses_lock); spin_lock(&tcon->tc_lock); + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count - 1, trace); if (--tcon->tc_count > 0) { spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); @@ -2493,7 +2496,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) _free_xid(xid); cifs_fscache_release_super_cookie(tcon); - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free); cifs_put_smb_ses(ses); } @@ -2547,7 +2550,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) nohandlecache = ctx->nohandlecache; else nohandlecache = true; - tcon = tcon_info_alloc(!nohandlecache); + tcon = tcon_info_alloc(!nohandlecache, netfs_trace_tcon_ref_new); if (tcon == NULL) { rc = -ENOMEM; goto out_fail; @@ -2737,7 +2740,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) return tcon; out_fail: - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_fail); return ERR_PTR(rc); } @@ -2754,7 +2757,7 @@ cifs_put_tlink(struct tcon_link *tlink) } if (!IS_ERR(tlink_tcon(tlink))) - cifs_put_tcon(tlink_tcon(tlink)); + cifs_put_tcon(tlink_tcon(tlink), netfs_trace_tcon_ref_put_tlink); kfree(tlink); } @@ -3319,7 +3322,7 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) int rc = 0; if (mnt_ctx->tcon) - cifs_put_tcon(mnt_ctx->tcon); + cifs_put_tcon(mnt_ctx->tcon, netfs_trace_tcon_ref_put_mnt_ctx); else if (mnt_ctx->ses) cifs_put_smb_ses(mnt_ctx->ses); else if (mnt_ctx->server) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 6c727d8c31e8..3bbac925d076 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -748,6 +748,16 @@ static int smb3_fs_context_validate(struct fs_context *fc) /* set the port that we got earlier */ cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port); + if (ctx->uid_specified && !ctx->forceuid_specified) { + ctx->override_uid = 1; + pr_notice("enabling forceuid mount option implicitly because uid= option is specified\n"); + } + + if (ctx->gid_specified && !ctx->forcegid_specified) { + ctx->override_gid = 1; + pr_notice("enabling forcegid mount option implicitly because gid= option is specified\n"); + } + if (ctx->override_uid && !ctx->uid_specified) { ctx->override_uid = 0; pr_notice("ignoring forceuid mount option specified with no uid= option\n"); @@ -1019,12 +1029,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->override_uid = 0; else ctx->override_uid = 1; + ctx->forceuid_specified = true; break; case Opt_forcegid: if (result.negated) ctx->override_gid = 0; else ctx->override_gid = 1; + ctx->forcegid_specified = true; break; case Opt_perm: if (result.negated) diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index a947bddeba27..cf577ec0dd0a 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -165,6 +165,8 @@ enum cifs_param { }; struct smb3_fs_context { + bool forceuid_specified; + bool forcegid_specified; bool uid_specified; bool cruid_specified; bool gid_specified; diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c index 340efce8f052..1a895e6243ee 100644 --- a/fs/smb/client/fscache.c +++ b/fs/smb/client/fscache.c @@ -43,12 +43,23 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) char *key; int ret = -ENOMEM; + if (tcon->fscache_acquired) + return 0; + + mutex_lock(&tcon->fscache_lock); + if (tcon->fscache_acquired) { + mutex_unlock(&tcon->fscache_lock); + return 0; + } + tcon->fscache_acquired = true; + tcon->fscache = NULL; switch (sa->sa_family) { case AF_INET: case AF_INET6: break; default: + mutex_unlock(&tcon->fscache_lock); cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family); return -EINVAL; } @@ -57,6 +68,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) sharename = extract_sharename(tcon->tree_name); if (IS_ERR(sharename)) { + mutex_unlock(&tcon->fscache_lock); cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__); return PTR_ERR(sharename); } @@ -82,6 +94,11 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) } pr_err("Cache volume key already in use (%s)\n", key); vcookie = NULL; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_fscache_collision); + } else { + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_fscache_okay); } tcon->fscache = vcookie; @@ -90,6 +107,7 @@ out_2: kfree(key); out: kfree(sharename); + mutex_unlock(&tcon->fscache_lock); return ret; } @@ -102,6 +120,8 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) cifs_fscache_fill_volume_coherency(tcon, &cd); fscache_relinquish_volume(tcon->fscache, &cd, false); tcon->fscache = NULL; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_fscache_relinq); } void cifs_fscache_get_inode_cookie(struct inode *inode) diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 7d15a1969b81..07c468ddb88a 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -111,9 +111,10 @@ sesInfoFree(struct cifs_ses *buf_to_free) } struct cifs_tcon * -tcon_info_alloc(bool dir_leases_enabled) +tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace) { struct cifs_tcon *ret_buf; + static atomic_t tcon_debug_id; ret_buf = kzalloc(sizeof(*ret_buf), GFP_KERNEL); if (!ret_buf) @@ -130,7 +131,8 @@ tcon_info_alloc(bool dir_leases_enabled) atomic_inc(&tconInfoAllocCount); ret_buf->status = TID_NEW; - ++ret_buf->tc_count; + ret_buf->debug_id = atomic_inc_return(&tcon_debug_id); + ret_buf->tc_count = 1; spin_lock_init(&ret_buf->tc_lock); INIT_LIST_HEAD(&ret_buf->openFileList); INIT_LIST_HEAD(&ret_buf->tcon_list); @@ -139,17 +141,22 @@ tcon_info_alloc(bool dir_leases_enabled) atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); ret_buf->stats_from_time = ktime_get_real_seconds(); +#ifdef CONFIG_CIFS_FSCACHE + mutex_init(&ret_buf->fscache_lock); +#endif + trace_smb3_tcon_ref(ret_buf->debug_id, ret_buf->tc_count, trace); return ret_buf; } void -tconInfoFree(struct cifs_tcon *tcon) +tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace) { if (tcon == NULL) { cifs_dbg(FYI, "Null buffer passed to tconInfoFree\n"); return; } + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, trace); free_cached_dirs(tcon->cfids); atomic_dec(&tconInfoAllocCount); kfree(tcon->nativeFileSystem); diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c index cc72be5a93a9..677ef6f99a5b 100644 --- a/fs/smb/client/smb2misc.c +++ b/fs/smb/client/smb2misc.c @@ -767,7 +767,7 @@ smb2_cancelled_close_fid(struct work_struct *work) if (rc) cifs_tcon_dbg(VFS, "Close cancelled mid failed rc:%d\n", rc); - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close_fid); kfree(cancelled); } @@ -811,6 +811,8 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, if (tcon->tc_count <= 0) { struct TCP_Server_Info *server = NULL; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_see_cancelled_close); WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative"); spin_unlock(&cifs_tcp_ses_lock); @@ -823,12 +825,14 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, return 0; } tcon->tc_count++; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_cancelled_close); spin_unlock(&cifs_tcp_ses_lock); rc = __smb2_handle_cancelled_cmd(tcon, SMB2_CLOSE_HE, 0, persistent_fid, volatile_fid); if (rc) - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close); return rc; } @@ -856,7 +860,7 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve rsp->PersistentFileId, rsp->VolatileFileId); if (rc) - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_mid); return rc; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 78c94d0350fe..28f0b7d19d53 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2915,8 +2915,11 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, tcon = list_first_entry_or_null(&ses->tcon_list, struct cifs_tcon, tcon_list); - if (tcon) + if (tcon) { tcon->tc_count++; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_dfs_refer); + } spin_unlock(&cifs_tcp_ses_lock); } @@ -2980,6 +2983,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, /* ipc tcons are not refcounted */ spin_lock(&cifs_tcp_ses_lock); tcon->tc_count--; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_dec_dfs_refer); /* tc_count can never go negative */ WARN_ON(tcon->tc_count < 0); spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 86c647a947cc..a5efce03cb58 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4138,6 +4138,8 @@ void smb2_reconnect_server(struct work_struct *work) list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->need_reconnect || tcon->need_reopen_files) { tcon->tc_count++; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_reconnect_server); list_add_tail(&tcon->rlist, &tmp_list); tcon_selected = true; } @@ -4176,14 +4178,14 @@ void smb2_reconnect_server(struct work_struct *work) if (tcon->ipc) cifs_put_smb_ses(tcon->ses); else - cifs_put_tcon(tcon); + cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_reconnect_server); } if (!ses_exist) goto done; /* allocate a dummy tcon struct used for reconnect */ - tcon = tcon_info_alloc(false); + tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_reconnect_server); if (!tcon) { resched = true; list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { @@ -4206,7 +4208,7 @@ void smb2_reconnect_server(struct work_struct *work) list_del_init(&ses->rlist); cifs_put_smb_ses(ses); } - tconInfoFree(tcon); + tconInfoFree(tcon, netfs_trace_tcon_ref_free_reconnect_server); done: cifs_dbg(FYI, "Reconnecting tcons and channels finished\n"); diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index c72a3b2886b7..2fccf0d4f53d 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -320,7 +320,7 @@ struct smb2_file_reparse_point_info { } __packed; struct smb2_file_network_open_info { - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 1d6e54f7879e..02135a605305 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -189,6 +189,8 @@ smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid) if (tcon->tid != tid) continue; ++tcon->tc_count; + trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, + netfs_trace_tcon_ref_get_find_sess_tcon); return tcon; } diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 5e83cb9da902..604e52876cd2 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -3,6 +3,9 @@ * Copyright (C) 2018, Microsoft Corporation. * * Author(s): Steve French <stfrench@microsoft.com> + * + * Please use this 3-part article as a reference for writing new tracepoints: + * https://lwn.net/Articles/379903/ */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cifs @@ -15,9 +18,70 @@ #include <linux/inet.h> /* - * Please use this 3-part article as a reference for writing new tracepoints: - * https://lwn.net/Articles/379903/ + * Specify enums for tracing information. + */ +#define smb3_tcon_ref_traces \ + EM(netfs_trace_tcon_ref_dec_dfs_refer, "DEC DfsRef") \ + EM(netfs_trace_tcon_ref_free, "FRE ") \ + EM(netfs_trace_tcon_ref_free_fail, "FRE Fail ") \ + EM(netfs_trace_tcon_ref_free_ipc, "FRE Ipc ") \ + EM(netfs_trace_tcon_ref_free_ipc_fail, "FRE Ipc-F ") \ + EM(netfs_trace_tcon_ref_free_reconnect_server, "FRE Reconn") \ + EM(netfs_trace_tcon_ref_get_cancelled_close, "GET Cn-Cls") \ + EM(netfs_trace_tcon_ref_get_dfs_refer, "GET DfsRef") \ + EM(netfs_trace_tcon_ref_get_find, "GET Find ") \ + EM(netfs_trace_tcon_ref_get_find_sess_tcon, "GET FndSes") \ + EM(netfs_trace_tcon_ref_get_reconnect_server, "GET Reconn") \ + EM(netfs_trace_tcon_ref_new, "NEW ") \ + EM(netfs_trace_tcon_ref_new_ipc, "NEW Ipc ") \ + EM(netfs_trace_tcon_ref_new_reconnect_server, "NEW Reconn") \ + EM(netfs_trace_tcon_ref_put_cancelled_close, "PUT Cn-Cls") \ + EM(netfs_trace_tcon_ref_put_cancelled_close_fid, "PUT Cn-Fid") \ + EM(netfs_trace_tcon_ref_put_cancelled_mid, "PUT Cn-Mid") \ + EM(netfs_trace_tcon_ref_put_mnt_ctx, "PUT MntCtx") \ + EM(netfs_trace_tcon_ref_put_reconnect_server, "PUT Reconn") \ + EM(netfs_trace_tcon_ref_put_tlink, "PUT Tlink ") \ + EM(netfs_trace_tcon_ref_see_cancelled_close, "SEE Cn-Cls") \ + EM(netfs_trace_tcon_ref_see_fscache_collision, "SEE FV-CO!") \ + EM(netfs_trace_tcon_ref_see_fscache_okay, "SEE FV-Ok ") \ + EM(netfs_trace_tcon_ref_see_fscache_relinq, "SEE FV-Rlq") \ + E_(netfs_trace_tcon_ref_see_umount, "SEE Umount") + +#undef EM +#undef E_ + +/* + * Define those tracing enums. + */ +#ifndef __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY +#define __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY + +#define EM(a, b) a, +#define E_(a, b) a + +enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte); + +#undef EM +#undef E_ +#endif + +/* + * Export enum symbols via userspace. + */ +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define E_(a, b) TRACE_DEFINE_ENUM(a); + +smb3_tcon_ref_traces; + +#undef EM +#undef E_ + +/* + * Now redefine the EM() and E_() macros to map the enums to the strings that + * will be printed in the output. */ +#define EM(a, b) { a, b }, +#define E_(a, b) { a, b } /* For logging errors in read or write */ DECLARE_EVENT_CLASS(smb3_rw_err_class, @@ -1125,6 +1189,30 @@ DEFINE_SMB3_CREDIT_EVENT(waitff_credits); DEFINE_SMB3_CREDIT_EVENT(overflow_credits); DEFINE_SMB3_CREDIT_EVENT(set_credits); + +TRACE_EVENT(smb3_tcon_ref, + TP_PROTO(unsigned int tcon_debug_id, int ref, + enum smb3_tcon_ref_trace trace), + TP_ARGS(tcon_debug_id, ref, trace), + TP_STRUCT__entry( + __field(unsigned int, tcon) + __field(int, ref) + __field(enum smb3_tcon_ref_trace, trace) + ), + TP_fast_assign( + __entry->tcon = tcon_debug_id; + __entry->ref = ref; + __entry->trace = trace; + ), + TP_printk("TC=%08x %s r=%u", + __entry->tcon, + __print_symbolic(__entry->trace, smb3_tcon_ref_traces), + __entry->ref) + ); + + +#undef EM +#undef E_ #endif /* _CIFS_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 994d70193432..ddf1a3aafee5 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -909,12 +909,15 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server) list_del_init(&mid->qhead); mid->mid_flags |= MID_DELETED; } + spin_unlock(&server->mid_lock); cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n", __func__, mid->mid, mid->mid_state); rc = -EIO; + goto sync_mid_done; } spin_unlock(&server->mid_lock); +sync_mid_done: release_mid(mid); return rc; } @@ -1057,9 +1060,11 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) index = (uint)atomic_inc_return(&ses->chan_seq); index %= ses->chan_count; } + + server = ses->chans[index].server; spin_unlock(&ses->chan_lock); - return ses->chans[index].server; + return server; } int diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 1b594307c9d5..202ff9128156 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -711,7 +711,7 @@ struct smb2_close_rsp { __le16 StructureSize; /* 60 */ __le16 Flags; __le32 Reserved; - struct_group(network_open_info, + struct_group_attr(network_open_info, __packed, __le64 CreationTime; __le64 LastAccessTime; __le64 LastWriteTime; diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h index 686b321c5a8b..f4e55199938d 100644 --- a/fs/smb/server/ksmbd_netlink.h +++ b/fs/smb/server/ksmbd_netlink.h @@ -340,23 +340,24 @@ enum KSMBD_TREE_CONN_STATUS { /* * Share config flags. */ -#define KSMBD_SHARE_FLAG_INVALID (0) -#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0) -#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1) -#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2) -#define KSMBD_SHARE_FLAG_READONLY BIT(3) -#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4) -#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5) -#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6) -#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7) -#define KSMBD_SHARE_FLAG_PIPE BIT(8) -#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9) -#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10) -#define KSMBD_SHARE_FLAG_STREAMS BIT(11) -#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) -#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) -#define KSMBD_SHARE_FLAG_UPDATE BIT(14) -#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15) +#define KSMBD_SHARE_FLAG_INVALID (0) +#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0) +#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1) +#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2) +#define KSMBD_SHARE_FLAG_READONLY BIT(3) +#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4) +#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5) +#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6) +#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7) +#define KSMBD_SHARE_FLAG_PIPE BIT(8) +#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9) +#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10) +#define KSMBD_SHARE_FLAG_STREAMS BIT(11) +#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12) +#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13) +#define KSMBD_SHARE_FLAG_UPDATE BIT(14) +#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15) +#define KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY BIT(16) /* * Tree connect request flags. diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c index c0788188aa82..c67fbc8d6683 100644 --- a/fs/smb/server/server.c +++ b/fs/smb/server/server.c @@ -167,20 +167,17 @@ static void __handle_ksmbd_work(struct ksmbd_work *work, int rc; bool is_chained = false; - if (conn->ops->allocate_rsp_buf(work)) - return; - if (conn->ops->is_transform_hdr && conn->ops->is_transform_hdr(work->request_buf)) { rc = conn->ops->decrypt_req(work); - if (rc < 0) { - conn->ops->set_rsp_status(work, STATUS_DATA_ERROR); - goto send; - } - + if (rc < 0) + return; work->encrypted = true; } + if (conn->ops->allocate_rsp_buf(work)) + return; + rc = conn->ops->init_rsp_hdr(work); if (rc) { /* either uid or tid is not correct */ diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 5723bbf372d7..355824151c2d 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -535,6 +535,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) if (cmd == SMB2_QUERY_INFO_HE) { struct smb2_query_info_req *req; + if (get_rfc1002_len(work->request_buf) < + offsetof(struct smb2_query_info_req, OutputBufferLength)) + return -EINVAL; + req = smb2_get_msg(work->request_buf); if ((req->InfoType == SMB2_O_INFO_FILE && (req->FileInfoClass == FILE_FULL_EA_INFORMATION || @@ -1984,7 +1988,12 @@ int smb2_tree_connect(struct ksmbd_work *work) write_unlock(&sess->tree_conns_lock); rsp->StructureSize = cpu_to_le16(16); out_err1: - rsp->Capabilities = 0; + if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE && + test_share_config_flag(share, + KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY)) + rsp->Capabilities = SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY; + else + rsp->Capabilities = 0; rsp->Reserved = 0; /* default manual caching */ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING; @@ -3498,7 +3507,9 @@ int smb2_open(struct ksmbd_work *work) memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE); if (dh_info.type == DURABLE_REQ_V2 || dh_info.type == DURABLE_REQ) { - if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent) + if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent && + test_share_config_flag(work->tcon->share_conf, + KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY)) fp->is_persistent = true; else fp->is_durable = true; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 22f0f3db3ac9..51b1b0bed616 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -754,10 +754,15 @@ retry: goto out4; } + /* + * explicitly handle file overwrite case, for compatibility with + * filesystems that may not support rename flags (e.g: fuse) + */ if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) { err = -EEXIST; goto out4; } + flags &= ~(RENAME_NOREPLACE); if (old_child == trap) { err = -EINVAL; diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 894c6ca1e500..a878cea70f4c 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -37,6 +37,7 @@ static DEFINE_MUTEX(eventfs_mutex); struct eventfs_root_inode { struct eventfs_inode ei; + struct inode *parent_inode; struct dentry *events_dir; }; @@ -68,11 +69,25 @@ enum { EVENTFS_SAVE_MODE = BIT(16), EVENTFS_SAVE_UID = BIT(17), EVENTFS_SAVE_GID = BIT(18), - EVENTFS_TOPLEVEL = BIT(19), }; #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) +static void free_ei_rcu(struct rcu_head *rcu) +{ + struct eventfs_inode *ei = container_of(rcu, struct eventfs_inode, rcu); + struct eventfs_root_inode *rei; + + kfree(ei->entry_attrs); + kfree_const(ei->name); + if (ei->is_events) { + rei = get_root_inode(ei); + kfree(rei); + } else { + kfree(ei); + } +} + /* * eventfs_inode reference count management. * @@ -84,18 +99,17 @@ enum { static void release_ei(struct kref *ref) { struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref); - struct eventfs_root_inode *rei; + const struct eventfs_entry *entry; WARN_ON_ONCE(!ei->is_freed); - kfree(ei->entry_attrs); - kfree_const(ei->name); - if (ei->is_events) { - rei = get_root_inode(ei); - kfree_rcu(rei, ei.rcu); - } else { - kfree_rcu(ei, rcu); + for (int i = 0; i < ei->nr_entries; i++) { + entry = &ei->entries[i]; + if (entry->release) + entry->release(entry->name, ei->data); } + + call_rcu(&ei->rcu, free_ei_rcu); } static inline void put_ei(struct eventfs_inode *ei) @@ -112,6 +126,18 @@ static inline void free_ei(struct eventfs_inode *ei) } } +/* + * Called when creation of an ei fails, do not call release() functions. + */ +static inline void cleanup_ei(struct eventfs_inode *ei) +{ + if (ei) { + /* Set nr_entries to 0 to prevent release() function being called */ + ei->nr_entries = 0; + free_ei(ei); + } +} + static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei) { if (ei) @@ -181,21 +207,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, * determined by the parent directory. */ if (dentry->d_inode->i_mode & S_IFDIR) { - /* - * The events directory dentry is never freed, unless its - * part of an instance that is deleted. It's attr is the - * default for its child files and directories. - * Do not update it. It's not used for its own mode or ownership. - */ - if (ei->is_events) { - /* But it still needs to know if it was modified */ - if (iattr->ia_valid & ATTR_UID) - ei->attr.mode |= EVENTFS_SAVE_UID; - if (iattr->ia_valid & ATTR_GID) - ei->attr.mode |= EVENTFS_SAVE_GID; - } else { - update_attr(&ei->attr, iattr); - } + update_attr(&ei->attr, iattr); } else { name = dentry->d_name.name; @@ -213,18 +225,25 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, return ret; } -static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb) +static void update_events_attr(struct eventfs_inode *ei, struct super_block *sb) { - struct inode *root; + struct eventfs_root_inode *rei; + struct inode *parent; - /* Only update if the "events" was on the top level */ - if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) - return; + rei = get_root_inode(ei); - /* Get the tracefs root inode. */ - root = d_inode(sb->s_root); - ei->attr.uid = root->i_uid; - ei->attr.gid = root->i_gid; + /* Use the parent inode permissions unless root set its permissions */ + parent = rei->parent_inode; + + if (rei->ei.attr.mode & EVENTFS_SAVE_UID) + ei->attr.uid = rei->ei.attr.uid; + else + ei->attr.uid = parent->i_uid; + + if (rei->ei.attr.mode & EVENTFS_SAVE_GID) + ei->attr.gid = rei->ei.attr.gid; + else + ei->attr.gid = parent->i_gid; } static void set_top_events_ownership(struct inode *inode) @@ -233,10 +252,10 @@ static void set_top_events_ownership(struct inode *inode) struct eventfs_inode *ei = ti->private; /* The top events directory doesn't get automatically updated */ - if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) + if (!ei || !ei->is_events) return; - update_top_events_attr(ei, inode->i_sb); + update_events_attr(ei, inode->i_sb); if (!(ei->attr.mode & EVENTFS_SAVE_UID)) inode->i_uid = ei->attr.uid; @@ -265,7 +284,7 @@ static int eventfs_permission(struct mnt_idmap *idmap, return generic_permission(idmap, inode, mask); } -static const struct inode_operations eventfs_root_dir_inode_operations = { +static const struct inode_operations eventfs_dir_inode_operations = { .lookup = eventfs_root_lookup, .setattr = eventfs_set_attr, .getattr = eventfs_get_attr, @@ -282,6 +301,35 @@ static const struct file_operations eventfs_file_operations = { .llseek = generic_file_llseek, }; +/* + * On a remount of tracefs, if UID or GID options are set, then + * the mount point inode permissions should be used. + * Reset the saved permission flags appropriately. + */ +void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) +{ + struct eventfs_inode *ei = ti->private; + + if (!ei) + return; + + if (update_uid) + ei->attr.mode &= ~EVENTFS_SAVE_UID; + + if (update_gid) + ei->attr.mode &= ~EVENTFS_SAVE_GID; + + if (!ei->entry_attrs) + return; + + for (int i = 0; i < ei->nr_entries; i++) { + if (update_uid) + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID; + if (update_gid) + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID; + } +} + /* Return the evenfs_inode of the "events" directory */ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) { @@ -304,7 +352,7 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) // Walk upwards until you find the events inode } while (!ei->is_events); - update_top_events_attr(ei, dentry->d_sb); + update_events_attr(ei, dentry->d_sb); return ei; } @@ -410,7 +458,7 @@ static struct dentry *lookup_dir_entry(struct dentry *dentry, update_inode_attr(dentry, inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); - inode->i_op = &eventfs_root_dir_inode_operations; + inode->i_op = &eventfs_dir_inode_operations; inode->i_fop = &eventfs_file_operations; /* All directories will have the same inode number */ @@ -734,7 +782,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { - free_ei(ei); + cleanup_ei(ei); ei = NULL; } return ei; @@ -781,6 +829,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry // Note: we have a ref to the dentry from tracefs_start_creating() rei = get_root_inode(ei); rei->events_dir = dentry; + rei->parent_inode = d_inode(dentry->d_sb->s_root); ei->entries = entries; ei->nr_entries = size; @@ -790,29 +839,26 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry uid = d_inode(dentry->d_parent)->i_uid; gid = d_inode(dentry->d_parent)->i_gid; - /* - * If the events directory is of the top instance, then parent - * is NULL. Set the attr.mode to reflect this and its permissions will - * default to the tracefs root dentry. - */ - if (!parent) - ei->attr.mode = EVENTFS_TOPLEVEL; - - /* This is used as the default ownership of the files and directories */ ei->attr.uid = uid; ei->attr.gid = gid; + /* + * When the "events" directory is created, it takes on the + * permissions of its parent. But can be reset on remount. + */ + ei->attr.mode |= EVENTFS_SAVE_UID | EVENTFS_SAVE_GID; + INIT_LIST_HEAD(&ei->children); INIT_LIST_HEAD(&ei->list); ti = get_tracefs(inode); - ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE; + ti->flags |= TRACEFS_EVENT_INODE; ti->private = ei; inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_uid = uid; inode->i_gid = gid; - inode->i_op = &eventfs_root_dir_inode_operations; + inode->i_op = &eventfs_dir_inode_operations; inode->i_fop = &eventfs_file_operations; dentry->d_fsdata = get_ei(ei); @@ -835,7 +881,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry return ei; fail: - free_ei(ei); + cleanup_ei(ei); tracefs_failed_creating(dentry); return ERR_PTR(-ENOMEM); } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 5545e6bf7d26..417c840e6403 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -30,20 +30,47 @@ static struct vfsmount *tracefs_mount; static int tracefs_mount_count; static bool tracefs_registered; +/* + * Keep track of all tracefs_inodes in order to update their + * flags if necessary on a remount. + */ +static DEFINE_SPINLOCK(tracefs_inode_lock); +static LIST_HEAD(tracefs_inodes); + static struct inode *tracefs_alloc_inode(struct super_block *sb) { struct tracefs_inode *ti; + unsigned long flags; ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); if (!ti) return NULL; + spin_lock_irqsave(&tracefs_inode_lock, flags); + list_add_rcu(&ti->list, &tracefs_inodes); + spin_unlock_irqrestore(&tracefs_inode_lock, flags); + return &ti->vfs_inode; } +static void tracefs_free_inode_rcu(struct rcu_head *rcu) +{ + struct tracefs_inode *ti; + + ti = container_of(rcu, struct tracefs_inode, rcu); + kmem_cache_free(tracefs_inode_cachep, ti); +} + static void tracefs_free_inode(struct inode *inode) { - kmem_cache_free(tracefs_inode_cachep, get_tracefs(inode)); + struct tracefs_inode *ti = get_tracefs(inode); + unsigned long flags; + + spin_lock_irqsave(&tracefs_inode_lock, flags); + list_del_rcu(&ti->list); + spin_unlock_irqrestore(&tracefs_inode_lock, flags); + + call_rcu(&ti->rcu, tracefs_free_inode_rcu); } static ssize_t default_read_file(struct file *file, char __user *buf, @@ -153,16 +180,39 @@ static void set_tracefs_inode_owner(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); struct inode *root_inode = ti->private; + kuid_t uid; + kgid_t gid; + + uid = root_inode->i_uid; + gid = root_inode->i_gid; + + /* + * If the root is not the mount point, then check the root's + * permissions. If it was never set, then default to the + * mount point. + */ + if (root_inode != d_inode(root_inode->i_sb->s_root)) { + struct tracefs_inode *rti; + + rti = get_tracefs(root_inode); + root_inode = d_inode(root_inode->i_sb->s_root); + + if (!(rti->flags & TRACEFS_UID_PERM_SET)) + uid = root_inode->i_uid; + + if (!(rti->flags & TRACEFS_GID_PERM_SET)) + gid = root_inode->i_gid; + } /* * If this inode has never been referenced, then update * the permissions to the superblock. */ if (!(ti->flags & TRACEFS_UID_PERM_SET)) - inode->i_uid = root_inode->i_uid; + inode->i_uid = uid; if (!(ti->flags & TRACEFS_GID_PERM_SET)) - inode->i_gid = root_inode->i_gid; + inode->i_gid = gid; } static int tracefs_permission(struct mnt_idmap *idmap, @@ -313,6 +363,8 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) struct tracefs_fs_info *fsi = sb->s_fs_info; struct inode *inode = d_inode(sb->s_root); struct tracefs_mount_opts *opts = &fsi->mount_opts; + struct tracefs_inode *ti; + bool update_uid, update_gid; umode_t tmp_mode; /* @@ -332,6 +384,25 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) if (!remount || opts->opts & BIT(Opt_gid)) inode->i_gid = opts->gid; + if (remount && (opts->opts & BIT(Opt_uid) || opts->opts & BIT(Opt_gid))) { + + update_uid = opts->opts & BIT(Opt_uid); + update_gid = opts->opts & BIT(Opt_gid); + + rcu_read_lock(); + list_for_each_entry_rcu(ti, &tracefs_inodes, list) { + if (update_uid) + ti->flags &= ~TRACEFS_UID_PERM_SET; + + if (update_gid) + ti->flags &= ~TRACEFS_GID_PERM_SET; + + if (ti->flags & TRACEFS_EVENT_INODE) + eventfs_remount(ti, update_uid, update_gid); + } + rcu_read_unlock(); + } + return 0; } @@ -398,7 +469,22 @@ static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) return !(ei && ei->is_freed); } +static void tracefs_d_iput(struct dentry *dentry, struct inode *inode) +{ + struct tracefs_inode *ti = get_tracefs(inode); + + /* + * This inode is being freed and cannot be used for + * eventfs. Clear the flag so that it doesn't call into + * eventfs during the remount flag updates. The eventfs_inode + * gets freed after an RCU cycle, so the content will still + * be safe if the iteration is going on now. + */ + ti->flags &= ~TRACEFS_EVENT_INODE; +} + static const struct dentry_operations tracefs_dentry_operations = { + .d_iput = tracefs_d_iput, .d_revalidate = tracefs_d_revalidate, .d_release = tracefs_d_release, }; diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 15c26f9aaad4..f704d8348357 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -4,15 +4,18 @@ enum { TRACEFS_EVENT_INODE = BIT(1), - TRACEFS_EVENT_TOP_INODE = BIT(2), - TRACEFS_GID_PERM_SET = BIT(3), - TRACEFS_UID_PERM_SET = BIT(4), - TRACEFS_INSTANCE_INODE = BIT(5), + TRACEFS_GID_PERM_SET = BIT(2), + TRACEFS_UID_PERM_SET = BIT(3), + TRACEFS_INSTANCE_INODE = BIT(4), }; struct tracefs_inode { - struct inode vfs_inode; + union { + struct inode vfs_inode; + struct rcu_head rcu; + }; /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ + struct list_head list; unsigned long flags; void *private; }; @@ -73,6 +76,7 @@ struct dentry *tracefs_end_creating(struct dentry *dentry); struct dentry *tracefs_failed_creating(struct dentry *dentry); struct inode *tracefs_get_inode(struct super_block *sb); +void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid); void eventfs_d_release(struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ |