summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.h11
-rw-r--r--fs/9p/vfs_inode.c37
-rw-r--r--fs/9p/vfs_inode_dotl.c28
-rw-r--r--fs/9p/vfs_super.c2
-rw-r--r--fs/bcachefs/backpointers.c2
-rw-r--r--fs/bcachefs/bcachefs_format.h3
-rw-r--r--fs/bcachefs/btree_gc.c3
-rw-r--r--fs/bcachefs/btree_io.c2
-rw-r--r--fs/bcachefs/btree_key_cache.c19
-rw-r--r--fs/bcachefs/btree_node_scan.c9
-rw-r--r--fs/bcachefs/btree_node_scan_types.h1
-rw-r--r--fs/bcachefs/btree_types.h2
-rw-r--r--fs/bcachefs/btree_update_interior.c6
-rw-r--r--fs/bcachefs/buckets.c2
-rw-r--r--fs/bcachefs/chardev.c4
-rw-r--r--fs/bcachefs/fs.c9
-rw-r--r--fs/bcachefs/inode.c2
-rw-r--r--fs/bcachefs/journal_io.c60
-rw-r--r--fs/bcachefs/recovery.c5
-rw-r--r--fs/bcachefs/sb-clean.c8
-rw-r--r--fs/bcachefs/sb-errors_types.h3
-rw-r--r--fs/bcachefs/sb-members.c4
-rw-r--r--fs/bcachefs/sb-members.h6
-rw-r--r--fs/bcachefs/super.c1
-rw-r--r--fs/bcachefs/thread_with_file.c15
-rw-r--r--fs/bcachefs/thread_with_file.h3
-rw-r--r--fs/btrfs/backref.c12
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/inode.c13
-rw-r--r--fs/btrfs/ioctl.c33
-rw-r--r--fs/btrfs/messages.c2
-rw-r--r--fs/btrfs/ordered-data.c1
-rw-r--r--fs/btrfs/qgroup.c21
-rw-r--r--fs/btrfs/scrub.c18
-rw-r--r--fs/btrfs/tests/extent-map-tests.c5
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/erofs/fscache.c2
-rw-r--r--fs/erofs/internal.h7
-rw-r--r--fs/erofs/super.c124
-rw-r--r--fs/eventpoll.c38
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/netfs/buffered_write.c23
-rw-r--r--fs/nfs/inode.c7
-rw-r--r--fs/nfsd/nfs4callback.c26
-rw-r--r--fs/nfsd/nfs4xdr.c2
-rw-r--r--fs/nfsd/state.h2
-rw-r--r--fs/ntfs3/Kconfig9
-rw-r--r--fs/ntfs3/dir.c7
-rw-r--r--fs/ntfs3/file.c8
-rw-r--r--fs/ntfs3/inode.c20
-rw-r--r--fs/ntfs3/ntfs_fs.h4
-rw-r--r--fs/ntfs3/super.c65
-rw-r--r--fs/proc/page.c7
-rw-r--r--fs/smb/client/cifsfs.c3
-rw-r--r--fs/smb/client/cifsglob.h3
-rw-r--r--fs/smb/client/cifspdu.h4
-rw-r--r--fs/smb/client/cifsproto.h9
-rw-r--r--fs/smb/client/connect.c21
-rw-r--r--fs/smb/client/fs_context.c12
-rw-r--r--fs/smb/client/fs_context.h2
-rw-r--r--fs/smb/client/fscache.c20
-rw-r--r--fs/smb/client/misc.c13
-rw-r--r--fs/smb/client/smb2misc.c10
-rw-r--r--fs/smb/client/smb2ops.c7
-rw-r--r--fs/smb/client/smb2pdu.c8
-rw-r--r--fs/smb/client/smb2pdu.h2
-rw-r--r--fs/smb/client/smb2transport.c2
-rw-r--r--fs/smb/client/trace.h92
-rw-r--r--fs/smb/client/transport.c7
-rw-r--r--fs/smb/common/smb2pdu.h2
-rw-r--r--fs/smb/server/ksmbd_netlink.h35
-rw-r--r--fs/smb/server/server.c13
-rw-r--r--fs/smb/server/smb2pdu.c15
-rw-r--r--fs/smb/server/vfs.c5
-rw-r--r--fs/tracefs/event_inode.c148
-rw-r--r--fs/tracefs/inode.c92
-rw-r--r--fs/tracefs/internal.h14
77 files changed, 883 insertions, 366 deletions
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 9defa12208f9..1775fcc7f0e8 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -179,13 +179,14 @@ extern int v9fs_vfs_rename(struct mnt_idmap *idmap,
struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags);
-extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid);
+extern struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid,
+ bool new);
extern const struct inode_operations v9fs_dir_inode_operations_dotl;
extern const struct inode_operations v9fs_file_inode_operations_dotl;
extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
extern const struct netfs_request_ops v9fs_req_ops;
extern struct inode *v9fs_fid_iget_dotl(struct super_block *sb,
- struct p9_fid *fid);
+ struct p9_fid *fid, bool new);
/* other default globals */
#define V9FS_PORT 564
@@ -224,12 +225,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
*/
static inline struct inode *
v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+ struct super_block *sb, bool new)
{
if (v9fs_proto_dotl(v9ses))
- return v9fs_fid_iget_dotl(sb, fid);
+ return v9fs_fid_iget_dotl(sb, fid, new);
else
- return v9fs_fid_iget(sb, fid);
+ return v9fs_fid_iget(sb, fid, new);
}
#endif
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 47bd77199e20..7a3308d77606 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -364,7 +364,8 @@ void v9fs_evict_inode(struct inode *inode)
clear_inode(inode);
}
-struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid)
+struct inode *
+v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, bool new)
{
dev_t rdev;
int retval;
@@ -376,8 +377,18 @@ struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid)
inode = iget_locked(sb, QID2INO(&fid->qid));
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
- return inode;
+ if (!(inode->i_state & I_NEW)) {
+ if (!new) {
+ goto done;
+ } else {
+ p9_debug(P9_DEBUG_VFS, "WARNING: Inode collision %ld\n",
+ inode->i_ino);
+ iput(inode);
+ remove_inode_hash(inode);
+ inode = iget_locked(sb, QID2INO(&fid->qid));
+ WARN_ON(!(inode->i_state & I_NEW));
+ }
+ }
/*
* initialize the inode with the stat info
@@ -401,11 +412,11 @@ struct inode *v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid)
v9fs_set_netfs_context(inode);
v9fs_cache_inode_get_cookie(inode);
unlock_new_inode(inode);
+done:
return inode;
error:
iget_failed(inode);
return ERR_PTR(retval);
-
}
/**
@@ -437,8 +448,15 @@ static int v9fs_at_to_dotl_flags(int flags)
*/
static void v9fs_dec_count(struct inode *inode)
{
- if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
- drop_nlink(inode);
+ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) {
+ if (inode->i_nlink) {
+ drop_nlink(inode);
+ } else {
+ p9_debug(P9_DEBUG_VFS,
+ "WARNING: unexpected i_nlink zero %d inode %ld\n",
+ inode->i_nlink, inode->i_ino);
+ }
+ }
}
/**
@@ -489,6 +507,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
} else
v9fs_dec_count(inode);
+ if (inode->i_nlink <= 0) /* no more refs unhash it */
+ remove_inode_hash(inode);
+
v9fs_invalidate_inode_attr(inode);
v9fs_invalidate_inode_attr(dir);
@@ -554,7 +575,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
/*
* instantiate inode and assign the unopened fid to the dentry
*/
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS,
@@ -683,7 +704,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
else if (IS_ERR(fid))
inode = ERR_CAST(fid);
else
- inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, false);
/*
* If we had a rename on the server and a parallel lookup
* for the new name, then make sure we instantiate with
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 55dde186041a..c61b97bd13b9 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -52,7 +52,10 @@ static kgid_t v9fs_get_fsgid_for_create(struct inode *dir_inode)
return current_fsgid();
}
-struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid)
+
+
+struct inode *
+v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid, bool new)
{
int retval;
struct inode *inode;
@@ -62,8 +65,18 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid)
inode = iget_locked(sb, QID2INO(&fid->qid));
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
- return inode;
+ if (!(inode->i_state & I_NEW)) {
+ if (!new) {
+ goto done;
+ } else { /* deal with race condition in inode number reuse */
+ p9_debug(P9_DEBUG_ERROR, "WARNING: Inode collision %lx\n",
+ inode->i_ino);
+ iput(inode);
+ remove_inode_hash(inode);
+ inode = iget_locked(sb, QID2INO(&fid->qid));
+ WARN_ON(!(inode->i_state & I_NEW));
+ }
+ }
/*
* initialize the inode with the stat info
@@ -90,12 +103,11 @@ struct inode *v9fs_fid_iget_dotl(struct super_block *sb, struct p9_fid *fid)
goto error;
unlock_new_inode(inode);
-
+done:
return inode;
error:
iget_failed(inode);
return ERR_PTR(retval);
-
}
struct dotl_openflag_map {
@@ -247,7 +259,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
goto out;
}
- inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+ inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -340,7 +352,7 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap,
}
/* instantiate inode and assign the unopened fid to the dentry */
- inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+ inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -776,7 +788,7 @@ v9fs_vfs_mknod_dotl(struct mnt_idmap *idmap, struct inode *dir,
err);
goto error;
}
- inode = v9fs_fid_iget_dotl(dir->i_sb, fid);
+ inode = v9fs_fid_iget_dotl(dir->i_sb, fid, true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n",
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 55e67e36ae68..f52fdf42945c 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -139,7 +139,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
else
sb->s_d_op = &v9fs_dentry_operations;
- inode = v9fs_get_inode_from_fid(v9ses, fid, sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, sb, true);
if (IS_ERR(inode)) {
retval = PTR_ERR(inode);
goto release_sb;
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index fadb1078903d..a20044201002 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -470,7 +470,7 @@ found:
goto err;
}
- bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL);
+ bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL);
bio->bi_iter.bi_sector = p.ptr.offset;
bch2_bio_map(bio, data_buf, bytes);
ret = submit_bio_wait(bio);
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 085987435a5e..f7fbfccd2b1e 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -1504,7 +1504,8 @@ enum btree_id_flags {
BIT_ULL(KEY_TYPE_stripe)) \
x(reflink, 7, BTREE_ID_EXTENTS|BTREE_ID_DATA, \
BIT_ULL(KEY_TYPE_reflink_v)| \
- BIT_ULL(KEY_TYPE_indirect_inline_data)) \
+ BIT_ULL(KEY_TYPE_indirect_inline_data)| \
+ BIT_ULL(KEY_TYPE_error)) \
x(subvolumes, 8, 0, \
BIT_ULL(KEY_TYPE_subvolume)) \
x(snapshots, 9, 0, \
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index ecbd9598f69f..791470b0c654 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -1587,7 +1587,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
ret = PTR_ERR_OR_ZERO(new);
if (ret)
- return ret;
+ goto out;
if (!r->refcount)
new->k.type = KEY_TYPE_deleted;
@@ -1595,6 +1595,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
*bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
ret = bch2_trans_update(trans, iter, new, 0);
}
+out:
fsck_err:
printbuf_exit(&buf);
return ret;
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 9678b2375bed..debb0edc3455 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -888,7 +888,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i,
btree_node_bkey_bad_u64s,
- "bad k->u64s %u (min %u max %lu)", k->u64s,
+ "bad k->u64s %u (min %u max %zu)", k->u64s,
bkeyp_key_u64s(&b->format, k),
U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k)))
goto drop_this_key;
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 88a3582a3275..e8c1c530cd95 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -842,8 +842,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
* Newest freed entries are at the end of the list - once we hit one
* that's too new to be freed, we can bail out:
*/
- scanned += bc->nr_freed_nonpcpu;
-
list_for_each_entry_safe(ck, t, &bc->freed_nonpcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
@@ -857,11 +855,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
bc->nr_freed_nonpcpu--;
}
- if (scanned >= nr)
- goto out;
-
- scanned += bc->nr_freed_pcpu;
-
list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
if (!poll_state_synchronize_srcu(&c->btree_trans_barrier,
ck->btree_trans_barrier_seq))
@@ -875,9 +868,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
bc->nr_freed_pcpu--;
}
- if (scanned >= nr)
- goto out;
-
rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
if (bc->shrink_iter >= tbl->size)
@@ -893,12 +883,12 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
ck = container_of(pos, struct bkey_cached, hash);
- if (test_bit(BKEY_CACHED_DIRTY, &ck->flags))
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
goto next;
-
- if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags))
+ } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
- else if (bkey_cached_lock_for_evict(ck)) {
+ goto next;
+ } else if (bkey_cached_lock_for_evict(ck)) {
bkey_cached_evict(bc, ck);
bkey_cached_free(bc, ck);
}
@@ -916,7 +906,6 @@ next:
} while (scanned < nr && bc->shrink_iter != start);
rcu_read_unlock();
-out:
memalloc_nofs_restore(flags);
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
mutex_unlock(&bc->lock);
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 866bd278439f..45cb8149d374 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -57,13 +57,14 @@ static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_n
bp->v.seq = cpu_to_le64(f->cookie);
bp->v.sectors_written = 0;
bp->v.flags = 0;
+ bp->v.sectors_written = cpu_to_le16(f->sectors_written);
bp->v.min_key = f->min_key;
SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated);
memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
}
static bool found_btree_node_is_readable(struct btree_trans *trans,
- const struct found_btree_node *f)
+ struct found_btree_node *f)
{
struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k;
@@ -71,8 +72,10 @@ static bool found_btree_node_is_readable(struct btree_trans *trans,
struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
bool ret = !IS_ERR_OR_NULL(b);
- if (ret)
+ if (ret) {
+ f->sectors_written = b->written;
six_unlock_read(&b->c.lock);
+ }
/*
* We might update this node's range; if that happens, we need the node
@@ -302,6 +305,8 @@ again:
start->max_key = bpos_predecessor(n->min_key);
start->range_updated = true;
+ } else if (n->level) {
+ n->overwritten = true;
} else {
struct printbuf buf = PRINTBUF;
diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h
index abb7b27d556a..5cfaeb5ac831 100644
--- a/fs/bcachefs/btree_node_scan_types.h
+++ b/fs/bcachefs/btree_node_scan_types.h
@@ -9,6 +9,7 @@ struct found_btree_node {
bool overwritten:1;
u8 btree_id;
u8 level;
+ unsigned sectors_written;
u32 seq;
u64 cookie;
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index e0c982a4195c..c69b233c41bb 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -321,9 +321,9 @@ struct bkey_cached {
struct btree_bkey_cached_common c;
unsigned long flags;
+ unsigned long btree_trans_barrier_seq;
u16 u64s;
bool valid;
- u32 btree_trans_barrier_seq;
struct bkey_cached_key key;
struct rhash_head hash;
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 6030c396754f..b4efd8cc4d1a 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1960,7 +1960,11 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates)
return 0;
- flags &= ~BCH_WATERMARK_MASK;
+ if ((flags & BCH_WATERMARK_MASK) <= BCH_WATERMARK_reclaim) {
+ flags &= ~BCH_WATERMARK_MASK;
+ flags |= BCH_WATERMARK_btree;
+ flags |= BCH_TRANS_COMMIT_journal_reclaim;
+ }
b = trans->paths[path].l[level].b;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 941401a210f5..82f179258867 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -525,7 +525,6 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
"different types of data in same bucket: %s, %s",
bch2_data_type_str(g->data_type),
bch2_data_type_str(data_type))) {
- BUG();
ret = -EIO;
goto err;
}
@@ -629,7 +628,6 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
bch2_data_type_str(ptr_data_type),
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k), buf.buf));
- BUG();
ret = -EIO;
goto err;
}
diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c
index 72781aad6ba7..4d14f19f5185 100644
--- a/fs/bcachefs/chardev.c
+++ b/fs/bcachefs/chardev.c
@@ -232,13 +232,15 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
/* We need request_key() to be called before we punt to kthread: */
opt_set(thr->opts, nostart, true);
+ bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops);
+
thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
if (!IS_ERR(thr->c) &&
thr->c->opts.errors == BCH_ON_ERROR_panic)
thr->c->opts.errors = BCH_ON_ERROR_ro;
- ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops);
+ ret = __bch2_run_thread_with_stdio(&thr->thr);
out:
darray_for_each(devs, i)
kfree(*i);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index b5ea9fa1259d..fce690007edf 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -188,7 +188,8 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
BUG_ON(!old);
if (unlikely(old != inode)) {
- discard_new_inode(&inode->v);
+ __destroy_inode(&inode->v);
+ kmem_cache_free(bch2_inode_cache, inode);
inode = old;
} else {
mutex_lock(&c->vfs_inodes_lock);
@@ -225,8 +226,10 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans)
if (unlikely(!inode)) {
int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM);
- if (ret && inode)
- discard_new_inode(&inode->v);
+ if (ret && inode) {
+ __destroy_inode(&inode->v);
+ kmem_cache_free(bch2_inode_cache, inode);
+ }
if (ret)
return ERR_PTR(ret);
}
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index ca4a066e9a54..0f95d7fb5ec0 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -606,7 +606,7 @@ int bch2_trigger_inode(struct btree_trans *trans,
struct bkey_s new,
unsigned flags)
{
- s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k);
+ s64 nr = (s64) bkey_is_inode(new.k) - (s64) bkey_is_inode(old.k);
if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
if (nr) {
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 9aa28b52ab92..eb1f9d6f5a19 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1723,7 +1723,7 @@ static void journal_write_endio(struct bio *bio)
percpu_ref_put(&ca->io_ref);
}
-static CLOSURE_CALLBACK(do_journal_write)
+static CLOSURE_CALLBACK(journal_write_submit)
{
closure_type(w, struct journal_buf, io);
struct journal *j = container_of(w, struct journal, buf[w->idx]);
@@ -1768,6 +1768,44 @@ static CLOSURE_CALLBACK(do_journal_write)
continue_at(cl, journal_write_done, j->wq);
}
+static CLOSURE_CALLBACK(journal_write_preflush)
+{
+ closure_type(w, struct journal_buf, io);
+ struct journal *j = container_of(w, struct journal, buf[w->idx]);
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+
+ if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
+ spin_lock(&j->lock);
+ closure_wait(&j->async_wait, cl);
+ spin_unlock(&j->lock);
+
+ continue_at(cl, journal_write_preflush, j->wq);
+ return;
+ }
+
+ if (w->separate_flush) {
+ for_each_rw_member(c, ca) {
+ percpu_ref_get(&ca->io_ref);
+
+ struct journal_device *ja = &ca->journal;
+ struct bio *bio = &ja->bio[w->idx]->bio;
+ bio_reset(bio, ca->disk_sb.bdev,
+ REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
+ bio->bi_end_io = journal_write_endio;
+ bio->bi_private = ca;
+ closure_bio_submit(bio, cl);
+ }
+
+ continue_at(cl, journal_write_submit, j->wq);
+ } else {
+ /*
+ * no need to punt to another work item if we're not waiting on
+ * preflushes
+ */
+ journal_write_submit(&cl->work);
+ }
+}
+
static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
@@ -2033,23 +2071,9 @@ CLOSURE_CALLBACK(bch2_journal_write)
goto err;
if (!JSET_NO_FLUSH(w->data))
- closure_wait_event(&j->async_wait, j->seq_ondisk + 1 == le64_to_cpu(w->data->seq));
-
- if (!JSET_NO_FLUSH(w->data) && w->separate_flush) {
- for_each_rw_member(c, ca) {
- percpu_ref_get(&ca->io_ref);
-
- struct journal_device *ja = &ca->journal;
- struct bio *bio = &ja->bio[w->idx]->bio;
- bio_reset(bio, ca->disk_sb.bdev,
- REQ_OP_WRITE|REQ_SYNC|REQ_META|REQ_PREFLUSH);
- bio->bi_end_io = journal_write_endio;
- bio->bi_private = ca;
- closure_bio_submit(bio, cl);
- }
- }
-
- continue_at(cl, do_journal_write, j->wq);
+ continue_at(cl, journal_write_preflush, j->wq);
+ else
+ continue_at(cl, journal_write_submit, j->wq);
return;
no_io:
continue_at(cl, journal_write_done, j->wq);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 0f328aba9760..be5b47619327 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -249,7 +249,10 @@ int bch2_journal_replay(struct bch_fs *c)
struct journal_key *k = *kp;
- replay_now_at(j, k->journal_seq);
+ if (k->journal_seq)
+ replay_now_at(j, k->journal_seq);
+ else
+ replay_now_at(j, j->replay_journal_seq_end);
ret = commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc|
diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c
index 5980ba2563fe..35ca3f138de6 100644
--- a/fs/bcachefs/sb-clean.c
+++ b/fs/bcachefs/sb-clean.c
@@ -29,6 +29,14 @@ int bch2_sb_clean_validate_late(struct bch_fs *c, struct bch_sb_field_clean *cle
for (entry = clean->start;
entry < (struct jset_entry *) vstruct_end(&clean->field);
entry = vstruct_next(entry)) {
+ if (vstruct_end(entry) > vstruct_end(&clean->field)) {
+ bch_err(c, "journal entry (u64s %u) overran end of superblock clean section (u64s %u) by %zu",
+ le16_to_cpu(entry->u64s), le32_to_cpu(clean->field.u64s),
+ (u64 *) vstruct_end(entry) - (u64 *) vstruct_end(&clean->field));
+ bch2_sb_error_count(c, BCH_FSCK_ERR_sb_clean_entry_overrun);
+ return -BCH_ERR_fsck_repair_unimplemented;
+ }
+
ret = bch2_journal_entry_validate(c, NULL, entry,
le16_to_cpu(c->disk_sb.sb->version),
BCH_SB_BIG_ENDIAN(c->disk_sb.sb),
diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h
index 4ca6e7b0d8aa..06c7a644f4a4 100644
--- a/fs/bcachefs/sb-errors_types.h
+++ b/fs/bcachefs/sb-errors_types.h
@@ -271,7 +271,8 @@
x(btree_root_unreadable_and_scan_found_nothing, 263) \
x(snapshot_node_missing, 264) \
x(dup_backpointer_to_bad_csum_extent, 265) \
- x(btree_bitmap_not_marked, 266)
+ x(btree_bitmap_not_marked, 266) \
+ x(sb_clean_entry_overrun, 267)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 522a969345e5..5b8e621ac5eb 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -463,8 +463,8 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns
m->btree_bitmap_shift += resize;
}
- for (unsigned bit = sectors >> m->btree_bitmap_shift;
- bit << m->btree_bitmap_shift < end;
+ for (unsigned bit = start >> m->btree_bitmap_shift;
+ (u64) bit << m->btree_bitmap_shift < end;
bit++)
bitmap |= BIT_ULL(bit);
diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h
index b27c3e4467cf..5efa64eca5f8 100644
--- a/fs/bcachefs/sb-members.h
+++ b/fs/bcachefs/sb-members.h
@@ -235,11 +235,11 @@ static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64
{
u64 end = start + sectors;
- if (end > 64 << ca->mi.btree_bitmap_shift)
+ if (end > 64ULL << ca->mi.btree_bitmap_shift)
return false;
- for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift;
- bit << ca->mi.btree_bitmap_shift < end;
+ for (unsigned bit = start >> ca->mi.btree_bitmap_shift;
+ (u64) bit << ca->mi.btree_bitmap_shift < end;
bit++)
if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit)))
return false;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 8daf80a38d60..88e214c609bb 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -544,6 +544,7 @@ static void __bch2_fs_free(struct bch_fs *c)
bch2_find_btree_nodes_exit(&c->found_btree_nodes);
bch2_free_pending_node_rewrites(c);
+ bch2_fs_allocator_background_exit(c);
bch2_fs_sb_errors_exit(c);
bch2_fs_counters_exit(c);
bch2_fs_snapshots_exit(c);
diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c
index 940db15d6a93..b1af7ac430f6 100644
--- a/fs/bcachefs/thread_with_file.c
+++ b/fs/bcachefs/thread_with_file.c
@@ -294,16 +294,27 @@ static int thread_with_stdio_fn(void *arg)
return 0;
}
-int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
- const struct thread_with_stdio_ops *ops)
+void bch2_thread_with_stdio_init(struct thread_with_stdio *thr,
+ const struct thread_with_stdio_ops *ops)
{
stdio_buf_init(&thr->stdio.input);
stdio_buf_init(&thr->stdio.output);
thr->ops = ops;
+}
+int __bch2_run_thread_with_stdio(struct thread_with_stdio *thr)
+{
return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, thread_with_stdio_fn);
}
+int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
+ const struct thread_with_stdio_ops *ops)
+{
+ bch2_thread_with_stdio_init(thr, ops);
+
+ return __bch2_run_thread_with_stdio(thr);
+}
+
int bch2_run_thread_with_stdout(struct thread_with_stdio *thr,
const struct thread_with_stdio_ops *ops)
{
diff --git a/fs/bcachefs/thread_with_file.h b/fs/bcachefs/thread_with_file.h
index af54ea8f5b0f..1d63d14d7dca 100644
--- a/fs/bcachefs/thread_with_file.h
+++ b/fs/bcachefs/thread_with_file.h
@@ -63,6 +63,9 @@ struct thread_with_stdio {
const struct thread_with_stdio_ops *ops;
};
+void bch2_thread_with_stdio_init(struct thread_with_stdio *,
+ const struct thread_with_stdio_ops *);
+int __bch2_run_thread_with_stdio(struct thread_with_stdio *);
int bch2_run_thread_with_stdio(struct thread_with_stdio *,
const struct thread_with_stdio_ops *);
int bch2_run_thread_with_stdout(struct thread_with_stdio *,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index c1e6a5bbeeaf..58110c968667 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -2776,20 +2776,14 @@ struct btrfs_data_container *init_data_container(u32 total_bytes)
size_t alloc_bytes;
alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
- data = kvmalloc(alloc_bytes, GFP_KERNEL);
+ data = kvzalloc(alloc_bytes, GFP_KERNEL);
if (!data)
return ERR_PTR(-ENOMEM);
- if (total_bytes >= sizeof(*data)) {
+ if (total_bytes >= sizeof(*data))
data->bytes_left = total_bytes - sizeof(*data);
- data->bytes_missing = 0;
- } else {
+ else
data->bytes_missing = sizeof(*data) - total_bytes;
- data->bytes_left = 0;
- }
-
- data->elem_cnt = 0;
- data->elem_missed = 0;
return data;
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 445f7716f1e2..24a048210b15 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -817,7 +817,7 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
split->block_len = em->block_len;
split->orig_start = em->orig_start;
} else {
- const u64 diff = start + len - em->start;
+ const u64 diff = end - em->start;
split->block_len = split->len;
split->block_start += diff;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c65fe5de4022..7fed887e700c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1145,13 +1145,13 @@ static void submit_one_async_extent(struct async_chunk *async_chunk,
0, *alloc_hint, &ins, 1, 1);
if (ret) {
/*
- * Here we used to try again by going back to non-compressed
- * path for ENOSPC. But we can't reserve space even for
- * compressed size, how could it work for uncompressed size
- * which requires larger size? So here we directly go error
- * path.
+ * We can't reserve contiguous space for the compressed size.
+ * Unlikely, but it's possible that we could have enough
+ * non-contiguous space for the uncompressed size instead. So
+ * fall back to uncompressed.
*/
- goto out_free;
+ submit_uncompressed_range(inode, async_extent, locked_page);
+ goto done;
}
/* Here we're doing allocation and writeback of the compressed pages */
@@ -1203,7 +1203,6 @@ done:
out_free_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
-out_free:
mapping_set_error(inode->vfs_inode.i_mapping, -EIO);
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 55f3ba6a831c..0493272a7668 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3758,15 +3758,43 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
goto drop_write;
}
- down_write(&fs_info->subvol_sem);
-
switch (sa->cmd) {
case BTRFS_QUOTA_CTL_ENABLE:
case BTRFS_QUOTA_CTL_ENABLE_SIMPLE_QUOTA:
+ down_write(&fs_info->subvol_sem);
ret = btrfs_quota_enable(fs_info, sa);
+ up_write(&fs_info->subvol_sem);
break;
case BTRFS_QUOTA_CTL_DISABLE:
+ /*
+ * Lock the cleaner mutex to prevent races with concurrent
+ * relocation, because relocation may be building backrefs for
+ * blocks of the quota root while we are deleting the root. This
+ * is like dropping fs roots of deleted snapshots/subvolumes, we
+ * need the same protection.
+ *
+ * This also prevents races between concurrent tasks trying to
+ * disable quotas, because we will unlock and relock
+ * qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
+ *
+ * We take this here because we have the dependency of
+ *
+ * inode_lock -> subvol_sem
+ *
+ * because of rename. With relocation we can prealloc extents,
+ * so that makes the dependency chain
+ *
+ * cleaner_mutex -> inode_lock -> subvol_sem
+ *
+ * so we must take the cleaner_mutex here before we take the
+ * subvol_sem. The deadlock can't actually happen, but this
+ * quiets lockdep.
+ */
+ mutex_lock(&fs_info->cleaner_mutex);
+ down_write(&fs_info->subvol_sem);
ret = btrfs_quota_disable(fs_info);
+ up_write(&fs_info->subvol_sem);
+ mutex_unlock(&fs_info->cleaner_mutex);
break;
default:
ret = -EINVAL;
@@ -3774,7 +3802,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
}
kfree(sa);
- up_write(&fs_info->subvol_sem);
drop_write:
mnt_drop_write_file(file);
return ret;
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index c96dd66fd0f7..210d9c82e2ae 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -7,7 +7,7 @@
#ifdef CONFIG_PRINTK
-#define STATE_STRING_PREFACE ": state "
+#define STATE_STRING_PREFACE " state "
#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT + 1)
/*
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index b749ba45da2b..c2a42bcde98e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -1188,6 +1188,7 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
ordered->disk_bytenr += len;
ordered->num_bytes -= len;
ordered->disk_num_bytes -= len;
+ ordered->ram_bytes -= len;
if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
ASSERT(ordered->bytes_left == 0);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index cf8820ce7aa2..364acc9bbe73 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1342,16 +1342,10 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
lockdep_assert_held_write(&fs_info->subvol_sem);
/*
- * Lock the cleaner mutex to prevent races with concurrent relocation,
- * because relocation may be building backrefs for blocks of the quota
- * root while we are deleting the root. This is like dropping fs roots
- * of deleted snapshots/subvolumes, we need the same protection.
- *
- * This also prevents races between concurrent tasks trying to disable
- * quotas, because we will unlock and relock qgroup_ioctl_lock across
- * BTRFS_FS_QUOTA_ENABLED changes.
+ * Relocation will mess with backrefs, so make sure we have the
+ * cleaner_mutex held to protect us from relocate.
*/
- mutex_lock(&fs_info->cleaner_mutex);
+ lockdep_assert_held(&fs_info->cleaner_mutex);
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
@@ -1373,9 +1367,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
btrfs_qgroup_wait_for_completion(fs_info, false);
+ /*
+ * We have nothing held here and no trans handle, just return the error
+ * if there is one.
+ */
ret = flush_reservations(fs_info);
if (ret)
- goto out_unlock_cleaner;
+ return ret;
/*
* 1 For the root item
@@ -1439,9 +1437,6 @@ out:
btrfs_end_transaction(trans);
else if (trans)
ret = btrfs_commit_transaction(trans);
-out_unlock_cleaner:
- mutex_unlock(&fs_info->cleaner_mutex);
-
return ret;
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index fa25004ab04e..4b22cfe9a98c 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1012,6 +1012,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
struct btrfs_fs_info *fs_info = sctx->fs_info;
int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
stripe->bg->length);
+ unsigned long repaired;
int mirror;
int i;
@@ -1078,16 +1079,15 @@ out:
* Submit the repaired sectors. For zoned case, we cannot do repair
* in-place, but queue the bg to be relocated.
*/
- if (btrfs_is_zoned(fs_info)) {
- if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+ bitmap_andnot(&repaired, &stripe->init_error_bitmap, &stripe->error_bitmap,
+ stripe->nr_sectors);
+ if (!sctx->readonly && !bitmap_empty(&repaired, stripe->nr_sectors)) {
+ if (btrfs_is_zoned(fs_info)) {
btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start);
- } else if (!sctx->readonly) {
- unsigned long repaired;
-
- bitmap_andnot(&repaired, &stripe->init_error_bitmap,
- &stripe->error_bitmap, stripe->nr_sectors);
- scrub_write_sectors(sctx, stripe, repaired, false);
- wait_scrub_stripe_io(stripe);
+ } else {
+ scrub_write_sectors(sctx, stripe, repaired, false);
+ wait_scrub_stripe_io(stripe);
+ }
}
scrub_stripe_report_errors(sctx, stripe);
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 253cce7ffecf..47b5d301038e 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -847,6 +847,11 @@ static int test_case_7(struct btrfs_fs_info *fs_info)
goto out;
}
+ if (em->block_start != SZ_32K + SZ_4K) {
+ test_err("em->block_start is %llu, expected 36K", em->block_start);
+ goto out;
+ }
+
free_extent_map(em);
read_lock(&em_tree->lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f15591f3e54f..ef6bd2f4251b 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3455,6 +3455,7 @@ again:
* alignment and size).
*/
ret = -EUCLEAN;
+ mutex_unlock(&fs_info->reclaim_bgs_lock);
goto error;
}
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 8aff1a724805..62da538d91cb 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -151,7 +151,7 @@ static int erofs_fscache_read_io_async(struct fscache_cookie *cookie,
if (WARN_ON(len == 0))
source = NETFS_INVALID_READ;
if (source != NETFS_READ_FROM_CACHE) {
- erofs_err(NULL, "prepare_read failed (source %d)", source);
+ erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source);
return -EIO;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 39c67119f43b..d28ccfc0352b 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -84,13 +84,6 @@ struct erofs_dev_context {
bool flatdev;
};
-struct erofs_fs_context {
- struct erofs_mount_opts opt;
- struct erofs_dev_context *devs;
- char *fsid;
- char *domain_id;
-};
-
/* all filesystem-wide lz4 configurations */
struct erofs_sb_lz4_info {
/* # of pages needed for EROFS lz4 rolling decompression */
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index c0eb139adb07..30b49b2eee53 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -370,18 +370,18 @@ out:
return ret;
}
-static void erofs_default_options(struct erofs_fs_context *ctx)
+static void erofs_default_options(struct erofs_sb_info *sbi)
{
#ifdef CONFIG_EROFS_FS_ZIP
- ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
- ctx->opt.max_sync_decompress_pages = 3;
- ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
+ sbi->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND;
+ sbi->opt.max_sync_decompress_pages = 3;
+ sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO;
#endif
#ifdef CONFIG_EROFS_FS_XATTR
- set_opt(&ctx->opt, XATTR_USER);
+ set_opt(&sbi->opt, XATTR_USER);
#endif
#ifdef CONFIG_EROFS_FS_POSIX_ACL
- set_opt(&ctx->opt, POSIX_ACL);
+ set_opt(&sbi->opt, POSIX_ACL);
#endif
}
@@ -426,16 +426,16 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
{
#ifdef CONFIG_FS_DAX
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *sbi = fc->s_fs_info;
switch (mode) {
case EROFS_MOUNT_DAX_ALWAYS:
- set_opt(&ctx->opt, DAX_ALWAYS);
- clear_opt(&ctx->opt, DAX_NEVER);
+ set_opt(&sbi->opt, DAX_ALWAYS);
+ clear_opt(&sbi->opt, DAX_NEVER);
return true;
case EROFS_MOUNT_DAX_NEVER:
- set_opt(&ctx->opt, DAX_NEVER);
- clear_opt(&ctx->opt, DAX_ALWAYS);
+ set_opt(&sbi->opt, DAX_NEVER);
+ clear_opt(&sbi->opt, DAX_ALWAYS);
return true;
default:
DBG_BUGON(1);
@@ -450,7 +450,7 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
static int erofs_fc_parse_param(struct fs_context *fc,
struct fs_parameter *param)
{
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *sbi = fc->s_fs_info;
struct fs_parse_result result;
struct erofs_device_info *dif;
int opt, ret;
@@ -463,9 +463,9 @@ static int erofs_fc_parse_param(struct fs_context *fc,
case Opt_user_xattr:
#ifdef CONFIG_EROFS_FS_XATTR
if (result.boolean)
- set_opt(&ctx->opt, XATTR_USER);
+ set_opt(&sbi->opt, XATTR_USER);
else
- clear_opt(&ctx->opt, XATTR_USER);
+ clear_opt(&sbi->opt, XATTR_USER);
#else
errorfc(fc, "{,no}user_xattr options not supported");
#endif
@@ -473,16 +473,16 @@ static int erofs_fc_parse_param(struct fs_context *fc,
case Opt_acl:
#ifdef CONFIG_EROFS_FS_POSIX_ACL
if (result.boolean)
- set_opt(&ctx->opt, POSIX_ACL);
+ set_opt(&sbi->opt, POSIX_ACL);
else
- clear_opt(&ctx->opt, POSIX_ACL);
+ clear_opt(&sbi->opt, POSIX_ACL);
#else
errorfc(fc, "{,no}acl options not supported");
#endif
break;
case Opt_cache_strategy:
#ifdef CONFIG_EROFS_FS_ZIP
- ctx->opt.cache_strategy = result.uint_32;
+ sbi->opt.cache_strategy = result.uint_32;
#else
errorfc(fc, "compression not supported, cache_strategy ignored");
#endif
@@ -504,27 +504,27 @@ static int erofs_fc_parse_param(struct fs_context *fc,
kfree(dif);
return -ENOMEM;
}
- down_write(&ctx->devs->rwsem);
- ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL);
- up_write(&ctx->devs->rwsem);
+ down_write(&sbi->devs->rwsem);
+ ret = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL);
+ up_write(&sbi->devs->rwsem);
if (ret < 0) {
kfree(dif->path);
kfree(dif);
return ret;
}
- ++ctx->devs->extra_devices;
+ ++sbi->devs->extra_devices;
break;
#ifdef CONFIG_EROFS_FS_ONDEMAND
case Opt_fsid:
- kfree(ctx->fsid);
- ctx->fsid = kstrdup(param->string, GFP_KERNEL);
- if (!ctx->fsid)
+ kfree(sbi->fsid);
+ sbi->fsid = kstrdup(param->string, GFP_KERNEL);
+ if (!sbi->fsid)
return -ENOMEM;
break;
case Opt_domain_id:
- kfree(ctx->domain_id);
- ctx->domain_id = kstrdup(param->string, GFP_KERNEL);
- if (!ctx->domain_id)
+ kfree(sbi->domain_id);
+ sbi->domain_id = kstrdup(param->string, GFP_KERNEL);
+ if (!sbi->domain_id)
return -ENOMEM;
break;
#else
@@ -581,8 +581,7 @@ static const struct export_operations erofs_export_ops = {
static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
- struct erofs_sb_info *sbi;
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
int err;
sb->s_magic = EROFS_SUPER_MAGIC;
@@ -590,19 +589,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_op = &erofs_sops;
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- return -ENOMEM;
-
- sb->s_fs_info = sbi;
- sbi->opt = ctx->opt;
- sbi->devs = ctx->devs;
- ctx->devs = NULL;
- sbi->fsid = ctx->fsid;
- ctx->fsid = NULL;
- sbi->domain_id = ctx->domain_id;
- ctx->domain_id = NULL;
-
sbi->blkszbits = PAGE_SHIFT;
if (erofs_is_fscache_mode(sb)) {
sb->s_blocksize = PAGE_SIZE;
@@ -706,9 +692,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
static int erofs_fc_get_tree(struct fs_context *fc)
{
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *sbi = fc->s_fs_info;
- if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid)
+ if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid)
return get_tree_nodev(fc, erofs_fc_fill_super);
return get_tree_bdev(fc, erofs_fc_fill_super);
@@ -718,19 +704,19 @@ static int erofs_fc_reconfigure(struct fs_context *fc)
{
struct super_block *sb = fc->root->d_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb);
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *new_sbi = fc->s_fs_info;
DBG_BUGON(!sb_rdonly(sb));
- if (ctx->fsid || ctx->domain_id)
+ if (new_sbi->fsid || new_sbi->domain_id)
erofs_info(sb, "ignoring reconfiguration for fsid|domain_id.");
- if (test_opt(&ctx->opt, POSIX_ACL))
+ if (test_opt(&new_sbi->opt, POSIX_ACL))
fc->sb_flags |= SB_POSIXACL;
else
fc->sb_flags &= ~SB_POSIXACL;
- sbi->opt = ctx->opt;
+ sbi->opt = new_sbi->opt;
fc->sb_flags |= SB_RDONLY;
return 0;
@@ -761,12 +747,15 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs)
static void erofs_fc_free(struct fs_context *fc)
{
- struct erofs_fs_context *ctx = fc->fs_private;
+ struct erofs_sb_info *sbi = fc->s_fs_info;
- erofs_free_dev_context(ctx->devs);
- kfree(ctx->fsid);
- kfree(ctx->domain_id);
- kfree(ctx);
+ if (!sbi)
+ return;
+
+ erofs_free_dev_context(sbi->devs);
+ kfree(sbi->fsid);
+ kfree(sbi->domain_id);
+ kfree(sbi);
}
static const struct fs_context_operations erofs_context_ops = {
@@ -778,38 +767,35 @@ static const struct fs_context_operations erofs_context_ops = {
static int erofs_init_fs_context(struct fs_context *fc)
{
- struct erofs_fs_context *ctx;
+ struct erofs_sb_info *sbi;
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
+ sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+ if (!sbi)
return -ENOMEM;
- ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
- if (!ctx->devs) {
- kfree(ctx);
+
+ sbi->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
+ if (!sbi->devs) {
+ kfree(sbi);
return -ENOMEM;
}
- fc->fs_private = ctx;
+ fc->s_fs_info = sbi;
- idr_init(&ctx->devs->tree);
- init_rwsem(&ctx->devs->rwsem);
- erofs_default_options(ctx);
+ idr_init(&sbi->devs->tree);
+ init_rwsem(&sbi->devs->rwsem);
+ erofs_default_options(sbi);
fc->ops = &erofs_context_ops;
return 0;
}
static void erofs_kill_sb(struct super_block *sb)
{
- struct erofs_sb_info *sbi;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
- if (erofs_is_fscache_mode(sb))
+ if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid)
kill_anon_super(sb);
else
kill_block_super(sb);
- sbi = EROFS_SB(sb);
- if (!sbi)
- return;
-
erofs_free_dev_context(sbi->devs);
fs_put_dax(sbi->dax_dev, NULL);
erofs_fscache_unregister_fs(sb);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 882b89edc52a..f53ca4f7fced 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -980,6 +980,34 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep
}
/*
+ * The ffd.file pointer may be in the process of being torn down due to
+ * being closed, but we may not have finished eventpoll_release() yet.
+ *
+ * Normally, even with the atomic_long_inc_not_zero, the file may have
+ * been free'd and then gotten re-allocated to something else (since
+ * files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
+ *
+ * But for epoll, users hold the ep->mtx mutex, and as such any file in
+ * the process of being free'd will block in eventpoll_release_file()
+ * and thus the underlying file allocation will not be free'd, and the
+ * file re-use cannot happen.
+ *
+ * For the same reason we can avoid a rcu_read_lock() around the
+ * operation - 'ffd.file' cannot go away even if the refcount has
+ * reached zero (but we must still not call out to ->poll() functions
+ * etc).
+ */
+static struct file *epi_fget(const struct epitem *epi)
+{
+ struct file *file;
+
+ file = epi->ffd.file;
+ if (!atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
+ return file;
+}
+
+/*
* Differs from ep_eventpoll_poll() in that internal callers already have
* the ep->mtx so we need to start from depth=1, such that mutex_lock_nested()
* is correctly annotated.
@@ -987,14 +1015,22 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep
static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt,
int depth)
{
- struct file *file = epi->ffd.file;
+ struct file *file = epi_fget(epi);
__poll_t res;
+ /*
+ * We could return EPOLLERR | EPOLLHUP or something, but let's
+ * treat this more as "file doesn't exist, poll didn't happen".
+ */
+ if (!file)
+ return 0;
+
pt->_key = epi->event.events;
if (!is_file_epoll(file))
res = vfs_poll(file, pt);
else
res = __ep_eventpoll_poll(file, pt, depth);
+ fput(file);
return res & epi->event.events;
}
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1d5abfdf0f22..fb0628e680c4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -769,7 +769,7 @@ static int ioctl_getfsuuid(struct file *file, void __user *argp)
struct fsuuid2 u = { .len = sb->s_uuid_len, };
if (!sb->s_uuid_len)
- return -ENOIOCTLCMD;
+ return -ENOTTY;
memcpy(&u.uuid[0], &sb->s_uuid, sb->s_uuid_len);
@@ -781,7 +781,7 @@ static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp)
struct super_block *sb = file_inode(file)->i_sb;
if (!strlen(sb->s_sysfs_name))
- return -ENOIOCTLCMD;
+ return -ENOTTY;
struct fs_sysfs_path u = {};
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 9a0d32e4b422..267b622d923b 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -164,7 +164,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
enum netfs_how_to_modify howto;
enum netfs_folio_trace trace;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
- ssize_t written = 0, ret;
+ ssize_t written = 0, ret, ret2;
loff_t i_size, pos = iocb->ki_pos, from, to;
size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
bool maybe_trouble = false;
@@ -172,15 +172,14 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) ||
iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC))
) {
- if (pos < i_size_read(inode)) {
- ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count);
- if (ret < 0) {
- goto out;
- }
- }
-
wbc_attach_fdatawrite_inode(&wbc, mapping->host);
+ ret = filemap_write_and_wait_range(mapping, pos, pos + iter->count);
+ if (ret < 0) {
+ wbc_detach_inode(&wbc);
+ goto out;
+ }
+
wreq = netfs_begin_writethrough(iocb, iter->count);
if (IS_ERR(wreq)) {
wbc_detach_inode(&wbc);
@@ -395,10 +394,12 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
out:
if (unlikely(wreq)) {
- ret = netfs_end_writethrough(wreq, iocb);
+ ret2 = netfs_end_writethrough(wreq, iocb);
wbc_detach_inode(&wbc);
- if (ret == -EIOCBQUEUED)
- return ret;
+ if (ret2 == -EIOCBQUEUED)
+ return ret2;
+ if (ret == 0)
+ ret = ret2;
}
iocb->ki_pos += written;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c709c296ea9a..acef52ecb1bb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2429,7 +2429,12 @@ static int nfs_net_init(struct net *net)
struct nfs_net *nn = net_generic(net, nfs_net_id);
nfs_clients_init(net);
- rpc_proc_register(net, &nn->rpcstats);
+
+ if (!rpc_proc_register(net, &nn->rpcstats)) {
+ nfs_clients_exit(net);
+ return -ENOMEM;
+ }
+
return nfs_fs_proc_net_init(net);
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 87c9547989f6..e88aca0c6e8e 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -983,15 +983,7 @@ static struct workqueue_struct *callback_wq;
static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
{
trace_nfsd_cb_queue(cb->cb_clp, cb);
- return queue_delayed_work(callback_wq, &cb->cb_work, 0);
-}
-
-static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb,
- unsigned long msecs)
-{
- trace_nfsd_cb_queue(cb->cb_clp, cb);
- queue_delayed_work(callback_wq, &cb->cb_work,
- msecs_to_jiffies(msecs));
+ return queue_work(callback_wq, &cb->cb_work);
}
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
@@ -1490,7 +1482,7 @@ static void
nfsd4_run_cb_work(struct work_struct *work)
{
struct nfsd4_callback *cb =
- container_of(work, struct nfsd4_callback, cb_work.work);
+ container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
int flags;
@@ -1502,16 +1494,8 @@ nfsd4_run_cb_work(struct work_struct *work)
clnt = clp->cl_cb_client;
if (!clnt) {
- if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
- nfsd41_destroy_cb(cb);
- else {
- /*
- * XXX: Ideally, we could wait for the client to
- * reconnect, but I haven't figured out how
- * to do that yet.
- */
- nfsd4_queue_cb_delayed(cb, 25);
- }
+ /* Callback channel broken, or client killed; give up: */
+ nfsd41_destroy_cb(cb);
return;
}
@@ -1544,7 +1528,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
cb->cb_ops = ops;
- INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work);
+ INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
cb->cb_need_restart = false;
cb->cb_holds_slot = false;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1955481832e0..a644460f3a5e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3515,6 +3515,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
args.exp = exp;
args.dentry = dentry;
args.ignore_crossmnt = (ignore_crossmnt != 0);
+ args.acl = NULL;
/*
* Make a local copy of the attribute bitmap that can be modified.
@@ -3573,7 +3574,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
} else
args.fhp = fhp;
- args.acl = NULL;
if (attrmask[0] & FATTR4_WORD0_ACL) {
err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);
if (err == -EOPNOTSUPP)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 01c6f3445646..2ed0fcf879fd 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -68,7 +68,7 @@ struct nfsd4_callback {
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
const struct nfsd4_callback_ops *cb_ops;
- struct delayed_work cb_work;
+ struct work_struct cb_work;
int cb_seq_status;
int cb_status;
bool cb_need_restart;
diff --git a/fs/ntfs3/Kconfig b/fs/ntfs3/Kconfig
index cdfdf51e55d7..7bc31d69f680 100644
--- a/fs/ntfs3/Kconfig
+++ b/fs/ntfs3/Kconfig
@@ -46,3 +46,12 @@ config NTFS3_FS_POSIX_ACL
NOTE: this is linux only feature. Windows will ignore these ACLs.
If you don't know what Access Control Lists are, say N.
+
+config NTFS_FS
+ tristate "NTFS file system support"
+ select NTFS3_FS
+ select BUFFER_HEAD
+ select NLS
+ help
+ This config option is here only for backward compatibility. NTFS
+ filesystem is now handled by the NTFS3 driver.
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index 5cf3d9decf64..263635199b60 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -616,4 +616,11 @@ const struct file_operations ntfs_dir_operations = {
.compat_ioctl = ntfs_compat_ioctl,
#endif
};
+
+const struct file_operations ntfs_legacy_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .iterate_shared = ntfs_readdir,
+ .open = ntfs_file_open,
+};
// clang-format on
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 5418662c80d8..b73969e05052 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -1236,4 +1236,12 @@ const struct file_operations ntfs_file_operations = {
.fallocate = ntfs_fallocate,
.release = ntfs_file_release,
};
+
+const struct file_operations ntfs_legacy_file_operations = {
+ .llseek = generic_file_llseek,
+ .read_iter = ntfs_file_read_iter,
+ .splice_read = ntfs_file_splice_read,
+ .open = ntfs_file_open,
+ .release = ntfs_file_release,
+};
// clang-format on
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index eb7a8c9fba01..d273eda1cf45 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -440,7 +440,10 @@ end_enum:
* Usually a hard links to directories are disabled.
*/
inode->i_op = &ntfs_dir_inode_operations;
- inode->i_fop = &ntfs_dir_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_dir_operations;
+ else
+ inode->i_fop = &ntfs_dir_operations;
ni->i_valid = 0;
} else if (S_ISLNK(mode)) {
ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
@@ -450,7 +453,10 @@ end_enum:
} else if (S_ISREG(mode)) {
ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY;
inode->i_op = &ntfs_file_inode_operations;
- inode->i_fop = &ntfs_file_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_file_operations;
+ else
+ inode->i_fop = &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
&ntfs_aops;
if (ino != MFT_REC_MFT)
@@ -1614,7 +1620,10 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
if (S_ISDIR(mode)) {
inode->i_op = &ntfs_dir_inode_operations;
- inode->i_fop = &ntfs_dir_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_dir_operations;
+ else
+ inode->i_fop = &ntfs_dir_operations;
} else if (S_ISLNK(mode)) {
inode->i_op = &ntfs_link_inode_operations;
inode->i_fop = NULL;
@@ -1623,7 +1632,10 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
inode_nohighmem(inode);
} else if (S_ISREG(mode)) {
inode->i_op = &ntfs_file_inode_operations;
- inode->i_fop = &ntfs_file_operations;
+ if (is_legacy_ntfs(inode->i_sb))
+ inode->i_fop = &ntfs_legacy_file_operations;
+ else
+ inode->i_fop = &ntfs_file_operations;
inode->i_mapping->a_ops = is_compressed(ni) ? &ntfs_aops_cmpr :
&ntfs_aops;
init_rwsem(&ni->file.run_lock);
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index 79356fd29a14..5f4d288c6adf 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -493,6 +493,7 @@ struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni,
struct ntfs_fnd *fnd);
bool dir_is_empty(struct inode *dir);
extern const struct file_operations ntfs_dir_operations;
+extern const struct file_operations ntfs_legacy_dir_operations;
/* Globals from file.c */
int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
@@ -507,6 +508,7 @@ long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg);
extern const struct inode_operations ntfs_special_inode_operations;
extern const struct inode_operations ntfs_file_inode_operations;
extern const struct file_operations ntfs_file_operations;
+extern const struct file_operations ntfs_legacy_file_operations;
/* Globals from frecord.c */
void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi);
@@ -1154,4 +1156,6 @@ static inline void le64_sub_cpu(__le64 *var, u64 val)
*var = cpu_to_le64(le64_to_cpu(*var) - val);
}
+bool is_legacy_ntfs(struct super_block *sb);
+
#endif /* _LINUX_NTFS3_NTFS_FS_H */
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 9df7c20d066f..b26d95a8d327 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -408,6 +408,12 @@ static int ntfs_fs_reconfigure(struct fs_context *fc)
struct ntfs_mount_options *new_opts = fc->fs_private;
int ro_rw;
+ /* If ntfs3 is used as legacy ntfs enforce read-only mode. */
+ if (is_legacy_ntfs(sb)) {
+ fc->sb_flags |= SB_RDONLY;
+ goto out;
+ }
+
ro_rw = sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY);
if (ro_rw && (sbi->flags & NTFS_FLAGS_NEED_REPLAY)) {
errorf(fc,
@@ -427,8 +433,6 @@ static int ntfs_fs_reconfigure(struct fs_context *fc)
fc,
"ntfs3: Cannot use different iocharset when remounting!");
- sync_filesystem(sb);
-
if (ro_rw && (sbi->volume.flags & VOLUME_FLAG_DIRTY) &&
!new_opts->force) {
errorf(fc,
@@ -436,6 +440,8 @@ static int ntfs_fs_reconfigure(struct fs_context *fc)
return -EINVAL;
}
+out:
+ sync_filesystem(sb);
swap(sbi->options, fc->fs_private);
return 0;
@@ -1613,6 +1619,8 @@ load_root:
}
#endif
+ if (is_legacy_ntfs(sb))
+ sb->s_flags |= SB_RDONLY;
return 0;
put_inode_out:
@@ -1730,7 +1738,7 @@ static const struct fs_context_operations ntfs_context_ops = {
* This will called when mount/remount. We will first initialize
* options so that if remount we can use just that.
*/
-static int ntfs_init_fs_context(struct fs_context *fc)
+static int __ntfs_init_fs_context(struct fs_context *fc)
{
struct ntfs_mount_options *opts;
struct ntfs_sb_info *sbi;
@@ -1778,6 +1786,11 @@ free_opts:
return -ENOMEM;
}
+static int ntfs_init_fs_context(struct fs_context *fc)
+{
+ return __ntfs_init_fs_context(fc);
+}
+
static void ntfs3_kill_sb(struct super_block *sb)
{
struct ntfs_sb_info *sbi = sb->s_fs_info;
@@ -1798,6 +1811,50 @@ static struct file_system_type ntfs_fs_type = {
.kill_sb = ntfs3_kill_sb,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
+
+#if IS_ENABLED(CONFIG_NTFS_FS)
+static int ntfs_legacy_init_fs_context(struct fs_context *fc)
+{
+ int ret;
+
+ ret = __ntfs_init_fs_context(fc);
+ /* If ntfs3 is used as legacy ntfs enforce read-only mode. */
+ fc->sb_flags |= SB_RDONLY;
+ return ret;
+}
+
+static struct file_system_type ntfs_legacy_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "ntfs",
+ .init_fs_context = ntfs_legacy_init_fs_context,
+ .parameters = ntfs_fs_parameters,
+ .kill_sb = ntfs3_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+};
+MODULE_ALIAS_FS("ntfs");
+
+static inline void register_as_ntfs_legacy(void)
+{
+ int err = register_filesystem(&ntfs_legacy_fs_type);
+ if (err)
+ pr_warn("ntfs3: Failed to register legacy ntfs filesystem driver: %d\n", err);
+}
+
+static inline void unregister_as_ntfs_legacy(void)
+{
+ unregister_filesystem(&ntfs_legacy_fs_type);
+}
+bool is_legacy_ntfs(struct super_block *sb)
+{
+ return sb->s_type == &ntfs_legacy_fs_type;
+}
+#else
+static inline void register_as_ntfs_legacy(void) {}
+static inline void unregister_as_ntfs_legacy(void) {}
+bool is_legacy_ntfs(struct super_block *sb) { return false; }
+#endif
+
+
// clang-format on
static int __init init_ntfs_fs(void)
@@ -1832,6 +1889,7 @@ static int __init init_ntfs_fs(void)
goto out1;
}
+ register_as_ntfs_legacy();
err = register_filesystem(&ntfs_fs_type);
if (err)
goto out;
@@ -1849,6 +1907,7 @@ static void __exit exit_ntfs_fs(void)
rcu_barrier();
kmem_cache_destroy(ntfs_inode_cachep);
unregister_filesystem(&ntfs_fs_type);
+ unregister_as_ntfs_legacy();
ntfs3_exit_bitmap();
#ifdef CONFIG_PROC_FS
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 195b077c0fac..9223856c934b 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -67,7 +67,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
*/
ppage = pfn_to_online_page(pfn);
- if (!ppage || PageSlab(ppage) || page_has_type(ppage))
+ if (!ppage)
pcount = 0;
else
pcount = page_mapcount(ppage);
@@ -124,11 +124,8 @@ u64 stable_page_flags(struct page *page)
/*
* pseudo flags for the well known (anonymous) memory mapped pages
- *
- * Note that page->_mapcount is overloaded in SLAB, so the
- * simple test in page_mapped() is not enough.
*/
- if (!PageSlab(page) && page_mapped(page))
+ if (page_mapped(page))
u |= 1 << KPF_MMAP;
if (PageAnon(page))
u |= 1 << KPF_ANON;
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index d41eedbff674..39277c37185c 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -389,6 +389,7 @@ cifs_alloc_inode(struct super_block *sb)
* server, can not assume caching of file data or metadata.
*/
cifs_set_oplock_level(cifs_inode, 0);
+ cifs_inode->lease_granted = false;
cifs_inode->flags = 0;
spin_lock_init(&cifs_inode->writers_lock);
cifs_inode->writers = 0;
@@ -739,6 +740,8 @@ static void cifs_umount_begin(struct super_block *sb)
spin_lock(&cifs_tcp_ses_lock);
spin_lock(&tcon->tc_lock);
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_umount);
if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) {
/* we have other mounts to same share or we have
already tried to umount this and woken up
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index d6669ce4ae87..6ff35570db81 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1190,6 +1190,7 @@ struct cifs_fattr {
*/
struct cifs_tcon {
struct list_head tcon_list;
+ int debug_id; /* Debugging for tracing */
int tc_count;
struct list_head rlist; /* reconnect list */
spinlock_t tc_lock; /* protect anything here that is not protected */
@@ -1276,7 +1277,9 @@ struct cifs_tcon {
__u32 max_cached_dirs;
#ifdef CONFIG_CIFS_FSCACHE
u64 resource_id; /* server resource id */
+ bool fscache_acquired; /* T if we've tried acquiring a cookie */
struct fscache_volume *fscache; /* cookie for share */
+ struct mutex fscache_lock; /* Prevent regetting a cookie */
#endif
struct list_head pending_opens; /* list of incomplete opens */
struct cached_fids *cfids;
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index c0513fbb8a59..c46d418c1c0c 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -882,7 +882,7 @@ typedef struct smb_com_open_rsp {
__u8 OplockLevel;
__u16 Fid;
__le32 CreateAction;
- struct_group(common_attributes,
+ struct_group_attr(common_attributes, __packed,
__le64 CreationTime;
__le64 LastAccessTime;
__le64 LastWriteTime;
@@ -2266,7 +2266,7 @@ typedef struct {
/* QueryFileInfo/QueryPathinfo (also for SetPath/SetFile) data buffer formats */
/******************************************************************************/
typedef struct { /* data block encoding of response to level 263 QPathInfo */
- struct_group(common_attributes,
+ struct_group_attr(common_attributes, __packed,
__le64 CreationTime;
__le64 LastAccessTime;
__le64 LastWriteTime;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index 8e0a348f1f66..fbc358c09da3 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -303,7 +303,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx,
struct TCP_Server_Info *primary_server);
extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
int from_reconnect);
-extern void cifs_put_tcon(struct cifs_tcon *tcon);
+extern void cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace);
extern void cifs_release_automount_timer(void);
@@ -530,8 +530,9 @@ extern int CIFSSMBLogoff(const unsigned int xid, struct cifs_ses *ses);
extern struct cifs_ses *sesInfoAlloc(void);
extern void sesInfoFree(struct cifs_ses *);
-extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled);
-extern void tconInfoFree(struct cifs_tcon *);
+extern struct cifs_tcon *tcon_info_alloc(bool dir_leases_enabled,
+ enum smb3_tcon_ref_trace trace);
+extern void tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace);
extern int cifs_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server,
__u32 *pexpected_response_sequence_number);
@@ -721,8 +722,6 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
return options;
}
-struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
-void cifs_put_tcon_super(struct super_block *sb);
int cifs_wait_for_server_reconnect(struct TCP_Server_Info *server, bool retry);
/* Put references of @ses and its children */
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 4e35970681bf..7a16e12f5da8 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -1943,7 +1943,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx)
}
/* no need to setup directory caching on IPC share, so pass in false */
- tcon = tcon_info_alloc(false);
+ tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_ipc);
if (tcon == NULL)
return -ENOMEM;
@@ -1960,7 +1960,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx)
if (rc) {
cifs_server_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc);
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc_fail);
goto out;
}
@@ -2043,7 +2043,7 @@ void __cifs_put_smb_ses(struct cifs_ses *ses)
* files on session close, as specified in MS-SMB2 3.3.5.6 Receiving an
* SMB2 LOGOFF Request.
*/
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc);
if (do_logoff) {
xid = get_xid();
rc = server->ops->logoff(xid, ses);
@@ -2432,6 +2432,8 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
continue;
}
++tcon->tc_count;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_find);
spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
return tcon;
@@ -2441,7 +2443,7 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
}
void
-cifs_put_tcon(struct cifs_tcon *tcon)
+cifs_put_tcon(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace)
{
unsigned int xid;
struct cifs_ses *ses;
@@ -2457,6 +2459,7 @@ cifs_put_tcon(struct cifs_tcon *tcon)
cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
spin_lock(&cifs_tcp_ses_lock);
spin_lock(&tcon->tc_lock);
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count - 1, trace);
if (--tcon->tc_count > 0) {
spin_unlock(&tcon->tc_lock);
spin_unlock(&cifs_tcp_ses_lock);
@@ -2493,7 +2496,7 @@ cifs_put_tcon(struct cifs_tcon *tcon)
_free_xid(xid);
cifs_fscache_release_super_cookie(tcon);
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free);
cifs_put_smb_ses(ses);
}
@@ -2547,7 +2550,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
nohandlecache = ctx->nohandlecache;
else
nohandlecache = true;
- tcon = tcon_info_alloc(!nohandlecache);
+ tcon = tcon_info_alloc(!nohandlecache, netfs_trace_tcon_ref_new);
if (tcon == NULL) {
rc = -ENOMEM;
goto out_fail;
@@ -2737,7 +2740,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
return tcon;
out_fail:
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_fail);
return ERR_PTR(rc);
}
@@ -2754,7 +2757,7 @@ cifs_put_tlink(struct tcon_link *tlink)
}
if (!IS_ERR(tlink_tcon(tlink)))
- cifs_put_tcon(tlink_tcon(tlink));
+ cifs_put_tcon(tlink_tcon(tlink), netfs_trace_tcon_ref_put_tlink);
kfree(tlink);
}
@@ -3319,7 +3322,7 @@ void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx)
int rc = 0;
if (mnt_ctx->tcon)
- cifs_put_tcon(mnt_ctx->tcon);
+ cifs_put_tcon(mnt_ctx->tcon, netfs_trace_tcon_ref_put_mnt_ctx);
else if (mnt_ctx->ses)
cifs_put_smb_ses(mnt_ctx->ses);
else if (mnt_ctx->server)
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 6c727d8c31e8..3bbac925d076 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -748,6 +748,16 @@ static int smb3_fs_context_validate(struct fs_context *fc)
/* set the port that we got earlier */
cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port);
+ if (ctx->uid_specified && !ctx->forceuid_specified) {
+ ctx->override_uid = 1;
+ pr_notice("enabling forceuid mount option implicitly because uid= option is specified\n");
+ }
+
+ if (ctx->gid_specified && !ctx->forcegid_specified) {
+ ctx->override_gid = 1;
+ pr_notice("enabling forcegid mount option implicitly because gid= option is specified\n");
+ }
+
if (ctx->override_uid && !ctx->uid_specified) {
ctx->override_uid = 0;
pr_notice("ignoring forceuid mount option specified with no uid= option\n");
@@ -1019,12 +1029,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->override_uid = 0;
else
ctx->override_uid = 1;
+ ctx->forceuid_specified = true;
break;
case Opt_forcegid:
if (result.negated)
ctx->override_gid = 0;
else
ctx->override_gid = 1;
+ ctx->forcegid_specified = true;
break;
case Opt_perm:
if (result.negated)
diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h
index a947bddeba27..cf577ec0dd0a 100644
--- a/fs/smb/client/fs_context.h
+++ b/fs/smb/client/fs_context.h
@@ -165,6 +165,8 @@ enum cifs_param {
};
struct smb3_fs_context {
+ bool forceuid_specified;
+ bool forcegid_specified;
bool uid_specified;
bool cruid_specified;
bool gid_specified;
diff --git a/fs/smb/client/fscache.c b/fs/smb/client/fscache.c
index 340efce8f052..1a895e6243ee 100644
--- a/fs/smb/client/fscache.c
+++ b/fs/smb/client/fscache.c
@@ -43,12 +43,23 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
char *key;
int ret = -ENOMEM;
+ if (tcon->fscache_acquired)
+ return 0;
+
+ mutex_lock(&tcon->fscache_lock);
+ if (tcon->fscache_acquired) {
+ mutex_unlock(&tcon->fscache_lock);
+ return 0;
+ }
+ tcon->fscache_acquired = true;
+
tcon->fscache = NULL;
switch (sa->sa_family) {
case AF_INET:
case AF_INET6:
break;
default:
+ mutex_unlock(&tcon->fscache_lock);
cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
return -EINVAL;
}
@@ -57,6 +68,7 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
sharename = extract_sharename(tcon->tree_name);
if (IS_ERR(sharename)) {
+ mutex_unlock(&tcon->fscache_lock);
cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
return PTR_ERR(sharename);
}
@@ -82,6 +94,11 @@ int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
}
pr_err("Cache volume key already in use (%s)\n", key);
vcookie = NULL;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_fscache_collision);
+ } else {
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_fscache_okay);
}
tcon->fscache = vcookie;
@@ -90,6 +107,7 @@ out_2:
kfree(key);
out:
kfree(sharename);
+ mutex_unlock(&tcon->fscache_lock);
return ret;
}
@@ -102,6 +120,8 @@ void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
cifs_fscache_fill_volume_coherency(tcon, &cd);
fscache_relinquish_volume(tcon->fscache, &cd, false);
tcon->fscache = NULL;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_fscache_relinq);
}
void cifs_fscache_get_inode_cookie(struct inode *inode)
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index 7d15a1969b81..07c468ddb88a 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -111,9 +111,10 @@ sesInfoFree(struct cifs_ses *buf_to_free)
}
struct cifs_tcon *
-tcon_info_alloc(bool dir_leases_enabled)
+tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace)
{
struct cifs_tcon *ret_buf;
+ static atomic_t tcon_debug_id;
ret_buf = kzalloc(sizeof(*ret_buf), GFP_KERNEL);
if (!ret_buf)
@@ -130,7 +131,8 @@ tcon_info_alloc(bool dir_leases_enabled)
atomic_inc(&tconInfoAllocCount);
ret_buf->status = TID_NEW;
- ++ret_buf->tc_count;
+ ret_buf->debug_id = atomic_inc_return(&tcon_debug_id);
+ ret_buf->tc_count = 1;
spin_lock_init(&ret_buf->tc_lock);
INIT_LIST_HEAD(&ret_buf->openFileList);
INIT_LIST_HEAD(&ret_buf->tcon_list);
@@ -139,17 +141,22 @@ tcon_info_alloc(bool dir_leases_enabled)
atomic_set(&ret_buf->num_local_opens, 0);
atomic_set(&ret_buf->num_remote_opens, 0);
ret_buf->stats_from_time = ktime_get_real_seconds();
+#ifdef CONFIG_CIFS_FSCACHE
+ mutex_init(&ret_buf->fscache_lock);
+#endif
+ trace_smb3_tcon_ref(ret_buf->debug_id, ret_buf->tc_count, trace);
return ret_buf;
}
void
-tconInfoFree(struct cifs_tcon *tcon)
+tconInfoFree(struct cifs_tcon *tcon, enum smb3_tcon_ref_trace trace)
{
if (tcon == NULL) {
cifs_dbg(FYI, "Null buffer passed to tconInfoFree\n");
return;
}
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count, trace);
free_cached_dirs(tcon->cfids);
atomic_dec(&tconInfoAllocCount);
kfree(tcon->nativeFileSystem);
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
index cc72be5a93a9..677ef6f99a5b 100644
--- a/fs/smb/client/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
@@ -767,7 +767,7 @@ smb2_cancelled_close_fid(struct work_struct *work)
if (rc)
cifs_tcon_dbg(VFS, "Close cancelled mid failed rc:%d\n", rc);
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close_fid);
kfree(cancelled);
}
@@ -811,6 +811,8 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
if (tcon->tc_count <= 0) {
struct TCP_Server_Info *server = NULL;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_see_cancelled_close);
WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative");
spin_unlock(&cifs_tcp_ses_lock);
@@ -823,12 +825,14 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
return 0;
}
tcon->tc_count++;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_cancelled_close);
spin_unlock(&cifs_tcp_ses_lock);
rc = __smb2_handle_cancelled_cmd(tcon, SMB2_CLOSE_HE, 0,
persistent_fid, volatile_fid);
if (rc)
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_close);
return rc;
}
@@ -856,7 +860,7 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve
rsp->PersistentFileId,
rsp->VolatileFileId);
if (rc)
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cancelled_mid);
return rc;
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 78c94d0350fe..28f0b7d19d53 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -2915,8 +2915,11 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
tcon = list_first_entry_or_null(&ses->tcon_list,
struct cifs_tcon,
tcon_list);
- if (tcon)
+ if (tcon) {
tcon->tc_count++;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_dfs_refer);
+ }
spin_unlock(&cifs_tcp_ses_lock);
}
@@ -2980,6 +2983,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
/* ipc tcons are not refcounted */
spin_lock(&cifs_tcp_ses_lock);
tcon->tc_count--;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_dec_dfs_refer);
/* tc_count can never go negative */
WARN_ON(tcon->tc_count < 0);
spin_unlock(&cifs_tcp_ses_lock);
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 86c647a947cc..a5efce03cb58 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4138,6 +4138,8 @@ void smb2_reconnect_server(struct work_struct *work)
list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
if (tcon->need_reconnect || tcon->need_reopen_files) {
tcon->tc_count++;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_reconnect_server);
list_add_tail(&tcon->rlist, &tmp_list);
tcon_selected = true;
}
@@ -4176,14 +4178,14 @@ void smb2_reconnect_server(struct work_struct *work)
if (tcon->ipc)
cifs_put_smb_ses(tcon->ses);
else
- cifs_put_tcon(tcon);
+ cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_reconnect_server);
}
if (!ses_exist)
goto done;
/* allocate a dummy tcon struct used for reconnect */
- tcon = tcon_info_alloc(false);
+ tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_reconnect_server);
if (!tcon) {
resched = true;
list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) {
@@ -4206,7 +4208,7 @@ void smb2_reconnect_server(struct work_struct *work)
list_del_init(&ses->rlist);
cifs_put_smb_ses(ses);
}
- tconInfoFree(tcon);
+ tconInfoFree(tcon, netfs_trace_tcon_ref_free_reconnect_server);
done:
cifs_dbg(FYI, "Reconnecting tcons and channels finished\n");
diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h
index c72a3b2886b7..2fccf0d4f53d 100644
--- a/fs/smb/client/smb2pdu.h
+++ b/fs/smb/client/smb2pdu.h
@@ -320,7 +320,7 @@ struct smb2_file_reparse_point_info {
} __packed;
struct smb2_file_network_open_info {
- struct_group(network_open_info,
+ struct_group_attr(network_open_info, __packed,
__le64 CreationTime;
__le64 LastAccessTime;
__le64 LastWriteTime;
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index 1d6e54f7879e..02135a605305 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -189,6 +189,8 @@ smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid)
if (tcon->tid != tid)
continue;
++tcon->tc_count;
+ trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
+ netfs_trace_tcon_ref_get_find_sess_tcon);
return tcon;
}
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index 5e83cb9da902..604e52876cd2 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -3,6 +3,9 @@
* Copyright (C) 2018, Microsoft Corporation.
*
* Author(s): Steve French <stfrench@microsoft.com>
+ *
+ * Please use this 3-part article as a reference for writing new tracepoints:
+ * https://lwn.net/Articles/379903/
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM cifs
@@ -15,9 +18,70 @@
#include <linux/inet.h>
/*
- * Please use this 3-part article as a reference for writing new tracepoints:
- * https://lwn.net/Articles/379903/
+ * Specify enums for tracing information.
+ */
+#define smb3_tcon_ref_traces \
+ EM(netfs_trace_tcon_ref_dec_dfs_refer, "DEC DfsRef") \
+ EM(netfs_trace_tcon_ref_free, "FRE ") \
+ EM(netfs_trace_tcon_ref_free_fail, "FRE Fail ") \
+ EM(netfs_trace_tcon_ref_free_ipc, "FRE Ipc ") \
+ EM(netfs_trace_tcon_ref_free_ipc_fail, "FRE Ipc-F ") \
+ EM(netfs_trace_tcon_ref_free_reconnect_server, "FRE Reconn") \
+ EM(netfs_trace_tcon_ref_get_cancelled_close, "GET Cn-Cls") \
+ EM(netfs_trace_tcon_ref_get_dfs_refer, "GET DfsRef") \
+ EM(netfs_trace_tcon_ref_get_find, "GET Find ") \
+ EM(netfs_trace_tcon_ref_get_find_sess_tcon, "GET FndSes") \
+ EM(netfs_trace_tcon_ref_get_reconnect_server, "GET Reconn") \
+ EM(netfs_trace_tcon_ref_new, "NEW ") \
+ EM(netfs_trace_tcon_ref_new_ipc, "NEW Ipc ") \
+ EM(netfs_trace_tcon_ref_new_reconnect_server, "NEW Reconn") \
+ EM(netfs_trace_tcon_ref_put_cancelled_close, "PUT Cn-Cls") \
+ EM(netfs_trace_tcon_ref_put_cancelled_close_fid, "PUT Cn-Fid") \
+ EM(netfs_trace_tcon_ref_put_cancelled_mid, "PUT Cn-Mid") \
+ EM(netfs_trace_tcon_ref_put_mnt_ctx, "PUT MntCtx") \
+ EM(netfs_trace_tcon_ref_put_reconnect_server, "PUT Reconn") \
+ EM(netfs_trace_tcon_ref_put_tlink, "PUT Tlink ") \
+ EM(netfs_trace_tcon_ref_see_cancelled_close, "SEE Cn-Cls") \
+ EM(netfs_trace_tcon_ref_see_fscache_collision, "SEE FV-CO!") \
+ EM(netfs_trace_tcon_ref_see_fscache_okay, "SEE FV-Ok ") \
+ EM(netfs_trace_tcon_ref_see_fscache_relinq, "SEE FV-Rlq") \
+ E_(netfs_trace_tcon_ref_see_umount, "SEE Umount")
+
+#undef EM
+#undef E_
+
+/*
+ * Define those tracing enums.
+ */
+#ifndef __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY
+#define __SMB3_DECLARE_TRACE_ENUMS_ONCE_ONLY
+
+#define EM(a, b) a,
+#define E_(a, b) a
+
+enum smb3_tcon_ref_trace { smb3_tcon_ref_traces } __mode(byte);
+
+#undef EM
+#undef E_
+#endif
+
+/*
+ * Export enum symbols via userspace.
+ */
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define E_(a, b) TRACE_DEFINE_ENUM(a);
+
+smb3_tcon_ref_traces;
+
+#undef EM
+#undef E_
+
+/*
+ * Now redefine the EM() and E_() macros to map the enums to the strings that
+ * will be printed in the output.
*/
+#define EM(a, b) { a, b },
+#define E_(a, b) { a, b }
/* For logging errors in read or write */
DECLARE_EVENT_CLASS(smb3_rw_err_class,
@@ -1125,6 +1189,30 @@ DEFINE_SMB3_CREDIT_EVENT(waitff_credits);
DEFINE_SMB3_CREDIT_EVENT(overflow_credits);
DEFINE_SMB3_CREDIT_EVENT(set_credits);
+
+TRACE_EVENT(smb3_tcon_ref,
+ TP_PROTO(unsigned int tcon_debug_id, int ref,
+ enum smb3_tcon_ref_trace trace),
+ TP_ARGS(tcon_debug_id, ref, trace),
+ TP_STRUCT__entry(
+ __field(unsigned int, tcon)
+ __field(int, ref)
+ __field(enum smb3_tcon_ref_trace, trace)
+ ),
+ TP_fast_assign(
+ __entry->tcon = tcon_debug_id;
+ __entry->ref = ref;
+ __entry->trace = trace;
+ ),
+ TP_printk("TC=%08x %s r=%u",
+ __entry->tcon,
+ __print_symbolic(__entry->trace, smb3_tcon_ref_traces),
+ __entry->ref)
+ );
+
+
+#undef EM
+#undef E_
#endif /* _CIFS_TRACE_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index 994d70193432..ddf1a3aafee5 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -909,12 +909,15 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
list_del_init(&mid->qhead);
mid->mid_flags |= MID_DELETED;
}
+ spin_unlock(&server->mid_lock);
cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
__func__, mid->mid, mid->mid_state);
rc = -EIO;
+ goto sync_mid_done;
}
spin_unlock(&server->mid_lock);
+sync_mid_done:
release_mid(mid);
return rc;
}
@@ -1057,9 +1060,11 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
index = (uint)atomic_inc_return(&ses->chan_seq);
index %= ses->chan_count;
}
+
+ server = ses->chans[index].server;
spin_unlock(&ses->chan_lock);
- return ses->chans[index].server;
+ return server;
}
int
diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h
index 1b594307c9d5..202ff9128156 100644
--- a/fs/smb/common/smb2pdu.h
+++ b/fs/smb/common/smb2pdu.h
@@ -711,7 +711,7 @@ struct smb2_close_rsp {
__le16 StructureSize; /* 60 */
__le16 Flags;
__le32 Reserved;
- struct_group(network_open_info,
+ struct_group_attr(network_open_info, __packed,
__le64 CreationTime;
__le64 LastAccessTime;
__le64 LastWriteTime;
diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index 686b321c5a8b..f4e55199938d 100644
--- a/fs/smb/server/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
@@ -340,23 +340,24 @@ enum KSMBD_TREE_CONN_STATUS {
/*
* Share config flags.
*/
-#define KSMBD_SHARE_FLAG_INVALID (0)
-#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0)
-#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1)
-#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2)
-#define KSMBD_SHARE_FLAG_READONLY BIT(3)
-#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4)
-#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5)
-#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6)
-#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7)
-#define KSMBD_SHARE_FLAG_PIPE BIT(8)
-#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9)
-#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10)
-#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
-#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
-#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
-#define KSMBD_SHARE_FLAG_UPDATE BIT(14)
-#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15)
+#define KSMBD_SHARE_FLAG_INVALID (0)
+#define KSMBD_SHARE_FLAG_AVAILABLE BIT(0)
+#define KSMBD_SHARE_FLAG_BROWSEABLE BIT(1)
+#define KSMBD_SHARE_FLAG_WRITEABLE BIT(2)
+#define KSMBD_SHARE_FLAG_READONLY BIT(3)
+#define KSMBD_SHARE_FLAG_GUEST_OK BIT(4)
+#define KSMBD_SHARE_FLAG_GUEST_ONLY BIT(5)
+#define KSMBD_SHARE_FLAG_STORE_DOS_ATTRS BIT(6)
+#define KSMBD_SHARE_FLAG_OPLOCKS BIT(7)
+#define KSMBD_SHARE_FLAG_PIPE BIT(8)
+#define KSMBD_SHARE_FLAG_HIDE_DOT_FILES BIT(9)
+#define KSMBD_SHARE_FLAG_INHERIT_OWNER BIT(10)
+#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
+#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
+#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
+#define KSMBD_SHARE_FLAG_UPDATE BIT(14)
+#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15)
+#define KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY BIT(16)
/*
* Tree connect request flags.
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index c0788188aa82..c67fbc8d6683 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -167,20 +167,17 @@ static void __handle_ksmbd_work(struct ksmbd_work *work,
int rc;
bool is_chained = false;
- if (conn->ops->allocate_rsp_buf(work))
- return;
-
if (conn->ops->is_transform_hdr &&
conn->ops->is_transform_hdr(work->request_buf)) {
rc = conn->ops->decrypt_req(work);
- if (rc < 0) {
- conn->ops->set_rsp_status(work, STATUS_DATA_ERROR);
- goto send;
- }
-
+ if (rc < 0)
+ return;
work->encrypted = true;
}
+ if (conn->ops->allocate_rsp_buf(work))
+ return;
+
rc = conn->ops->init_rsp_hdr(work);
if (rc) {
/* either uid or tid is not correct */
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 5723bbf372d7..355824151c2d 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -535,6 +535,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
if (cmd == SMB2_QUERY_INFO_HE) {
struct smb2_query_info_req *req;
+ if (get_rfc1002_len(work->request_buf) <
+ offsetof(struct smb2_query_info_req, OutputBufferLength))
+ return -EINVAL;
+
req = smb2_get_msg(work->request_buf);
if ((req->InfoType == SMB2_O_INFO_FILE &&
(req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
@@ -1984,7 +1988,12 @@ int smb2_tree_connect(struct ksmbd_work *work)
write_unlock(&sess->tree_conns_lock);
rsp->StructureSize = cpu_to_le16(16);
out_err1:
- rsp->Capabilities = 0;
+ if (server_conf.flags & KSMBD_GLOBAL_FLAG_DURABLE_HANDLE &&
+ test_share_config_flag(share,
+ KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY))
+ rsp->Capabilities = SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY;
+ else
+ rsp->Capabilities = 0;
rsp->Reserved = 0;
/* default manual caching */
rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
@@ -3498,7 +3507,9 @@ int smb2_open(struct ksmbd_work *work)
memcpy(fp->client_guid, conn->ClientGUID, SMB2_CLIENT_GUID_SIZE);
if (dh_info.type == DURABLE_REQ_V2 || dh_info.type == DURABLE_REQ) {
- if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent)
+ if (dh_info.type == DURABLE_REQ_V2 && dh_info.persistent &&
+ test_share_config_flag(work->tcon->share_conf,
+ KSMBD_SHARE_FLAG_CONTINUOUS_AVAILABILITY))
fp->is_persistent = true;
else
fp->is_durable = true;
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index 22f0f3db3ac9..51b1b0bed616 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -754,10 +754,15 @@ retry:
goto out4;
}
+ /*
+ * explicitly handle file overwrite case, for compatibility with
+ * filesystems that may not support rename flags (e.g: fuse)
+ */
if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) {
err = -EEXIST;
goto out4;
}
+ flags &= ~(RENAME_NOREPLACE);
if (old_child == trap) {
err = -EINVAL;
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 894c6ca1e500..a878cea70f4c 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -37,6 +37,7 @@ static DEFINE_MUTEX(eventfs_mutex);
struct eventfs_root_inode {
struct eventfs_inode ei;
+ struct inode *parent_inode;
struct dentry *events_dir;
};
@@ -68,11 +69,25 @@ enum {
EVENTFS_SAVE_MODE = BIT(16),
EVENTFS_SAVE_UID = BIT(17),
EVENTFS_SAVE_GID = BIT(18),
- EVENTFS_TOPLEVEL = BIT(19),
};
#define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1)
+static void free_ei_rcu(struct rcu_head *rcu)
+{
+ struct eventfs_inode *ei = container_of(rcu, struct eventfs_inode, rcu);
+ struct eventfs_root_inode *rei;
+
+ kfree(ei->entry_attrs);
+ kfree_const(ei->name);
+ if (ei->is_events) {
+ rei = get_root_inode(ei);
+ kfree(rei);
+ } else {
+ kfree(ei);
+ }
+}
+
/*
* eventfs_inode reference count management.
*
@@ -84,18 +99,17 @@ enum {
static void release_ei(struct kref *ref)
{
struct eventfs_inode *ei = container_of(ref, struct eventfs_inode, kref);
- struct eventfs_root_inode *rei;
+ const struct eventfs_entry *entry;
WARN_ON_ONCE(!ei->is_freed);
- kfree(ei->entry_attrs);
- kfree_const(ei->name);
- if (ei->is_events) {
- rei = get_root_inode(ei);
- kfree_rcu(rei, ei.rcu);
- } else {
- kfree_rcu(ei, rcu);
+ for (int i = 0; i < ei->nr_entries; i++) {
+ entry = &ei->entries[i];
+ if (entry->release)
+ entry->release(entry->name, ei->data);
}
+
+ call_rcu(&ei->rcu, free_ei_rcu);
}
static inline void put_ei(struct eventfs_inode *ei)
@@ -112,6 +126,18 @@ static inline void free_ei(struct eventfs_inode *ei)
}
}
+/*
+ * Called when creation of an ei fails, do not call release() functions.
+ */
+static inline void cleanup_ei(struct eventfs_inode *ei)
+{
+ if (ei) {
+ /* Set nr_entries to 0 to prevent release() function being called */
+ ei->nr_entries = 0;
+ free_ei(ei);
+ }
+}
+
static inline struct eventfs_inode *get_ei(struct eventfs_inode *ei)
{
if (ei)
@@ -181,21 +207,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
* determined by the parent directory.
*/
if (dentry->d_inode->i_mode & S_IFDIR) {
- /*
- * The events directory dentry is never freed, unless its
- * part of an instance that is deleted. It's attr is the
- * default for its child files and directories.
- * Do not update it. It's not used for its own mode or ownership.
- */
- if (ei->is_events) {
- /* But it still needs to know if it was modified */
- if (iattr->ia_valid & ATTR_UID)
- ei->attr.mode |= EVENTFS_SAVE_UID;
- if (iattr->ia_valid & ATTR_GID)
- ei->attr.mode |= EVENTFS_SAVE_GID;
- } else {
- update_attr(&ei->attr, iattr);
- }
+ update_attr(&ei->attr, iattr);
} else {
name = dentry->d_name.name;
@@ -213,18 +225,25 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
return ret;
}
-static void update_top_events_attr(struct eventfs_inode *ei, struct super_block *sb)
+static void update_events_attr(struct eventfs_inode *ei, struct super_block *sb)
{
- struct inode *root;
+ struct eventfs_root_inode *rei;
+ struct inode *parent;
- /* Only update if the "events" was on the top level */
- if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL))
- return;
+ rei = get_root_inode(ei);
- /* Get the tracefs root inode. */
- root = d_inode(sb->s_root);
- ei->attr.uid = root->i_uid;
- ei->attr.gid = root->i_gid;
+ /* Use the parent inode permissions unless root set its permissions */
+ parent = rei->parent_inode;
+
+ if (rei->ei.attr.mode & EVENTFS_SAVE_UID)
+ ei->attr.uid = rei->ei.attr.uid;
+ else
+ ei->attr.uid = parent->i_uid;
+
+ if (rei->ei.attr.mode & EVENTFS_SAVE_GID)
+ ei->attr.gid = rei->ei.attr.gid;
+ else
+ ei->attr.gid = parent->i_gid;
}
static void set_top_events_ownership(struct inode *inode)
@@ -233,10 +252,10 @@ static void set_top_events_ownership(struct inode *inode)
struct eventfs_inode *ei = ti->private;
/* The top events directory doesn't get automatically updated */
- if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL))
+ if (!ei || !ei->is_events)
return;
- update_top_events_attr(ei, inode->i_sb);
+ update_events_attr(ei, inode->i_sb);
if (!(ei->attr.mode & EVENTFS_SAVE_UID))
inode->i_uid = ei->attr.uid;
@@ -265,7 +284,7 @@ static int eventfs_permission(struct mnt_idmap *idmap,
return generic_permission(idmap, inode, mask);
}
-static const struct inode_operations eventfs_root_dir_inode_operations = {
+static const struct inode_operations eventfs_dir_inode_operations = {
.lookup = eventfs_root_lookup,
.setattr = eventfs_set_attr,
.getattr = eventfs_get_attr,
@@ -282,6 +301,35 @@ static const struct file_operations eventfs_file_operations = {
.llseek = generic_file_llseek,
};
+/*
+ * On a remount of tracefs, if UID or GID options are set, then
+ * the mount point inode permissions should be used.
+ * Reset the saved permission flags appropriately.
+ */
+void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid)
+{
+ struct eventfs_inode *ei = ti->private;
+
+ if (!ei)
+ return;
+
+ if (update_uid)
+ ei->attr.mode &= ~EVENTFS_SAVE_UID;
+
+ if (update_gid)
+ ei->attr.mode &= ~EVENTFS_SAVE_GID;
+
+ if (!ei->entry_attrs)
+ return;
+
+ for (int i = 0; i < ei->nr_entries; i++) {
+ if (update_uid)
+ ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID;
+ if (update_gid)
+ ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID;
+ }
+}
+
/* Return the evenfs_inode of the "events" directory */
static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
{
@@ -304,7 +352,7 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
// Walk upwards until you find the events inode
} while (!ei->is_events);
- update_top_events_attr(ei, dentry->d_sb);
+ update_events_attr(ei, dentry->d_sb);
return ei;
}
@@ -410,7 +458,7 @@ static struct dentry *lookup_dir_entry(struct dentry *dentry,
update_inode_attr(dentry, inode, &ei->attr,
S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
- inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_op = &eventfs_dir_inode_operations;
inode->i_fop = &eventfs_file_operations;
/* All directories will have the same inode number */
@@ -734,7 +782,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode
/* Was the parent freed? */
if (list_empty(&ei->list)) {
- free_ei(ei);
+ cleanup_ei(ei);
ei = NULL;
}
return ei;
@@ -781,6 +829,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
// Note: we have a ref to the dentry from tracefs_start_creating()
rei = get_root_inode(ei);
rei->events_dir = dentry;
+ rei->parent_inode = d_inode(dentry->d_sb->s_root);
ei->entries = entries;
ei->nr_entries = size;
@@ -790,29 +839,26 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
uid = d_inode(dentry->d_parent)->i_uid;
gid = d_inode(dentry->d_parent)->i_gid;
- /*
- * If the events directory is of the top instance, then parent
- * is NULL. Set the attr.mode to reflect this and its permissions will
- * default to the tracefs root dentry.
- */
- if (!parent)
- ei->attr.mode = EVENTFS_TOPLEVEL;
-
- /* This is used as the default ownership of the files and directories */
ei->attr.uid = uid;
ei->attr.gid = gid;
+ /*
+ * When the "events" directory is created, it takes on the
+ * permissions of its parent. But can be reset on remount.
+ */
+ ei->attr.mode |= EVENTFS_SAVE_UID | EVENTFS_SAVE_GID;
+
INIT_LIST_HEAD(&ei->children);
INIT_LIST_HEAD(&ei->list);
ti = get_tracefs(inode);
- ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
+ ti->flags |= TRACEFS_EVENT_INODE;
ti->private = ei;
inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
inode->i_uid = uid;
inode->i_gid = gid;
- inode->i_op = &eventfs_root_dir_inode_operations;
+ inode->i_op = &eventfs_dir_inode_operations;
inode->i_fop = &eventfs_file_operations;
dentry->d_fsdata = get_ei(ei);
@@ -835,7 +881,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry
return ei;
fail:
- free_ei(ei);
+ cleanup_ei(ei);
tracefs_failed_creating(dentry);
return ERR_PTR(-ENOMEM);
}
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 5545e6bf7d26..417c840e6403 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -30,20 +30,47 @@ static struct vfsmount *tracefs_mount;
static int tracefs_mount_count;
static bool tracefs_registered;
+/*
+ * Keep track of all tracefs_inodes in order to update their
+ * flags if necessary on a remount.
+ */
+static DEFINE_SPINLOCK(tracefs_inode_lock);
+static LIST_HEAD(tracefs_inodes);
+
static struct inode *tracefs_alloc_inode(struct super_block *sb)
{
struct tracefs_inode *ti;
+ unsigned long flags;
ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL);
if (!ti)
return NULL;
+ spin_lock_irqsave(&tracefs_inode_lock, flags);
+ list_add_rcu(&ti->list, &tracefs_inodes);
+ spin_unlock_irqrestore(&tracefs_inode_lock, flags);
+
return &ti->vfs_inode;
}
+static void tracefs_free_inode_rcu(struct rcu_head *rcu)
+{
+ struct tracefs_inode *ti;
+
+ ti = container_of(rcu, struct tracefs_inode, rcu);
+ kmem_cache_free(tracefs_inode_cachep, ti);
+}
+
static void tracefs_free_inode(struct inode *inode)
{
- kmem_cache_free(tracefs_inode_cachep, get_tracefs(inode));
+ struct tracefs_inode *ti = get_tracefs(inode);
+ unsigned long flags;
+
+ spin_lock_irqsave(&tracefs_inode_lock, flags);
+ list_del_rcu(&ti->list);
+ spin_unlock_irqrestore(&tracefs_inode_lock, flags);
+
+ call_rcu(&ti->rcu, tracefs_free_inode_rcu);
}
static ssize_t default_read_file(struct file *file, char __user *buf,
@@ -153,16 +180,39 @@ static void set_tracefs_inode_owner(struct inode *inode)
{
struct tracefs_inode *ti = get_tracefs(inode);
struct inode *root_inode = ti->private;
+ kuid_t uid;
+ kgid_t gid;
+
+ uid = root_inode->i_uid;
+ gid = root_inode->i_gid;
+
+ /*
+ * If the root is not the mount point, then check the root's
+ * permissions. If it was never set, then default to the
+ * mount point.
+ */
+ if (root_inode != d_inode(root_inode->i_sb->s_root)) {
+ struct tracefs_inode *rti;
+
+ rti = get_tracefs(root_inode);
+ root_inode = d_inode(root_inode->i_sb->s_root);
+
+ if (!(rti->flags & TRACEFS_UID_PERM_SET))
+ uid = root_inode->i_uid;
+
+ if (!(rti->flags & TRACEFS_GID_PERM_SET))
+ gid = root_inode->i_gid;
+ }
/*
* If this inode has never been referenced, then update
* the permissions to the superblock.
*/
if (!(ti->flags & TRACEFS_UID_PERM_SET))
- inode->i_uid = root_inode->i_uid;
+ inode->i_uid = uid;
if (!(ti->flags & TRACEFS_GID_PERM_SET))
- inode->i_gid = root_inode->i_gid;
+ inode->i_gid = gid;
}
static int tracefs_permission(struct mnt_idmap *idmap,
@@ -313,6 +363,8 @@ static int tracefs_apply_options(struct super_block *sb, bool remount)
struct tracefs_fs_info *fsi = sb->s_fs_info;
struct inode *inode = d_inode(sb->s_root);
struct tracefs_mount_opts *opts = &fsi->mount_opts;
+ struct tracefs_inode *ti;
+ bool update_uid, update_gid;
umode_t tmp_mode;
/*
@@ -332,6 +384,25 @@ static int tracefs_apply_options(struct super_block *sb, bool remount)
if (!remount || opts->opts & BIT(Opt_gid))
inode->i_gid = opts->gid;
+ if (remount && (opts->opts & BIT(Opt_uid) || opts->opts & BIT(Opt_gid))) {
+
+ update_uid = opts->opts & BIT(Opt_uid);
+ update_gid = opts->opts & BIT(Opt_gid);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ti, &tracefs_inodes, list) {
+ if (update_uid)
+ ti->flags &= ~TRACEFS_UID_PERM_SET;
+
+ if (update_gid)
+ ti->flags &= ~TRACEFS_GID_PERM_SET;
+
+ if (ti->flags & TRACEFS_EVENT_INODE)
+ eventfs_remount(ti, update_uid, update_gid);
+ }
+ rcu_read_unlock();
+ }
+
return 0;
}
@@ -398,7 +469,22 @@ static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags)
return !(ei && ei->is_freed);
}
+static void tracefs_d_iput(struct dentry *dentry, struct inode *inode)
+{
+ struct tracefs_inode *ti = get_tracefs(inode);
+
+ /*
+ * This inode is being freed and cannot be used for
+ * eventfs. Clear the flag so that it doesn't call into
+ * eventfs during the remount flag updates. The eventfs_inode
+ * gets freed after an RCU cycle, so the content will still
+ * be safe if the iteration is going on now.
+ */
+ ti->flags &= ~TRACEFS_EVENT_INODE;
+}
+
static const struct dentry_operations tracefs_dentry_operations = {
+ .d_iput = tracefs_d_iput,
.d_revalidate = tracefs_d_revalidate,
.d_release = tracefs_d_release,
};
diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h
index 15c26f9aaad4..f704d8348357 100644
--- a/fs/tracefs/internal.h
+++ b/fs/tracefs/internal.h
@@ -4,15 +4,18 @@
enum {
TRACEFS_EVENT_INODE = BIT(1),
- TRACEFS_EVENT_TOP_INODE = BIT(2),
- TRACEFS_GID_PERM_SET = BIT(3),
- TRACEFS_UID_PERM_SET = BIT(4),
- TRACEFS_INSTANCE_INODE = BIT(5),
+ TRACEFS_GID_PERM_SET = BIT(2),
+ TRACEFS_UID_PERM_SET = BIT(3),
+ TRACEFS_INSTANCE_INODE = BIT(4),
};
struct tracefs_inode {
- struct inode vfs_inode;
+ union {
+ struct inode vfs_inode;
+ struct rcu_head rcu;
+ };
/* The below gets initialized with memset_after(ti, 0, vfs_inode) */
+ struct list_head list;
unsigned long flags;
void *private;
};
@@ -73,6 +76,7 @@ struct dentry *tracefs_end_creating(struct dentry *dentry);
struct dentry *tracefs_failed_creating(struct dentry *dentry);
struct inode *tracefs_get_inode(struct super_block *sb);
+void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid);
void eventfs_d_release(struct dentry *dentry);
#endif /* _TRACEFS_INTERNAL_H */