summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/inode.c5
-rw-r--r--fs/affs/file.c22
-rw-r--r--fs/afs/inode.c11
-rw-r--r--fs/aio.c2
-rw-r--r--fs/attr.c14
-rw-r--r--fs/autofs/autofs_i.h4
-rw-r--r--fs/autofs/dev-ioctl.c97
-rw-r--r--fs/autofs/expire.c7
-rw-r--r--fs/autofs/inode.c5
-rw-r--r--fs/backing-file.c5
-rw-r--r--fs/bcachefs/alloc_background.c90
-rw-r--r--fs/bcachefs/alloc_background_format.h1
-rw-r--r--fs/bcachefs/bcachefs_format.h3
-rw-r--r--fs/bcachefs/btree_cache.c25
-rw-r--r--fs/bcachefs/btree_cache.h2
-rw-r--r--fs/bcachefs/btree_iter.h9
-rw-r--r--fs/bcachefs/btree_journal_iter.c2
-rw-r--r--fs/bcachefs/btree_key_cache.c31
-rw-r--r--fs/bcachefs/btree_update_interior.c46
-rw-r--r--fs/bcachefs/buckets.c89
-rw-r--r--fs/bcachefs/buckets_waiting_for_journal.c4
-rw-r--r--fs/bcachefs/data_update.c210
-rw-r--r--fs/bcachefs/ec.h4
-rw-r--r--fs/bcachefs/errcode.h1
-rw-r--r--fs/bcachefs/extents.c90
-rw-r--r--fs/bcachefs/extents.h24
-rw-r--r--fs/bcachefs/fs-io-buffered.c159
-rw-r--r--fs/bcachefs/fs-io-buffered.h6
-rw-r--r--fs/bcachefs/fs-ioctl.c3
-rw-r--r--fs/bcachefs/fs.c18
-rw-r--r--fs/bcachefs/fs.h7
-rw-r--r--fs/bcachefs/fsck.c24
-rw-r--r--fs/bcachefs/journal.c2
-rw-r--r--fs/bcachefs/journal_sb.c15
-rw-r--r--fs/bcachefs/movinggc.c2
-rw-r--r--fs/bcachefs/recovery.c9
-rw-r--r--fs/bcachefs/replicas.c8
-rw-r--r--fs/bcachefs/sb-downgrade.c8
-rw-r--r--fs/bcachefs/sb-errors_format.h10
-rw-r--r--fs/bcachefs/sb-members.c3
-rw-r--r--fs/bcachefs/sb-members_format.h5
-rw-r--r--fs/bcachefs/sysfs.c2
-rw-r--r--fs/bcachefs/util.c1
-rw-r--r--fs/bcachefs/xattr.c12
-rw-r--r--fs/bfs/file.c4
-rw-r--r--fs/binfmt_elf_fdpic.c3
-rw-r--r--fs/btrfs/bio.c26
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/direct-io.c16
-rw-r--r--fs/btrfs/fiemap.c2
-rw-r--r--fs/btrfs/file.c9
-rw-r--r--fs/btrfs/qgroup.c5
-rw-r--r--fs/btrfs/space-info.c17
-rw-r--r--fs/btrfs/space-info.h2
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/zoned.c30
-rw-r--r--fs/buffer.c71
-rw-r--r--fs/ceph/addr.c13
-rw-r--r--fs/ceph/inode.c1
-rw-r--r--fs/coda/inode.c43
-rw-r--r--fs/dcache.c19
-rw-r--r--fs/debugfs/inode.c8
-rw-r--r--fs/direct-io.c6
-rw-r--r--fs/ecryptfs/mmap.c86
-rw-r--r--fs/erofs/dir.c35
-rw-r--r--fs/erofs/inode.c18
-rw-r--r--fs/erofs/internal.h2
-rw-r--r--fs/erofs/super.c26
-rw-r--r--fs/erofs/zutil.c3
-rw-r--r--fs/eventpoll.c7
-rw-r--r--fs/exec.c31
-rw-r--r--fs/exfat/file.c8
-rw-r--r--fs/exfat/inode.c9
-rw-r--r--fs/ext2/dir.c4
-rw-r--r--fs/ext2/inode.c8
-rw-r--r--fs/ext4/ext4.h4
-rw-r--r--fs/ext4/inline.c14
-rw-r--r--fs/ext4/inode.c37
-rw-r--r--fs/ext4/verity.c8
-rw-r--r--fs/f2fs/data.c87
-rw-r--r--fs/f2fs/super.c8
-rw-r--r--fs/f2fs/verity.c8
-rw-r--r--fs/fat/inode.c9
-rw-r--r--fs/fcntl.c10
-rw-r--r--fs/fhandle.c29
-rw-r--r--fs/file.c2
-rw-r--r--fs/file_table.c5
-rw-r--r--fs/fs-writeback.c67
-rw-r--r--fs/fuse/dev.c14
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/file.c55
-rw-r--r--fs/fuse/inode.c7
-rw-r--r--fs/fuse/xattr.c4
-rw-r--r--fs/hfs/extent.c6
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c5
-rw-r--r--fs/hfsplus/extents.c6
-rw-r--r--fs/hfsplus/hfsplus_fs.h2
-rw-r--r--fs/hfsplus/inode.c5
-rw-r--r--fs/hostfs/hostfs_kern.c23
-rw-r--r--fs/hpfs/file.c9
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inode.c116
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/jffs2/file.c88
-rw-r--r--fs/jffs2/gc.c25
-rw-r--r--fs/jfs/inode.c8
-rw-r--r--fs/libfs.c47
-rw-r--r--fs/minix/dir.c134
-rw-r--r--fs/minix/inode.c8
-rw-r--r--fs/minix/minix.h40
-rw-r--r--fs/minix/namei.c32
-rw-r--r--fs/mnt_idmapping.c12
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namei.c85
-rw-r--r--fs/namespace.c18
-rw-r--r--fs/netfs/fscache_main.c1
-rw-r--r--fs/netfs/io.c22
-rw-r--r--fs/netfs/locking.c22
-rw-r--r--fs/netfs/main.c4
-rw-r--r--fs/netfs/misc.c60
-rw-r--r--fs/netfs/write_collect.c7
-rw-r--r--fs/nfs/callback_xdr.c6
-rw-r--r--fs/nfs/delegation.c15
-rw-r--r--fs/nfs/file.c7
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/nfs/pnfs.c5
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfsd/nfs4state.c62
-rw-r--r--fs/nfsd/nfs4xdr.c6
-rw-r--r--fs/nfsd/state.h2
-rw-r--r--fs/nilfs2/dir.c4
-rw-r--r--fs/nilfs2/inode.c10
-rw-r--r--fs/nilfs2/recovery.c51
-rw-r--r--fs/nilfs2/segment.c10
-rw-r--r--fs/nilfs2/sysfs.c43
-rw-r--r--fs/ntfs3/file.c9
-rw-r--r--fs/ntfs3/inode.c51
-rw-r--r--fs/ntfs3/ntfs_fs.h5
-rw-r--r--fs/ocfs2/aops.c12
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/file.c17
-rw-r--r--fs/ocfs2/mmap.c6
-rw-r--r--fs/omfs/file.c4
-rw-r--r--fs/orangefs/inode.c39
-rw-r--r--fs/overlayfs/params.c51
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/posix_acl.c4
-rw-r--r--fs/proc/base.c10
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/qnx6/dir.c88
-rw-r--r--fs/qnx6/inode.c25
-rw-r--r--fs/qnx6/namei.c4
-rw-r--r--fs/qnx6/qnx6.h9
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/reiserfs/inode.c57
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/select.c2
-rw-r--r--fs/smb/client/cifsencrypt.c2
-rw-r--r--fs/smb/client/cifsfs.c27
-rw-r--r--fs/smb/client/cifsglob.h8
-rw-r--r--fs/smb/client/cifssmb.c55
-rw-r--r--fs/smb/client/connect.c17
-rw-r--r--fs/smb/client/file.c41
-rw-r--r--fs/smb/client/inode.c2
-rw-r--r--fs/smb/client/ioctl.c2
-rw-r--r--fs/smb/client/link.c1
-rw-r--r--fs/smb/client/reparse.c11
-rw-r--r--fs/smb/client/smb2inode.c3
-rw-r--r--fs/smb/client/smb2ops.c50
-rw-r--r--fs/smb/client/smb2pdu.c85
-rw-r--r--fs/smb/client/trace.h1
-rw-r--r--fs/smb/server/connection.c34
-rw-r--r--fs/smb/server/connection.h3
-rw-r--r--fs/smb/server/mgmt/user_session.c9
-rw-r--r--fs/smb/server/oplock.c2
-rw-r--r--fs/smb/server/smb2pdu.c35
-rw-r--r--fs/smb/server/smb_common.h4
-rw-r--r--fs/smb/server/transport_tcp.c4
-rw-r--r--fs/smb/server/xattr.h2
-rw-r--r--fs/squashfs/file.c86
-rw-r--r--fs/squashfs/file_direct.c19
-rw-r--r--fs/squashfs/page_actor.c11
-rw-r--r--fs/squashfs/page_actor.h6
-rw-r--r--fs/super.c8
-rw-r--r--fs/sysv/dir.c158
-rw-r--r--fs/sysv/itree.c8
-rw-r--r--fs/sysv/namei.c32
-rw-r--r--fs/sysv/sysv.h20
-rw-r--r--fs/tracefs/event_inode.c2
-rw-r--r--fs/ubifs/file.c13
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/inode.c12
-rw-r--r--fs/ufs/dir.c231
-rw-r--r--fs/ufs/inode.c12
-rw-r--r--fs/ufs/namei.c39
-rw-r--r--fs/ufs/ufs.h20
-rw-r--r--fs/ufs/util.h6
-rw-r--r--fs/vboxsf/file.c24
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c14
-rw-r--r--fs/xfs/scrub/xfile.c2
-rw-r--r--fs/xfs/xfs_discard.c36
-rw-r--r--fs/xfs/xfs_fsmap.c30
-rw-r--r--fs/xfs/xfs_rtalloc.c78
207 files changed, 2709 insertions, 1982 deletions
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index a183e213a4a5..21527189e430 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -55,12 +55,11 @@ static void adfs_write_failed(struct address_space *mapping, loff_t to)
static int adfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
- *pagep = NULL;
- ret = cont_write_begin(file, mapping, pos, len, pagep, fsdata,
+ ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
adfs_get_block,
&ADFS_I(mapping->host)->mmu_private);
if (unlikely(ret))
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 04c018e19602..a5a861dd5223 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -417,12 +417,11 @@ affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static int affs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
- *pagep = NULL;
- ret = cont_write_begin(file, mapping, pos, len, pagep, fsdata,
+ ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
affs_get_block,
&AFFS_I(mapping->host)->mmu_private);
if (unlikely(ret))
@@ -433,12 +432,12 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
static int affs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned int len, unsigned int copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
int ret;
- ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+ ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
/* Clear Archived bit on file writes, as AmigaOS would do */
if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) {
@@ -648,7 +647,7 @@ static int affs_read_folio_ofs(struct file *file, struct folio *folio)
static int affs_write_begin_ofs(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct inode *inode = mapping->host;
struct folio *folio;
@@ -671,7 +670,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
- *pagep = &folio->page;
+ *foliop = folio;
if (folio_test_uptodate(folio))
return 0;
@@ -687,9 +686,8 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct folio *folio = page_folio(page);
struct inode *inode = mapping->host;
struct super_block *sb = inode->i_sb;
struct buffer_head *bh, *prev_bh;
@@ -882,14 +880,14 @@ affs_truncate(struct inode *inode)
if (inode->i_size > AFFS_I(inode)->mmu_private) {
struct address_space *mapping = inode->i_mapping;
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
loff_t isize = inode->i_size;
int res;
- res = mapping->a_ops->write_begin(NULL, mapping, isize, 0, &page, &fsdata);
+ res = mapping->a_ops->write_begin(NULL, mapping, isize, 0, &folio, &fsdata);
if (!res)
- res = mapping->a_ops->write_end(NULL, mapping, isize, 0, 0, page, fsdata);
+ res = mapping->a_ops->write_end(NULL, mapping, isize, 0, 0, folio, fsdata);
else
inode->i_size = AFFS_I(inode)->mmu_private;
mark_inode_dirty(inode);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 3acf5e050072..a95e77670b49 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -695,13 +695,18 @@ static void afs_setattr_edit_file(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
struct afs_vnode *vnode = vp->vnode;
+ struct inode *inode = &vnode->netfs.inode;
if (op->setattr.attr->ia_valid & ATTR_SIZE) {
loff_t size = op->setattr.attr->ia_size;
- loff_t i_size = op->setattr.old_i_size;
+ loff_t old = op->setattr.old_i_size;
+
+ /* Note: inode->i_size was updated by afs_apply_status() inside
+ * the I/O and callback locks.
+ */
- if (size != i_size) {
- truncate_setsize(&vnode->netfs.inode, size);
+ if (size != old) {
+ truncate_pagecache(inode, size);
netfs_resize_file(&vnode->netfs, size, true);
fscache_resize_cookie(afs_vnode_cache(vnode), size);
}
diff --git a/fs/aio.c b/fs/aio.c
index 6066f64967b3..e8920178b50f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -100,7 +100,7 @@ struct kioctx {
unsigned long user_id;
- struct __percpu kioctx_cpu *cpu;
+ struct kioctx_cpu __percpu *cpu;
/*
* For percpu reqs_available, number of slots we move to/from global
diff --git a/fs/attr.c b/fs/attr.c
index 825007d5cda4..c04d19b58f12 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -487,9 +487,17 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
error = security_inode_setattr(idmap, dentry, attr);
if (error)
return error;
- error = try_break_deleg(inode, delegated_inode);
- if (error)
- return error;
+
+ /*
+ * If ATTR_DELEG is set, then these attributes are being set on
+ * behalf of the holder of a write delegation. We want to avoid
+ * breaking the delegation in this case.
+ */
+ if (!(ia_valid & ATTR_DELEG)) {
+ error = try_break_deleg(inode, delegated_inode);
+ if (error)
+ return error;
+ }
if (inode->i_op->setattr)
error = inode->i_op->setattr(idmap, dentry, attr);
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 8c1d587b3eef..77c7991d89aa 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -62,6 +62,7 @@ struct autofs_info {
struct list_head expiring;
struct autofs_sb_info *sbi;
+ unsigned long exp_timeout;
unsigned long last_used;
int count;
@@ -81,6 +82,9 @@ struct autofs_info {
*/
#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
+#define AUTOFS_INF_EXPIRE_SET (1<<3) /* per-dentry expire timeout set for
+ this mount point.
+ */
struct autofs_wait_queue {
wait_queue_head_t queue;
struct autofs_wait_queue *next;
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index 5bf781ea6d67..f011e026358e 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -128,7 +128,13 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
goto out;
}
+ /* Setting the per-dentry expire timeout requires a trailing
+ * path component, ie. no '/', so invert the logic of the
+ * check_name() return for AUTOFS_DEV_IOCTL_TIMEOUT_CMD.
+ */
err = check_name(param->path);
+ if (cmd == AUTOFS_DEV_IOCTL_TIMEOUT_CMD)
+ err = err ? 0 : -EINVAL;
if (err) {
pr_warn("invalid path supplied for cmd(0x%08x)\n",
cmd);
@@ -396,16 +402,97 @@ static int autofs_dev_ioctl_catatonic(struct file *fp,
return 0;
}
-/* Set the autofs mount timeout */
+/*
+ * Set the autofs mount expire timeout.
+ *
+ * There are two places an expire timeout can be set, in the autofs
+ * super block info. (this is all that's needed for direct and offset
+ * mounts because there's a distinct mount corresponding to each of
+ * these) and per-dentry within within the dentry info. If a per-dentry
+ * timeout is set it will override the expire timeout set in the parent
+ * autofs super block info.
+ *
+ * If setting the autofs super block expire timeout the autofs_dev_ioctl
+ * size field will be equal to the autofs_dev_ioctl structure size. If
+ * setting the per-dentry expire timeout the mount point name is passed
+ * in the autofs_dev_ioctl path field and the size field updated to
+ * reflect this.
+ *
+ * Setting the autofs mount expire timeout sets the timeout in the super
+ * block info. struct. Setting the per-dentry timeout does a little more.
+ * If the timeout is equal to -1 the per-dentry timeout (and flag) is
+ * cleared which reverts to using the super block timeout, otherwise if
+ * timeout is 0 the timeout is set to this value and the flag is left
+ * set which disables expiration for the mount point, lastly the flag
+ * and the timeout are set enabling the dentry to use this timeout.
+ */
static int autofs_dev_ioctl_timeout(struct file *fp,
struct autofs_sb_info *sbi,
struct autofs_dev_ioctl *param)
{
- unsigned long timeout;
+ unsigned long timeout = param->timeout.timeout;
+
+ /* If setting the expire timeout for an individual indirect
+ * mount point dentry the mount trailing component path is
+ * placed in param->path and param->size adjusted to account
+ * for it otherwise param->size it is set to the structure
+ * size.
+ */
+ if (param->size == AUTOFS_DEV_IOCTL_SIZE) {
+ param->timeout.timeout = sbi->exp_timeout / HZ;
+ sbi->exp_timeout = timeout * HZ;
+ } else {
+ struct dentry *base = fp->f_path.dentry;
+ struct inode *inode = base->d_inode;
+ int path_len = param->size - AUTOFS_DEV_IOCTL_SIZE - 1;
+ struct dentry *dentry;
+ struct autofs_info *ino;
+
+ if (!autofs_type_indirect(sbi->type))
+ return -EINVAL;
+
+ /* An expire timeout greater than the superblock timeout
+ * could be a problem at shutdown but the super block
+ * timeout itself can change so all we can really do is
+ * warn the user.
+ */
+ if (timeout >= sbi->exp_timeout)
+ pr_warn("per-mount expire timeout is greater than "
+ "the parent autofs mount timeout which could "
+ "prevent shutdown\n");
+
+ inode_lock_shared(inode);
+ dentry = try_lookup_one_len(param->path, base, path_len);
+ inode_unlock_shared(inode);
+ if (IS_ERR_OR_NULL(dentry))
+ return dentry ? PTR_ERR(dentry) : -ENOENT;
+ ino = autofs_dentry_ino(dentry);
+ if (!ino) {
+ dput(dentry);
+ return -ENOENT;
+ }
+
+ if (ino->exp_timeout && ino->flags & AUTOFS_INF_EXPIRE_SET)
+ param->timeout.timeout = ino->exp_timeout / HZ;
+ else
+ param->timeout.timeout = sbi->exp_timeout / HZ;
+
+ if (timeout == -1) {
+ /* Revert to using the super block timeout */
+ ino->flags &= ~AUTOFS_INF_EXPIRE_SET;
+ ino->exp_timeout = 0;
+ } else {
+ /* Set the dentry expire flag and timeout.
+ *
+ * If timeout is 0 it will prevent the expire
+ * of this particular automount.
+ */
+ ino->flags |= AUTOFS_INF_EXPIRE_SET;
+ ino->exp_timeout = timeout * HZ;
+ }
+ dput(dentry);
+ }
- timeout = param->timeout.timeout;
- param->timeout.timeout = sbi->exp_timeout / HZ;
- sbi->exp_timeout = timeout * HZ;
return 0;
}
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index 39d8c84c16f4..5c2d459e1e48 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -429,8 +429,6 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb,
if (!root)
return NULL;
- timeout = sbi->exp_timeout;
-
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
spin_lock(&sbi->fs_lock);
@@ -441,6 +439,11 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb,
}
spin_unlock(&sbi->fs_lock);
+ if (ino->flags & AUTOFS_INF_EXPIRE_SET)
+ timeout = ino->exp_timeout;
+ else
+ timeout = sbi->exp_timeout;
+
expired = should_expire(dentry, mnt, timeout, how);
if (!expired)
continue;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index cf792d4de4f1..ee2edccaef70 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -19,6 +19,7 @@ struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi)
INIT_LIST_HEAD(&ino->expiring);
ino->last_used = jiffies;
ino->sbi = sbi;
+ ino->exp_timeout = -1;
ino->count = 1;
}
return ino;
@@ -28,6 +29,7 @@ void autofs_clean_ino(struct autofs_info *ino)
{
ino->uid = GLOBAL_ROOT_UID;
ino->gid = GLOBAL_ROOT_GID;
+ ino->exp_timeout = -1;
ino->last_used = jiffies;
}
@@ -172,8 +174,7 @@ static int autofs_parse_fd(struct fs_context *fc, struct autofs_sb_info *sbi,
ret = autofs_check_pipe(pipe);
if (ret < 0) {
errorf(fc, "Invalid/unusable pipe");
- if (param->type != fs_value_is_file)
- fput(pipe);
+ fput(pipe);
return -EBADF;
}
diff --git a/fs/backing-file.c b/fs/backing-file.c
index afb557446c27..8860dac58c37 100644
--- a/fs/backing-file.c
+++ b/fs/backing-file.c
@@ -303,13 +303,16 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
return -EIO;
+ if (!out->f_op->splice_write)
+ return -EINVAL;
+
ret = file_remove_privs(ctx->user_file);
if (ret)
return ret;
old_cred = override_creds(ctx->cred);
file_start_write(out);
- ret = iter_file_splice_write(pipe, out, ppos, len, flags);
+ ret = out->f_op->splice_write(pipe, out, ppos, len, flags);
file_end_write(out);
revert_creds(old_cred);
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index fd3a2522bc3e..dc3a4024aab6 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -240,71 +240,73 @@ fsck_err:
int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k,
enum bch_validate_flags flags)
{
- struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
+ struct bch_alloc_v4 a;
int ret = 0;
- bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k),
+ bkey_val_copy(&a, bkey_s_c_to_alloc_v4(k));
+
+ bkey_fsck_err_on(alloc_v4_u64s_noerror(&a) > bkey_val_u64s(k.k),
c, alloc_v4_val_size_bad,
"bad val size (%u > %zu)",
- alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k));
+ alloc_v4_u64s_noerror(&a), bkey_val_u64s(k.k));
- bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) &&
- BCH_ALLOC_V4_NR_BACKPOINTERS(a.v),
+ bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(&a) &&
+ BCH_ALLOC_V4_NR_BACKPOINTERS(&a),
c, alloc_v4_backpointers_start_bad,
"invalid backpointers_start");
- bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type,
+ bkey_fsck_err_on(alloc_data_type(a, a.data_type) != a.data_type,
c, alloc_key_data_type_bad,
"invalid data type (got %u should be %u)",
- a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
+ a.data_type, alloc_data_type(a, a.data_type));
for (unsigned i = 0; i < 2; i++)
- bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX,
+ bkey_fsck_err_on(a.io_time[i] > LRU_TIME_MAX,
c, alloc_key_io_time_bad,
"invalid io_time[%s]: %llu, max %llu",
i == READ ? "read" : "write",
- a.v->io_time[i], LRU_TIME_MAX);
+ a.io_time[i], LRU_TIME_MAX);
- unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(a.v) * sizeof(u64) >
+ unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(&a) * sizeof(u64) >
offsetof(struct bch_alloc_v4, stripe_sectors)
- ? a.v->stripe_sectors
+ ? a.stripe_sectors
: 0;
- switch (a.v->data_type) {
+ switch (a.data_type) {
case BCH_DATA_free:
case BCH_DATA_need_gc_gens:
case BCH_DATA_need_discard:
bkey_fsck_err_on(stripe_sectors ||
- a.v->dirty_sectors ||
- a.v->cached_sectors ||
- a.v->stripe,
+ a.dirty_sectors ||
+ a.cached_sectors ||
+ a.stripe,
c, alloc_key_empty_but_have_data,
"empty data type free but have data %u.%u.%u %u",
stripe_sectors,
- a.v->dirty_sectors,
- a.v->cached_sectors,
- a.v->stripe);
+ a.dirty_sectors,
+ a.cached_sectors,
+ a.stripe);
break;
case BCH_DATA_sb:
case BCH_DATA_journal:
case BCH_DATA_btree:
case BCH_DATA_user:
case BCH_DATA_parity:
- bkey_fsck_err_on(!a.v->dirty_sectors &&
+ bkey_fsck_err_on(!a.dirty_sectors &&
!stripe_sectors,
c, alloc_key_dirty_sectors_0,
"data_type %s but dirty_sectors==0",
- bch2_data_type_str(a.v->data_type));
+ bch2_data_type_str(a.data_type));
break;
case BCH_DATA_cached:
- bkey_fsck_err_on(!a.v->cached_sectors ||
- a.v->dirty_sectors ||
+ bkey_fsck_err_on(!a.cached_sectors ||
+ a.dirty_sectors ||
stripe_sectors ||
- a.v->stripe,
+ a.stripe,
c, alloc_key_cached_inconsistency,
"data type inconsistency");
- bkey_fsck_err_on(!a.v->io_time[READ] &&
+ bkey_fsck_err_on(!a.io_time[READ] &&
c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
c, alloc_key_cached_but_read_time_zero,
"cached bucket with read_time == 0");
@@ -556,7 +558,7 @@ int bch2_bucket_gens_init(struct bch_fs *c)
struct bpos pos = alloc_gens_pos(iter.pos, &offset);
int ret2 = 0;
- if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
+ if (have_bucket_gens_key && !bkey_eq(g.k.p, pos)) {
ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret2)
@@ -829,7 +831,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if (likely(new.k->type == KEY_TYPE_alloc_v4)) {
new_a = bkey_s_to_alloc_v4(new).v;
} else {
- BUG_ON(!(flags & BTREE_TRIGGER_gc));
+ BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair)));
struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c);
ret = PTR_ERR_OR_ZERO(new_ka);
@@ -1872,26 +1874,26 @@ static void bch2_do_discards_work(struct work_struct *work)
trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
bch2_err_str(ret));
- bch2_write_ref_put(c, BCH_WRITE_REF_discard);
percpu_ref_put(&ca->io_ref);
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard);
}
void bch2_dev_do_discards(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
return;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
- goto put_ioref;
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ goto put_write_ref;
if (queue_work(c->write_ref_wq, &ca->discard_work))
return;
- bch2_write_ref_put(c, BCH_WRITE_REF_discard);
-put_ioref:
percpu_ref_put(&ca->io_ref);
+put_write_ref:
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard);
}
void bch2_do_discards(struct bch_fs *c)
@@ -1966,8 +1968,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
break;
}
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
percpu_ref_put(&ca->io_ref);
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
}
static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
@@ -1977,18 +1979,18 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
if (discard_in_flight_add(ca, bucket, false))
return;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
return;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
- goto put_ioref;
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ goto put_ref;
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
return;
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
-put_ioref:
percpu_ref_put(&ca->io_ref);
+put_ref:
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
}
static int invalidate_one_bucket(struct btree_trans *trans,
@@ -2130,26 +2132,26 @@ static void bch2_do_invalidates_work(struct work_struct *work)
bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
percpu_ref_put(&ca->io_ref);
+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
}
void bch2_dev_do_invalidates(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
return;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
- goto put_ioref;
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ goto put_ref;
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
return;
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
-put_ioref:
percpu_ref_put(&ca->io_ref);
+put_ref:
+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
}
void bch2_do_invalidates(struct bch_fs *c)
diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h
index 47d9d006502c..f754a2951d8a 100644
--- a/fs/bcachefs/alloc_background_format.h
+++ b/fs/bcachefs/alloc_background_format.h
@@ -69,6 +69,7 @@ struct bch_alloc_v4 {
__u64 io_time[2];
__u32 stripe;
__u32 nr_external_backpointers;
+ /* end of fields in original version of alloc_v4 */
__u64 fragmentation_lru;
__u32 stripe_sectors;
__u32 pad;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index c75f2e0f32bb..14ce726bf5a3 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -677,7 +677,8 @@ struct bch_sb_field_ext {
x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \
x(disk_accounting_v2, BCH_VERSION(1, 9)) \
x(disk_accounting_v3, BCH_VERSION(1, 10)) \
- x(disk_accounting_inum, BCH_VERSION(1, 11))
+ x(disk_accounting_inum, BCH_VERSION(1, 11)) \
+ x(rebalance_work_acct_fix, BCH_VERSION(1, 12))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index f5d85b50b6f2..e52a06d3418c 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -159,6 +159,16 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
return b;
}
+void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
+{
+ mutex_lock(&c->btree_cache.lock);
+ list_move(&b->list, &c->btree_cache.freeable);
+ mutex_unlock(&c->btree_cache.lock);
+
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+}
+
/* Btree in memory cache - hash table */
void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
@@ -736,6 +746,13 @@ out:
start_time);
memalloc_nofs_restore(flags);
+
+ int ret = bch2_trans_relock(trans);
+ if (unlikely(ret)) {
+ bch2_btree_node_to_freelist(c, b);
+ return ERR_PTR(ret);
+ }
+
return b;
err:
mutex_lock(&bc->lock);
@@ -856,6 +873,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
bch2_btree_node_read(trans, b, sync);
+ int ret = bch2_trans_relock(trans);
+ if (ret)
+ return ERR_PTR(ret);
+
if (!sync)
return NULL;
@@ -974,6 +995,10 @@ retry:
bch2_btree_node_wait_on_read(b);
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ return ERR_PTR(ret);
+
/*
* should_be_locked is not set on this path yet, so we need to
* relock it specifically:
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index c0eb87a057cc..f82064007127 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -12,6 +12,8 @@ struct btree_iter;
void bch2_recalc_btree_reserve(struct bch_fs *);
+void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *);
+
void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index dca62375d7d3..222b7ce8a901 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -569,6 +569,15 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans,
bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \
_btree_id, _pos, _flags, KEY_TYPE_##_type))
+#define bkey_val_copy(_dst_v, _src_k) \
+do { \
+ unsigned b = min_t(unsigned, sizeof(*_dst_v), \
+ bkey_val_bytes(_src_k.k)); \
+ memcpy(_dst_v, _src_k.v, b); \
+ if (b < sizeof(*_dst_v)) \
+ memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \
+} while (0)
+
static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans,
unsigned btree_id, struct bpos pos,
unsigned flags, unsigned type,
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index 74933490aaba..c1657182c275 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -530,6 +530,8 @@ static void __journal_keys_sort(struct journal_keys *keys)
{
sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL);
+ cond_resched();
+
struct journal_key *dst = keys->data;
darray_for_each(*keys, src) {
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 79954490627c..fda7998734cb 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -726,6 +726,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans,
mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+ path->should_be_locked = false;
}
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
@@ -777,6 +778,20 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
+
+ /*
+ * Scanning is expensive while a rehash is in progress - most elements
+ * will be on the new hashtable, if it's in progress
+ *
+ * A rehash could still start while we're scanning - that's ok, we'll
+ * still see most elements.
+ */
+ if (unlikely(tbl->nest)) {
+ rcu_read_unlock();
+ srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
+ return SHRINK_STOP;
+ }
+
if (bc->shrink_iter >= tbl->size)
bc->shrink_iter = 0;
start = bc->shrink_iter;
@@ -784,7 +799,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
do {
struct rhash_head *pos, *next;
- pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter));
+ pos = rht_ptr_rcu(&tbl->buckets[bc->shrink_iter]);
while (!rht_is_a_nulls(pos)) {
next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter);
@@ -865,12 +880,22 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
while (atomic_long_read(&bc->nr_keys)) {
rcu_read_lock();
tbl = rht_dereference_rcu(bc->table.tbl, &bc->table);
- if (tbl)
+ if (tbl) {
+ if (tbl->nest) {
+ /* wait for in progress rehash */
+ rcu_read_unlock();
+ mutex_lock(&bc->table.mutex);
+ mutex_unlock(&bc->table.mutex);
+ rcu_read_lock();
+ continue;
+ }
for (i = 0; i < tbl->size; i++)
- rht_for_each_entry_rcu(ck, pos, tbl, i, hash) {
+ while (pos = rht_ptr_rcu(&tbl->buckets[i]), !rht_is_a_nulls(pos)) {
+ ck = container_of(pos, struct bkey_cached, hash);
bkey_cached_evict(bc, ck);
list_add(&ck->list, &items);
}
+ }
rcu_read_unlock();
}
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index b3454d4619e8..8fd112026e7a 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -317,6 +317,12 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
: 0;
int ret;
+ b = bch2_btree_node_mem_alloc(trans, interior_node);
+ if (IS_ERR(b))
+ return b;
+
+ BUG_ON(b->ob.nr);
+
mutex_lock(&c->btree_reserve_cache_lock);
if (c->btree_reserve_cache_nr > nr_reserve) {
struct btree_alloc *a =
@@ -325,10 +331,9 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
obs = a->ob;
bkey_copy(&tmp.k, &a->k);
mutex_unlock(&c->btree_reserve_cache_lock);
- goto mem_alloc;
+ goto out;
}
mutex_unlock(&c->btree_reserve_cache_lock);
-
retry:
ret = bch2_alloc_sectors_start_trans(trans,
c->opts.metadata_target ?:
@@ -341,7 +346,7 @@ retry:
c->opts.metadata_replicas_required),
watermark, 0, cl, &wp);
if (unlikely(ret))
- return ERR_PTR(ret);
+ goto err;
if (wp->sectors_free < btree_sectors(c)) {
struct open_bucket *ob;
@@ -360,19 +365,16 @@ retry:
bch2_open_bucket_get(c, wp, &obs);
bch2_alloc_sectors_done(c, wp);
-mem_alloc:
- b = bch2_btree_node_mem_alloc(trans, interior_node);
- six_unlock_write(&b->c.lock);
- six_unlock_intent(&b->c.lock);
-
- /* we hold cannibalize_lock: */
- BUG_ON(IS_ERR(b));
- BUG_ON(b->ob.nr);
-
+out:
bkey_copy(&b->key, &tmp.k);
b->ob = obs;
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
return b;
+err:
+ bch2_btree_node_to_freelist(c, b);
+ return ERR_PTR(ret);
}
static struct btree *bch2_btree_node_alloc(struct btree_update *as,
@@ -2439,6 +2441,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
}
new_hash = bch2_btree_node_mem_alloc(trans, false);
+ ret = PTR_ERR_OR_ZERO(new_hash);
+ if (ret)
+ goto err;
}
path->intent_ref++;
@@ -2446,14 +2451,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
commit_flags, skip_triggers);
--path->intent_ref;
- if (new_hash) {
- mutex_lock(&c->btree_cache.lock);
- list_move(&new_hash->list, &c->btree_cache.freeable);
- mutex_unlock(&c->btree_cache.lock);
-
- six_unlock_write(&new_hash->c.lock);
- six_unlock_intent(&new_hash->c.lock);
- }
+ if (new_hash)
+ bch2_btree_node_to_freelist(c, new_hash);
+err:
closure_sync(&cl);
bch2_btree_cache_cannibalize_unlock(trans);
return ret;
@@ -2522,6 +2522,10 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id
b = bch2_btree_node_mem_alloc(trans, false);
bch2_btree_cache_cannibalize_unlock(trans);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ return ret;
+
set_btree_node_fake(b);
set_btree_node_need_rewrite(b);
b->c.level = level;
@@ -2553,7 +2557,7 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id
void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level)
{
- bch2_trans_run(c, bch2_btree_root_alloc_fake_trans(trans, id, level));
+ bch2_trans_run(c, lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level)));
}
static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index be2bbd248631..721bbe1dffc1 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -100,12 +100,13 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
if (!ca) {
- if (fsck_err(trans, ptr_to_invalid_device,
- "pointer to missing device %u\n"
- "while marking %s",
- p.ptr.dev,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID,
+ trans, ptr_to_invalid_device,
+ "pointer to missing device %u\n"
+ "while marking %s",
+ p.ptr.dev,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
*do_update = true;
return 0;
}
@@ -562,7 +563,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
if (unlikely(!ca)) {
- if (insert)
+ if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID)
ret = -EIO;
goto err;
}
@@ -699,7 +700,8 @@ err:
static int __trigger_extent(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k,
- enum btree_iter_update_trigger_flags flags)
+ enum btree_iter_update_trigger_flags flags,
+ s64 *replicas_sectors)
{
bool gc = flags & BTREE_TRIGGER_gc;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -708,7 +710,6 @@ static int __trigger_extent(struct btree_trans *trans,
enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
? BCH_DATA_btree
: BCH_DATA_user;
- s64 replicas_sectors = 0;
int ret = 0;
struct disk_accounting_pos acc_replicas_key = {
@@ -739,7 +740,7 @@ static int __trigger_extent(struct btree_trans *trans,
if (ret)
return ret;
} else if (!p.has_ec) {
- replicas_sectors += disk_sectors;
+ *replicas_sectors += disk_sectors;
acc_replicas_key.replicas.devs[acc_replicas_key.replicas.nr_devs++] = p.ptr.dev;
} else {
ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
@@ -777,7 +778,7 @@ static int __trigger_extent(struct btree_trans *trans,
}
if (acc_replicas_key.replicas.nr_devs) {
- ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc);
+ ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc);
if (ret)
return ret;
}
@@ -787,7 +788,7 @@ static int __trigger_extent(struct btree_trans *trans,
.type = BCH_DISK_ACCOUNTING_snapshot,
.snapshot.id = k.k->p.snapshot,
};
- ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, &replicas_sectors, 1, gc);
+ ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc);
if (ret)
return ret;
}
@@ -807,7 +808,7 @@ static int __trigger_extent(struct btree_trans *trans,
.type = BCH_DISK_ACCOUNTING_btree,
.btree.id = btree_id,
};
- ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc);
+ ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc);
if (ret)
return ret;
} else {
@@ -819,22 +820,13 @@ static int __trigger_extent(struct btree_trans *trans,
s64 v[3] = {
insert ? 1 : -1,
insert ? k.k->size : -((s64) k.k->size),
- replicas_sectors,
+ *replicas_sectors,
};
ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc);
if (ret)
return ret;
}
- if (bch2_bkey_rebalance_opts(k)) {
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_rebalance_work,
- };
- ret = bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1, gc);
- if (ret)
- return ret;
- }
-
return 0;
}
@@ -843,6 +835,7 @@ int bch2_trigger_extent(struct btree_trans *trans,
struct bkey_s_c old, struct bkey_s new,
enum btree_iter_update_trigger_flags flags)
{
+ struct bch_fs *c = trans->c;
struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c);
struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old);
unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start;
@@ -858,21 +851,53 @@ int bch2_trigger_extent(struct btree_trans *trans,
new_ptrs_bytes))
return 0;
- if (flags & BTREE_TRIGGER_transactional) {
- struct bch_fs *c = trans->c;
- int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) -
- (int) bch2_bkey_needs_rebalance(c, old);
+ if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
+ s64 old_replicas_sectors = 0, new_replicas_sectors = 0;
+
+ if (old.k->type) {
+ int ret = __trigger_extent(trans, btree, level, old,
+ flags & ~BTREE_TRIGGER_insert,
+ &old_replicas_sectors);
+ if (ret)
+ return ret;
+ }
+
+ if (new.k->type) {
+ int ret = __trigger_extent(trans, btree, level, new.s_c,
+ flags & ~BTREE_TRIGGER_overwrite,
+ &new_replicas_sectors);
+ if (ret)
+ return ret;
+ }
- if (mod) {
+ int need_rebalance_delta = 0;
+ s64 need_rebalance_sectors_delta = 0;
+
+ s64 s = bch2_bkey_sectors_need_rebalance(c, old);
+ need_rebalance_delta -= s != 0;
+ need_rebalance_sectors_delta -= s;
+
+ s = bch2_bkey_sectors_need_rebalance(c, new.s_c);
+ need_rebalance_delta += s != 0;
+ need_rebalance_sectors_delta += s;
+
+ if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) {
int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work,
- new.k->p, mod > 0);
+ new.k->p, need_rebalance_delta > 0);
if (ret)
return ret;
}
- }
- if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc))
- return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree, level, old, new, flags);
+ if (need_rebalance_sectors_delta) {
+ struct disk_accounting_pos acc = {
+ .type = BCH_DISK_ACCOUNTING_rebalance_work,
+ };
+ int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1,
+ flags & BTREE_TRIGGER_gc);
+ if (ret)
+ return ret;
+ }
+ }
return 0;
}
diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c
index f70eb2127d32..f9fb150eda70 100644
--- a/fs/bcachefs/buckets_waiting_for_journal.c
+++ b/fs/bcachefs/buckets_waiting_for_journal.c
@@ -107,7 +107,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
nr_elements += t->d[i].journal_seq > flushed_seq;
new_bits = ilog2(roundup_pow_of_two(nr_elements * 3));
-
+realloc:
n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL);
if (!n) {
ret = -BCH_ERR_ENOMEM_buckets_waiting_for_journal_set;
@@ -118,6 +118,8 @@ retry_rehash:
if (nr_rehashes_this_size == 3) {
new_bits++;
nr_rehashes_this_size = 0;
+ kvfree(n);
+ goto realloc;
}
nr_rehashes++;
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 6a854c918496..004894ad4147 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -20,6 +20,76 @@
#include "subvolume.h"
#include "trace.h"
+static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(ptrs, ptr)
+ bch2_dev_put(bch2_dev_have_ref(c, ptr->dev));
+}
+
+static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (!bch2_dev_tryget(c, ptr->dev)) {
+ bkey_for_each_ptr(ptrs, ptr2) {
+ if (ptr2 == ptr)
+ break;
+ bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev));
+ }
+ return false;
+ }
+ }
+ return true;
+}
+
+static void bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
+ struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
+
+ bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0);
+ }
+}
+
+static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
+ struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
+
+ if (ctxt) {
+ bool locked;
+
+ move_ctxt_wait_event(ctxt,
+ (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) ||
+ list_empty(&ctxt->ios));
+
+ if (!locked)
+ bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0);
+ } else {
+ if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) {
+ bkey_for_each_ptr(ptrs, ptr2) {
+ if (ptr2 == ptr)
+ break;
+
+ bucket = PTR_BUCKET_POS(ca, ptr2);
+ bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0);
+ }
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k)
{
if (trace_move_extent_finish_enabled()) {
@@ -267,6 +337,7 @@ restart_drop_extra_replicas:
printbuf_exit(&buf);
bch2_fatal_error(c);
+ ret = -EIO;
goto out;
}
@@ -355,17 +426,11 @@ void bch2_data_update_read_done(struct data_update *m,
void bch2_data_update_exit(struct data_update *update)
{
struct bch_fs *c = update->op.c;
- struct bkey_ptrs_c ptrs =
- bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k));
-
- bkey_for_each_ptr(ptrs, ptr) {
- struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
- if (c->opts.nocow_enabled)
- bch2_bucket_nocow_unlock(&c->nocow_locks,
- PTR_BUCKET_POS(ca, ptr), 0);
- bch2_dev_put(ca);
- }
+ struct bkey_s_c k = bkey_i_to_s_c(update->k.k);
+ if (c->opts.nocow_enabled)
+ bkey_nocow_unlock(c, k);
+ bkey_put_dev_refs(c, k);
bch2_bkey_buf_exit(&update->k, c);
bch2_disk_reservation_put(c, &update->op.res);
bch2_bio_free_pages_pool(c, &update->op.wbio.bio);
@@ -475,6 +540,9 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
bch2_compression_opt_to_text(out, background_compression(*io_opts));
prt_newline(out);
+ prt_str(out, "opts.replicas:\t");
+ prt_u64(out, io_opts->data_replicas);
+
prt_str(out, "extra replicas:\t");
prt_u64(out, data_opts->extra_replicas);
}
@@ -543,7 +611,6 @@ int bch2_data_update_init(struct btree_trans *trans,
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas;
- unsigned ptrs_locked = 0;
int ret = 0;
/*
@@ -554,6 +621,15 @@ int bch2_data_update_init(struct btree_trans *trans,
if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot)))
return -BCH_ERR_data_update_done;
+ if (!bkey_get_dev_refs(c, k))
+ return -BCH_ERR_data_update_done;
+
+ if (c->opts.nocow_enabled &&
+ !bkey_nocow_lock(c, ctxt, k)) {
+ bkey_put_dev_refs(c, k);
+ return -BCH_ERR_nocow_lock_blocked;
+ }
+
bch2_bkey_buf_init(&m->k);
bch2_bkey_buf_reassemble(&m->k, c, k);
m->btree_id = btree_id;
@@ -575,40 +651,24 @@ int bch2_data_update_init(struct btree_trans *trans,
m->op.compression_opt = background_compression(io_opts);
m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK;
- bkey_for_each_ptr(ptrs, ptr) {
- if (!bch2_dev_tryget(c, ptr->dev)) {
- bkey_for_each_ptr(ptrs, ptr2) {
- if (ptr2 == ptr)
- break;
- bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev));
- }
- return -BCH_ERR_data_update_done;
- }
- }
-
unsigned durability_have = 0, durability_removing = 0;
i = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev);
- struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr);
- bool locked;
-
- rcu_read_lock();
- if (((1U << i) & m->data_opts.rewrite_ptrs)) {
- BUG_ON(p.ptr.cached);
-
- if (crc_is_compressed(p.crc))
- reserve_sectors += k.k->size;
-
- m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
- durability_removing += bch2_extent_ptr_desired_durability(c, &p);
- } else if (!p.ptr.cached &&
- !((1U << i) & m->data_opts.kill_ptrs)) {
- bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
- durability_have += bch2_extent_ptr_durability(c, &p);
+ if (!p.ptr.cached) {
+ rcu_read_lock();
+ if (BIT(i) & m->data_opts.rewrite_ptrs) {
+ if (crc_is_compressed(p.crc))
+ reserve_sectors += k.k->size;
+
+ m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p);
+ durability_removing += bch2_extent_ptr_desired_durability(c, &p);
+ } else if (!(BIT(i) & m->data_opts.kill_ptrs)) {
+ bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
+ durability_have += bch2_extent_ptr_durability(c, &p);
+ }
+ rcu_read_unlock();
}
- rcu_read_unlock();
/*
* op->csum_type is normally initialized from the fs/file's
@@ -623,24 +683,6 @@ int bch2_data_update_init(struct btree_trans *trans,
if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible)
m->op.incompressible = true;
- if (c->opts.nocow_enabled) {
- if (ctxt) {
- move_ctxt_wait_event(ctxt,
- (locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
- bucket, 0)) ||
- list_empty(&ctxt->ios));
-
- if (!locked)
- bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0);
- } else {
- if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) {
- ret = -BCH_ERR_nocow_lock_blocked;
- goto err;
- }
- }
- ptrs_locked |= (1U << i);
- }
-
i++;
}
@@ -654,16 +696,6 @@ int bch2_data_update_init(struct btree_trans *trans,
* Increasing replication is an explicit operation triggered by
* rereplicate, currently, so that users don't get an unexpected -ENOSPC
*/
- if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) &&
- !durability_required) {
- m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
- m->data_opts.rewrite_ptrs = 0;
- /* if iter == NULL, it's just a promote */
- if (iter)
- ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts);
- goto done;
- }
-
m->op.nr_replicas = min(durability_removing, durability_required) +
m->data_opts.extra_replicas;
@@ -675,48 +707,38 @@ int bch2_data_update_init(struct btree_trans *trans,
if (!(durability_have + durability_removing))
m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
- if (!m->op.nr_replicas) {
- struct printbuf buf = PRINTBUF;
+ m->op.nr_replicas_required = m->op.nr_replicas;
- bch2_data_update_to_text(&buf, m);
- WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf);
- printbuf_exit(&buf);
- ret = -BCH_ERR_data_update_done;
- goto done;
+ /*
+ * It might turn out that we don't need any new replicas, if the
+ * replicas or durability settings have been changed since the extent
+ * was written:
+ */
+ if (!m->op.nr_replicas) {
+ m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
+ m->data_opts.rewrite_ptrs = 0;
+ /* if iter == NULL, it's just a promote */
+ if (iter)
+ ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts);
+ goto out;
}
- m->op.nr_replicas_required = m->op.nr_replicas;
-
if (reserve_sectors) {
ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
m->data_opts.extra_replicas
? 0
: BCH_DISK_RESERVATION_NOFAIL);
if (ret)
- goto err;
+ goto out;
}
if (bkey_extent_is_unwritten(k)) {
bch2_update_unwritten_extent(trans, m);
- goto done;
+ goto out;
}
return 0;
-err:
- i = 0;
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev);
- struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr);
- if ((1U << i) & ptrs_locked)
- bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0);
- bch2_dev_put(ca);
- i++;
- }
-
- bch2_bkey_buf_exit(&m->k, c);
- bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
- return ret;
-done:
+out:
bch2_data_update_exit(m);
return ret ?: -BCH_ERR_data_update_done;
}
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 90962b3c0130..9baf3411a8f9 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -97,7 +97,9 @@ static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe
const struct bch_extent_ptr *data_ptr,
unsigned sectors)
{
- return data_ptr->dev == stripe_ptr->dev &&
+ return (data_ptr->dev == stripe_ptr->dev ||
+ data_ptr->dev == BCH_SB_MEMBER_INVALID ||
+ stripe_ptr->dev == BCH_SB_MEMBER_INVALID) &&
data_ptr->gen == stripe_ptr->gen &&
data_ptr->offset >= stripe_ptr->offset &&
data_ptr->offset < stripe_ptr->offset + sectors;
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index ab5a7adece10..742dcdd3e5d7 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -257,7 +257,6 @@
x(BCH_ERR_nopromote, nopromote_in_flight) \
x(BCH_ERR_nopromote, nopromote_no_writes) \
x(BCH_ERR_nopromote, nopromote_enomem) \
- x(0, need_inode_lock) \
x(0, invalid_snapshot_node) \
x(0, option_needs_open_fs)
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 4419ad3e454e..324303bf4353 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -781,14 +781,17 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
/*
* Returns pointer to the next entry after the one being dropped:
*/
-union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
- struct bch_extent_ptr *ptr)
+void bch2_bkey_drop_ptr_noerror(struct bkey_s k, struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry = to_entry(ptr), *next;
- union bch_extent_entry *ret = entry;
bool drop_crc = true;
+ if (k.k->type == KEY_TYPE_stripe) {
+ ptr->dev = BCH_SB_MEMBER_INVALID;
+ return;
+ }
+
EBUG_ON(ptr < &ptrs.start->ptr ||
ptr >= &ptrs.end->ptr);
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
@@ -811,21 +814,16 @@ union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
break;
if ((extent_entry_is_crc(entry) && drop_crc) ||
- extent_entry_is_stripe_ptr(entry)) {
- ret = (void *) ret - extent_entry_bytes(entry);
+ extent_entry_is_stripe_ptr(entry))
extent_entry_drop(k, entry);
- }
}
-
- return ret;
}
-union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
- struct bch_extent_ptr *ptr)
+void bch2_bkey_drop_ptr(struct bkey_s k, struct bch_extent_ptr *ptr)
{
bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
- union bch_extent_entry *ret =
- bch2_bkey_drop_ptr_noerror(k, ptr);
+
+ bch2_bkey_drop_ptr_noerror(k, ptr);
/*
* If we deleted all the dirty pointers and there's still cached
@@ -837,14 +835,10 @@ union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
!bch2_bkey_dirty_devs(k.s_c).nr) {
k.k->type = KEY_TYPE_error;
set_bkey_val_u64s(k.k, 0);
- ret = NULL;
} else if (!bch2_bkey_nr_ptrs(k.s_c)) {
k.k->type = KEY_TYPE_deleted;
set_bkey_val_u64s(k.k, 0);
- ret = NULL;
}
-
- return ret;
}
void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
@@ -929,8 +923,29 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
if (p1.ptr.dev == p2.ptr.dev &&
p1.ptr.gen == p2.ptr.gen &&
+
+ /*
+ * This checks that the two pointers point
+ * to the same region on disk - adjusting
+ * for the difference in where the extents
+ * start, since one may have been trimmed:
+ */
(s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
- (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k) &&
+
+ /*
+ * This additionally checks that the
+ * extents overlap on disk, since the
+ * previous check may trigger spuriously
+ * when one extent is immediately partially
+ * overwritten with another extent (so that
+ * on disk they are adjacent) and
+ * compression is in use:
+ */
+ ((p1.ptr.offset >= p2.ptr.offset &&
+ p1.ptr.offset < p2.ptr.offset + p2.crc.compressed_size) ||
+ (p2.ptr.offset >= p1.ptr.offset &&
+ p2.ptr.offset < p1.ptr.offset + p1.crc.compressed_size)))
return true;
return false;
@@ -1017,6 +1032,8 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
prt_printf(out, "ptr: %u:%llu:%u gen %u",
ptr->dev, b, offset, ptr->gen);
+ if (ca->mi.durability != 1)
+ prt_printf(out, " d=%u", ca->mi.durability);
if (ptr->cached)
prt_str(out, " cached");
if (ptr->unwritten)
@@ -1377,6 +1394,45 @@ bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
return r != NULL;
}
+static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
+ unsigned target, unsigned compression)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ u64 sectors = 0;
+
+ if (compression) {
+ unsigned compression_type = bch2_compression_opt_to_type(compression);
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible ||
+ p.ptr.unwritten) {
+ sectors = 0;
+ goto incompressible;
+ }
+
+ if (!p.ptr.cached && p.crc.compression_type != compression_type)
+ sectors += p.crc.compressed_size;
+ }
+ }
+incompressible:
+ if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target))
+ sectors += p.crc.compressed_size;
+ }
+
+ return sectors;
+}
+
+u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
+{
+ const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
+
+ return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0;
+}
+
int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
struct bch_io_opts *opts)
{
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 1a6ddee48041..42a7c6d820a0 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -649,26 +649,21 @@ static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr
void bch2_extent_ptr_decoded_append(struct bkey_i *,
struct extent_ptr_decoded *);
-union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s,
- struct bch_extent_ptr *);
-union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
- struct bch_extent_ptr *);
+void bch2_bkey_drop_ptr_noerror(struct bkey_s, struct bch_extent_ptr *);
+void bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *);
#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \
do { \
- struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \
+ __label__ _again; \
+ struct bkey_ptrs _ptrs; \
+_again: \
+ _ptrs = bch2_bkey_ptrs(_k); \
\
- struct bch_extent_ptr *_ptr = &_ptrs.start->ptr; \
- \
- while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \
+ bkey_for_each_ptr(_ptrs, _ptr) \
if (_cond) { \
- _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \
- _ptrs = bch2_bkey_ptrs(_k); \
- continue; \
+ bch2_bkey_drop_ptr(_k, _ptr); \
+ goto _again; \
} \
- \
- (_ptr)++; \
- } \
} while (0)
bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
@@ -692,6 +687,7 @@ const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c);
unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c,
unsigned, unsigned);
bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c);
+u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c);
int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *,
struct bch_io_opts *);
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index cc33d763f722..ff60c041abe5 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -534,7 +534,7 @@ do_io:
if (f_sectors > w->tmp_sectors) {
kfree(w->tmp);
- w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), __GFP_NOFAIL);
+ w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), GFP_NOFS|__GFP_NOFAIL);
w->tmp_sectors = f_sectors;
}
@@ -659,7 +659,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc
int bch2_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
@@ -728,12 +728,11 @@ out:
goto err;
}
- *pagep = &folio->page;
+ *foliop = folio;
return 0;
err:
folio_unlock(folio);
folio_put(folio);
- *pagep = NULL;
err_unlock:
bch2_pagecache_add_put(inode);
kfree(res);
@@ -743,12 +742,11 @@ err_unlock:
int bch2_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation *res = fsdata;
- struct folio *folio = page_folio(page);
unsigned offset = pos - folio_pos(folio);
lockdep_assert_held(&inode->v.i_rwsem);
@@ -802,8 +800,7 @@ static noinline void folios_trunc(folios *fs, struct folio **fi)
static int __bch2_buffered_write(struct bch_inode_info *inode,
struct address_space *mapping,
struct iov_iter *iter,
- loff_t pos, unsigned len,
- bool inode_locked)
+ loff_t pos, unsigned len)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation res;
@@ -827,15 +824,6 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
BUG_ON(!fs.nr);
- /*
- * If we're not using the inode lock, we need to lock all the folios for
- * atomiticity of writes vs. other writes:
- */
- if (!inode_locked && folio_end_pos(darray_last(fs)) < end) {
- ret = -BCH_ERR_need_inode_lock;
- goto out;
- }
-
f = darray_first(fs);
if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
ret = bch2_read_single_folio(f, mapping);
@@ -932,10 +920,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
end = pos + copied;
spin_lock(&inode->v.i_lock);
- if (end > inode->v.i_size) {
- BUG_ON(!inode_locked);
+ if (end > inode->v.i_size)
i_size_write(&inode->v, end);
- }
spin_unlock(&inode->v.i_lock);
f_pos = pos;
@@ -979,68 +965,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct bch_inode_info *inode = file_bch_inode(file);
- loff_t pos;
- bool inode_locked = false;
- ssize_t written = 0, written2 = 0, ret = 0;
-
- /*
- * We don't take the inode lock unless i_size will be changing. Folio
- * locks provide exclusion with other writes, and the pagecache add lock
- * provides exclusion with truncate and hole punching.
- *
- * There is one nasty corner case where atomicity would be broken
- * without great care: when copying data from userspace to the page
- * cache, we do that with faults disable - a page fault would recurse
- * back into the filesystem, taking filesystem locks again, and
- * deadlock; so it's done with faults disabled, and we fault in the user
- * buffer when we aren't holding locks.
- *
- * If we do part of the write, but we then race and in the userspace
- * buffer have been evicted and are no longer resident, then we have to
- * drop our folio locks to re-fault them in, breaking write atomicity.
- *
- * To fix this, we restart the write from the start, if we weren't
- * holding the inode lock.
- *
- * There is another wrinkle after that; if we restart the write from the
- * start, and then get an unrecoverable error, we _cannot_ claim to
- * userspace that we did not write data we actually did - so we must
- * track (written2) the most we ever wrote.
- */
-
- if ((iocb->ki_flags & IOCB_APPEND) ||
- (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) {
- inode_lock(&inode->v);
- inode_locked = true;
- }
-
- ret = generic_write_checks(iocb, iter);
- if (ret <= 0)
- goto unlock;
-
- ret = file_remove_privs_flags(file, !inode_locked ? IOCB_NOWAIT : 0);
- if (ret) {
- if (!inode_locked) {
- inode_lock(&inode->v);
- inode_locked = true;
- ret = file_remove_privs_flags(file, 0);
- }
- if (ret)
- goto unlock;
- }
-
- ret = file_update_time(file);
- if (ret)
- goto unlock;
-
- pos = iocb->ki_pos;
+ loff_t pos = iocb->ki_pos;
+ ssize_t written = 0;
+ int ret = 0;
bch2_pagecache_add_get(inode);
- if (!inode_locked &&
- (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v)))
- goto get_inode_lock;
-
do {
unsigned offset = pos & (PAGE_SIZE - 1);
unsigned bytes = iov_iter_count(iter);
@@ -1065,17 +995,12 @@ again:
}
}
- if (unlikely(bytes != iov_iter_count(iter) && !inode_locked))
- goto get_inode_lock;
-
if (unlikely(fatal_signal_pending(current))) {
ret = -EINTR;
break;
}
- ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes, inode_locked);
- if (ret == -BCH_ERR_need_inode_lock)
- goto get_inode_lock;
+ ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
if (unlikely(ret < 0))
break;
@@ -1096,46 +1021,50 @@ again:
}
pos += ret;
written += ret;
- written2 = max(written, written2);
-
- if (ret != bytes && !inode_locked)
- goto get_inode_lock;
ret = 0;
balance_dirty_pages_ratelimited(mapping);
-
- if (0) {
-get_inode_lock:
- bch2_pagecache_add_put(inode);
- inode_lock(&inode->v);
- inode_locked = true;
- bch2_pagecache_add_get(inode);
-
- iov_iter_revert(iter, written);
- pos -= written;
- written = 0;
- ret = 0;
- }
} while (iov_iter_count(iter));
- bch2_pagecache_add_put(inode);
-unlock:
- if (inode_locked)
- inode_unlock(&inode->v);
- iocb->ki_pos += written;
+ bch2_pagecache_add_put(inode);
- ret = max(written, written2) ?: ret;
- if (ret > 0)
- ret = generic_write_sync(iocb, ret);
- return ret;
+ return written ? written : ret;
}
-ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- ssize_t ret = iocb->ki_flags & IOCB_DIRECT
- ? bch2_direct_write(iocb, iter)
- : bch2_buffered_write(iocb, iter);
+ struct file *file = iocb->ki_filp;
+ struct bch_inode_info *inode = file_bch_inode(file);
+ ssize_t ret;
+
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ ret = bch2_direct_write(iocb, from);
+ goto out;
+ }
+
+ inode_lock(&inode->v);
+
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto unlock;
+
+ ret = file_remove_privs(file);
+ if (ret)
+ goto unlock;
+
+ ret = file_update_time(file);
+ if (ret)
+ goto unlock;
+
+ ret = bch2_buffered_write(iocb, from);
+ if (likely(ret > 0))
+ iocb->ki_pos += ret;
+unlock:
+ inode_unlock(&inode->v);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+out:
return bch2_err_class(ret);
}
diff --git a/fs/bcachefs/fs-io-buffered.h b/fs/bcachefs/fs-io-buffered.h
index a6126ff790e6..3207ebbb4ab4 100644
--- a/fs/bcachefs/fs-io-buffered.h
+++ b/fs/bcachefs/fs-io-buffered.h
@@ -10,10 +10,10 @@ int bch2_read_folio(struct file *, struct folio *);
int bch2_writepages(struct address_space *, struct writeback_control *);
void bch2_readahead(struct readahead_control *);
-int bch2_write_begin(struct file *, struct address_space *, loff_t,
- unsigned, struct page **, void **);
+int bch2_write_begin(struct file *, struct address_space *, loff_t pos,
+ unsigned len, struct folio **, void **);
int bch2_write_end(struct file *, struct address_space *, loff_t,
- unsigned, unsigned, struct page *, void *);
+ unsigned len, unsigned copied, struct folio *, void *);
ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index aea8132d2c40..99c7fe987c74 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -328,9 +328,8 @@ static int bch2_ioc_setlabel(struct bch_fs *c,
mutex_lock(&c->sb_lock);
strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE);
- mutex_unlock(&c->sb_lock);
-
ret = bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
mnt_drop_write_file(file);
return ret;
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 94c392abef65..011817afc3ad 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -177,6 +177,14 @@ static unsigned bch2_inode_hash(subvol_inum inum)
return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
}
+struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
+{
+ return to_bch_ei(ilookup5_nowait(c->vfs_sb,
+ bch2_inode_hash(inum),
+ bch2_iget5_test,
+ &inum));
+}
+
static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode)
{
subvol_inum inum = inode_inum(inode);
@@ -1644,14 +1652,16 @@ again:
break;
}
} else if (clean_pass && this_pass_clean) {
- wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
- DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry,
+ TASK_UNINTERRUPTIBLE);
mutex_unlock(&c->vfs_inodes_lock);
schedule();
- finish_wait(wq, &wait.wq_entry);
+ finish_wait(wq_head, &wqe.wq_entry);
goto again;
}
}
diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
index c3af7225ff69..990ec43e0365 100644
--- a/fs/bcachefs/fs.h
+++ b/fs/bcachefs/fs.h
@@ -56,6 +56,8 @@ static inline subvol_inum inode_inum(struct bch_inode_info *inode)
};
}
+struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *, subvol_inum);
+
/*
* Set if we've gotten a btree error for this inode, and thus the vfs inode and
* btree inode may be inconsistent:
@@ -194,6 +196,11 @@ int bch2_vfs_init(void);
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
+static inline struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
+{
+ return NULL;
+}
+
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
snapshot_id_list *s) {}
static inline void bch2_vfs_exit(void) {}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 9138944c5ae6..9b3470a97546 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -8,6 +8,7 @@
#include "darray.h"
#include "dirent.h"
#include "error.h"
+#include "fs.h"
#include "fs-common.h"
#include "fsck.h"
#include "inode.h"
@@ -962,6 +963,22 @@ fsck_err:
return ret;
}
+static bool bch2_inode_open(struct bch_fs *c, struct bpos p)
+{
+ subvol_inum inum = {
+ .subvol = snapshot_t(c, p.snapshot)->subvol,
+ .inum = p.offset,
+ };
+
+ /* snapshot tree corruption, can't safely delete */
+ if (!inum.subvol) {
+ bch_err_ratelimited(c, "%s(): snapshot %u has no subvol", __func__, p.snapshot);
+ return true;
+ }
+
+ return __bch2_inode_hash_find(c, inum) != NULL;
+}
+
static int check_inode(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@@ -1040,6 +1057,7 @@ static int check_inode(struct btree_trans *trans,
}
if (u.bi_flags & BCH_INODE_unlinked &&
+ !bch2_inode_open(c, k.k->p) &&
(!c->sb.clean ||
fsck_err(trans, inode_unlinked_but_clean,
"filesystem marked clean, but inode %llu unlinked",
@@ -2006,7 +2024,6 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
if (ret) {
bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum);
ret = -BCH_ERR_fsck_repair_unimplemented;
- ret = 0;
goto err;
}
@@ -2216,6 +2233,8 @@ int bch2_check_xattrs(struct bch_fs *c)
NULL, NULL,
BCH_TRANS_COMMIT_no_enospc,
check_xattr(trans, &iter, k, &hash_info, &inode)));
+
+ inode_walker_exit(&inode);
bch_err_fn(c, ret);
return ret;
}
@@ -2469,8 +2488,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino
: bch2_inode_unpack(inode_k, &inode);
if (ret) {
/* Should have been caught in dirents pass */
- if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
- bch_err(c, "error looking up parent directory: %i", ret);
+ bch_err_msg(c, ret, "error looking up parent directory");
break;
}
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 649e3a01608a..f5f7db50ca31 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1260,7 +1260,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq)
}
if (!had_entries)
- j->last_empty_seq = cur_seq;
+ j->last_empty_seq = cur_seq - 1; /* to match j->seq */
spin_lock(&j->lock);
diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c
index db80e506e3ab..62b910f2fb27 100644
--- a/fs/bcachefs/journal_sb.c
+++ b/fs/bcachefs/journal_sb.c
@@ -104,6 +104,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f
struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2);
struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx);
int ret = -BCH_ERR_invalid_sb_journal;
+ u64 sum = 0;
unsigned nr;
unsigned i;
struct u64_range *b;
@@ -119,6 +120,15 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f
for (i = 0; i < nr; i++) {
b[i].start = le64_to_cpu(journal->d[i].start);
b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr);
+
+ if (b[i].end <= b[i].start) {
+ prt_printf(err, "journal buckets entry with bad nr: %llu+%llu",
+ le64_to_cpu(journal->d[i].start),
+ le64_to_cpu(journal->d[i].nr));
+ goto err;
+ }
+
+ sum += le64_to_cpu(journal->d[i].nr);
}
sort(b, nr, sizeof(*b), u64_range_cmp, NULL);
@@ -148,6 +158,11 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f
}
}
+ if (sum > UINT_MAX) {
+ prt_printf(err, "too many journal buckets: %llu > %u", sum, UINT_MAX);
+ goto err;
+ }
+
ret = 0;
err:
kfree(b);
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index deef4f024d20..d86565bf07c8 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -383,7 +383,7 @@ static int bch2_copygc_thread(void *arg)
if (min_member_capacity == U64_MAX)
min_member_capacity = 128 * 2048;
- bch2_trans_unlock_long(ctxt.trans);
+ move_buckets_wait(&ctxt, buckets, true);
bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6),
MAX_SCHEDULE_TIMEOUT);
}
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index d89eb43c5ce9..36de1c6fe8c3 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -241,7 +241,13 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
const struct journal_key *l = *((const struct journal_key **)_l);
const struct journal_key *r = *((const struct journal_key **)_r);
- return cmp_int(l->journal_seq, r->journal_seq);
+ /*
+ * Map 0 to U64_MAX, so that keys with journal_seq === 0 come last
+ *
+ * journal_seq == 0 means that the key comes from early repair, and
+ * should be inserted last so as to avoid overflowing the journal
+ */
+ return cmp_int(l->journal_seq - 1, r->journal_seq - 1);
}
int bch2_journal_replay(struct bch_fs *c)
@@ -322,6 +328,7 @@ int bch2_journal_replay(struct bch_fs *c)
}
}
+ bch2_trans_unlock_long(trans);
/*
* Now, replay any remaining keys in the order in which they appear in
* the journal, unpinning those journal entries as we go:
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 1223b710755d..1f34c92a6d11 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -82,7 +82,8 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
}
for (unsigned i = 0; i < r->nr_devs; i++)
- if (!bch2_member_exists(sb, r->devs[i])) {
+ if (r->devs[i] != BCH_SB_MEMBER_INVALID &&
+ !bch2_member_exists(sb, r->devs[i])) {
prt_printf(err, "invalid device %u in entry ", r->devs[i]);
goto bad;
}
@@ -451,7 +452,8 @@ retry:
.type = BCH_DISK_ACCOUNTING_replicas,
};
- memcpy(&k.replicas, e, replicas_entry_bytes(e));
+ unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e),
+ "embedded variable length struct");
struct bpos p = disk_accounting_pos_to_bpos(&k);
@@ -794,7 +796,7 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
nr_online += test_bit(e->devs[i], devs.d);
struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]);
- nr_failed += ca && ca->mi.state == BCH_MEMBER_STATE_failed;
+ nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
}
rcu_read_unlock();
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index 650a1f77ca40..c7e4cdd3f6a5 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -75,6 +75,9 @@
BCH_FSCK_ERR_accounting_key_junk_at_end) \
x(disk_accounting_inum, \
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_accounting_mismatch) \
+ x(rebalance_work_acct_fix, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
BCH_FSCK_ERR_accounting_mismatch)
#define DOWNGRADE_TABLE() \
@@ -108,7 +111,10 @@
BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
BCH_FSCK_ERR_fs_usage_replicas_wrong, \
BCH_FSCK_ERR_accounting_replicas_not_marked, \
- BCH_FSCK_ERR_bkey_version_in_future)
+ BCH_FSCK_ERR_bkey_version_in_future) \
+ x(rebalance_work_acct_fix, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
+ BCH_FSCK_ERR_accounting_mismatch)
struct upgrade_downgrade_entry {
u64 recovery_passes;
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index d3a498617303..f0c14702f9e6 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -23,7 +23,7 @@ enum bch_fsck_flags {
x(jset_past_bucket_end, 9, 0) \
x(jset_seq_blacklisted, 10, 0) \
x(journal_entries_missing, 11, 0) \
- x(journal_entry_replicas_not_marked, 12, 0) \
+ x(journal_entry_replicas_not_marked, 12, FSCK_AUTOFIX) \
x(journal_entry_past_jset_end, 13, 0) \
x(journal_entry_replicas_data_mismatch, 14, 0) \
x(journal_entry_bkey_u64s_0, 15, 0) \
@@ -288,10 +288,10 @@ enum bch_fsck_flags {
x(invalid_btree_id, 274, 0) \
x(alloc_key_io_time_bad, 275, 0) \
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
- x(accounting_key_junk_at_end, 277, 0) \
- x(accounting_key_replicas_nr_devs_0, 278, 0) \
- x(accounting_key_replicas_nr_required_bad, 279, 0) \
- x(accounting_key_replicas_devs_unsorted, 280, 0) \
+ x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \
+ x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \
+ x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \
+ x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 39196f2a4197..4b765422dd77 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -11,7 +11,8 @@
void bch2_dev_missing(struct bch_fs *c, unsigned dev)
{
- bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
+ if (dev != BCH_SB_MEMBER_INVALID)
+ bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
}
void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket)
diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h
index e2630548c0f6..d727d2dfda08 100644
--- a/fs/bcachefs/sb-members_format.h
+++ b/fs/bcachefs/sb-members_format.h
@@ -8,6 +8,11 @@
*/
#define BCH_SB_MEMBERS_MAX 64
+/*
+ * Sentinal value - indicates a device that does not exist
+ */
+#define BCH_SB_MEMBER_INVALID 255
+
#define BCH_MIN_NR_NBUCKETS (1 << 6)
#define BCH_IOPS_MEASUREMENTS() \
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index f393023a3ae2..33f2a64c14c9 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -461,7 +461,7 @@ STORE(bch2_fs)
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = strtoul_or_return(buf);
- c->btree_key_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
+ c->btree_key_cache.shrink->scan_objects(c->btree_key_cache.shrink, &sc);
}
if (attr == &sysfs_trigger_gc)
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 138320eaa2ad..1b8554460af4 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -416,7 +416,6 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
printbuf_tabstop_push(out, TABSTOP_SIZE + 2);
prt_printf(out, "\tsince mount\r\trecent\r\n");
- prt_printf(out, "recent");
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, out->indent + 20);
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index f2b4c17a0307..331f944d73dc 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -612,10 +612,20 @@ static int bch2_xattr_bcachefs_get_effective(
name, buffer, size, true);
}
+/* Noop - xattrs in the bcachefs_effective namespace are inherited */
+static int bch2_xattr_bcachefs_set_effective(const struct xattr_handler *handler,
+ struct mnt_idmap *idmap,
+ struct dentry *dentry, struct inode *vinode,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ return 0;
+}
+
static const struct xattr_handler bch_xattr_bcachefs_effective_handler = {
.prefix = "bcachefs_effective.",
.get = bch2_xattr_bcachefs_get_effective,
- .set = bch2_xattr_bcachefs_set,
+ .set = bch2_xattr_bcachefs_set_effective,
};
#endif /* NO_BCACHEFS_FS */
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index a778411574a9..fa66a09e496a 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -172,11 +172,11 @@ static void bfs_write_failed(struct address_space *mapping, loff_t to)
static int bfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
- ret = block_write_begin(mapping, pos, len, pagep, bfs_get_block);
+ ret = block_write_begin(mapping, pos, len, foliop, bfs_get_block);
if (unlikely(ret))
bfs_write_failed(mapping, pos + len);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 28a3439f163a..4fe5bb9f1b1f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -589,6 +589,9 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
if (bprm->have_execfd)
nitems++;
+#ifdef ELF_HWCAP2
+ nitems++;
+#endif
csp = sp;
sp -= nitems * 2 * sizeof(unsigned long);
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index f04d93109960..b4e31ae17cd9 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -668,7 +668,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = bbio->fs_info;
- struct btrfs_bio *orig_bbio = bbio;
struct bio *bio = &bbio->bio;
u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bio->bi_iter.bi_size;
@@ -706,7 +705,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
bbio->saved_iter = bio->bi_iter;
ret = btrfs_lookup_bio_sums(bbio);
if (ret)
- goto fail_put_bio;
+ goto fail;
}
if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
@@ -740,13 +739,13 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
ret = btrfs_bio_csum(bbio);
if (ret)
- goto fail_put_bio;
+ goto fail;
} else if (use_append ||
(btrfs_is_zoned(fs_info) && inode &&
inode->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_alloc_dummy_sum(bbio);
if (ret)
- goto fail_put_bio;
+ goto fail;
}
}
@@ -754,12 +753,23 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
done:
return map_length == length;
-fail_put_bio:
- if (map_length < length)
- btrfs_cleanup_bio(bbio);
fail:
btrfs_bio_counter_dec(fs_info);
- btrfs_bio_end_io(orig_bbio, ret);
+ /*
+ * We have split the original bbio, now we have to end both the current
+ * @bbio and remaining one, as the remaining one will never be submitted.
+ */
+ if (map_length < length) {
+ struct btrfs_bio *remaining = bbio->private;
+
+ ASSERT(bbio->bio.bi_pool == &btrfs_clone_bioset);
+ ASSERT(remaining);
+
+ remaining->bio.bi_status = ret;
+ btrfs_orig_bbio_end_io(remaining);
+ }
+ bbio->bio.bi_status = ret;
+ btrfs_orig_bbio_end_io(bbio);
/* Do not submit another chunk */
return true;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 75fa563e4cac..c8568b1a61c4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -459,7 +459,6 @@ struct btrfs_file_private {
void *filldir_buf;
u64 last_index;
struct extent_state *llseek_cached_state;
- bool fsync_skip_inode_lock;
};
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index 67adbe9d294a..364bce34f034 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -864,13 +864,6 @@ again:
if (IS_ERR_OR_NULL(dio)) {
ret = PTR_ERR_OR_ZERO(dio);
} else {
- struct btrfs_file_private stack_private = { 0 };
- struct btrfs_file_private *private;
- const bool have_private = (file->private_data != NULL);
-
- if (!have_private)
- file->private_data = &stack_private;
-
/*
* If we have a synchronous write, we must make sure the fsync
* triggered by the iomap_dio_complete() call below doesn't
@@ -879,13 +872,10 @@ again:
* partial writes due to the input buffer (or parts of it) not
* being already faulted in.
*/
- private = file->private_data;
- private->fsync_skip_inode_lock = true;
+ ASSERT(current->journal_info == NULL);
+ current->journal_info = BTRFS_TRANS_DIO_WRITE_STUB;
ret = iomap_dio_complete(dio);
- private->fsync_skip_inode_lock = false;
-
- if (!have_private)
- file->private_data = NULL;
+ current->journal_info = NULL;
}
/* No increment (+=) because iomap returns a cumulative value. */
diff --git a/fs/btrfs/fiemap.c b/fs/btrfs/fiemap.c
index 8f95f3e44e99..df7f09f3b02e 100644
--- a/fs/btrfs/fiemap.c
+++ b/fs/btrfs/fiemap.c
@@ -637,7 +637,7 @@ static int extent_fiemap(struct btrfs_inode *inode,
struct btrfs_path *path;
struct fiemap_cache cache = { 0 };
struct btrfs_backref_share_check_ctx *backref_ctx;
- u64 last_extent_end;
+ u64 last_extent_end = 0;
u64 prev_extent_end;
u64 range_start;
u64 range_end;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9914419f3b7d..2aeb8116549c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1603,7 +1603,6 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
*/
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct btrfs_file_private *private = file->private_data;
struct dentry *dentry = file_dentry(file);
struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
struct btrfs_root *root = inode->root;
@@ -1613,7 +1612,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
int ret = 0, err;
u64 len;
bool full_sync;
- const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false);
+ bool skip_ilock = false;
+
+ if (current->journal_info == BTRFS_TRANS_DIO_WRITE_STUB) {
+ skip_ilock = true;
+ current->journal_info = NULL;
+ lockdep_assert_held(&inode->vfs_inode.i_rwsem);
+ }
trace_btrfs_sync_file(file, datasync);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5d57a285d59b..feb8f9f2f358 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -4185,6 +4185,8 @@ static int try_flush_qgroup(struct btrfs_root *root)
return 0;
}
+ btrfs_run_delayed_iputs(root->fs_info);
+ btrfs_wait_on_delayed_iputs(root->fs_info);
ret = btrfs_start_delalloc_snapshot(root, true);
if (ret < 0)
goto out;
@@ -4344,10 +4346,9 @@ static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
int ret;
if (btrfs_qgroup_mode(inode->root->fs_info) == BTRFS_QGROUP_MODE_DISABLED) {
- extent_changeset_init(&changeset);
return clear_record_extent_bits(&inode->io_tree, start,
start + len - 1,
- EXTENT_QGROUP_RESERVED, &changeset);
+ EXTENT_QGROUP_RESERVED, NULL);
}
/* In release case, we shouldn't have @reserved */
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 68e14fd48638..c691784b4660 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -1985,8 +1985,8 @@ static bool is_reclaim_urgent(struct btrfs_space_info *space_info)
return unalloc < data_chunk_size;
}
-static int do_reclaim_sweep(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, int raid)
+static void do_reclaim_sweep(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info, int raid)
{
struct btrfs_block_group *bg;
int thresh_pct;
@@ -2031,7 +2031,6 @@ again:
}
up_read(&space_info->groups_sem);
- return 0;
}
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
@@ -2074,21 +2073,15 @@ bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info)
return ret;
}
-int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info)
+void btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info)
{
- int ret;
int raid;
struct btrfs_space_info *space_info;
list_for_each_entry(space_info, &fs_info->space_info, list) {
if (!btrfs_should_periodic_reclaim(space_info))
continue;
- for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) {
- ret = do_reclaim_sweep(fs_info, space_info, raid);
- if (ret)
- return ret;
- }
+ for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++)
+ do_reclaim_sweep(fs_info, space_info, raid);
}
-
- return ret;
}
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 88b44221ce97..5602026c5e14 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -294,6 +294,6 @@ void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s6
void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready);
bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info);
int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info);
-int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info);
+void btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info);
#endif /* BTRFS_SPACE_INFO_H */
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 98c03ddc760b..dd9ce9b9f69e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,6 +27,12 @@ struct btrfs_root_item;
struct btrfs_root;
struct btrfs_path;
+/*
+ * Signal that a direct IO write is in progress, to avoid deadlock for sync
+ * direct IO writes when fsync is called during the direct IO write path.
+ */
+#define BTRFS_TRANS_DIO_WRITE_STUB ((void *) 1)
+
/* Radix-tree tag for roots that are part of the trasaction. */
#define BTRFS_ROOT_TRANS_TAG 0
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 66f63e82af79..047e3337852e 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1406,6 +1406,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
return -EINVAL;
}
+ bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
+
if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
btrfs_err(bg->fs_info,
"zoned: cannot recover write pointer for zone %llu",
@@ -1432,7 +1434,6 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
}
bg->alloc_offset = zone_info[0].alloc_offset;
- bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
return 0;
}
@@ -1450,6 +1451,9 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
return -EINVAL;
}
+ /* In case a device is missing we have a cap of 0, so don't use it. */
+ bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
+
for (i = 0; i < map->num_stripes; i++) {
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
zone_info[i].alloc_offset == WP_CONVENTIONAL)
@@ -1471,9 +1475,6 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
if (test_bit(0, active))
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
}
- /* In case a device is missing we have a cap of 0, so don't use it. */
- bg->zone_capacity = min_not_zero(zone_info[0].capacity,
- zone_info[1].capacity);
}
if (zone_info[0].alloc_offset != WP_MISSING_DEV)
@@ -1563,6 +1564,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
unsigned long *active = NULL;
u64 last_alloc = 0;
u32 num_sequential = 0, num_conventional = 0;
+ u64 profile;
if (!btrfs_is_zoned(fs_info))
return 0;
@@ -1623,7 +1625,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
}
}
- switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
+ switch (profile) {
case 0: /* single */
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
break;
@@ -1650,6 +1653,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
+ if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 &&
+ profile != BTRFS_BLOCK_GROUP_RAID10) {
+ /*
+ * Detected broken write pointer. Make this block group
+ * unallocatable by setting the allocation pointer at the end of
+ * allocatable region. Relocating this block group will fix the
+ * mismatch.
+ *
+ * Currently, we cannot handle RAID0 or RAID10 case like this
+ * because we don't have a proper zone_capacity value. But,
+ * reading from this block group won't work anyway by a missing
+ * stripe.
+ */
+ cache->alloc_offset = cache->zone_capacity;
+ ret = 0;
+ }
+
out:
/* Reject non SINGLE data profiles without RST */
if ((map->type & BTRFS_BLOCK_GROUP_DATA) &&
diff --git a/fs/buffer.c b/fs/buffer.c
index e55ad471c530..1fc9a50def0b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -774,12 +774,11 @@ EXPORT_SYMBOL(block_dirty_folio);
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
{
struct buffer_head *bh;
- struct list_head tmp;
struct address_space *mapping;
int err = 0, err2;
struct blk_plug plug;
+ LIST_HEAD(tmp);
- INIT_LIST_HEAD(&tmp);
blk_start_plug(&plug);
spin_lock(lock);
@@ -958,12 +957,9 @@ no_grow:
}
EXPORT_SYMBOL_GPL(folio_alloc_buffers);
-struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
- bool retry)
+struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size)
{
gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
- if (retry)
- gfp |= __GFP_NOFAIL;
return folio_alloc_buffers(page_folio(page), size, gfp);
}
@@ -2168,11 +2164,10 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
return err;
}
-int __block_write_begin(struct page *page, loff_t pos, unsigned len,
+int __block_write_begin(struct folio *folio, loff_t pos, unsigned len,
get_block_t *get_block)
{
- return __block_write_begin_int(page_folio(page), pos, len, get_block,
- NULL);
+ return __block_write_begin_int(folio, pos, len, get_block, NULL);
}
EXPORT_SYMBOL(__block_write_begin);
@@ -2222,33 +2217,33 @@ static void __block_commit_write(struct folio *folio, size_t from, size_t to)
* The filesystem needs to handle block truncation upon failure.
*/
int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
- struct page **pagep, get_block_t *get_block)
+ struct folio **foliop, get_block_t *get_block)
{
pgoff_t index = pos >> PAGE_SHIFT;
- struct page *page;
+ struct folio *folio;
int status;
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
- return -ENOMEM;
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- status = __block_write_begin(page, pos, len, get_block);
+ status = __block_write_begin_int(folio, pos, len, get_block, NULL);
if (unlikely(status)) {
- unlock_page(page);
- put_page(page);
- page = NULL;
+ folio_unlock(folio);
+ folio_put(folio);
+ folio = NULL;
}
- *pagep = page;
+ *foliop = folio;
return status;
}
EXPORT_SYMBOL(block_write_begin);
int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct folio *folio = page_folio(page);
size_t start = pos - folio_pos(folio);
if (unlikely(copied < len)) {
@@ -2280,19 +2275,19 @@ EXPORT_SYMBOL(block_write_end);
int generic_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
bool i_size_changed = false;
- copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+ copied = block_write_end(file, mapping, pos, len, copied, folio, fsdata);
/*
* No need to use i_size_read() here, the i_size cannot change under us
* because we hold i_rwsem.
*
- * But it's important to update i_size while still holding page lock:
+ * But it's important to update i_size while still holding folio lock:
* page writeout could otherwise come in and zero beyond i_size.
*/
if (pos + copied > inode->i_size) {
@@ -2300,8 +2295,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
i_size_changed = true;
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
@@ -2467,7 +2462,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
{
struct address_space *mapping = inode->i_mapping;
const struct address_space_operations *aops = mapping->a_ops;
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
int err;
@@ -2475,11 +2470,11 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
if (err)
goto out;
- err = aops->write_begin(NULL, mapping, size, 0, &page, &fsdata);
+ err = aops->write_begin(NULL, mapping, size, 0, &folio, &fsdata);
if (err)
goto out;
- err = aops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
+ err = aops->write_end(NULL, mapping, size, 0, 0, folio, fsdata);
BUG_ON(err > 0);
out:
@@ -2493,7 +2488,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
const struct address_space_operations *aops = mapping->a_ops;
unsigned int blocksize = i_blocksize(inode);
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
pgoff_t index, curidx;
loff_t curpos;
@@ -2512,12 +2507,12 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
len = PAGE_SIZE - zerofrom;
err = aops->write_begin(file, mapping, curpos, len,
- &page, &fsdata);
+ &folio, &fsdata);
if (err)
goto out;
- zero_user(page, zerofrom, len);
+ folio_zero_range(folio, offset_in_folio(folio, curpos), len);
err = aops->write_end(file, mapping, curpos, len, len,
- page, fsdata);
+ folio, fsdata);
if (err < 0)
goto out;
BUG_ON(err != len);
@@ -2545,12 +2540,12 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
len = offset - zerofrom;
err = aops->write_begin(file, mapping, curpos, len,
- &page, &fsdata);
+ &folio, &fsdata);
if (err)
goto out;
- zero_user(page, zerofrom, len);
+ folio_zero_range(folio, offset_in_folio(folio, curpos), len);
err = aops->write_end(file, mapping, curpos, len, len,
- page, fsdata);
+ folio, fsdata);
if (err < 0)
goto out;
BUG_ON(err != len);
@@ -2566,7 +2561,7 @@ out:
*/
int cont_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata,
+ struct folio **foliop, void **fsdata,
get_block_t *get_block, loff_t *bytes)
{
struct inode *inode = mapping->host;
@@ -2584,7 +2579,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
(*bytes)++;
}
- return block_write_begin(mapping, pos, len, pagep, get_block);
+ return block_write_begin(mapping, pos, len, foliop, get_block);
}
EXPORT_SYMBOL(cont_write_begin);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c4744a02db75..a5f848c167fa 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1508,20 +1508,18 @@ static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned
*/
static int ceph_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct folio *folio = NULL;
int r;
- r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, &folio, NULL);
+ r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, foliop, NULL);
if (r < 0)
return r;
- folio_wait_private_2(folio); /* [DEPRECATED] */
- WARN_ON_ONCE(!folio_test_locked(folio));
- *pagep = &folio->page;
+ folio_wait_private_2(*foliop); /* [DEPRECATED] */
+ WARN_ON_ONCE(!folio_test_locked(*foliop));
return 0;
}
@@ -1531,9 +1529,8 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
*/
static int ceph_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *subpage, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct folio *folio = page_folio(subpage);
struct inode *inode = file_inode(file);
struct ceph_client *cl = ceph_inode_to_client(inode);
bool check_cap = false;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 71cd70514efa..4a8eec46254b 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -695,6 +695,7 @@ void ceph_evict_inode(struct inode *inode)
percpu_counter_dec(&mdsc->metric.total_inodes);
+ netfs_wait_for_outstanding_io(inode);
truncate_inode_pages_final(&inode->i_data);
if (inode->i_state & I_PINNING_NETFS_WB)
ceph_fscache_unuse_cookie(inode, true);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 6898dc621011..6896fce122e1 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -119,31 +119,43 @@ static const struct fs_parameter_spec coda_param_specs[] = {
{}
};
-static int coda_parse_fd(struct fs_context *fc, int fd)
+static int coda_set_idx(struct fs_context *fc, struct file *file)
{
struct coda_fs_context *ctx = fc->fs_private;
- struct fd f;
struct inode *inode;
int idx;
- f = fdget(fd);
- if (!f.file)
- return -EBADF;
- inode = file_inode(f.file);
+ inode = file_inode(file);
if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
- fdput(f);
- return invalf(fc, "code: Not coda psdev");
+ return invalf(fc, "coda: Not coda psdev");
}
-
idx = iminor(inode);
- fdput(f);
-
if (idx < 0 || idx >= MAX_CODADEVS)
return invalf(fc, "coda: Bad minor number");
ctx->idx = idx;
return 0;
}
+static int coda_parse_fd(struct fs_context *fc, struct fs_parameter *param,
+ struct fs_parse_result *result)
+{
+ struct file *file;
+ int err;
+
+ if (param->type == fs_value_is_file) {
+ file = param->file;
+ param->file = NULL;
+ } else {
+ file = fget(result->uint_32);
+ }
+ if (!file)
+ return -EBADF;
+
+ err = coda_set_idx(fc, file);
+ fput(file);
+ return err;
+}
+
static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct fs_parse_result result;
@@ -155,7 +167,7 @@ static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param)
switch (opt) {
case Opt_fd:
- return coda_parse_fd(fc, result.uint_32);
+ return coda_parse_fd(fc, param, &result);
}
return 0;
@@ -167,6 +179,7 @@ static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param)
*/
static int coda_parse_monolithic(struct fs_context *fc, void *_data)
{
+ struct file *file;
struct coda_mount_data *data = _data;
if (!data)
@@ -175,7 +188,11 @@ static int coda_parse_monolithic(struct fs_context *fc, void *_data)
if (data->version != CODA_MOUNT_VERSION)
return invalf(fc, "coda: Bad mount version");
- coda_parse_fd(fc, data->fd);
+ file = fget(data->fd);
+ if (file) {
+ coda_set_idx(fc, file);
+ fput(file);
+ }
return 0;
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 3d8daaecb6d1..0f6b16ba30d0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -96,11 +96,16 @@ EXPORT_SYMBOL(dotdot_name);
*
* This hash-function tries to avoid losing too many bits of hash
* information, yet avoid using a prime hash-size or similar.
+ *
+ * Marking the variables "used" ensures that the compiler doesn't
+ * optimize them away completely on architectures with runtime
+ * constant infrastructure, this allows debuggers to see their
+ * values. But updating these values has no effect on those arches.
*/
-static unsigned int d_hash_shift __ro_after_init;
+static unsigned int d_hash_shift __ro_after_init __used;
-static struct hlist_bl_head *dentry_hashtable __ro_after_init;
+static struct hlist_bl_head *dentry_hashtable __ro_after_init __used;
static inline struct hlist_bl_head *d_hash(unsigned long hashlen)
{
@@ -1908,8 +1913,13 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
__d_instantiate(entry, inode);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW & ~I_CREATING;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_instantiate_new);
@@ -2163,9 +2173,6 @@ seqretry:
* without taking d_lock and checking d_seq sequence count against @seq
* returned here.
*
- * A refcount may be taken on the found dentry with the d_rcu_to_refcount
- * function.
- *
* Alternatively, __d_lookup_rcu may be called again to look up the child of
* the returned dentry, so long as its parent's seqlock is checked after the
* child is looked up. Thus, an interlocking stepping of sequence lock checks
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 91521576f500..66d9b3b4c588 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -89,12 +89,14 @@ enum {
Opt_uid,
Opt_gid,
Opt_mode,
+ Opt_source,
};
static const struct fs_parameter_spec debugfs_param_specs[] = {
fsparam_gid ("gid", Opt_gid),
fsparam_u32oct ("mode", Opt_mode),
fsparam_uid ("uid", Opt_uid),
+ fsparam_string ("source", Opt_source),
{}
};
@@ -126,6 +128,12 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
case Opt_mode:
opts->mode = result.uint_32 & S_IALLUGO;
break;
+ case Opt_source:
+ if (fc->source)
+ return invalfc(fc, "Multiple sources specified");
+ fc->source = param->string;
+ param->string = NULL;
+ break;
/*
* We might like to report bad mount options here;
* but traditionally debugfs has ignored all mount options
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b0aafe640fa4..bbd05f1a2145 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,7 +37,6 @@
#include <linux/rwsem.h>
#include <linux/uio.h>
#include <linux/atomic.h>
-#include <linux/prefetch.h>
#include "internal.h"
@@ -1121,11 +1120,6 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
struct blk_plug plug;
unsigned long align = offset | iov_iter_alignment(iter);
- /*
- * Avoid references to bdev if not absolutely needed to give
- * the early prefetch in the caller enough time.
- */
-
/* watch out for a 0 len io from a tricksy fs */
if (iov_iter_rw(iter) == READ && !count)
return 0;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index e2483acc4366..287e5d407f08 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -234,17 +234,17 @@ out:
/*
* Called with lower inode mutex held.
*/
-static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
+static int fill_zeros_to_end_of_page(struct folio *folio, unsigned int to)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
int end_byte_in_page;
- if ((i_size_read(inode) / PAGE_SIZE) != page->index)
+ if ((i_size_read(inode) / PAGE_SIZE) != folio->index)
goto out;
end_byte_in_page = i_size_read(inode) % PAGE_SIZE;
if (to > end_byte_in_page)
end_byte_in_page = to;
- zero_user_segment(page, end_byte_in_page, PAGE_SIZE);
+ folio_zero_segment(folio, end_byte_in_page, PAGE_SIZE);
out:
return 0;
}
@@ -255,7 +255,7 @@ out:
* @mapping: The eCryptfs object
* @pos: The file offset at which to start writing
* @len: Length of the write
- * @pagep: Pointer to return the page
+ * @foliop: Pointer to return the folio
* @fsdata: Pointer to return fs data (unused)
*
* This function must zero any hole we create
@@ -265,38 +265,39 @@ out:
static int ecryptfs_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
pgoff_t index = pos >> PAGE_SHIFT;
- struct page *page;
+ struct folio *folio;
loff_t prev_page_end_size;
int rc = 0;
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
- return -ENOMEM;
- *pagep = page;
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ *foliop = folio;
prev_page_end_size = ((loff_t)index << PAGE_SHIFT);
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(mapping->host)->crypt_stat;
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
rc = ecryptfs_read_lower_page_segment(
- page, index, 0, PAGE_SIZE, mapping->host);
+ &folio->page, index, 0, PAGE_SIZE, mapping->host);
if (rc) {
printk(KERN_ERR "%s: Error attempting to read "
"lower page segment; rc = [%d]\n",
__func__, rc);
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
goto out;
} else
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
} else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) {
if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
rc = ecryptfs_copy_up_encrypted_with_header(
- page, crypt_stat);
+ &folio->page, crypt_stat);
if (rc) {
printk(KERN_ERR "%s: Error attempting "
"to copy the encrypted content "
@@ -304,46 +305,46 @@ static int ecryptfs_write_begin(struct file *file,
"inserting the metadata from "
"the xattr into the header; rc "
"= [%d]\n", __func__, rc);
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
goto out;
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
} else {
rc = ecryptfs_read_lower_page_segment(
- page, index, 0, PAGE_SIZE,
+ &folio->page, index, 0, PAGE_SIZE,
mapping->host);
if (rc) {
printk(KERN_ERR "%s: Error reading "
"page; rc = [%d]\n",
__func__, rc);
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
goto out;
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
} else {
if (prev_page_end_size
- >= i_size_read(page->mapping->host)) {
- zero_user(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
+ >= i_size_read(mapping->host)) {
+ folio_zero_range(folio, 0, PAGE_SIZE);
+ folio_mark_uptodate(folio);
} else if (len < PAGE_SIZE) {
- rc = ecryptfs_decrypt_page(page);
+ rc = ecryptfs_decrypt_page(&folio->page);
if (rc) {
printk(KERN_ERR "%s: Error decrypting "
"page at index [%ld]; "
"rc = [%d]\n",
- __func__, page->index, rc);
- ClearPageUptodate(page);
+ __func__, folio->index, rc);
+ folio_clear_uptodate(folio);
goto out;
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
}
}
/* If creating a page or more of holes, zero them out via truncate.
* Note, this will increase i_size. */
if (index != 0) {
- if (prev_page_end_size > i_size_read(page->mapping->host)) {
+ if (prev_page_end_size > i_size_read(mapping->host)) {
rc = ecryptfs_truncate(file->f_path.dentry,
prev_page_end_size);
if (rc) {
@@ -359,12 +360,11 @@ static int ecryptfs_write_begin(struct file *file,
* of page? Zero it out. */
if ((i_size_read(mapping->host) == prev_page_end_size)
&& (pos != 0))
- zero_user(page, 0, PAGE_SIZE);
+ folio_zero_range(folio, 0, PAGE_SIZE);
out:
if (unlikely(rc)) {
- unlock_page(page);
- put_page(page);
- *pagep = NULL;
+ folio_unlock(folio);
+ folio_put(folio);
}
return rc;
}
@@ -457,13 +457,13 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode)
* @pos: The file position
* @len: The length of the data (unused)
* @copied: The amount of data copied
- * @page: The eCryptfs page
+ * @folio: The eCryptfs folio
* @fsdata: The fsdata (unused)
*/
static int ecryptfs_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
pgoff_t index = pos >> PAGE_SHIFT;
unsigned from = pos & (PAGE_SIZE - 1);
@@ -476,8 +476,8 @@ static int ecryptfs_write_end(struct file *file,
ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
"(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
- rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0,
- to);
+ rc = ecryptfs_write_lower_page_segment(ecryptfs_inode,
+ &folio->page, 0, to);
if (!rc) {
rc = copied;
fsstack_copy_inode_size(ecryptfs_inode,
@@ -485,21 +485,21 @@ static int ecryptfs_write_end(struct file *file,
}
goto out;
}
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
if (copied < PAGE_SIZE) {
rc = 0;
goto out;
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
/* Fills in zeros if 'to' goes beyond inode size */
- rc = fill_zeros_to_end_of_page(page, to);
+ rc = fill_zeros_to_end_of_page(folio, to);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error attempting to fill "
"zeros in page with index = [0x%.16lx]\n", index);
goto out;
}
- rc = ecryptfs_encrypt_page(page);
+ rc = ecryptfs_encrypt_page(&folio->page);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
"index [0x%.16lx])\n", index);
@@ -518,8 +518,8 @@ static int ecryptfs_write_end(struct file *file,
else
rc = copied;
out:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return rc;
}
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index 2193a6710c8f..c3b90abdee37 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -8,19 +8,15 @@
static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
void *dentry_blk, struct erofs_dirent *de,
- unsigned int nameoff, unsigned int maxsize)
+ unsigned int nameoff0, unsigned int maxsize)
{
- const struct erofs_dirent *end = dentry_blk + nameoff;
+ const struct erofs_dirent *end = dentry_blk + nameoff0;
while (de < end) {
- const char *de_name;
+ unsigned char d_type = fs_ftype_to_dtype(de->file_type);
+ unsigned int nameoff = le16_to_cpu(de->nameoff);
+ const char *de_name = (char *)dentry_blk + nameoff;
unsigned int de_namelen;
- unsigned char d_type;
-
- d_type = fs_ftype_to_dtype(de->file_type);
-
- nameoff = le16_to_cpu(de->nameoff);
- de_name = (char *)dentry_blk + nameoff;
/* the last dirent in the block? */
if (de + 1 >= end)
@@ -52,21 +48,20 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct super_block *sb = dir->i_sb;
unsigned long bsz = sb->s_blocksize;
- const size_t dirsize = i_size_read(dir);
- unsigned int i = erofs_blknr(sb, ctx->pos);
unsigned int ofs = erofs_blkoff(sb, ctx->pos);
int err = 0;
bool initial = true;
buf.mapping = dir->i_mapping;
- while (ctx->pos < dirsize) {
+ while (ctx->pos < dir->i_size) {
+ erofs_off_t dbstart = ctx->pos - ofs;
struct erofs_dirent *de;
unsigned int nameoff, maxsize;
- de = erofs_bread(&buf, erofs_pos(sb, i), EROFS_KMAP);
+ de = erofs_bread(&buf, dbstart, EROFS_KMAP);
if (IS_ERR(de)) {
erofs_err(sb, "fail to readdir of logical block %u of nid %llu",
- i, EROFS_I(dir)->nid);
+ erofs_blknr(sb, dbstart), EROFS_I(dir)->nid);
err = PTR_ERR(de);
break;
}
@@ -79,25 +74,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
break;
}
- maxsize = min_t(unsigned int, dirsize - ctx->pos + ofs, bsz);
-
+ maxsize = min_t(unsigned int, dir->i_size - dbstart, bsz);
/* search dirents at the arbitrary position */
if (initial) {
initial = false;
-
ofs = roundup(ofs, sizeof(struct erofs_dirent));
- ctx->pos = erofs_pos(sb, i) + ofs;
- if (ofs >= nameoff)
- goto skip_this;
+ ctx->pos = dbstart + ofs;
}
err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs,
nameoff, maxsize);
if (err)
break;
-skip_this:
- ctx->pos = erofs_pos(sb, i) + maxsize;
- ++i;
+ ctx->pos = dbstart + maxsize;
ofs = 0;
}
erofs_put_metabuf(&buf);
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 43c09aae2afc..419432be3223 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -257,25 +257,23 @@ static int erofs_fill_inode(struct inode *inode)
goto out_unlock;
}
+ mapping_set_large_folios(inode->i_mapping);
if (erofs_inode_is_data_compressed(vi->datalayout)) {
#ifdef CONFIG_EROFS_FS_ZIP
DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT,
erofs_info, inode->i_sb,
"EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!");
inode->i_mapping->a_ops = &z_erofs_aops;
- err = 0;
- goto out_unlock;
-#endif
+#else
err = -EOPNOTSUPP;
- goto out_unlock;
- }
- inode->i_mapping->a_ops = &erofs_raw_access_aops;
- mapping_set_large_folios(inode->i_mapping);
+#endif
+ } else {
+ inode->i_mapping->a_ops = &erofs_raw_access_aops;
#ifdef CONFIG_EROFS_FS_ONDEMAND
- if (erofs_is_fscache_mode(inode->i_sb))
- inode->i_mapping->a_ops = &erofs_fscache_access_aops;
+ if (erofs_is_fscache_mode(inode->i_sb))
+ inode->i_mapping->a_ops = &erofs_fscache_access_aops;
#endif
-
+ }
out_unlock:
erofs_put_metabuf(&buf);
return err;
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 736607675396..45dc15ebd870 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -220,7 +220,7 @@ struct erofs_buf {
};
#define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL })
-#define erofs_blknr(sb, addr) ((addr) >> (sb)->s_blocksize_bits)
+#define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits))
#define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1))
#define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits)
#define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits)
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 32ce5b35e1df..6cb5c8916174 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -108,22 +108,6 @@ static void erofs_free_inode(struct inode *inode)
kmem_cache_free(erofs_inode_cachep, vi);
}
-static bool check_layout_compatibility(struct super_block *sb,
- struct erofs_super_block *dsb)
-{
- const unsigned int feature = le32_to_cpu(dsb->feature_incompat);
-
- EROFS_SB(sb)->feature_incompat = feature;
-
- /* check if current kernel meets all mandatory requirements */
- if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) {
- erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel",
- feature & ~EROFS_ALL_FEATURE_INCOMPAT);
- return false;
- }
- return true;
-}
-
/* read variable-sized metadata, offset will be aligned by 4-byte */
void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
erofs_off_t *offset, int *lengthp)
@@ -279,7 +263,7 @@ static int erofs_scan_devices(struct super_block *sb,
static int erofs_read_superblock(struct super_block *sb)
{
- struct erofs_sb_info *sbi;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_super_block *dsb;
void *data;
@@ -291,9 +275,7 @@ static int erofs_read_superblock(struct super_block *sb)
return PTR_ERR(data);
}
- sbi = EROFS_SB(sb);
dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
-
ret = -EINVAL;
if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) {
erofs_err(sb, "cannot find valid erofs superblock");
@@ -318,8 +300,12 @@ static int erofs_read_superblock(struct super_block *sb)
}
ret = -EINVAL;
- if (!check_layout_compatibility(sb, dsb))
+ sbi->feature_incompat = le32_to_cpu(dsb->feature_incompat);
+ if (sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT) {
+ erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel",
+ sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT);
goto out;
+ }
sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE;
if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) {
diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c
index 9b53883e5caf..37afe2024840 100644
--- a/fs/erofs/zutil.c
+++ b/fs/erofs/zutil.c
@@ -111,7 +111,8 @@ int z_erofs_gbuf_growsize(unsigned int nrpages)
out:
if (i < z_erofs_gbuf_count && tmp_pages) {
for (j = 0; j < nrpages; ++j)
- if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j])
+ if (tmp_pages[j] && (j >= gbuf->nrpages ||
+ tmp_pages[j] != gbuf->pages[j]))
__free_page(tmp_pages[j]);
kfree(tmp_pages);
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f53ca4f7fced..145f5349c612 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -420,7 +420,7 @@ static bool busy_loop_ep_timeout(unsigned long start_time,
static bool ep_busy_loop_on(struct eventpoll *ep)
{
- return !!ep->busy_poll_usecs || net_busy_loop_on();
+ return !!READ_ONCE(ep->busy_poll_usecs) || net_busy_loop_on();
}
static bool ep_busy_loop_end(void *p, unsigned long start_time)
@@ -2200,11 +2200,6 @@ static int do_epoll_create(int flags)
error = PTR_ERR(file);
goto out_free_fd;
}
-#ifdef CONFIG_NET_RX_BUSY_POLL
- ep->busy_poll_usecs = 0;
- ep->busy_poll_budget = 0;
- ep->prefer_busy_poll = false;
-#endif
ep->file = file;
fd_install(fd, file);
return fd;
diff --git a/fs/exec.c b/fs/exec.c
index 50e76cc633c4..caae051c5a95 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -145,13 +145,11 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
goto out;
/*
- * may_open() has already checked for this, so it should be
- * impossible to trip now. But we need to be extra cautious
- * and check again at the very end too.
+ * Check do_open_execat() for an explanation.
*/
error = -EACCES;
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
- path_noexec(&file->f_path)))
+ if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
+ path_noexec(&file->f_path))
goto exit;
error = -ENOEXEC;
@@ -954,7 +952,6 @@ EXPORT_SYMBOL(transfer_args_to_stack);
static struct file *do_open_execat(int fd, struct filename *name, int flags)
{
struct file *file;
- int err;
struct open_flags open_exec_flags = {
.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
.acc_mode = MAY_EXEC,
@@ -971,24 +968,20 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
file = do_filp_open(fd, name, &open_exec_flags);
if (IS_ERR(file))
- goto out;
+ return file;
/*
- * may_open() has already checked for this, so it should be
- * impossible to trip now. But we need to be extra cautious
- * and check again at the very end too.
+ * In the past the regular type check was here. It moved to may_open() in
+ * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
+ * an invariant that all non-regular files error out before we get here.
*/
- err = -EACCES;
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
- path_noexec(&file->f_path)))
- goto exit;
+ if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
+ path_noexec(&file->f_path)) {
+ fput(file);
+ return ERR_PTR(-EACCES);
+ }
-out:
return file;
-
-exit:
- fput(file);
- return ERR_PTR(err);
}
/**
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 64c31867bc76..e19469e88000 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -535,20 +535,20 @@ static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end)
while (start < end) {
u32 zerofrom, len;
- struct page *page = NULL;
+ struct folio *folio;
zerofrom = start & (PAGE_SIZE - 1);
len = PAGE_SIZE - zerofrom;
if (start + len > end)
len = end - start;
- err = ops->write_begin(file, mapping, start, len, &page, NULL);
+ err = ops->write_begin(file, mapping, start, len, &folio, NULL);
if (err)
goto out;
- zero_user_segment(page, zerofrom, zerofrom + len);
+ folio_zero_range(folio, offset_in_folio(folio, start), len);
- err = ops->write_end(file, mapping, start, len, len, page, NULL);
+ err = ops->write_end(file, mapping, start, len, len, folio, NULL);
if (err < 0)
goto out;
start += len;
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index dd894e558c91..05f0e07b01d0 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -448,12 +448,11 @@ static void exfat_write_failed(struct address_space *mapping, loff_t to)
static int exfat_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned int len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
- *pagep = NULL;
- ret = block_write_begin(mapping, pos, len, pagep, exfat_get_block);
+ ret = block_write_begin(mapping, pos, len, foliop, exfat_get_block);
if (ret < 0)
exfat_write_failed(mapping, pos+len);
@@ -463,13 +462,13 @@ static int exfat_write_begin(struct file *file, struct address_space *mapping,
static int exfat_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned int len, unsigned int copied,
- struct page *pagep, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
struct exfat_inode_info *ei = EXFAT_I(inode);
int err;
- err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+ err = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
if (ei->i_size_aligned < i_size_read(inode)) {
exfat_fs_error(inode->i_sb,
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 6622c582f550..402fecf90a44 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -87,7 +87,7 @@ static void ext2_commit_chunk(struct folio *folio, loff_t pos, unsigned len)
struct inode *dir = mapping->host;
inode_inc_iversion(dir);
- block_write_end(NULL, mapping, pos, len, len, &folio->page, NULL);
+ block_write_end(NULL, mapping, pos, len, len, folio, NULL);
if (pos+len > dir->i_size) {
i_size_write(dir, pos+len);
@@ -434,7 +434,7 @@ int ext2_inode_by_name(struct inode *dir, const struct qstr *child, ino_t *ino)
static int ext2_prepare_chunk(struct folio *folio, loff_t pos, unsigned len)
{
- return __block_write_begin(&folio->page, pos, len, ext2_get_block);
+ return __block_write_begin(folio, pos, len, ext2_get_block);
}
static int ext2_handle_dirsync(struct inode *dir)
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0caa1650cee8..30f8201c155f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -916,11 +916,11 @@ static void ext2_readahead(struct readahead_control *rac)
static int
ext2_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
{
int ret;
- ret = block_write_begin(mapping, pos, len, pagep, ext2_get_block);
+ ret = block_write_begin(mapping, pos, len, foliop, ext2_get_block);
if (ret < 0)
ext2_write_failed(mapping, pos + len);
return ret;
@@ -928,11 +928,11 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
static int ext2_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
int ret;
- ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+ ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
if (ret < len)
ext2_write_failed(mapping, pos + len);
return ret;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d62a4b9b26ce..ecc15e5f1eba 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3565,13 +3565,13 @@ int ext4_readpage_inline(struct inode *inode, struct folio *folio);
extern int ext4_try_to_write_inline_data(struct address_space *mapping,
struct inode *inode,
loff_t pos, unsigned len,
- struct page **pagep);
+ struct folio **foliop);
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct folio *folio);
extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
struct inode *inode,
loff_t pos, unsigned len,
- struct page **pagep,
+ struct folio **foliop,
void **fsdata);
extern int ext4_try_add_inline_entry(handle_t *handle,
struct ext4_filename *fname,
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 4282e12dc405..edf4aa99a974 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -601,10 +601,10 @@ retry:
goto out;
if (ext4_should_dioread_nolock(inode)) {
- ret = __block_write_begin(&folio->page, from, to,
+ ret = __block_write_begin(folio, from, to,
ext4_get_block_unwritten);
} else
- ret = __block_write_begin(&folio->page, from, to, ext4_get_block);
+ ret = __block_write_begin(folio, from, to, ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) {
ret = ext4_walk_page_buffers(handle, inode,
@@ -660,7 +660,7 @@ out_nofolio:
int ext4_try_to_write_inline_data(struct address_space *mapping,
struct inode *inode,
loff_t pos, unsigned len,
- struct page **pagep)
+ struct folio **foliop)
{
int ret;
handle_t *handle;
@@ -708,7 +708,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
goto out;
}
- *pagep = &folio->page;
+ *foliop = folio;
down_read(&EXT4_I(inode)->xattr_sem);
if (!ext4_has_inline_data(inode)) {
ret = 0;
@@ -856,7 +856,7 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
goto out;
}
- ret = __block_write_begin(&folio->page, 0, inline_size,
+ ret = __block_write_begin(folio, 0, inline_size,
ext4_da_get_block_prep);
if (ret) {
up_read(&EXT4_I(inode)->xattr_sem);
@@ -891,7 +891,7 @@ out:
int ext4_da_write_inline_data_begin(struct address_space *mapping,
struct inode *inode,
loff_t pos, unsigned len,
- struct page **pagep,
+ struct folio **foliop,
void **fsdata)
{
int ret;
@@ -954,7 +954,7 @@ retry_journal:
goto out_release_page;
up_read(&EXT4_I(inode)->xattr_sem);
- *pagep = &folio->page;
+ *foliop = folio;
brelse(iloc.bh);
return 1;
out_release_page:
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 941c1c0d5c6e..03374dc215d1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1145,7 +1145,7 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
*/
static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct inode *inode = mapping->host;
int ret, needed_blocks;
@@ -1170,7 +1170,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
- pagep);
+ foliop);
if (ret < 0)
return ret;
if (ret == 1)
@@ -1224,10 +1224,10 @@ retry_journal:
ret = ext4_block_write_begin(folio, pos, len, ext4_get_block);
#else
if (ext4_should_dioread_nolock(inode))
- ret = __block_write_begin(&folio->page, pos, len,
+ ret = __block_write_begin(folio, pos, len,
ext4_get_block_unwritten);
else
- ret = __block_write_begin(&folio->page, pos, len, ext4_get_block);
+ ret = __block_write_begin(folio, pos, len, ext4_get_block);
#endif
if (!ret && ext4_should_journal_data(inode)) {
ret = ext4_walk_page_buffers(handle, inode,
@@ -1270,7 +1270,7 @@ retry_journal:
folio_put(folio);
return ret;
}
- *pagep = &folio->page;
+ *foliop = folio;
return ret;
}
@@ -1298,9 +1298,8 @@ static int write_end_fn(handle_t *handle, struct inode *inode,
static int ext4_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct folio *folio = page_folio(page);
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
@@ -1315,7 +1314,7 @@ static int ext4_write_end(struct file *file,
return ext4_write_inline_data_end(inode, pos, len, copied,
folio);
- copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+ copied = block_write_end(file, mapping, pos, len, copied, folio, fsdata);
/*
* it's important to update i_size while still holding folio lock:
* page writeout could otherwise come in and zero beyond i_size.
@@ -1402,9 +1401,8 @@ static void ext4_journalled_zero_new_buffers(handle_t *handle,
static int ext4_journalled_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct folio *folio = page_folio(page);
handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
@@ -2926,7 +2924,7 @@ static int ext4_nonda_switch(struct super_block *sb)
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret, retries = 0;
struct folio *folio;
@@ -2941,14 +2939,14 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
if (ext4_nonda_switch(inode->i_sb) || ext4_verity_in_progress(inode)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
return ext4_write_begin(file, mapping, pos,
- len, pagep, fsdata);
+ len, foliop, fsdata);
}
*fsdata = (void *)0;
trace_ext4_da_write_begin(inode, pos, len);
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
ret = ext4_da_write_inline_data_begin(mapping, inode, pos, len,
- pagep, fsdata);
+ foliop, fsdata);
if (ret < 0)
return ret;
if (ret == 1)
@@ -2964,7 +2962,7 @@ retry:
#ifdef CONFIG_FS_ENCRYPTION
ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep);
#else
- ret = __block_write_begin(&folio->page, pos, len, ext4_da_get_block_prep);
+ ret = __block_write_begin(folio, pos, len, ext4_da_get_block_prep);
#endif
if (ret < 0) {
folio_unlock(folio);
@@ -2983,7 +2981,7 @@ retry:
return ret;
}
- *pagep = &folio->page;
+ *foliop = folio;
return ret;
}
@@ -3029,7 +3027,7 @@ static int ext4_da_do_write_end(struct address_space *mapping,
* flag, which all that's needed to trigger page writeback.
*/
copied = block_write_end(NULL, mapping, pos, len, copied,
- &folio->page, NULL);
+ folio, NULL);
new_i_size = pos + copied;
/*
@@ -3080,15 +3078,14 @@ static int ext4_da_do_write_end(struct address_space *mapping,
static int ext4_da_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
int write_mode = (int)(unsigned long)fsdata;
- struct folio *folio = page_folio(page);
if (write_mode == FALL_BACK_TO_NONDELALLOC)
return ext4_write_end(file, mapping, pos,
- len, copied, &folio->page, fsdata);
+ len, copied, folio, fsdata);
trace_ext4_da_write_end(inode, pos, len, copied);
@@ -6219,7 +6216,7 @@ retry_alloc:
if (folio_pos(folio) + len > size)
len = size - folio_pos(folio);
- err = __block_write_begin(&folio->page, 0, len, ext4_get_block);
+ err = __block_write_begin(folio, 0, len, ext4_get_block);
if (!err) {
ret = VM_FAULT_SIGBUS;
if (ext4_journal_folio_buffers(handle, folio, len))
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index 2f37e1ea3955..d9203228ce97 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -76,17 +76,17 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
while (count) {
size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos));
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
int res;
- res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata);
+ res = aops->write_begin(NULL, mapping, pos, n, &folio, &fsdata);
if (res)
return res;
- memcpy_to_page(page, offset_in_page(pos), buf, n);
+ memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, n);
- res = aops->write_end(NULL, mapping, pos, n, n, page, fsdata);
+ res = aops->write_end(NULL, mapping, pos, n, n, folio, fsdata);
if (res < 0)
return res;
if (res != n)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 6457e5bca9c9..5dfa0207ad8f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3552,12 +3552,12 @@ reserve_block:
}
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct page *page = NULL;
- pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
+ struct folio *folio;
+ pgoff_t index = pos >> PAGE_SHIFT;
bool need_balance = false;
bool use_cow = false;
block_t blkaddr = NULL_ADDR;
@@ -3573,7 +3573,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
/*
* We should check this at this moment to avoid deadlock on inode page
* and #0 page. The locking rule for inline_data conversion should be:
- * lock_page(page #0) -> lock_page(inode_page)
+ * folio_lock(folio #0) -> folio_lock(inode_page)
*/
if (index != 0) {
err = f2fs_convert_inline_inode(inode);
@@ -3584,18 +3584,20 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (f2fs_compressed_file(inode)) {
int ret;
+ struct page *page;
*fsdata = NULL;
if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
goto repeat;
- ret = f2fs_prepare_compress_overwrite(inode, pagep,
+ ret = f2fs_prepare_compress_overwrite(inode, &page,
index, fsdata);
if (ret < 0) {
err = ret;
goto fail;
} else if (ret) {
+ *foliop = page_folio(page);
return 0;
}
}
@@ -3603,81 +3605,85 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
repeat:
/*
- * Do not use grab_cache_page_write_begin() to avoid deadlock due to
- * wait_for_stable_page. Will wait that below with our IO control.
+ * Do not use FGP_STABLE to avoid deadlock.
+ * Will wait that below with our IO control.
*/
- page = f2fs_pagecache_get_page(mapping, index,
+ folio = __filemap_get_folio(mapping, index,
FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
- if (!page) {
- err = -ENOMEM;
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
goto fail;
}
/* TODO: cluster can be compressed due to race with .writepage */
- *pagep = page;
+ *foliop = folio;
if (f2fs_is_atomic_file(inode))
- err = prepare_atomic_write_begin(sbi, page, pos, len,
+ err = prepare_atomic_write_begin(sbi, &folio->page, pos, len,
&blkaddr, &need_balance, &use_cow);
else
- err = prepare_write_begin(sbi, page, pos, len,
+ err = prepare_write_begin(sbi, &folio->page, pos, len,
&blkaddr, &need_balance);
if (err)
- goto fail;
+ goto put_folio;
if (need_balance && !IS_NOQUOTA(inode) &&
has_not_enough_free_secs(sbi, 0, 0)) {
- unlock_page(page);
+ folio_unlock(folio);
f2fs_balance_fs(sbi, true);
- lock_page(page);
- if (page->mapping != mapping) {
- /* The page got truncated from under us */
- f2fs_put_page(page, 1);
+ folio_lock(folio);
+ if (folio->mapping != mapping) {
+ /* The folio got truncated from under us */
+ folio_unlock(folio);
+ folio_put(folio);
goto repeat;
}
}
- f2fs_wait_on_page_writeback(page, DATA, false, true);
+ f2fs_wait_on_page_writeback(&folio->page, DATA, false, true);
- if (len == PAGE_SIZE || PageUptodate(page))
+ if (len == folio_size(folio) || folio_test_uptodate(folio))
return 0;
if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
!f2fs_verity_in_progress(inode)) {
- zero_user_segment(page, len, PAGE_SIZE);
+ folio_zero_segment(folio, len, PAGE_SIZE);
return 0;
}
if (blkaddr == NEW_ADDR) {
- zero_user_segment(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
+ folio_zero_segment(folio, 0, folio_size(folio));
+ folio_mark_uptodate(folio);
} else {
if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
err = -EFSCORRUPTED;
- goto fail;
+ goto put_folio;
}
err = f2fs_submit_page_read(use_cow ?
- F2FS_I(inode)->cow_inode : inode, page,
+ F2FS_I(inode)->cow_inode : inode, &folio->page,
blkaddr, 0, true);
if (err)
- goto fail;
+ goto put_folio;
- lock_page(page);
- if (unlikely(page->mapping != mapping)) {
- f2fs_put_page(page, 1);
+ folio_lock(folio);
+ if (unlikely(folio->mapping != mapping)) {
+ folio_unlock(folio);
+ folio_put(folio);
goto repeat;
}
- if (unlikely(!PageUptodate(page))) {
+ if (unlikely(!folio_test_uptodate(folio))) {
err = -EIO;
- goto fail;
+ goto put_folio;
}
}
return 0;
+put_folio:
+ folio_unlock(folio);
+ folio_put(folio);
fail:
- f2fs_put_page(page, 1);
f2fs_write_failed(inode, pos + len);
return err;
}
@@ -3685,9 +3691,9 @@ fail:
static int f2fs_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
trace_f2fs_write_end(inode, pos, len, copied);
@@ -3696,17 +3702,17 @@ static int f2fs_write_end(struct file *file,
* should be PAGE_SIZE. Otherwise, we treat it with zero copied and
* let generic_perform_write() try to copy data again through copied=0.
*/
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
if (unlikely(copied != len))
copied = 0;
else
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
/* overwrite compressed file */
if (f2fs_compressed_file(inode) && fsdata) {
- f2fs_compress_write_end(inode, fsdata, page->index, copied);
+ f2fs_compress_write_end(inode, fsdata, folio->index, copied);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
if (pos + copied > i_size_read(inode) &&
@@ -3719,7 +3725,7 @@ static int f2fs_write_end(struct file *file,
if (!copied)
goto unlock_out;
- set_page_dirty(page);
+ folio_mark_dirty(folio);
if (pos + copied > i_size_read(inode) &&
!f2fs_verity_in_progress(inode)) {
@@ -3729,7 +3735,8 @@ static int f2fs_write_end(struct file *file,
pos + copied);
}
unlock_out:
- f2fs_put_page(page, 1);
+ folio_unlock(folio);
+ folio_put(folio);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return copied;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3959fd137cc9..176b5177c89d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2677,7 +2677,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
const struct address_space_operations *a_ops = mapping->a_ops;
int offset = off & (sb->s_blocksize - 1);
size_t towrite = len;
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
int err = 0;
int tocopy;
@@ -2687,7 +2687,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
towrite);
retry:
err = a_ops->write_begin(NULL, mapping, off, tocopy,
- &page, &fsdata);
+ &folio, &fsdata);
if (unlikely(err)) {
if (err == -ENOMEM) {
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
@@ -2697,10 +2697,10 @@ retry:
break;
}
- memcpy_to_page(page, offset, data, tocopy);
+ memcpy_to_folio(folio, offset_in_folio(folio, off), data, tocopy);
a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
- page, fsdata);
+ folio, fsdata);
offset = 0;
towrite -= tocopy;
off += tocopy;
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index f7bb0c54502c..84a33fe49bed 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -80,17 +80,17 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
while (count) {
size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos));
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
int res;
- res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata);
+ res = aops->write_begin(NULL, mapping, pos, n, &folio, &fsdata);
if (res)
return res;
- memcpy_to_page(page, offset_in_page(pos), buf, n);
+ memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, n);
- res = aops->write_end(NULL, mapping, pos, n, n, page, fsdata);
+ res = aops->write_end(NULL, mapping, pos, n, n, folio, fsdata);
if (res < 0)
return res;
if (res != n)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 19115fd2d2a4..75722bbd6b5f 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -221,13 +221,12 @@ static void fat_write_failed(struct address_space *mapping, loff_t to)
static int fat_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int err;
- *pagep = NULL;
err = cont_write_begin(file, mapping, pos, len,
- pagep, fsdata, fat_get_block,
+ foliop, fsdata, fat_get_block,
&MSDOS_I(mapping->host)->mmu_private);
if (err < 0)
fat_write_failed(mapping, pos + len);
@@ -236,11 +235,11 @@ static int fat_write_begin(struct file *file, struct address_space *mapping,
static int fat_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *pagep, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
int err;
- err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+ err = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
if (err < len)
fat_write_failed(mapping, pos + len);
if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 0587a0e221a6..f6fde75a3bd5 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -414,6 +414,12 @@ static long f_dupfd_query(int fd, struct file *filp)
return f.file == filp;
}
+/* Let the caller figure out whether a given file was just created. */
+static long f_created_query(const struct file *filp)
+{
+ return !!(filp->f_mode & FMODE_CREATED);
+}
+
static int f_owner_sig(struct file *filp, int signum, bool setsig)
{
int ret = 0;
@@ -447,6 +453,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
long err = -EINVAL;
switch (cmd) {
+ case F_CREATED_QUERY:
+ err = f_created_query(filp);
+ break;
case F_DUPFD:
err = f_dupfd(argi, filp, 0);
break;
@@ -553,6 +562,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
static int check_fcntl_cmd(unsigned cmd)
{
switch (cmd) {
+ case F_CREATED_QUERY:
case F_DUPFD:
case F_DUPFD_CLOEXEC:
case F_DUPFD_QUERY:
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 6e8cea16790e..8cb665629f4a 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -16,7 +16,8 @@
static long do_sys_name_to_handle(const struct path *path,
struct file_handle __user *ufh,
- int __user *mnt_id, int fh_flags)
+ void __user *mnt_id, bool unique_mntid,
+ int fh_flags)
{
long retval;
struct file_handle f_handle;
@@ -69,9 +70,19 @@ static long do_sys_name_to_handle(const struct path *path,
} else
retval = 0;
/* copy the mount id */
- if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
- copy_to_user(ufh, handle,
- struct_size(handle, f_handle, handle_bytes)))
+ if (unique_mntid) {
+ if (put_user(real_mount(path->mnt)->mnt_id_unique,
+ (u64 __user *) mnt_id))
+ retval = -EFAULT;
+ } else {
+ if (put_user(real_mount(path->mnt)->mnt_id,
+ (int __user *) mnt_id))
+ retval = -EFAULT;
+ }
+ /* copy the handle */
+ if (retval != -EFAULT &&
+ copy_to_user(ufh, handle,
+ struct_size(handle, f_handle, handle_bytes)))
retval = -EFAULT;
kfree(handle);
return retval;
@@ -83,6 +94,7 @@ static long do_sys_name_to_handle(const struct path *path,
* @name: name that should be converted to handle.
* @handle: resulting file handle
* @mnt_id: mount id of the file system containing the file
+ * (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int)
* @flag: flag value to indicate whether to follow symlink or not
* and whether a decodable file handle is required.
*
@@ -92,7 +104,7 @@ static long do_sys_name_to_handle(const struct path *path,
* value required.
*/
SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
- struct file_handle __user *, handle, int __user *, mnt_id,
+ struct file_handle __user *, handle, void __user *, mnt_id,
int, flag)
{
struct path path;
@@ -100,7 +112,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
int fh_flags;
int err;
- if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID))
+ if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
+ AT_HANDLE_MNT_ID_UNIQUE))
return -EINVAL;
lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
@@ -109,7 +122,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
lookup_flags |= LOOKUP_EMPTY;
err = user_path_at(dfd, name, lookup_flags, &path);
if (!err) {
- err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags);
+ err = do_sys_name_to_handle(&path, handle, mnt_id,
+ flag & AT_HANDLE_MNT_ID_UNIQUE,
+ fh_flags);
path_put(&path);
}
return err;
diff --git a/fs/file.c b/fs/file.c
index 655338effe9c..976ecd4ce2c6 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -672,7 +672,7 @@ int close_fd(unsigned fd)
return filp_close(file, files);
}
-EXPORT_SYMBOL(close_fd); /* for ksys_close() */
+EXPORT_SYMBOL(close_fd);
/**
* last_fd - return last valid index into fd table
diff --git a/fs/file_table.c b/fs/file_table.c
index 7ce4d5dac080..3cac9c34264f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -136,6 +136,7 @@ static int __init init_fs_stat_sysctls(void)
register_sysctl_init("fs", fs_stat_sysctls);
if (IS_ENABLED(CONFIG_BINFMT_MISC)) {
struct ctl_table_header *hdr;
+
hdr = register_sysctl_mount_point("fs/binfmt_misc");
kmemleak_not_leak(hdr);
}
@@ -389,7 +390,9 @@ EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount);
struct file *alloc_file_clone(struct file *base, int flags,
const struct file_operations *fops)
{
- struct file *f = alloc_file(&base->f_path, flags, fops);
+ struct file *f;
+
+ f = alloc_file(&base->f_path, flags, fops);
if (!IS_ERR(f)) {
path_get(&f->f_path);
f->f_mapping = base->f_mapping;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b865a3fa52f3..d8bec3c1bb1f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1132,6 +1132,7 @@ out_bdi_put:
/**
* cgroup_writeback_umount - flush inode wb switches for umount
+ * @sb: target super_block
*
* This function is called when a super_block is about to be destroyed and
* flushes in-flight inode wb switches. An inode wb switch goes through
@@ -1140,8 +1141,12 @@ out_bdi_put:
* rare occurrences and synchronize_rcu() can take a while, perform
* flushing iff wb switches are in flight.
*/
-void cgroup_writeback_umount(void)
+void cgroup_writeback_umount(struct super_block *sb)
{
+
+ if (!(sb->s_bdi->capabilities & BDI_CAP_WRITEBACK))
+ return;
+
/*
* SB_ACTIVE should be reliably cleared before checking
* isw_nr_in_flight, see generic_shutdown_super().
@@ -1381,12 +1386,13 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
static void inode_sync_complete(struct inode *inode)
{
+ assert_spin_locked(&inode->i_lock);
+
inode->i_state &= ~I_SYNC;
/* If inode is clean an unused, put it into LRU now... */
inode_add_lru(inode);
- /* Waiters must see I_SYNC cleared before being woken up */
- smp_mb();
- wake_up_bit(&inode->i_state, __I_SYNC);
+ /* Called with inode->i_lock which ensures memory ordering. */
+ inode_wake_up_bit(inode, __I_SYNC);
}
static bool inode_dirtied_after(struct inode *inode, unsigned long t)
@@ -1505,30 +1511,27 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
* Wait for writeback on an inode to complete. Called with i_lock held.
* Caller must make sure inode cannot go away when we drop i_lock.
*/
-static void __inode_wait_for_writeback(struct inode *inode)
- __releases(inode->i_lock)
- __acquires(inode->i_lock)
+void inode_wait_for_writeback(struct inode *inode)
{
- DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
- wait_queue_head_t *wqh;
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+
+ assert_spin_locked(&inode->i_lock);
- wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
- while (inode->i_state & I_SYNC) {
+ if (!(inode->i_state & I_SYNC))
+ return;
+
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
+ for (;;) {
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
+ if (!(inode->i_state & I_SYNC))
+ break;
spin_unlock(&inode->i_lock);
- __wait_on_bit(wqh, &wq, bit_wait,
- TASK_UNINTERRUPTIBLE);
+ schedule();
spin_lock(&inode->i_lock);
}
-}
-
-/*
- * Wait for writeback on an inode to complete. Caller must have inode pinned.
- */
-void inode_wait_for_writeback(struct inode *inode)
-{
- spin_lock(&inode->i_lock);
- __inode_wait_for_writeback(inode);
- spin_unlock(&inode->i_lock);
+ finish_wait(wq_head, &wqe.wq_entry);
}
/*
@@ -1539,16 +1542,20 @@ void inode_wait_for_writeback(struct inode *inode)
static void inode_sleep_on_writeback(struct inode *inode)
__releases(inode->i_lock)
{
- DEFINE_WAIT(wait);
- wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
- int sleep;
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+ bool sleep;
+
+ assert_spin_locked(&inode->i_lock);
- prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
- sleep = inode->i_state & I_SYNC;
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
+ sleep = !!(inode->i_state & I_SYNC);
spin_unlock(&inode->i_lock);
if (sleep)
schedule();
- finish_wait(wqh, &wait);
+ finish_wait(wq_head, &wqe.wq_entry);
}
/*
@@ -1752,7 +1759,7 @@ static int writeback_single_inode(struct inode *inode,
*/
if (wbc->sync_mode != WB_SYNC_ALL)
goto out;
- __inode_wait_for_writeback(inode);
+ inode_wait_for_writeback(inode);
}
WARN_ON(inode->i_state & I_SYNC);
/*
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 7146038b2fe7..f0c9cd1a0b39 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -31,6 +31,8 @@ MODULE_ALIAS("devname:fuse");
static struct kmem_cache *fuse_req_cachep;
+static void end_requests(struct list_head *head);
+
static struct fuse_dev *fuse_get_dev(struct file *file)
{
/*
@@ -773,7 +775,6 @@ static int fuse_check_folio(struct folio *folio)
(folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
- 1 << PG_uptodate |
1 << PG_lru |
1 << PG_active |
1 << PG_workingset |
@@ -818,9 +819,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
newfolio = page_folio(buf->page);
- if (!folio_test_uptodate(newfolio))
- folio_mark_uptodate(newfolio);
-
+ folio_clear_uptodate(newfolio);
folio_clear_mappedtodisk(newfolio);
if (fuse_check_folio(newfolio) != 0)
@@ -1822,6 +1821,13 @@ static void fuse_resend(struct fuse_conn *fc)
}
spin_lock(&fiq->lock);
+ if (!fiq->connected) {
+ spin_unlock(&fiq->lock);
+ list_for_each_entry(req, &to_queue, list)
+ clear_bit(FR_PENDING, &req->flags);
+ end_requests(&to_queue);
+ return;
+ }
/* iq and pq requests are both oldest to newest */
list_splice(&to_queue, &fiq->pending);
fiq->ops->wake_pending_and_unlock(fiq);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2b0d4781f394..8e96df9fd76c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -670,7 +670,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
err = get_create_ext(&args, dir, entry, mode);
if (err)
- goto out_put_forget_req;
+ goto out_free_ff;
err = fuse_simple_request(fm, &args);
free_ext_value(&args);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f39456c65ed7..ba6df52a823e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1832,10 +1832,16 @@ __acquires(fi->lock)
fuse_writepage_finish(fm, wpa);
spin_unlock(&fi->lock);
- /* After fuse_writepage_finish() aux request list is private */
+ /* After rb_erase() aux request list is private */
for (aux = wpa->next; aux; aux = next) {
+ struct backing_dev_info *bdi = inode_to_bdi(aux->inode);
+
next = aux->next;
aux->next = NULL;
+
+ dec_wb_stat(&bdi->wb, WB_WRITEBACK);
+ dec_node_page_state(aux->ia.ap.pages[0], NR_WRITEBACK_TEMP);
+ wb_writeout_inc(&bdi->wb);
fuse_writepage_free(aux);
}
@@ -2387,76 +2393,77 @@ out:
* but how to implement it without killing performance need more thinking.
*/
static int fuse_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
{
pgoff_t index = pos >> PAGE_SHIFT;
struct fuse_conn *fc = get_fuse_conn(file_inode(file));
- struct page *page;
+ struct folio *folio;
loff_t fsize;
int err = -ENOMEM;
WARN_ON(!fc->writeback_cache);
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
goto error;
- fuse_wait_on_page_writeback(mapping->host, page->index);
+ fuse_wait_on_page_writeback(mapping->host, folio->index);
- if (PageUptodate(page) || len == PAGE_SIZE)
+ if (folio_test_uptodate(folio) || len >= folio_size(folio))
goto success;
/*
- * Check if the start this page comes after the end of file, in which
- * case the readpage can be optimized away.
+ * Check if the start of this folio comes after the end of file,
+ * in which case the readpage can be optimized away.
*/
fsize = i_size_read(mapping->host);
- if (fsize <= (pos & PAGE_MASK)) {
- size_t off = pos & ~PAGE_MASK;
+ if (fsize <= folio_pos(folio)) {
+ size_t off = offset_in_folio(folio, pos);
if (off)
- zero_user_segment(page, 0, off);
+ folio_zero_segment(folio, 0, off);
goto success;
}
- err = fuse_do_readpage(file, page);
+ err = fuse_do_readpage(file, &folio->page);
if (err)
goto cleanup;
success:
- *pagep = page;
+ *foliop = folio;
return 0;
cleanup:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
error:
return err;
}
static int fuse_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
/* Haven't copied anything? Skip zeroing, size extending, dirtying. */
if (!copied)
goto unlock;
pos += copied;
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
/* Zero any unwritten bytes at the end of the page */
size_t endoff = pos & ~PAGE_MASK;
if (endoff)
- zero_user_segment(page, endoff, PAGE_SIZE);
- SetPageUptodate(page);
+ folio_zero_segment(folio, endoff, PAGE_SIZE);
+ folio_mark_uptodate(folio);
}
if (pos > inode->i_size)
i_size_write(inode, pos);
- set_page_dirty(page);
+ folio_mark_dirty(folio);
unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return copied;
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d8ab4e93916f..bebd89002328 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1332,11 +1332,16 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
* on a stacked fs (e.g. overlayfs) themselves and with
* max_stack_depth == 1, FUSE fs can be stacked as the
* underlying fs of a stacked fs (e.g. overlayfs).
+ *
+ * Also don't allow the combination of FUSE_PASSTHROUGH
+ * and FUSE_WRITEBACK_CACHE, current design doesn't handle
+ * them together.
*/
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH) &&
(flags & FUSE_PASSTHROUGH) &&
arg->max_stack_depth > 0 &&
- arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH) {
+ arg->max_stack_depth <= FILESYSTEM_MAX_STACK_DEPTH &&
+ !(flags & FUSE_WRITEBACK_CACHE)) {
fc->passthrough = 1;
fc->max_stack_depth = arg->max_stack_depth;
fm->sb->s_stack_depth = arg->max_stack_depth;
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index 5b423fdbb13f..9f568d345c51 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -81,7 +81,7 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
}
ret = fuse_simple_request(fm, &args);
if (!ret && !size)
- ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX);
+ ret = min_t(size_t, outarg.size, XATTR_SIZE_MAX);
if (ret == -ENOSYS) {
fm->fc->no_getxattr = 1;
ret = -EOPNOTSUPP;
@@ -143,7 +143,7 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
}
ret = fuse_simple_request(fm, &args);
if (!ret && !size)
- ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX);
+ ret = min_t(size_t, outarg.size, XATTR_LIST_MAX);
if (ret > 0 && size)
ret = fuse_verify_xattr_list(list, ret);
if (ret == -ENOSYS) {
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index 6d1878b99b30..4a0ce131e233 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -487,15 +487,15 @@ void hfs_file_truncate(struct inode *inode)
if (inode->i_size > HFS_I(inode)->phys_size) {
struct address_space *mapping = inode->i_mapping;
void *fsdata = NULL;
- struct page *page;
+ struct folio *folio;
/* XXX: Can use generic_cont_expand? */
size = inode->i_size - 1;
- res = hfs_write_begin(NULL, mapping, size + 1, 0, &page,
+ res = hfs_write_begin(NULL, mapping, size + 1, 0, &folio,
&fsdata);
if (!res) {
res = generic_write_end(NULL, mapping, size + 1, 0, 0,
- page, fsdata);
+ folio, fsdata);
}
if (res)
inode->i_size = HFS_I(inode)->phys_size;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index b5a6ad5df357..a0c7cb0f79fc 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -202,7 +202,7 @@ extern const struct address_space_operations hfs_aops;
extern const struct address_space_operations hfs_btree_aops;
int hfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata);
+ loff_t pos, unsigned len, struct folio **foliop, void **fsdata);
extern struct inode *hfs_new_inode(struct inode *, const struct qstr *, umode_t);
extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
extern int hfs_write_inode(struct inode *, struct writeback_control *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 744e10b46904..a81ce7a740b9 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -45,12 +45,11 @@ static void hfs_write_failed(struct address_space *mapping, loff_t to)
}
int hfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
{
int ret;
- *pagep = NULL;
- ret = cont_write_begin(file, mapping, pos, len, pagep, fsdata,
+ ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
hfs_get_block,
&HFS_I(mapping->host)->phys_size);
if (unlikely(ret))
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 9c51867dddc5..a6d61685ae79 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -554,16 +554,16 @@ void hfsplus_file_truncate(struct inode *inode)
if (inode->i_size > hip->phys_size) {
struct address_space *mapping = inode->i_mapping;
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
loff_t size = inode->i_size;
res = hfsplus_write_begin(NULL, mapping, size, 0,
- &page, &fsdata);
+ &folio, &fsdata);
if (res)
return;
res = generic_write_end(NULL, mapping, size, 0, 0,
- page, fsdata);
+ folio, fsdata);
if (res < 0)
return;
mark_inode_dirty(inode);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 9e78f181c24f..59ce81dca73f 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -472,7 +472,7 @@ extern const struct address_space_operations hfsplus_btree_aops;
extern const struct dentry_operations hfsplus_dentry_operations;
int hfsplus_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata);
+ loff_t pos, unsigned len, struct folio **foliop, void **fsdata);
struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
umode_t mode);
void hfsplus_delete_inode(struct inode *inode);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 3d326926c195..f331e9574217 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -39,12 +39,11 @@ static void hfsplus_write_failed(struct address_space *mapping, loff_t to)
}
int hfsplus_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
{
int ret;
- *pagep = NULL;
- ret = cont_write_begin(file, mapping, pos, len, pagep, fsdata,
+ ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
hfsplus_get_block,
&HFSPLUS_I(mapping->host)->phys_size);
if (unlikely(ret))
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 22df574ca99e..6d1cf2436ead 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -465,31 +465,32 @@ static int hostfs_read_folio(struct file *file, struct folio *folio)
static int hostfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
pgoff_t index = pos >> PAGE_SHIFT;
- *pagep = grab_cache_page_write_begin(mapping, index);
- if (!*pagep)
+ *foliop = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (!*foliop)
return -ENOMEM;
return 0;
}
static int hostfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
void *buffer;
- unsigned from = pos & (PAGE_SIZE - 1);
+ size_t from = offset_in_folio(folio, pos);
int err;
- buffer = kmap_local_page(page);
- err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer + from, copied);
+ buffer = kmap_local_folio(folio, from);
+ err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer, copied);
kunmap_local(buffer);
- if (!PageUptodate(page) && err == PAGE_SIZE)
- SetPageUptodate(page);
+ if (!folio_test_uptodate(folio) && err == folio_size(folio))
+ folio_mark_uptodate(folio);
/*
* If err > 0, write_file has added err to pos, so we are comparing
@@ -497,8 +498,8 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping,
*/
if (err > 0 && (pos > inode->i_size))
inode->i_size = pos;
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return err;
}
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 1bb8d97cd9ae..449a3fc1b8d9 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -190,12 +190,11 @@ static void hpfs_write_failed(struct address_space *mapping, loff_t to)
static int hpfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
- *pagep = NULL;
- ret = cont_write_begin(file, mapping, pos, len, pagep, fsdata,
+ ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata,
hpfs_get_block,
&hpfs_i(mapping->host)->mmu_private);
if (unlikely(ret))
@@ -206,11 +205,11 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,
static int hpfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *pagep, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
int err;
- err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+ err = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
if (err < len)
hpfs_write_failed(mapping, pos + len);
if (!(err < 0)) {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9f6cff356796..5cf327337e22 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -388,14 +388,14 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
static int hugetlbfs_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
return -EINVAL;
}
static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
BUG();
return -EINVAL;
diff --git a/fs/inode.c b/fs/inode.c
index 10c4619faeef..af78f515403f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
inode->i_state |= I_REFERENCED;
}
+struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
+ struct inode *inode, u32 bit)
+{
+ void *bit_address;
+
+ bit_address = inode_state_wait_address(inode, bit);
+ init_wait_var_entry(wqe, bit_address, 0);
+ return __var_waitqueue(bit_address);
+}
+EXPORT_SYMBOL(inode_bit_waitqueue);
+
/*
* Add inode to LRU if needed (inode is unused and clean).
*
@@ -500,25 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
inode->i_state &= ~I_LRU_ISOLATING;
- smp_mb();
- wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
+ /* Called with inode->i_lock which ensures memory ordering. */
+ inode_wake_up_bit(inode, __I_LRU_ISOLATING);
spin_unlock(&inode->i_lock);
}
static void inode_wait_for_lru_isolating(struct inode *inode)
{
- spin_lock(&inode->i_lock);
- if (inode->i_state & I_LRU_ISOLATING) {
- DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
- wait_queue_head_t *wqh;
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+
+ lockdep_assert_held(&inode->i_lock);
+ if (!(inode->i_state & I_LRU_ISOLATING))
+ return;
- wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
+ for (;;) {
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /*
+ * Checking I_LRU_ISOLATING with inode->i_lock guarantees
+ * memory ordering.
+ */
+ if (!(inode->i_state & I_LRU_ISOLATING))
+ break;
spin_unlock(&inode->i_lock);
- __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
+ schedule();
spin_lock(&inode->i_lock);
- WARN_ON(inode->i_state & I_LRU_ISOLATING);
}
- spin_unlock(&inode->i_lock);
+ finish_wait(wq_head, &wqe.wq_entry);
+ WARN_ON(inode->i_state & I_LRU_ISOLATING);
}
/**
@@ -595,6 +616,7 @@ void dump_mapping(const struct address_space *mapping)
struct hlist_node *dentry_first;
struct dentry *dentry_ptr;
struct dentry dentry;
+ char fname[64] = {};
unsigned long ino;
/*
@@ -631,11 +653,14 @@ void dump_mapping(const struct address_space *mapping)
return;
}
+ if (strncpy_from_kernel_nofault(fname, dentry.d_name.name, 63) < 0)
+ strscpy(fname, "<invalid>");
/*
- * if dentry is corrupted, the %pd handler may still crash,
- * but it's unlikely that we reach here with a corrupt mapping
+ * Even if strncpy_from_kernel_nofault() succeeded,
+ * the fname could be unreliable
*/
- pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
+ pr_warn("aops:%ps ino:%lx dentry name(?):\"%s\"\n",
+ a_ops, ino, fname);
}
void clear_inode(struct inode *inode)
@@ -690,6 +715,7 @@ static void evict(struct inode *inode)
inode_sb_list_del(inode);
+ spin_lock(&inode->i_lock);
inode_wait_for_lru_isolating(inode);
/*
@@ -699,6 +725,7 @@ static void evict(struct inode *inode)
* the inode. We just have to wait for running writeback to finish.
*/
inode_wait_for_writeback(inode);
+ spin_unlock(&inode->i_lock);
if (op->evict_inode) {
op->evict_inode(inode);
@@ -722,7 +749,13 @@ static void evict(struct inode *inode)
* used as an indicator whether blocking on it is safe.
*/
spin_lock(&inode->i_lock);
- wake_up_bit(&inode->i_state, __I_NEW);
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
+ smp_mb();
+ inode_wake_up_bit(inode, __I_NEW);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
spin_unlock(&inode->i_lock);
@@ -770,6 +803,10 @@ again:
continue;
spin_lock(&inode->i_lock);
+ if (atomic_read(&inode->i_count)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
spin_unlock(&inode->i_lock);
continue;
@@ -1130,8 +1167,13 @@ void unlock_new_inode(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW & ~I_CREATING;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);
@@ -1142,8 +1184,13 @@ void discard_new_inode(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
iput(inode);
}
@@ -1570,9 +1617,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
again:
- spin_lock(&inode_hash_lock);
- inode = find_inode_fast(sb, head, ino, true);
- spin_unlock(&inode_hash_lock);
+ inode = find_inode_fast(sb, head, ino, false);
if (inode) {
if (IS_ERR(inode))
@@ -2334,8 +2379,8 @@ EXPORT_SYMBOL(inode_needs_sync);
*/
static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
{
- wait_queue_head_t *wq;
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
/*
* Handle racing against evict(), see that routine for more details.
@@ -2346,14 +2391,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
return;
}
- wq = bit_waitqueue(&inode->i_state, __I_NEW);
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
rcu_read_unlock();
if (is_inode_hash_locked)
spin_unlock(&inode_hash_lock);
schedule();
- finish_wait(wq, &wait.wq_entry);
+ finish_wait(wq_head, &wqe.wq_entry);
if (is_inode_hash_locked)
spin_lock(&inode_hash_lock);
rcu_read_lock();
@@ -2502,18 +2547,11 @@ EXPORT_SYMBOL(inode_owner_or_capable);
/*
* Direct i/o helper functions
*/
-static void __inode_dio_wait(struct inode *inode)
+bool inode_dio_finished(const struct inode *inode)
{
- wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
- DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
-
- do {
- prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
- if (atomic_read(&inode->i_dio_count))
- schedule();
- } while (atomic_read(&inode->i_dio_count));
- finish_wait(wq, &q.wq_entry);
+ return atomic_read(&inode->i_dio_count) == 0;
}
+EXPORT_SYMBOL(inode_dio_finished);
/**
* inode_dio_wait - wait for outstanding DIO requests to finish
@@ -2527,11 +2565,17 @@ static void __inode_dio_wait(struct inode *inode)
*/
void inode_dio_wait(struct inode *inode)
{
- if (atomic_read(&inode->i_dio_count))
- __inode_dio_wait(inode);
+ wait_var_event(&inode->i_dio_count, inode_dio_finished(inode));
}
EXPORT_SYMBOL(inode_dio_wait);
+void inode_dio_wait_interruptible(struct inode *inode)
+{
+ wait_var_event_interruptible(&inode->i_dio_count,
+ inode_dio_finished(inode));
+}
+EXPORT_SYMBOL(inode_dio_wait_interruptible);
+
/*
* inode_set_flags - atomically set some inode flags
*
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index f420c53d86ac..9b4ca3811a24 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -900,7 +900,7 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
size_t bh_written;
bh_written = block_write_end(NULL, iter->inode->i_mapping, pos,
- len, copied, &folio->page, NULL);
+ len, copied, folio, NULL);
WARN_ON_ONCE(bh_written != copied && bh_written != 0);
return bh_written == copied;
}
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index e12cb145147e..13c18ccc13b0 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -23,10 +23,10 @@
static int jffs2_write_end(struct file *filp, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *pg, void *fsdata);
+ struct folio *folio, void *fsdata);
static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata);
+ struct folio **foliop, void **fsdata);
static int jffs2_read_folio(struct file *filp, struct folio *folio);
int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
@@ -77,29 +77,27 @@ const struct address_space_operations jffs2_file_address_operations =
.write_end = jffs2_write_end,
};
-static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
+static int jffs2_do_readpage_nolock(struct inode *inode, struct folio *folio)
{
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
- unsigned char *pg_buf;
+ unsigned char *kaddr;
int ret;
jffs2_dbg(2, "%s(): ino #%lu, page at offset 0x%lx\n",
- __func__, inode->i_ino, pg->index << PAGE_SHIFT);
+ __func__, inode->i_ino, folio->index << PAGE_SHIFT);
- BUG_ON(!PageLocked(pg));
+ BUG_ON(!folio_test_locked(folio));
- pg_buf = kmap(pg);
- /* FIXME: Can kmap fail? */
-
- ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_SHIFT,
+ kaddr = kmap_local_folio(folio, 0);
+ ret = jffs2_read_inode_range(c, f, kaddr, folio->index << PAGE_SHIFT,
PAGE_SIZE);
+ kunmap_local(kaddr);
if (!ret)
- SetPageUptodate(pg);
+ folio_mark_uptodate(folio);
- flush_dcache_page(pg);
- kunmap(pg);
+ flush_dcache_folio(folio);
jffs2_dbg(2, "readpage finished\n");
return ret;
@@ -107,7 +105,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
int __jffs2_read_folio(struct file *file, struct folio *folio)
{
- int ret = jffs2_do_readpage_nolock(folio->mapping->host, &folio->page);
+ int ret = jffs2_do_readpage_nolock(folio->mapping->host, folio);
folio_unlock(folio);
return ret;
}
@@ -125,9 +123,9 @@ static int jffs2_read_folio(struct file *file, struct folio *folio)
static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
- struct page *pg;
+ struct folio *folio;
struct inode *inode = mapping->host;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
@@ -206,29 +204,30 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
* page in read_cache_page(), which causes a deadlock.
*/
mutex_lock(&c->alloc_sem);
- pg = grab_cache_page_write_begin(mapping, index);
- if (!pg) {
- ret = -ENOMEM;
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
goto release_sem;
}
- *pagep = pg;
+ *foliop = folio;
/*
- * Read in the page if it wasn't already present. Cannot optimize away
- * the whole page write case until jffs2_write_end can handle the
+ * Read in the folio if it wasn't already present. Cannot optimize away
+ * the whole folio write case until jffs2_write_end can handle the
* case of a short-copy.
*/
- if (!PageUptodate(pg)) {
+ if (!folio_test_uptodate(folio)) {
mutex_lock(&f->sem);
- ret = jffs2_do_readpage_nolock(inode, pg);
+ ret = jffs2_do_readpage_nolock(inode, folio);
mutex_unlock(&f->sem);
if (ret) {
- unlock_page(pg);
- put_page(pg);
+ folio_unlock(folio);
+ folio_put(folio);
goto release_sem;
}
}
- jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
+ jffs2_dbg(1, "end write_begin(). folio->flags %lx\n", folio->flags);
release_sem:
mutex_unlock(&c->alloc_sem);
@@ -238,7 +237,7 @@ out_err:
static int jffs2_write_end(struct file *filp, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *pg, void *fsdata)
+ struct folio *folio, void *fsdata)
{
/* Actually commit the write from the page cache page we're looking at.
* For now, we write the full page out each time. It sucks, but it's simple
@@ -252,16 +251,17 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
unsigned aligned_start = start & ~3;
int ret = 0;
uint32_t writtenlen = 0;
+ void *buf;
- jffs2_dbg(1, "%s(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
- __func__, inode->i_ino, pg->index << PAGE_SHIFT,
- start, end, pg->flags);
+ jffs2_dbg(1, "%s(): ino #%lu, page at 0x%llx, range %d-%d, flags %lx\n",
+ __func__, inode->i_ino, folio_pos(folio),
+ start, end, folio->flags);
/* We need to avoid deadlock with page_cache_read() in
- jffs2_garbage_collect_pass(). So the page must be
+ jffs2_garbage_collect_pass(). So the folio must be
up to date to prevent page_cache_read() from trying
to re-lock it. */
- BUG_ON(!PageUptodate(pg));
+ BUG_ON(!folio_test_uptodate(folio));
if (end == PAGE_SIZE) {
/* When writing out the end of a page, write out the
@@ -276,8 +276,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
if (!ri) {
jffs2_dbg(1, "%s(): Allocation of raw inode failed\n",
__func__);
- unlock_page(pg);
- put_page(pg);
+ folio_unlock(folio);
+ folio_put(folio);
return -ENOMEM;
}
@@ -289,15 +289,11 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
ri->isize = cpu_to_je32((uint32_t)inode->i_size);
ri->atime = ri->ctime = ri->mtime = cpu_to_je32(JFFS2_NOW());
- /* In 2.4, it was already kmapped by generic_file_write(). Doesn't
- hurt to do it again. The alternative is ifdefs, which are ugly. */
- kmap(pg);
-
- ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + aligned_start,
- (pg->index << PAGE_SHIFT) + aligned_start,
+ buf = kmap_local_folio(folio, aligned_start);
+ ret = jffs2_write_inode_range(c, f, ri, buf,
+ folio_pos(folio) + aligned_start,
end - aligned_start, &writtenlen);
-
- kunmap(pg);
+ kunmap_local(buf);
if (ret)
mapping_set_error(mapping, ret);
@@ -323,12 +319,12 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
it gets reread */
jffs2_dbg(1, "%s(): Not all bytes written. Marking page !uptodate\n",
__func__);
- ClearPageUptodate(pg);
+ folio_clear_uptodate(folio);
}
jffs2_dbg(1, "%s() returning %d\n",
__func__, writtenlen > 0 ? writtenlen : ret);
- unlock_page(pg);
- put_page(pg);
+ folio_unlock(folio);
+ folio_put(folio);
return writtenlen > 0 ? writtenlen : ret;
}
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 5c6602f3c189..822949d0eb00 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -1171,7 +1171,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
uint32_t alloclen, offset, orig_end, orig_start;
int ret = 0;
unsigned char *comprbuf = NULL, *writebuf;
- struct page *page;
+ struct folio *folio;
unsigned char *pg_ptr;
memset(&ri, 0, sizeof(ri));
@@ -1317,25 +1317,25 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
BUG_ON(start > orig_start);
}
- /* The rules state that we must obtain the page lock *before* f->sem, so
+ /* The rules state that we must obtain the folio lock *before* f->sem, so
* drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
* actually going to *change* so we're safe; we only allow reading.
*
* It is important to note that jffs2_write_begin() will ensure that its
- * page is marked Uptodate before allocating space. That means that if we
- * end up here trying to GC the *same* page that jffs2_write_begin() is
- * trying to write out, read_cache_page() will not deadlock. */
+ * folio is marked uptodate before allocating space. That means that if we
+ * end up here trying to GC the *same* folio that jffs2_write_begin() is
+ * trying to write out, read_cache_folio() will not deadlock. */
mutex_unlock(&f->sem);
- page = read_cache_page(inode->i_mapping, start >> PAGE_SHIFT,
+ folio = read_cache_folio(inode->i_mapping, start >> PAGE_SHIFT,
__jffs2_read_folio, NULL);
- if (IS_ERR(page)) {
- pr_warn("read_cache_page() returned error: %ld\n",
- PTR_ERR(page));
+ if (IS_ERR(folio)) {
+ pr_warn("read_cache_folio() returned error: %ld\n",
+ PTR_ERR(folio));
mutex_lock(&f->sem);
- return PTR_ERR(page);
+ return PTR_ERR(folio);
}
- pg_ptr = kmap(page);
+ pg_ptr = kmap_local_folio(folio, 0);
mutex_lock(&f->sem);
offset = start;
@@ -1400,7 +1400,6 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
}
}
- kunmap(page);
- put_page(page);
+ folio_release_kmap(folio, pg_ptr);
return ret;
}
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 1a6b5921d17a..07cfdc440596 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -292,11 +292,11 @@ static void jfs_write_failed(struct address_space *mapping, loff_t to)
static int jfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
- ret = block_write_begin(mapping, pos, len, pagep, jfs_get_block);
+ ret = block_write_begin(mapping, pos, len, foliop, jfs_get_block);
if (unlikely(ret))
jfs_write_failed(mapping, pos + len);
@@ -304,12 +304,12 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
}
static int jfs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied, struct page *page,
+ loff_t pos, unsigned len, unsigned copied, struct folio *folio,
void *fsdata)
{
int ret;
- ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+ ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
if (ret < len)
jfs_write_failed(mapping, pos + len);
return ret;
diff --git a/fs/libfs.c b/fs/libfs.c
index 02602d00939e..46966fd8bcf9 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -914,7 +914,7 @@ static int simple_read_folio(struct file *file, struct folio *folio)
int simple_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct folio *folio;
@@ -923,7 +923,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
if (IS_ERR(folio))
return PTR_ERR(folio);
- *pagep = &folio->page;
+ *foliop = folio;
if (!folio_test_uptodate(folio) && (len != folio_size(folio))) {
size_t from = offset_in_folio(folio, pos);
@@ -942,11 +942,11 @@ EXPORT_SYMBOL(simple_write_begin);
* @pos: "
* @len: "
* @copied: "
- * @page: "
+ * @folio: "
* @fsdata: "
*
- * simple_write_end does the minimum needed for updating a page after writing is
- * done. It has the same API signature as the .write_end of
+ * simple_write_end does the minimum needed for updating a folio after
+ * writing is done. It has the same API signature as the .write_end of
* address_space_operations vector. So it can just be set onto .write_end for
* FSes that don't need any other processing. i_mutex is assumed to be held.
* Block based filesystems should use generic_write_end().
@@ -959,9 +959,8 @@ EXPORT_SYMBOL(simple_write_begin);
*/
static int simple_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct folio *folio = page_folio(page);
struct inode *inode = folio->mapping->host;
loff_t last_pos = pos + copied;
@@ -2003,13 +2002,19 @@ bool inode_maybe_inc_iversion(struct inode *inode, bool force)
* information, but the legacy inode_inc_iversion code used a spinlock
* to serialize increments.
*
- * Here, we add full memory barriers to ensure that any de-facto
- * ordering with other info is preserved.
+ * We add a full memory barrier to ensure that any de facto ordering
+ * with other state is preserved (either implicitly coming from cmpxchg
+ * or explicitly from smp_mb if we don't know upfront if we will execute
+ * the former).
*
- * This barrier pairs with the barrier in inode_query_iversion()
+ * These barriers pair with inode_query_iversion().
*/
- smp_mb();
cur = inode_peek_iversion_raw(inode);
+ if (!force && !(cur & I_VERSION_QUERIED)) {
+ smp_mb();
+ cur = inode_peek_iversion_raw(inode);
+ }
+
do {
/* If flag is clear then we needn't do anything */
if (!force && !(cur & I_VERSION_QUERIED))
@@ -2038,20 +2043,22 @@ EXPORT_SYMBOL(inode_maybe_inc_iversion);
u64 inode_query_iversion(struct inode *inode)
{
u64 cur, new;
+ bool fenced = false;
+ /*
+ * Memory barriers (implicit in cmpxchg, explicit in smp_mb) pair with
+ * inode_maybe_inc_iversion(), see that routine for more details.
+ */
cur = inode_peek_iversion_raw(inode);
do {
/* If flag is already set, then no need to swap */
if (cur & I_VERSION_QUERIED) {
- /*
- * This barrier (and the implicit barrier in the
- * cmpxchg below) pairs with the barrier in
- * inode_maybe_inc_iversion().
- */
- smp_mb();
+ if (!fenced)
+ smp_mb();
break;
}
+ fenced = true;
new = cur | I_VERSION_QUERIED;
} while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
return cur >> I_VERSION_QUERIED_SHIFT;
@@ -2117,12 +2124,12 @@ struct timespec64 simple_inode_init_ts(struct inode *inode)
}
EXPORT_SYMBOL(simple_inode_init_ts);
-static inline struct dentry *get_stashed_dentry(struct dentry *stashed)
+static inline struct dentry *get_stashed_dentry(struct dentry **stashed)
{
struct dentry *dentry;
guard(rcu)();
- dentry = READ_ONCE(stashed);
+ dentry = rcu_dereference(*stashed);
if (!dentry)
return NULL;
if (!lockref_get_not_dead(&dentry->d_lockref))
@@ -2219,7 +2226,7 @@ int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data,
const struct stashed_operations *sops = mnt->mnt_sb->s_fs_info;
/* See if dentry can be reused. */
- path->dentry = get_stashed_dentry(*stashed);
+ path->dentry = get_stashed_dentry(stashed);
if (path->dentry) {
sops->put_data(data);
goto out_path;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index a224cf222570..dd2a425b41f0 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -40,18 +40,18 @@ minix_last_byte(struct inode *inode, unsigned long page_nr)
return last_byte;
}
-static void dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
+static void dir_commit_chunk(struct folio *folio, loff_t pos, unsigned len)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
struct inode *dir = mapping->host;
- block_write_end(NULL, mapping, pos, len, len, page, NULL);
+ block_write_end(NULL, mapping, pos, len, len, folio, NULL);
if (pos+len > dir->i_size) {
i_size_write(dir, pos+len);
mark_inode_dirty(dir);
}
- unlock_page(page);
+ folio_unlock(folio);
}
static int minix_handle_dirsync(struct inode *dir)
@@ -64,14 +64,15 @@ static int minix_handle_dirsync(struct inode *dir)
return err;
}
-static void *dir_get_page(struct inode *dir, unsigned long n, struct page **p)
+static void *dir_get_folio(struct inode *dir, unsigned long n,
+ struct folio **foliop)
{
- struct address_space *mapping = dir->i_mapping;
- struct page *page = read_mapping_page(mapping, n, NULL);
- if (IS_ERR(page))
- return ERR_CAST(page);
- *p = page;
- return kmap_local_page(page);
+ struct folio *folio = read_mapping_folio(dir->i_mapping, n, NULL);
+
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
+ *foliop = folio;
+ return kmap_local_folio(folio, 0);
}
static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
@@ -99,9 +100,9 @@ static int minix_readdir(struct file *file, struct dir_context *ctx)
for ( ; n < npages; n++, offset = 0) {
char *p, *kaddr, *limit;
- struct page *page;
+ struct folio *folio;
- kaddr = dir_get_page(inode, n, &page);
+ kaddr = dir_get_folio(inode, n, &folio);
if (IS_ERR(kaddr))
continue;
p = kaddr+offset;
@@ -122,13 +123,13 @@ static int minix_readdir(struct file *file, struct dir_context *ctx)
unsigned l = strnlen(name, sbi->s_namelen);
if (!dir_emit(ctx, name, l,
inumber, DT_UNKNOWN)) {
- unmap_and_put_page(page, p);
+ folio_release_kmap(folio, p);
return 0;
}
}
ctx->pos += chunk_size;
}
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(folio, kaddr);
}
return 0;
}
@@ -144,12 +145,13 @@ static inline int namecompare(int len, int maxlen,
/*
* minix_find_entry()
*
- * finds an entry in the specified directory with the wanted name. It
- * returns the cache buffer in which the entry was found, and the entry
- * itself (as a parameter - res_dir). It does NOT read the inode of the
+ * finds an entry in the specified directory with the wanted name.
+ * It does NOT read the inode of the
* entry - you'll have to do that yourself if you want to.
+ *
+ * On Success folio_release_kmap() should be called on *foliop.
*/
-minix_dirent *minix_find_entry(struct dentry *dentry, struct page **res_page)
+minix_dirent *minix_find_entry(struct dentry *dentry, struct folio **foliop)
{
const char * name = dentry->d_name.name;
int namelen = dentry->d_name.len;
@@ -158,17 +160,15 @@ minix_dirent *minix_find_entry(struct dentry *dentry, struct page **res_page)
struct minix_sb_info * sbi = minix_sb(sb);
unsigned long n;
unsigned long npages = dir_pages(dir);
- struct page *page = NULL;
char *p;
char *namx;
__u32 inumber;
- *res_page = NULL;
for (n = 0; n < npages; n++) {
char *kaddr, *limit;
- kaddr = dir_get_page(dir, n, &page);
+ kaddr = dir_get_folio(dir, n, foliop);
if (IS_ERR(kaddr))
continue;
@@ -188,12 +188,11 @@ minix_dirent *minix_find_entry(struct dentry *dentry, struct page **res_page)
if (namecompare(namelen, sbi->s_namelen, name, namx))
goto found;
}
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(*foliop, kaddr);
}
return NULL;
found:
- *res_page = page;
return (minix_dirent *)p;
}
@@ -204,7 +203,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
int namelen = dentry->d_name.len;
struct super_block * sb = dir->i_sb;
struct minix_sb_info * sbi = minix_sb(sb);
- struct page *page = NULL;
+ struct folio *folio = NULL;
unsigned long npages = dir_pages(dir);
unsigned long n;
char *kaddr, *p;
@@ -223,10 +222,10 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
for (n = 0; n <= npages; n++) {
char *limit, *dir_end;
- kaddr = dir_get_page(dir, n, &page);
+ kaddr = dir_get_folio(dir, n, &folio);
if (IS_ERR(kaddr))
return PTR_ERR(kaddr);
- lock_page(page);
+ folio_lock(folio);
dir_end = kaddr + minix_last_byte(dir, n);
limit = kaddr + PAGE_SIZE - sbi->s_dirsize;
for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) {
@@ -253,15 +252,15 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
if (namecompare(namelen, sbi->s_namelen, name, namx))
goto out_unlock;
}
- unlock_page(page);
- unmap_and_put_page(page, kaddr);
+ folio_unlock(folio);
+ folio_release_kmap(folio, kaddr);
}
BUG();
return -EINVAL;
got_it:
- pos = page_offset(page) + offset_in_page(p);
- err = minix_prepare_chunk(page, pos, sbi->s_dirsize);
+ pos = folio_pos(folio) + offset_in_folio(folio, p);
+ err = minix_prepare_chunk(folio, pos, sbi->s_dirsize);
if (err)
goto out_unlock;
memcpy (namx, name, namelen);
@@ -272,37 +271,37 @@ got_it:
memset (namx + namelen, 0, sbi->s_dirsize - namelen - 2);
de->inode = inode->i_ino;
}
- dir_commit_chunk(page, pos, sbi->s_dirsize);
+ dir_commit_chunk(folio, pos, sbi->s_dirsize);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
mark_inode_dirty(dir);
err = minix_handle_dirsync(dir);
out_put:
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(folio, kaddr);
return err;
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
goto out_put;
}
-int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
+int minix_delete_entry(struct minix_dir_entry *de, struct folio *folio)
{
- struct inode *inode = page->mapping->host;
- loff_t pos = page_offset(page) + offset_in_page(de);
+ struct inode *inode = folio->mapping->host;
+ loff_t pos = folio_pos(folio) + offset_in_folio(folio, de);
struct minix_sb_info *sbi = minix_sb(inode->i_sb);
unsigned len = sbi->s_dirsize;
int err;
- lock_page(page);
- err = minix_prepare_chunk(page, pos, len);
+ folio_lock(folio);
+ err = minix_prepare_chunk(folio, pos, len);
if (err) {
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
if (sbi->s_version == MINIX_V3)
((minix3_dirent *)de)->inode = 0;
else
de->inode = 0;
- dir_commit_chunk(page, pos, len);
+ dir_commit_chunk(folio, pos, len);
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
mark_inode_dirty(inode);
return minix_handle_dirsync(inode);
@@ -310,21 +309,21 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
int minix_make_empty(struct inode *inode, struct inode *dir)
{
- struct page *page = grab_cache_page(inode->i_mapping, 0);
+ struct folio *folio = filemap_grab_folio(inode->i_mapping, 0);
struct minix_sb_info *sbi = minix_sb(inode->i_sb);
char *kaddr;
int err;
- if (!page)
- return -ENOMEM;
- err = minix_prepare_chunk(page, 0, 2 * sbi->s_dirsize);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ err = minix_prepare_chunk(folio, 0, 2 * sbi->s_dirsize);
if (err) {
- unlock_page(page);
+ folio_unlock(folio);
goto fail;
}
- kaddr = kmap_local_page(page);
- memset(kaddr, 0, PAGE_SIZE);
+ kaddr = kmap_local_folio(folio, 0);
+ memset(kaddr, 0, folio_size(folio));
if (sbi->s_version == MINIX_V3) {
minix3_dirent *de3 = (minix3_dirent *)kaddr;
@@ -345,10 +344,10 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
}
kunmap_local(kaddr);
- dir_commit_chunk(page, 0, 2 * sbi->s_dirsize);
+ dir_commit_chunk(folio, 0, 2 * sbi->s_dirsize);
err = minix_handle_dirsync(inode);
fail:
- put_page(page);
+ folio_put(folio);
return err;
}
@@ -357,7 +356,7 @@ fail:
*/
int minix_empty_dir(struct inode * inode)
{
- struct page *page = NULL;
+ struct folio *folio = NULL;
unsigned long i, npages = dir_pages(inode);
struct minix_sb_info *sbi = minix_sb(inode->i_sb);
char *name, *kaddr;
@@ -366,7 +365,7 @@ int minix_empty_dir(struct inode * inode)
for (i = 0; i < npages; i++) {
char *p, *limit;
- kaddr = dir_get_page(inode, i, &page);
+ kaddr = dir_get_folio(inode, i, &folio);
if (IS_ERR(kaddr))
continue;
@@ -395,44 +394,44 @@ int minix_empty_dir(struct inode * inode)
goto not_empty;
}
}
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(folio, kaddr);
}
return 1;
not_empty:
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(folio, kaddr);
return 0;
}
/* Releases the page */
-int minix_set_link(struct minix_dir_entry *de, struct page *page,
+int minix_set_link(struct minix_dir_entry *de, struct folio *folio,
struct inode *inode)
{
- struct inode *dir = page->mapping->host;
+ struct inode *dir = folio->mapping->host;
struct minix_sb_info *sbi = minix_sb(dir->i_sb);
- loff_t pos = page_offset(page) + offset_in_page(de);
+ loff_t pos = folio_pos(folio) + offset_in_folio(folio, de);
int err;
- lock_page(page);
- err = minix_prepare_chunk(page, pos, sbi->s_dirsize);
+ folio_lock(folio);
+ err = minix_prepare_chunk(folio, pos, sbi->s_dirsize);
if (err) {
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
if (sbi->s_version == MINIX_V3)
((minix3_dirent *)de)->inode = inode->i_ino;
else
de->inode = inode->i_ino;
- dir_commit_chunk(page, pos, sbi->s_dirsize);
+ dir_commit_chunk(folio, pos, sbi->s_dirsize);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
mark_inode_dirty(dir);
return minix_handle_dirsync(dir);
}
-struct minix_dir_entry * minix_dotdot (struct inode *dir, struct page **p)
+struct minix_dir_entry *minix_dotdot(struct inode *dir, struct folio **foliop)
{
struct minix_sb_info *sbi = minix_sb(dir->i_sb);
- struct minix_dir_entry *de = dir_get_page(dir, 0, p);
+ struct minix_dir_entry *de = dir_get_folio(dir, 0, foliop);
if (!IS_ERR(de))
return minix_next_entry(de, sbi);
@@ -441,20 +440,19 @@ struct minix_dir_entry * minix_dotdot (struct inode *dir, struct page **p)
ino_t minix_inode_by_name(struct dentry *dentry)
{
- struct page *page;
- struct minix_dir_entry *de = minix_find_entry(dentry, &page);
+ struct folio *folio;
+ struct minix_dir_entry *de = minix_find_entry(dentry, &folio);
ino_t res = 0;
if (de) {
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = folio->mapping->host;
struct minix_sb_info *sbi = minix_sb(inode->i_sb);
if (sbi->s_version == MINIX_V3)
res = ((minix3_dirent *) de)->inode;
else
res = de->inode;
- unmap_and_put_page(page, de);
+ folio_release_kmap(folio, de);
}
return res;
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 1c3df63162ef..f007e389d5d2 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -427,9 +427,9 @@ static int minix_read_folio(struct file *file, struct folio *folio)
return block_read_full_folio(folio, minix_get_block);
}
-int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len)
+int minix_prepare_chunk(struct folio *folio, loff_t pos, unsigned len)
{
- return __block_write_begin(page, pos, len, minix_get_block);
+ return __block_write_begin(folio, pos, len, minix_get_block);
}
static void minix_write_failed(struct address_space *mapping, loff_t to)
@@ -444,11 +444,11 @@ static void minix_write_failed(struct address_space *mapping, loff_t to)
static int minix_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
- ret = block_write_begin(mapping, pos, len, pagep, minix_get_block);
+ ret = block_write_begin(mapping, pos, len, foliop, minix_get_block);
if (unlikely(ret))
minix_write_failed(mapping, pos + len);
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index d493507c064f..d54273c3c9ff 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -42,18 +42,18 @@ struct minix_sb_info {
unsigned short s_version;
};
-extern struct inode *minix_iget(struct super_block *, unsigned long);
-extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **);
-extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **);
-extern struct inode * minix_new_inode(const struct inode *, umode_t);
-extern void minix_free_inode(struct inode * inode);
-extern unsigned long minix_count_free_inodes(struct super_block *sb);
-extern int minix_new_block(struct inode * inode);
-extern void minix_free_block(struct inode *inode, unsigned long block);
-extern unsigned long minix_count_free_blocks(struct super_block *sb);
-extern int minix_getattr(struct mnt_idmap *, const struct path *,
- struct kstat *, u32, unsigned int);
-extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
+struct inode *minix_iget(struct super_block *, unsigned long);
+struct minix_inode *minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **);
+struct minix2_inode *minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **);
+struct inode *minix_new_inode(const struct inode *, umode_t);
+void minix_free_inode(struct inode *inode);
+unsigned long minix_count_free_inodes(struct super_block *sb);
+int minix_new_block(struct inode *inode);
+void minix_free_block(struct inode *inode, unsigned long block);
+unsigned long minix_count_free_blocks(struct super_block *sb);
+int minix_getattr(struct mnt_idmap *, const struct path *,
+ struct kstat *, u32, unsigned int);
+int minix_prepare_chunk(struct folio *folio, loff_t pos, unsigned len);
extern void V1_minix_truncate(struct inode *);
extern void V2_minix_truncate(struct inode *);
@@ -64,15 +64,15 @@ extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int);
extern unsigned V1_minix_blocks(loff_t, struct super_block *);
extern unsigned V2_minix_blocks(loff_t, struct super_block *);
-extern struct minix_dir_entry *minix_find_entry(struct dentry*, struct page**);
-extern int minix_add_link(struct dentry*, struct inode*);
-extern int minix_delete_entry(struct minix_dir_entry*, struct page*);
-extern int minix_make_empty(struct inode*, struct inode*);
-extern int minix_empty_dir(struct inode*);
-int minix_set_link(struct minix_dir_entry *de, struct page *page,
+struct minix_dir_entry *minix_find_entry(struct dentry *, struct folio **);
+int minix_add_link(struct dentry*, struct inode*);
+int minix_delete_entry(struct minix_dir_entry *, struct folio *);
+int minix_make_empty(struct inode*, struct inode*);
+int minix_empty_dir(struct inode*);
+int minix_set_link(struct minix_dir_entry *de, struct folio *folio,
struct inode *inode);
-extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**);
-extern ino_t minix_inode_by_name(struct dentry*);
+struct minix_dir_entry *minix_dotdot(struct inode*, struct folio **);
+ino_t minix_inode_by_name(struct dentry*);
extern const struct inode_operations minix_file_inode_operations;
extern const struct inode_operations minix_dir_inode_operations;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index a944a0f17b53..5d9c1406fe27 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -141,15 +141,15 @@ out_fail:
static int minix_unlink(struct inode * dir, struct dentry *dentry)
{
struct inode * inode = d_inode(dentry);
- struct page * page;
+ struct folio *folio;
struct minix_dir_entry * de;
int err;
- de = minix_find_entry(dentry, &page);
+ de = minix_find_entry(dentry, &folio);
if (!de)
return -ENOENT;
- err = minix_delete_entry(de, page);
- unmap_and_put_page(page, de);
+ err = minix_delete_entry(de, folio);
+ folio_release_kmap(folio, de);
if (err)
return err;
@@ -180,28 +180,28 @@ static int minix_rename(struct mnt_idmap *idmap,
{
struct inode * old_inode = d_inode(old_dentry);
struct inode * new_inode = d_inode(new_dentry);
- struct page * dir_page = NULL;
+ struct folio * dir_folio = NULL;
struct minix_dir_entry * dir_de = NULL;
- struct page * old_page;
+ struct folio *old_folio;
struct minix_dir_entry * old_de;
int err = -ENOENT;
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
- old_de = minix_find_entry(old_dentry, &old_page);
+ old_de = minix_find_entry(old_dentry, &old_folio);
if (!old_de)
goto out;
if (S_ISDIR(old_inode->i_mode)) {
err = -EIO;
- dir_de = minix_dotdot(old_inode, &dir_page);
+ dir_de = minix_dotdot(old_inode, &dir_folio);
if (!dir_de)
goto out_old;
}
if (new_inode) {
- struct page * new_page;
+ struct folio *new_folio;
struct minix_dir_entry * new_de;
err = -ENOTEMPTY;
@@ -209,11 +209,11 @@ static int minix_rename(struct mnt_idmap *idmap,
goto out_dir;
err = -ENOENT;
- new_de = minix_find_entry(new_dentry, &new_page);
+ new_de = minix_find_entry(new_dentry, &new_folio);
if (!new_de)
goto out_dir;
- err = minix_set_link(new_de, new_page, old_inode);
- unmap_and_put_page(new_page, new_de);
+ err = minix_set_link(new_de, new_folio, old_inode);
+ folio_release_kmap(new_folio, new_de);
if (err)
goto out_dir;
inode_set_ctime_current(new_inode);
@@ -228,22 +228,22 @@ static int minix_rename(struct mnt_idmap *idmap,
inode_inc_link_count(new_dir);
}
- err = minix_delete_entry(old_de, old_page);
+ err = minix_delete_entry(old_de, old_folio);
if (err)
goto out_dir;
mark_inode_dirty(old_inode);
if (dir_de) {
- err = minix_set_link(dir_de, dir_page, new_dir);
+ err = minix_set_link(dir_de, dir_folio, new_dir);
if (!err)
inode_dec_link_count(old_dir);
}
out_dir:
if (dir_de)
- unmap_and_put_page(dir_page, dir_de);
+ folio_release_kmap(dir_folio, dir_de);
out_old:
- unmap_and_put_page(old_page, old_de);
+ folio_release_kmap(old_folio, old_de);
out:
return err;
}
diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
index 3c60f1eaca61..79491663dbc0 100644
--- a/fs/mnt_idmapping.c
+++ b/fs/mnt_idmapping.c
@@ -228,15 +228,15 @@ static int copy_mnt_idmap(struct uid_gid_map *map_from,
return 0;
}
- forward = kmemdup(map_from->forward,
- nr_extents * sizeof(struct uid_gid_extent),
- GFP_KERNEL_ACCOUNT);
+ forward = kmemdup_array(map_from->forward, nr_extents,
+ sizeof(struct uid_gid_extent),
+ GFP_KERNEL_ACCOUNT);
if (!forward)
return -ENOMEM;
- reverse = kmemdup(map_from->reverse,
- nr_extents * sizeof(struct uid_gid_extent),
- GFP_KERNEL_ACCOUNT);
+ reverse = kmemdup_array(map_from->reverse, nr_extents,
+ sizeof(struct uid_gid_extent),
+ GFP_KERNEL_ACCOUNT);
if (!reverse) {
kfree(forward);
return -ENOMEM;
diff --git a/fs/mount.h b/fs/mount.h
index ad4b1ddebb54..0a78f85cf737 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -153,5 +153,4 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
list_add_tail(&mnt->mnt_list, dt_list);
}
-extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
bool has_locked_children(struct mount *mnt, struct dentry *dentry);
diff --git a/fs/namei.c b/fs/namei.c
index 5512cb10fa89..891b169e38c9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1639,6 +1639,20 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
}
EXPORT_SYMBOL(lookup_one_qstr_excl);
+/**
+ * lookup_fast - do fast lockless (but racy) lookup of a dentry
+ * @nd: current nameidata
+ *
+ * Do a fast, but racy lookup in the dcache for the given dentry, and
+ * revalidate it. Returns a valid dentry pointer or NULL if one wasn't
+ * found. On error, an ERR_PTR will be returned.
+ *
+ * If this function returns a valid dentry and the walk is no longer
+ * lazy, the dentry will carry a reference that must later be put. If
+ * RCU mode is still in force, then this is not the case and the dentry
+ * must be legitimized before use. If this returns NULL, then the walk
+ * will no longer be in RCU mode.
+ */
static struct dentry *lookup_fast(struct nameidata *nd)
{
struct dentry *dentry, *parent = nd->path.dentry;
@@ -3521,6 +3535,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
return dentry;
}
+ if (open_flag & O_CREAT)
+ audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
+
/*
* Checking write permission is tricky, bacuse we don't know if we are
* going to actually need it: O_CREAT opens should work as long as the
@@ -3591,6 +3608,42 @@ out_dput:
return ERR_PTR(error);
}
+static inline bool trailing_slashes(struct nameidata *nd)
+{
+ return (bool)nd->last.name[nd->last.len];
+}
+
+static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag)
+{
+ struct dentry *dentry;
+
+ if (open_flag & O_CREAT) {
+ if (trailing_slashes(nd))
+ return ERR_PTR(-EISDIR);
+
+ /* Don't bother on an O_EXCL create */
+ if (open_flag & O_EXCL)
+ return NULL;
+ }
+
+ if (trailing_slashes(nd))
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+
+ dentry = lookup_fast(nd);
+ if (IS_ERR_OR_NULL(dentry))
+ return dentry;
+
+ if (open_flag & O_CREAT) {
+ /* Discard negative dentries. Need inode_lock to do the create */
+ if (!dentry->d_inode) {
+ if (!(nd->flags & LOOKUP_RCU))
+ dput(dentry);
+ dentry = NULL;
+ }
+ }
+ return dentry;
+}
+
static const char *open_last_lookups(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
@@ -3608,28 +3661,22 @@ static const char *open_last_lookups(struct nameidata *nd,
return handle_dots(nd, nd->last_type);
}
- if (!(open_flag & O_CREAT)) {
- if (nd->last.name[nd->last.len])
- nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
- /* we _can_ be in RCU mode here */
- dentry = lookup_fast(nd);
- if (IS_ERR(dentry))
- return ERR_CAST(dentry);
- if (likely(dentry))
- goto finish_lookup;
+ /* We _can_ be in RCU mode here */
+ dentry = lookup_fast_for_open(nd, open_flag);
+ if (IS_ERR(dentry))
+ return ERR_CAST(dentry);
+ if (likely(dentry))
+ goto finish_lookup;
+
+ if (!(open_flag & O_CREAT)) {
if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU))
return ERR_PTR(-ECHILD);
} else {
- /* create side of things */
if (nd->flags & LOOKUP_RCU) {
if (!try_to_unlazy(nd))
return ERR_PTR(-ECHILD);
}
- audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
- /* trailing slashes? */
- if (unlikely(nd->last.name[nd->last.len]))
- return ERR_PTR(-EISDIR);
}
if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
@@ -5304,7 +5351,7 @@ int page_symlink(struct inode *inode, const char *symname, int len)
struct address_space *mapping = inode->i_mapping;
const struct address_space_operations *aops = mapping->a_ops;
bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS);
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
int err;
unsigned int flags;
@@ -5312,16 +5359,16 @@ int page_symlink(struct inode *inode, const char *symname, int len)
retry:
if (nofs)
flags = memalloc_nofs_save();
- err = aops->write_begin(NULL, mapping, 0, len-1, &page, &fsdata);
+ err = aops->write_begin(NULL, mapping, 0, len-1, &folio, &fsdata);
if (nofs)
memalloc_nofs_restore(flags);
if (err)
goto fail;
- memcpy(page_address(page), symname, len-1);
+ memcpy(folio_address(folio), symname, len - 1);
- err = aops->write_end(NULL, mapping, 0, len-1, len-1,
- page, fsdata);
+ err = aops->write_end(NULL, mapping, 0, len - 1, len - 1,
+ folio, fsdata);
if (err < 0)
goto fail;
if (err < len-1)
diff --git a/fs/namespace.c b/fs/namespace.c
index 328087a4df8a..5f2dddee0074 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1774,7 +1774,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
list_del_init(&p->mnt_child);
}
- /* Add propogated mounts to the tmp_list */
+ /* Add propagated mounts to the tmp_list */
if (how & UMOUNT_PROPAGATE)
propagate_umount(&tmp_list);
@@ -2921,8 +2921,15 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
if (!__mnt_is_readonly(mnt) &&
(!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&
(ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
- char *buf = (char *)__get_free_page(GFP_KERNEL);
- char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
+ char *buf, *mntpath;
+
+ buf = (char *)__get_free_page(GFP_KERNEL);
+ if (buf)
+ mntpath = d_path(mountpoint, buf, PAGE_SIZE);
+ else
+ mntpath = ERR_PTR(-ENOMEM);
+ if (IS_ERR(mntpath))
+ mntpath = "(unknown)";
pr_warn("%s filesystem being %s at %s supports timestamps until %ptTd (0x%llx)\n",
sb->s_type->name,
@@ -2930,8 +2937,9 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
mntpath, &sb->s_time_max,
(unsigned long long)sb->s_time_max);
- free_page((unsigned long)buf);
sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;
+ if (buf)
+ free_page((unsigned long)buf);
}
}
@@ -5605,7 +5613,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
/* Only worry about locked mounts */
if (!(child->mnt.mnt_flags & MNT_LOCKED))
continue;
- /* Is the directory permanetly empty? */
+ /* Is the directory permanently empty? */
if (!is_empty_dir_inode(inode))
goto next;
}
diff --git a/fs/netfs/fscache_main.c b/fs/netfs/fscache_main.c
index 42e98bb523e3..49849005eb7c 100644
--- a/fs/netfs/fscache_main.c
+++ b/fs/netfs/fscache_main.c
@@ -103,6 +103,7 @@ void __exit fscache_exit(void)
kmem_cache_destroy(fscache_cookie_jar);
fscache_proc_cleanup();
+ timer_shutdown_sync(&fscache_cookie_lru_timer);
destroy_workqueue(fscache_wq);
pr_notice("FS-Cache unloaded\n");
}
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index 5367caf3fa28..d6ada4eba744 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -270,7 +270,7 @@ static void netfs_reset_subreq_iter(struct netfs_io_request *rreq,
if (count == remaining)
return;
- _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n",
+ _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x",
rreq->debug_id, subreq->debug_index,
iov_iter_count(&subreq->io_iter), subreq->transferred,
subreq->len, rreq->i_size,
@@ -306,6 +306,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq)
break;
subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
subreq->error = 0;
+ __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
netfs_stat(&netfs_n_rh_download_instead);
trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
@@ -313,6 +314,8 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq)
netfs_reset_subreq_iter(rreq, subreq);
netfs_read_from_server(rreq, subreq);
} else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) {
+ __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
+ netfs_reset_subreq_iter(rreq, subreq);
netfs_rreq_short_read(rreq, subreq);
}
}
@@ -365,7 +368,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
if (subreq->error || subreq->transferred == 0)
break;
transferred += subreq->transferred;
- if (subreq->transferred < subreq->len)
+ if (subreq->transferred < subreq->len ||
+ test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags))
break;
}
@@ -500,7 +504,8 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq,
subreq->error = 0;
subreq->transferred += transferred_or_error;
- if (subreq->transferred < subreq->len)
+ if (subreq->transferred < subreq->len &&
+ !test_bit(NETFS_SREQ_HIT_EOF, &subreq->flags))
goto incomplete;
complete:
@@ -779,10 +784,13 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
TASK_UNINTERRUPTIBLE);
ret = rreq->error;
- if (ret == 0 && rreq->submitted < rreq->len &&
- rreq->origin != NETFS_DIO_READ) {
- trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
- ret = -EIO;
+ if (ret == 0) {
+ if (rreq->origin == NETFS_DIO_READ) {
+ ret = rreq->transferred;
+ } else if (rreq->submitted < rreq->len) {
+ trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
+ ret = -EIO;
+ }
}
} else {
/* If we decrement nr_outstanding to 0, the ref belongs to us. */
diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
index 75dc52a49b3a..21eab56ee2f9 100644
--- a/fs/netfs/locking.c
+++ b/fs/netfs/locking.c
@@ -19,25 +19,13 @@
* Must be called under a lock that serializes taking new references
* to i_dio_count, usually by inode->i_mutex.
*/
-static int inode_dio_wait_interruptible(struct inode *inode)
+static int netfs_inode_dio_wait_interruptible(struct inode *inode)
{
- if (!atomic_read(&inode->i_dio_count))
+ if (inode_dio_finished(inode))
return 0;
- wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
- DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
-
- for (;;) {
- prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE);
- if (!atomic_read(&inode->i_dio_count))
- break;
- if (signal_pending(current))
- break;
- schedule();
- }
- finish_wait(wq, &q.wq_entry);
-
- return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
+ inode_dio_wait_interruptible(inode);
+ return !inode_dio_finished(inode) ? -ERESTARTSYS : 0;
}
/* Call with exclusively locked inode->i_rwsem */
@@ -46,7 +34,7 @@ static int netfs_block_o_direct(struct netfs_inode *ictx)
if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags))
return 0;
clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
- return inode_dio_wait_interruptible(&ictx->inode);
+ return netfs_inode_dio_wait_interruptible(&ictx->inode);
}
/**
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 5f0f438e5d21..9d6b49dc6694 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -142,7 +142,7 @@ static int __init netfs_init(void)
error_fscache:
error_procfile:
- remove_proc_entry("fs/netfs", NULL);
+ remove_proc_subtree("fs/netfs", NULL);
error_proc:
mempool_exit(&netfs_subrequest_pool);
error_subreqpool:
@@ -159,7 +159,7 @@ fs_initcall(netfs_init);
static void __exit netfs_exit(void)
{
fscache_exit();
- remove_proc_entry("fs/netfs", NULL);
+ remove_proc_subtree("fs/netfs", NULL);
mempool_exit(&netfs_subrequest_pool);
kmem_cache_destroy(netfs_subrequest_slab);
mempool_exit(&netfs_request_pool);
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 83e644bd518f..c1f321cf5999 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -97,10 +97,22 @@ EXPORT_SYMBOL(netfs_clear_inode_writeback);
void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
{
struct netfs_folio *finfo;
+ struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
size_t flen = folio_size(folio);
_enter("{%lx},%zx,%zx", folio->index, offset, length);
+ if (offset == 0 && length == flen) {
+ unsigned long long i_size = i_size_read(&ctx->inode);
+ unsigned long long fpos = folio_pos(folio), end;
+
+ end = umin(fpos + flen, i_size);
+ if (fpos < i_size && end > ctx->zero_point)
+ ctx->zero_point = end;
+ }
+
+ folio_wait_private_2(folio); /* [DEPRECATED] */
+
if (!folio_test_private(folio))
return;
@@ -113,18 +125,34 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
/* We have a partially uptodate page from a streaming write. */
unsigned int fstart = finfo->dirty_offset;
unsigned int fend = fstart + finfo->dirty_len;
- unsigned int end = offset + length;
+ unsigned int iend = offset + length;
if (offset >= fend)
return;
- if (end <= fstart)
+ if (iend <= fstart)
+ return;
+
+ /* The invalidation region overlaps the data. If the region
+ * covers the start of the data, we either move along the start
+ * or just erase the data entirely.
+ */
+ if (offset <= fstart) {
+ if (iend >= fend)
+ goto erase_completely;
+ /* Move the start of the data. */
+ finfo->dirty_len = fend - iend;
+ finfo->dirty_offset = offset;
+ return;
+ }
+
+ /* Reduce the length of the data if the invalidation region
+ * covers the tail part.
+ */
+ if (iend >= fend) {
+ finfo->dirty_len = offset - fstart;
return;
- if (offset <= fstart && end >= fend)
- goto erase_completely;
- if (offset <= fstart && end > fstart)
- goto reduce_len;
- if (offset > fstart && end >= fend)
- goto move_start;
+ }
+
/* A partial write was split. The caller has already zeroed
* it, so just absorb the hole.
*/
@@ -137,12 +165,6 @@ erase_completely:
folio_clear_uptodate(folio);
kfree(finfo);
return;
-reduce_len:
- finfo->dirty_len = offset + length - finfo->dirty_offset;
- return;
-move_start:
- finfo->dirty_len -= offset - finfo->dirty_offset;
- finfo->dirty_offset = offset;
}
EXPORT_SYMBOL(netfs_invalidate_folio);
@@ -159,12 +181,20 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp)
struct netfs_inode *ctx = netfs_inode(folio_inode(folio));
unsigned long long end;
- end = folio_pos(folio) + folio_size(folio);
+ if (folio_test_dirty(folio))
+ return false;
+
+ end = umin(folio_pos(folio) + folio_size(folio), i_size_read(&ctx->inode));
if (end > ctx->zero_point)
ctx->zero_point = end;
if (folio_test_private(folio))
return false;
+ if (unlikely(folio_test_private_2(folio))) { /* [DEPRECATED] */
+ if (current_is_kswapd() || !(gfp & __GFP_FS))
+ return false;
+ folio_wait_private_2(folio);
+ }
fscache_note_page_release(netfs_i_cookie(ctx));
return true;
}
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 426cf87aaf2e..ae7a2043f670 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -33,6 +33,7 @@
int netfs_folio_written_back(struct folio *folio)
{
enum netfs_folio_trace why = netfs_folio_trace_clear;
+ struct netfs_inode *ictx = netfs_inode(folio->mapping->host);
struct netfs_folio *finfo;
struct netfs_group *group = NULL;
int gcount = 0;
@@ -41,6 +42,12 @@ int netfs_folio_written_back(struct folio *folio)
/* Streaming writes cannot be redirtied whilst under writeback,
* so discard the streaming record.
*/
+ unsigned long long fend;
+
+ fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len;
+ if (fend > ictx->zero_point)
+ ictx->zero_point = fend;
+
folio_detach_private(folio);
group = finfo->netfs_group;
gcount++;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 29c49a7e5fe1..6df77f008d3f 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -118,7 +118,9 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
if (likely(attrlen > 0))
bitmap[0] = ntohl(*p++);
if (attrlen > 1)
- bitmap[1] = ntohl(*p);
+ bitmap[1] = ntohl(*p++);
+ if (attrlen > 2)
+ bitmap[2] = ntohl(*p);
return 0;
}
@@ -446,7 +448,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
void *argp)
{
struct cb_recallanyargs *args = argp;
- uint32_t bitmap[2];
+ uint32_t bitmap[3];
__be32 *p, status;
p = xdr_inline_decode(xdr, 4);
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index d5edb3b3eeef..20cb2008f9e4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -647,6 +647,9 @@ restart:
prev = delegation;
continue;
}
+ inode = nfs_delegation_grab_inode(delegation);
+ if (inode == NULL)
+ continue;
if (prev) {
struct inode *tmp = nfs_delegation_grab_inode(prev);
@@ -657,12 +660,6 @@ restart:
}
}
- inode = nfs_delegation_grab_inode(delegation);
- if (inode == NULL) {
- rcu_read_unlock();
- iput(to_put);
- goto restart;
- }
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
@@ -1184,7 +1181,6 @@ static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server,
struct inode *inode;
restart:
rcu_read_lock();
-restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
@@ -1195,7 +1191,7 @@ restart_locked:
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
- goto restart_locked;
+ continue;
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
if (delegation != NULL) {
@@ -1318,7 +1314,6 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server,
restart:
rcu_read_lock();
-restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
@@ -1330,7 +1325,7 @@ restart_locked:
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
- goto restart_locked;
+ continue;
spin_lock(&delegation->lock);
cred = get_cred_rcu(delegation->cred);
nfs4_stateid_copy(&stateid, &delegation->stateid);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 61a8cdb9f1e1..6800ee92d742 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -336,7 +336,7 @@ static bool nfs_want_read_modify_write(struct file *file, struct folio *folio,
* increment the page use counts until he is done with the page.
*/
static int nfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep,
+ loff_t pos, unsigned len, struct folio **foliop,
void **fsdata)
{
fgf_t fgp = FGP_WRITEBEGIN;
@@ -353,7 +353,7 @@ start:
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
- *pagep = &folio->page;
+ *foliop = folio;
ret = nfs_flush_incompatible(file, folio);
if (ret) {
@@ -372,10 +372,9 @@ start:
static int nfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct folio *folio = page_folio(page);
unsigned offset = offset_in_folio(folio, pos);
int status;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8883016c551c..b8ffbe52ba15 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3931,7 +3931,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
FATTR4_WORD0_CASE_INSENSITIVE |
FATTR4_WORD0_CASE_PRESERVING;
if (minorversion)
- bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT;
+ bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT |
+ FATTR4_WORD2_OPEN_ARGUMENTS;
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
if (status == 0) {
@@ -9997,6 +9998,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
fallthrough;
default:
task->tk_status = 0;
+ lrp->res.lrs_present = 0;
fallthrough;
case 0:
break;
@@ -10010,9 +10012,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
task->tk_status = 0;
break;
case -NFS4ERR_DELAY:
- if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
- break;
- goto out_restart;
+ if (nfs4_async_handle_error(task, server, NULL, NULL) ==
+ -EAGAIN)
+ goto out_restart;
+ lrp->res.lrs_present = 0;
+ break;
}
return;
out_restart:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index aa698481bec8..0d16b383a452 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1284,10 +1284,9 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
LIST_HEAD(freeme);
spin_lock(&inode->i_lock);
- if (!pnfs_layout_is_valid(lo) ||
- !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
+ if (!nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid))
goto out_unlock;
- if (stateid) {
+ if (stateid && pnfs_layout_is_valid(lo)) {
u32 seq = be32_to_cpu(arg_stateid->seqid);
pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index cbbd4866b0b7..97b386032b71 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -47,6 +47,7 @@
#include <linux/vfs.h>
#include <linux/inet.h>
#include <linux/in6.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <net/ipv6.h>
#include <linux/netdevice.h>
@@ -228,6 +229,7 @@ static int __nfs_list_for_each_server(struct list_head *head,
ret = fn(server, data);
if (ret)
goto out;
+ cond_resched();
rcu_read_lock();
}
rcu_read_unlock();
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a20c2c9d7d45..a366fb1c1b9b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2789,15 +2789,18 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
- spin_lock(&nf->fi_lock);
- file = find_any_file_locked(nf);
- if (file) {
- nfs4_show_superblock(s, file);
- seq_puts(s, ", ");
- nfs4_show_fname(s, file);
- seq_puts(s, ", ");
- }
- spin_unlock(&nf->fi_lock);
+ if (nf) {
+ spin_lock(&nf->fi_lock);
+ file = find_any_file_locked(nf);
+ if (file) {
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_puts(s, ", ");
+ }
+ spin_unlock(&nf->fi_lock);
+ } else
+ seq_puts(s, "closed, ");
nfs4_show_owner(s, oo);
if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
seq_puts(s, ", admin-revoked");
@@ -3075,9 +3078,9 @@ nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
struct nfs4_delegation *dp =
container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
- nfs4_put_stid(&dp->dl_stid);
clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
+ nfs4_put_stid(&dp->dl_stid);
}
static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
@@ -8812,7 +8815,7 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
/**
* nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
* @rqstp: RPC transaction context
- * @inode: file to be checked for a conflict
+ * @dentry: dentry of inode to be checked for a conflict
* @modified: return true if file was modified
* @size: new size of file if modified is true
*
@@ -8827,16 +8830,16 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* code is returned.
*/
__be32
-nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
bool *modified, u64 *size)
{
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file_lock_context *ctx;
struct file_lease *fl;
- struct nfs4_delegation *dp;
struct iattr attrs;
struct nfs4_cb_fattr *ncf;
+ struct inode *inode = d_inode(dentry);
*modified = false;
ctx = locks_inode_context(inode);
@@ -8856,17 +8859,26 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
*/
if (type == F_RDLCK)
break;
- goto break_lease;
+
+ nfsd_stats_wdeleg_getattr_inc(nn);
+ spin_unlock(&ctx->flc_lock);
+
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ return status;
+ return 0;
}
if (type == F_WRLCK) {
- dp = fl->c.flc_owner;
+ struct nfs4_delegation *dp = fl->c.flc_owner;
+
if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
spin_unlock(&ctx->flc_lock);
return 0;
}
-break_lease:
nfsd_stats_wdeleg_getattr_inc(nn);
dp = fl->c.flc_owner;
+ refcount_inc(&dp->dl_stid.sc_count);
ncf = &dp->dl_cb_fattr;
nfs4_cb_getattr(&dp->dl_cb_fattr);
spin_unlock(&ctx->flc_lock);
@@ -8876,27 +8888,37 @@ break_lease:
/* Recall delegation only if client didn't respond */
status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
if (status != nfserr_jukebox ||
- !nfsd_wait_for_delegreturn(rqstp, inode))
+ !nfsd_wait_for_delegreturn(rqstp, inode)) {
+ nfs4_put_stid(&dp->dl_stid);
return status;
+ }
}
if (!ncf->ncf_file_modified &&
(ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
ncf->ncf_file_modified = true;
if (ncf->ncf_file_modified) {
+ int err;
+
/*
* Per section 10.4.3 of RFC 8881, the server would
* not update the file's metadata with the client's
* modified size
*/
attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
- attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
- setattr_copy(&nop_mnt_idmap, inode, &attrs);
- mark_inode_dirty(inode);
+ attrs.ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_DELEG;
+ inode_lock(inode);
+ err = notify_change(&nop_mnt_idmap, dentry, &attrs, NULL);
+ inode_unlock(inode);
+ if (err) {
+ nfs4_put_stid(&dp->dl_stid);
+ return nfserrno(err);
+ }
ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
*size = ncf->ncf_cur_fsize;
*modified = true;
}
+ nfs4_put_stid(&dp->dl_stid);
return 0;
}
break;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 42b41d55d4ed..97f583777972 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3545,6 +3545,9 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
args.dentry = dentry;
args.ignore_crossmnt = (ignore_crossmnt != 0);
args.acl = NULL;
+#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
+ args.context = NULL;
+#endif
/*
* Make a local copy of the attribute bitmap that can be modified.
@@ -3562,7 +3565,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
}
args.size = 0;
if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
- status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
+ status = nfsd4_deleg_getattr_conflict(rqstp, dentry,
&file_modified, &size);
if (status)
goto out;
@@ -3617,7 +3620,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
args.contextsupport = false;
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
- args.context = NULL;
if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index ffc217099d19..ec4559ecd193 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -781,5 +781,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
}
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
- struct inode *inode, bool *file_modified, u64 *size);
+ struct dentry *dentry, bool *file_modified, u64 *size);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 4a29b0138d75..4b3e19d74925 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -83,7 +83,7 @@ static int nilfs_prepare_chunk(struct folio *folio, unsigned int from,
{
loff_t pos = folio_pos(folio) + from;
- return __block_write_begin(&folio->page, pos, to - from, nilfs_get_block);
+ return __block_write_begin(folio, pos, to - from, nilfs_get_block);
}
static void nilfs_commit_chunk(struct folio *folio,
@@ -96,7 +96,7 @@ static void nilfs_commit_chunk(struct folio *folio,
int err;
nr_dirty = nilfs_page_count_clean_buffers(&folio->page, from, to);
- copied = block_write_end(NULL, mapping, pos, len, len, &folio->page, NULL);
+ copied = block_write_end(NULL, mapping, pos, len, len, folio, NULL);
if (pos + copied > dir->i_size)
i_size_write(dir, pos + copied);
if (IS_DIRSYNC(dir))
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 7340a01d80e1..8661f452dba6 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -250,7 +250,7 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to)
static int nilfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct inode *inode = mapping->host;
@@ -259,7 +259,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
if (unlikely(err))
return err;
- err = block_write_begin(mapping, pos, len, pagep, nilfs_get_block);
+ err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block);
if (unlikely(err)) {
nilfs_write_failed(mapping, pos + len);
nilfs_transaction_abort(inode->i_sb);
@@ -269,16 +269,16 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
static int nilfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
unsigned int start = pos & (PAGE_SIZE - 1);
unsigned int nr_dirty;
int err;
- nr_dirty = nilfs_page_count_clean_buffers(page, start,
+ nr_dirty = nilfs_page_count_clean_buffers(&folio->page, start,
start + copied);
- copied = generic_write_end(file, mapping, pos, len, copied, page,
+ copied = generic_write_end(file, mapping, pos, len, copied, folio,
fsdata);
nilfs_set_file_dirty(inode, nr_dirty);
err = nilfs_transaction_commit(inode->i_sb);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index b638dc06df2f..ec61ce9f29a2 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -498,7 +498,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
struct inode *inode;
struct nilfs_recovery_block *rb, *n;
unsigned int blocksize = nilfs->ns_blocksize;
- struct page *page;
+ struct folio *folio;
loff_t pos;
int err = 0, err2 = 0;
@@ -512,7 +512,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
pos = rb->blkoff << inode->i_blkbits;
err = block_write_begin(inode->i_mapping, pos, blocksize,
- &page, nilfs_get_block);
+ &folio, nilfs_get_block);
if (unlikely(err)) {
loff_t isize = inode->i_size;
@@ -522,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
goto failed_inode;
}
- err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, &folio->page);
if (unlikely(err))
goto failed_page;
@@ -531,17 +531,17 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
goto failed_page;
block_write_end(NULL, inode->i_mapping, pos, blocksize,
- blocksize, page, NULL);
+ blocksize, folio, NULL);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
(*nr_salvaged_blocks)++;
goto next;
failed_page:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
failed_inode:
nilfs_warn(sb,
@@ -716,6 +716,33 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
}
/**
+ * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery
+ * @nilfs: nilfs object
+ */
+static void nilfs_abort_roll_forward(struct the_nilfs *nilfs)
+{
+ struct nilfs_inode_info *ii, *n;
+ LIST_HEAD(head);
+
+ /* Abandon inodes that have read recovery data */
+ spin_lock(&nilfs->ns_inode_lock);
+ list_splice_init(&nilfs->ns_dirty_files, &head);
+ spin_unlock(&nilfs->ns_inode_lock);
+ if (list_empty(&head))
+ return;
+
+ set_nilfs_purging(nilfs);
+ list_for_each_entry_safe(ii, n, &head, i_dirty) {
+ spin_lock(&nilfs->ns_inode_lock);
+ list_del_init(&ii->i_dirty);
+ spin_unlock(&nilfs->ns_inode_lock);
+
+ iput(&ii->vfs_inode);
+ }
+ clear_nilfs_purging(nilfs);
+}
+
+/**
* nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
* @nilfs: nilfs object
* @sb: super block instance
@@ -773,15 +800,19 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
if (unlikely(err)) {
nilfs_err(sb, "error %d writing segment for recovery",
err);
- goto failed;
+ goto put_root;
}
nilfs_finish_roll_forward(nilfs, ri);
}
- failed:
+put_root:
nilfs_put_root(root);
return err;
+
+failed:
+ nilfs_abort_roll_forward(nilfs);
+ goto put_root;
}
/**
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 0ca3110d6386..871ec35ea8e8 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1812,6 +1812,9 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
nilfs_abort_logs(&logs, ret ? : err);
list_splice_tail_init(&sci->sc_segbufs, &logs);
+ if (list_empty(&logs))
+ return; /* if the first segment buffer preparation failed */
+
nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
nilfs_free_incomplete_logs(&logs, nilfs);
@@ -2056,7 +2059,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
err = nilfs_segctor_begin_construction(sci, nilfs);
if (unlikely(err))
- goto out;
+ goto failed;
/* Update time stamp */
sci->sc_seg_ctime = ktime_get_real_seconds();
@@ -2120,10 +2123,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
return err;
failed_to_write:
- if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
- nilfs_redirty_inodes(&sci->sc_dirty_files);
-
failed:
+ if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE)
+ nilfs_redirty_inodes(&sci->sc_dirty_files);
if (nilfs_doing_gc())
nilfs_redirty_inodes(&sci->sc_gc_inodes);
nilfs_segctor_abort_construction(sci, nilfs, err);
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index a5569b7f47a3..14868a3dd592 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -836,9 +836,15 @@ ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
- u32 major = le32_to_cpu(sbp[0]->s_rev_level);
- u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level);
+ struct nilfs_super_block *raw_sb;
+ u32 major;
+ u16 minor;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ major = le32_to_cpu(raw_sb->s_rev_level);
+ minor = le16_to_cpu(raw_sb->s_minor_rev_level);
+ up_read(&nilfs->ns_sem);
return sysfs_emit(buf, "%d.%d\n", major, minor);
}
@@ -856,8 +862,13 @@ ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
- u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size);
+ struct nilfs_super_block *raw_sb;
+ u64 dev_size;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ dev_size = le64_to_cpu(raw_sb->s_dev_size);
+ up_read(&nilfs->ns_sem);
return sysfs_emit(buf, "%llu\n", dev_size);
}
@@ -879,9 +890,15 @@ ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ struct nilfs_super_block *raw_sb;
+ ssize_t len;
- return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid);
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid);
+ up_read(&nilfs->ns_sem);
+
+ return len;
}
static
@@ -889,10 +906,16 @@ ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr,
struct the_nilfs *nilfs,
char *buf)
{
- struct nilfs_super_block **sbp = nilfs->ns_sbp;
+ struct nilfs_super_block *raw_sb;
+ ssize_t len;
+
+ down_read(&nilfs->ns_sem);
+ raw_sb = nilfs->ns_sbp[0];
+ len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n",
+ raw_sb->s_volume_name);
+ up_read(&nilfs->ns_sem);
- return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n",
- sbp[0]->s_volume_name);
+ return len;
}
static const char dev_readme_str[] =
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index ca1ddc46bd86..6202895a4542 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -182,7 +182,7 @@ static int ntfs_extend_initialized_size(struct file *file,
for (;;) {
u32 zerofrom, len;
- struct page *page;
+ struct folio *folio;
u8 bits;
CLST vcn, lcn, clen;
@@ -208,14 +208,13 @@ static int ntfs_extend_initialized_size(struct file *file,
if (pos + len > new_valid)
len = new_valid - pos;
- err = ntfs_write_begin(file, mapping, pos, len, &page, NULL);
+ err = ntfs_write_begin(file, mapping, pos, len, &folio, NULL);
if (err)
goto out;
- zero_user_segment(page, zerofrom, PAGE_SIZE);
+ folio_zero_range(folio, zerofrom, folio_size(folio));
- /* This function in any case puts page. */
- err = ntfs_write_end(file, mapping, pos, len, len, page, NULL);
+ err = ntfs_write_end(file, mapping, pos, len, len, folio, NULL);
if (err < 0)
goto out;
pos += len;
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 6b0bdc474e76..f672072e6bd4 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -901,7 +901,7 @@ static int ntfs_get_block_write_begin(struct inode *inode, sector_t vbn,
}
int ntfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, u32 len, struct page **pagep, void **fsdata)
+ loff_t pos, u32 len, struct folio **foliop, void **fsdata)
{
int err;
struct inode *inode = mapping->host;
@@ -910,7 +910,6 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping,
if (unlikely(ntfs3_forced_shutdown(inode->i_sb)))
return -EIO;
- *pagep = NULL;
if (is_resident(ni)) {
struct folio *folio = __filemap_get_folio(
mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN,
@@ -926,7 +925,7 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping,
ni_unlock(ni);
if (!err) {
- *pagep = &folio->page;
+ *foliop = folio;
goto out;
}
folio_unlock(folio);
@@ -936,7 +935,7 @@ int ntfs_write_begin(struct file *file, struct address_space *mapping,
goto out;
}
- err = block_write_begin(mapping, pos, len, pagep,
+ err = block_write_begin(mapping, pos, len, foliop,
ntfs_get_block_write_begin);
out:
@@ -947,9 +946,8 @@ out:
* ntfs_write_end - Address_space_operations::write_end.
*/
int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
- u32 len, u32 copied, struct page *page, void *fsdata)
+ u32 len, u32 copied, struct folio *folio, void *fsdata)
{
- struct folio *folio = page_folio(page);
struct inode *inode = mapping->host;
struct ntfs_inode *ni = ntfs_i(inode);
u64 valid = ni->i_valid;
@@ -979,7 +977,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
folio_unlock(folio);
folio_put(folio);
} else {
- err = generic_write_end(file, mapping, pos, len, copied, page,
+ err = generic_write_end(file, mapping, pos, len, copied, folio,
fsdata);
}
@@ -1008,45 +1006,6 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
return err;
}
-int reset_log_file(struct inode *inode)
-{
- int err;
- loff_t pos = 0;
- u32 log_size = inode->i_size;
- struct address_space *mapping = inode->i_mapping;
-
- for (;;) {
- u32 len;
- void *kaddr;
- struct page *page;
-
- len = pos + PAGE_SIZE > log_size ? (log_size - pos) : PAGE_SIZE;
-
- err = block_write_begin(mapping, pos, len, &page,
- ntfs_get_block_write_begin);
- if (err)
- goto out;
-
- kaddr = kmap_atomic(page);
- memset(kaddr, -1, len);
- kunmap_atomic(kaddr);
- flush_dcache_page(page);
-
- err = block_write_end(NULL, mapping, pos, len, len, page, NULL);
- if (err < 0)
- goto out;
- pos += len;
-
- if (pos >= log_size)
- break;
- balance_dirty_pages_ratelimited(mapping);
- }
-out:
- mark_inode_dirty_sync(inode);
-
- return err;
-}
-
int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc)
{
return _ni_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index e5255a251929..584f814715f4 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -708,13 +708,12 @@ int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi,
struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref,
const struct cpu_str *name);
int ntfs_set_size(struct inode *inode, u64 new_size);
-int reset_log_file(struct inode *inode);
int ntfs_get_block(struct inode *inode, sector_t vbn,
struct buffer_head *bh_result, int create);
int ntfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, u32 len, struct page **pagep, void **fsdata);
+ loff_t pos, u32 len, struct folio **foliop, void **fsdata);
int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
- u32 len, u32 copied, struct page *page, void *fsdata);
+ u32 len, u32 copied, struct folio *folio, void *fsdata);
int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc);
int ntfs_sync_inode(struct inode *inode);
int ntfs_flush_inodes(struct super_block *sb, struct inode *i1,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 6be175a1ab3c..d6c985cc6353 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1643,7 +1643,7 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
int ocfs2_write_begin_nolock(struct address_space *mapping,
loff_t pos, unsigned len, ocfs2_write_type_t type,
- struct page **pagep, void **fsdata,
+ struct folio **foliop, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page)
{
int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
@@ -1826,8 +1826,8 @@ try_again:
ocfs2_free_alloc_context(meta_ac);
success:
- if (pagep)
- *pagep = wc->w_target_page;
+ if (foliop)
+ *foliop = page_folio(wc->w_target_page);
*fsdata = wc;
return 0;
out_quota:
@@ -1879,7 +1879,7 @@ out:
static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
struct buffer_head *di_bh = NULL;
@@ -1901,7 +1901,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_BUFFER,
- pagep, fsdata, di_bh, NULL);
+ foliop, fsdata, di_bh, NULL);
if (ret) {
mlog_errno(ret);
goto out_fail;
@@ -2076,7 +2076,7 @@ out:
static int ocfs2_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
int ret;
struct inode *inode = mapping->host;
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 3a520117fa59..45db1781ea73 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -38,7 +38,7 @@ typedef enum {
int ocfs2_write_begin_nolock(struct address_space *mapping,
loff_t pos, unsigned len, ocfs2_write_type_t type,
- struct page **pagep, void **fsdata,
+ struct folio **foliop, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page);
int ocfs2_read_inline_data(struct inode *inode, struct page *page,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 115ab2172820..ad131a2fc58e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -755,7 +755,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
u64 abs_to, struct buffer_head *di_bh)
{
struct address_space *mapping = inode->i_mapping;
- struct page *page;
+ struct folio *folio;
unsigned long index = abs_from >> PAGE_SHIFT;
handle_t *handle;
int ret = 0;
@@ -774,9 +774,10 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
goto out;
}
- page = find_or_create_page(mapping, index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
mlog_errno(ret);
goto out_commit_trans;
}
@@ -803,7 +804,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
* __block_write_begin and block_commit_write to zero the
* whole block.
*/
- ret = __block_write_begin(page, block_start + 1, 0,
+ ret = __block_write_begin(folio, block_start + 1, 0,
ocfs2_get_block);
if (ret < 0) {
mlog_errno(ret);
@@ -812,7 +813,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
/* must not update i_size! */
- block_commit_write(page, block_start + 1, block_start + 1);
+ block_commit_write(&folio->page, block_start + 1, block_start + 1);
}
/*
@@ -833,8 +834,8 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
}
out_unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
out_commit_trans:
if (handle)
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 1834f26522ed..6ef4cb045ccd 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -53,7 +53,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
loff_t pos = page_offset(page);
unsigned int len = PAGE_SIZE;
pgoff_t last_index;
- struct page *locked_page = NULL;
+ struct folio *locked_folio = NULL;
void *fsdata;
loff_t size = i_size_read(inode);
@@ -91,7 +91,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
len = ((size - 1) & ~PAGE_MASK) + 1;
err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
- &locked_page, &fsdata, di_bh, page);
+ &locked_folio, &fsdata, di_bh, page);
if (err) {
if (err != -ENOSPC)
mlog_errno(err);
@@ -99,7 +99,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
goto out;
}
- if (!locked_page) {
+ if (!locked_folio) {
ret = VM_FAULT_NOPAGE;
goto out;
}
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 6b580b9da8e3..98358d405b6a 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -312,11 +312,11 @@ static void omfs_write_failed(struct address_space *mapping, loff_t to)
static int omfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
- ret = block_write_begin(mapping, pos, len, pagep, omfs_get_block);
+ ret = block_write_begin(mapping, pos, len, foliop, omfs_get_block);
if (unlikely(ret))
omfs_write_failed(mapping, pos + len);
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index fdb9b65db1de..aae6d2b8767d 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -309,22 +309,18 @@ static int orangefs_read_folio(struct file *file, struct folio *folio)
static int orangefs_write_begin(struct file *file,
struct address_space *mapping, loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct orangefs_write_range *wr;
struct folio *folio;
- struct page *page;
- pgoff_t index;
int ret;
- index = pos >> PAGE_SHIFT;
+ folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
- return -ENOMEM;
-
- *pagep = page;
- folio = page_folio(page);
+ *foliop = folio;
if (folio_test_dirty(folio) && !folio_test_private(folio)) {
/*
@@ -365,9 +361,10 @@ okay:
}
static int orangefs_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata)
+ loff_t pos, unsigned len, unsigned copied, struct folio *folio,
+ void *fsdata)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
loff_t last_pos = pos + copied;
/*
@@ -377,23 +374,23 @@ static int orangefs_write_end(struct file *file, struct address_space *mapping,
if (last_pos > inode->i_size)
i_size_write(inode, last_pos);
- /* zero the stale part of the page if we did a short copy */
- if (!PageUptodate(page)) {
+ /* zero the stale part of the folio if we did a short copy */
+ if (!folio_test_uptodate(folio)) {
unsigned from = pos & (PAGE_SIZE - 1);
if (copied < len) {
- zero_user(page, from + copied, len - copied);
+ folio_zero_range(folio, from + copied, len - copied);
}
/* Set fully written pages uptodate. */
- if (pos == page_offset(page) &&
+ if (pos == folio_pos(folio) &&
(len == PAGE_SIZE || pos + len == inode->i_size)) {
- zero_user_segment(page, from + copied, PAGE_SIZE);
- SetPageUptodate(page);
+ folio_zero_segment(folio, from + copied, PAGE_SIZE);
+ folio_mark_uptodate(folio);
}
}
- set_page_dirty(page);
- unlock_page(page);
- put_page(page);
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
mark_inode_dirty_sync(file_inode(file));
return copied;
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index 4860fcc4611b..d0568c091341 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -353,6 +353,8 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer,
case Opt_datadir_add:
ctx->nr_data++;
fallthrough;
+ case Opt_lowerdir:
+ fallthrough;
case Opt_lowerdir_add:
WARN_ON(ctx->nr >= ctx->capacity);
l = &ctx->lower[ctx->nr++];
@@ -365,10 +367,9 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer,
}
}
-static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param,
- enum ovl_opt layer)
+static int ovl_parse_layer(struct fs_context *fc, const char *layer_name, enum ovl_opt layer)
{
- char *name = kstrdup(param->string, GFP_KERNEL);
+ char *name = kstrdup(layer_name, GFP_KERNEL);
bool upper = (layer == Opt_upperdir || layer == Opt_workdir);
struct path path;
int err;
@@ -376,7 +377,7 @@ static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param,
if (!name)
return -ENOMEM;
- if (upper)
+ if (upper || layer == Opt_lowerdir)
err = ovl_mount_dir(name, &path);
else
err = ovl_mount_dir_noesc(name, &path);
@@ -432,7 +433,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
{
int err;
struct ovl_fs_context *ctx = fc->fs_private;
- struct ovl_fs_context_layer *l;
char *dup = NULL, *iter;
ssize_t nr_lower, nr;
bool data_layer = false;
@@ -449,7 +449,7 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
return 0;
if (*name == ':') {
- pr_err("cannot append lower layer");
+ pr_err("cannot append lower layer\n");
return -EINVAL;
}
@@ -472,35 +472,11 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
goto out_err;
}
- if (nr_lower > ctx->capacity) {
- err = -ENOMEM;
- l = krealloc_array(ctx->lower, nr_lower, sizeof(*ctx->lower),
- GFP_KERNEL_ACCOUNT);
- if (!l)
- goto out_err;
-
- ctx->lower = l;
- ctx->capacity = nr_lower;
- }
-
iter = dup;
- l = ctx->lower;
- for (nr = 0; nr < nr_lower; nr++, l++) {
- ctx->nr++;
- memset(l, 0, sizeof(*l));
-
- err = ovl_mount_dir(iter, &l->path);
+ for (nr = 0; nr < nr_lower; nr++) {
+ err = ovl_parse_layer(fc, iter, Opt_lowerdir);
if (err)
- goto out_put;
-
- err = ovl_mount_dir_check(fc, &l->path, Opt_lowerdir, iter, false);
- if (err)
- goto out_put;
-
- err = -ENOMEM;
- l->name = kstrdup(iter, GFP_KERNEL_ACCOUNT);
- if (!l->name)
- goto out_put;
+ goto out_err;
if (data_layer)
ctx->nr_data++;
@@ -517,8 +493,8 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
* there are no data layers.
*/
if (ctx->nr_data > 0) {
- pr_err("regular lower layers cannot follow data lower layers");
- goto out_put;
+ pr_err("regular lower layers cannot follow data lower layers\n");
+ goto out_err;
}
data_layer = false;
@@ -532,9 +508,6 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
kfree(dup);
return 0;
-out_put:
- ovl_reset_lowerdirs(ctx);
-
out_err:
kfree(dup);
@@ -582,7 +555,7 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_datadir_add:
case Opt_upperdir:
case Opt_workdir:
- err = ovl_parse_layer(fc, param, opt);
+ err = ovl_parse_layer(fc, param->string, opt);
break;
case Opt_default_permissions:
config->default_permissions = true;
diff --git a/fs/pipe.c b/fs/pipe.c
index d3735f1d6f00..4083ba492cb6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1429,7 +1429,7 @@ static const struct super_operations pipefs_ops = {
/*
* pipefs should _never_ be mounted by userland - too much of security hassle,
- * no real gain from having the whole whorehouse mounted. So we don't need
+ * no real gain from having the whole file system mounted. So we don't need
* any operations on the root directory. However, we need a non-trivial
* d_name - pipe: will go nicely and kill the special-casing in procfs.
*/
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 3f87297dbfdb..6c66a37522d0 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -715,8 +715,8 @@ int posix_acl_update_mode(struct mnt_idmap *idmap,
return error;
if (error == 0)
*acl = NULL;
- if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) &&
- !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
+ if (!in_group_or_capable(idmap, inode,
+ i_gid_into_vfsgid(idmap, inode)))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 988f6bbfc4bd..7f3abc3de49f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -827,12 +827,9 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
static int mem_open(struct inode *inode, struct file *file)
{
- int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
-
- /* OK to pass negative loff_t, we can catch out-of-range */
- file->f_mode |= FMODE_UNSIGNED_OFFSET;
-
- return ret;
+ if (WARN_ON_ONCE(!(file->f_op->fop_flags & FOP_UNSIGNED_OFFSET)))
+ return -EINVAL;
+ return __mem_open(inode, file, PTRACE_MODE_ATTACH);
}
static ssize_t mem_rw(struct file *file, char __user *buf,
@@ -932,6 +929,7 @@ static const struct file_operations proc_mem_operations = {
.write = mem_write,
.open = mem_open,
.release = mem_release,
+ .fop_flags = FOP_UNSIGNED_OFFSET,
};
static int environ_open(struct inode *inode, struct file *file)
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 586bbc84ca04..7baafb1eba13 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -59,7 +59,7 @@ static int seq_show(struct seq_file *m, void *v)
real_mount(file->f_path.mnt)->mnt_id,
file_inode(file)->i_ino);
- /* show_fd_locks() never deferences files so a stale value is safe */
+ /* show_fd_locks() never dereferences files, so a stale value is safe */
show_fd_locks(m, file, files);
if (seq_has_overflowed(m))
goto out;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 8e08a9a1b7ed..7d0acdad74e2 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -235,7 +235,7 @@ static int kcore_ram_list(struct list_head *list)
int nid, ret;
unsigned long end_pfn;
- /* Not inialized....update now */
+ /* Not initialized....update now */
/* find out "max pfn" */
end_pfn = 0;
for_each_node_state(nid, N_MEMORY) {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5f171ad7b436..2c5f4814aef9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -976,7 +976,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_PKEY_BIT0)] = "",
[ilog2(VM_PKEY_BIT1)] = "",
[ilog2(VM_PKEY_BIT2)] = "",
+#if VM_PKEY_BIT3
[ilog2(VM_PKEY_BIT3)] = "",
+#endif
#if VM_PKEY_BIT4
[ilog2(VM_PKEY_BIT4)] = "",
#endif
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index c1cfb8a19e9d..b4d10e45f2e4 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -24,13 +24,15 @@ static unsigned qnx6_lfile_checksum(char *name, unsigned size)
return crc;
}
-static struct page *qnx6_get_page(struct inode *dir, unsigned long n)
+static void *qnx6_get_folio(struct inode *dir, unsigned long n,
+ struct folio **foliop)
{
- struct address_space *mapping = dir->i_mapping;
- struct page *page = read_mapping_page(mapping, n, NULL);
- if (!IS_ERR(page))
- kmap(page);
- return page;
+ struct folio *folio = read_mapping_folio(dir->i_mapping, n, NULL);
+
+ if (IS_ERR(folio))
+ return folio;
+ *foliop = folio;
+ return kmap_local_folio(folio, 0);
}
static unsigned last_entry(struct inode *inode, unsigned long page_nr)
@@ -44,19 +46,20 @@ static unsigned last_entry(struct inode *inode, unsigned long page_nr)
static struct qnx6_long_filename *qnx6_longname(struct super_block *sb,
struct qnx6_long_dir_entry *de,
- struct page **p)
+ struct folio **foliop)
{
struct qnx6_sb_info *sbi = QNX6_SB(sb);
u32 s = fs32_to_cpu(sbi, de->de_long_inode); /* in block units */
u32 n = s >> (PAGE_SHIFT - sb->s_blocksize_bits); /* in pages */
- /* within page */
- u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_MASK;
+ u32 offs;
struct address_space *mapping = sbi->longfile->i_mapping;
- struct page *page = read_mapping_page(mapping, n, NULL);
- if (IS_ERR(page))
- return ERR_CAST(page);
- kmap(*p = page);
- return (struct qnx6_long_filename *)(page_address(page) + offs);
+ struct folio *folio = read_mapping_folio(mapping, n, NULL);
+
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
+ offs = offset_in_folio(folio, s << sb->s_blocksize_bits);
+ *foliop = folio;
+ return kmap_local_folio(folio, offs);
}
static int qnx6_dir_longfilename(struct inode *inode,
@@ -67,7 +70,7 @@ static int qnx6_dir_longfilename(struct inode *inode,
struct qnx6_long_filename *lf;
struct super_block *s = inode->i_sb;
struct qnx6_sb_info *sbi = QNX6_SB(s);
- struct page *page;
+ struct folio *folio;
int lf_size;
if (de->de_size != 0xff) {
@@ -76,7 +79,7 @@ static int qnx6_dir_longfilename(struct inode *inode,
pr_err("invalid direntry size (%i).\n", de->de_size);
return 0;
}
- lf = qnx6_longname(s, de, &page);
+ lf = qnx6_longname(s, de, &folio);
if (IS_ERR(lf)) {
pr_err("Error reading longname\n");
return 0;
@@ -87,7 +90,7 @@ static int qnx6_dir_longfilename(struct inode *inode,
if (lf_size > QNX6_LONG_NAME_MAX) {
pr_debug("file %s\n", lf->lf_fname);
pr_err("Filename too long (%i)\n", lf_size);
- qnx6_put_page(page);
+ folio_release_kmap(folio, lf);
return 0;
}
@@ -100,11 +103,11 @@ static int qnx6_dir_longfilename(struct inode *inode,
pr_debug("qnx6_readdir:%.*s inode:%u\n",
lf_size, lf->lf_fname, de_inode);
if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) {
- qnx6_put_page(page);
+ folio_release_kmap(folio, lf);
return 0;
}
- qnx6_put_page(page);
+ folio_release_kmap(folio, lf);
/* success */
return 1;
}
@@ -117,26 +120,27 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
loff_t pos = ctx->pos & ~(QNX6_DIR_ENTRY_SIZE - 1);
unsigned long npages = dir_pages(inode);
unsigned long n = pos >> PAGE_SHIFT;
- unsigned start = (pos & ~PAGE_MASK) / QNX6_DIR_ENTRY_SIZE;
+ unsigned offset = (pos & ~PAGE_MASK) / QNX6_DIR_ENTRY_SIZE;
bool done = false;
ctx->pos = pos;
if (ctx->pos >= inode->i_size)
return 0;
- for ( ; !done && n < npages; n++, start = 0) {
- struct page *page = qnx6_get_page(inode, n);
- int limit = last_entry(inode, n);
+ for ( ; !done && n < npages; n++, offset = 0) {
struct qnx6_dir_entry *de;
- int i = start;
+ struct folio *folio;
+ char *kaddr = qnx6_get_folio(inode, n, &folio);
+ char *limit;
- if (IS_ERR(page)) {
+ if (IS_ERR(kaddr)) {
pr_err("%s(): read failed\n", __func__);
ctx->pos = (n + 1) << PAGE_SHIFT;
- return PTR_ERR(page);
+ return PTR_ERR(kaddr);
}
- de = ((struct qnx6_dir_entry *)page_address(page)) + start;
- for (; i < limit; i++, de++, ctx->pos += QNX6_DIR_ENTRY_SIZE) {
+ de = (struct qnx6_dir_entry *)(kaddr + offset);
+ limit = kaddr + last_entry(inode, n);
+ for (; (char *)de < limit; de++, ctx->pos += QNX6_DIR_ENTRY_SIZE) {
int size = de->de_size;
u32 no_inode = fs32_to_cpu(sbi, de->de_inode);
@@ -164,7 +168,7 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
}
}
}
- qnx6_put_page(page);
+ folio_release_kmap(folio, kaddr);
}
return 0;
}
@@ -177,23 +181,23 @@ static unsigned qnx6_long_match(int len, const char *name,
{
struct super_block *s = dir->i_sb;
struct qnx6_sb_info *sbi = QNX6_SB(s);
- struct page *page;
+ struct folio *folio;
int thislen;
- struct qnx6_long_filename *lf = qnx6_longname(s, de, &page);
+ struct qnx6_long_filename *lf = qnx6_longname(s, de, &folio);
if (IS_ERR(lf))
return 0;
thislen = fs16_to_cpu(sbi, lf->lf_size);
if (len != thislen) {
- qnx6_put_page(page);
+ folio_release_kmap(folio, lf);
return 0;
}
if (memcmp(name, lf->lf_fname, len) == 0) {
- qnx6_put_page(page);
+ folio_release_kmap(folio, lf);
return fs32_to_cpu(sbi, de->de_inode);
}
- qnx6_put_page(page);
+ folio_release_kmap(folio, lf);
return 0;
}
@@ -210,20 +214,17 @@ static unsigned qnx6_match(struct super_block *s, int len, const char *name,
}
-unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
- struct page **res_page)
+unsigned qnx6_find_ino(int len, struct inode *dir, const char *name)
{
struct super_block *s = dir->i_sb;
struct qnx6_inode_info *ei = QNX6_I(dir);
- struct page *page = NULL;
+ struct folio *folio;
unsigned long start, n;
unsigned long npages = dir_pages(dir);
unsigned ino;
struct qnx6_dir_entry *de;
struct qnx6_long_dir_entry *lde;
- *res_page = NULL;
-
if (npages == 0)
return 0;
start = ei->i_dir_start_lookup;
@@ -232,12 +233,11 @@ unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
n = start;
do {
- page = qnx6_get_page(dir, n);
- if (!IS_ERR(page)) {
+ de = qnx6_get_folio(dir, n, &folio);
+ if (!IS_ERR(de)) {
int limit = last_entry(dir, n);
int i;
- de = (struct qnx6_dir_entry *)page_address(page);
for (i = 0; i < limit; i++, de++) {
if (len <= QNX6_SHORT_NAME_MAX) {
/* short filename */
@@ -256,7 +256,7 @@ unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
} else
pr_err("undefined filename size in inode.\n");
}
- qnx6_put_page(page);
+ folio_release_kmap(folio, de - i);
}
if (++n >= npages)
@@ -265,8 +265,8 @@ unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
return 0;
found:
- *res_page = page;
ei->i_dir_start_lookup = n;
+ folio_release_kmap(folio, de);
return ino;
}
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 4f1735b882b1..85925ec0051a 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -184,17 +184,17 @@ static const char *qnx6_checkroot(struct super_block *s)
struct qnx6_dir_entry *dir_entry;
struct inode *root = d_inode(s->s_root);
struct address_space *mapping = root->i_mapping;
- struct page *page = read_mapping_page(mapping, 0, NULL);
- if (IS_ERR(page))
+ struct folio *folio = read_mapping_folio(mapping, 0, NULL);
+
+ if (IS_ERR(folio))
return "error reading root directory";
- kmap(page);
- dir_entry = page_address(page);
+ dir_entry = kmap_local_folio(folio, 0);
for (i = 0; i < 2; i++) {
/* maximum 3 bytes - due to match_root limitation */
if (strncmp(dir_entry[i].de_fname, match_root[i], 3))
error = 1;
}
- qnx6_put_page(page);
+ folio_release_kmap(folio, dir_entry);
if (error)
return "error reading root directory.";
return NULL;
@@ -518,7 +518,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
struct inode *inode;
struct qnx6_inode_info *ei;
struct address_space *mapping;
- struct page *page;
+ struct folio *folio;
u32 n, offs;
inode = iget_locked(sb, ino);
@@ -538,17 +538,16 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
return ERR_PTR(-EIO);
}
n = (ino - 1) >> (PAGE_SHIFT - QNX6_INODE_SIZE_BITS);
- offs = (ino - 1) & (~PAGE_MASK >> QNX6_INODE_SIZE_BITS);
mapping = sbi->inodes->i_mapping;
- page = read_mapping_page(mapping, n, NULL);
- if (IS_ERR(page)) {
+ folio = read_mapping_folio(mapping, n, NULL);
+ if (IS_ERR(folio)) {
pr_err("major problem: unable to read inode from dev %s\n",
sb->s_id);
iget_failed(inode);
- return ERR_CAST(page);
+ return ERR_CAST(folio);
}
- kmap(page);
- raw_inode = ((struct qnx6_inode_entry *)page_address(page)) + offs;
+ offs = offset_in_folio(folio, (ino - 1) << QNX6_INODE_SIZE_BITS);
+ raw_inode = kmap_local_folio(folio, offs);
inode->i_mode = fs16_to_cpu(sbi, raw_inode->di_mode);
i_uid_write(inode, (uid_t)fs32_to_cpu(sbi, raw_inode->di_uid));
@@ -578,7 +577,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
inode->i_mapping->a_ops = &qnx6_aops;
} else
init_special_inode(inode, inode->i_mode, 0);
- qnx6_put_page(page);
+ folio_release_kmap(folio, raw_inode);
unlock_new_inode(inode);
return inode;
}
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index e2e98e653b8d..0f0755a9ecb5 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -17,7 +17,6 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
unsigned ino;
- struct page *page;
struct inode *foundinode = NULL;
const char *name = dentry->d_name.name;
int len = dentry->d_name.len;
@@ -25,10 +24,9 @@ struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
if (len > QNX6_LONG_NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
- ino = qnx6_find_entry(len, dir, name, &page);
+ ino = qnx6_find_ino(len, dir, name);
if (ino) {
foundinode = qnx6_iget(dir->i_sb, ino);
- qnx6_put_page(page);
if (IS_ERR(foundinode))
pr_debug("lookup->iget -> error %ld\n",
PTR_ERR(foundinode));
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index 34a6b126a3a9..56ed1367499e 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -126,11 +126,4 @@ static inline __fs16 cpu_to_fs16(struct qnx6_sb_info *sbi, __u16 n)
extern struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s,
int silent);
-static inline void qnx6_put_page(struct page *page)
-{
- kunmap(page);
- put_page(page);
-}
-
-extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
- struct page **res_page);
+unsigned qnx6_find_ino(int len, struct inode *dir, const char *name);
diff --git a/fs/read_write.c b/fs/read_write.c
index b19cce8e55b9..070a7c33b9dd 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(generic_ro_fops);
static inline bool unsigned_offsets(struct file *file)
{
- return file->f_mode & FMODE_UNSIGNED_OFFSET;
+ return file->f_op->fop_flags & FOP_UNSIGNED_OFFSET;
}
/**
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9b43a81a6488..72c53129c952 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2178,7 +2178,7 @@ static int grab_tail_page(struct inode *inode,
unsigned long offset = (inode->i_size) & (PAGE_SIZE - 1);
struct buffer_head *bh;
struct buffer_head *head;
- struct page *page;
+ struct folio *folio;
int error;
/*
@@ -2190,20 +2190,20 @@ static int grab_tail_page(struct inode *inode,
if ((offset & (blocksize - 1)) == 0) {
return -ENOENT;
}
- page = grab_cache_page(inode->i_mapping, index);
- error = -ENOMEM;
- if (!page) {
- goto out;
- }
+ folio = __filemap_get_folio(inode->i_mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ mapping_gfp_mask(inode->i_mapping));
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
/* start within the page of the last block in the file */
start = (offset / blocksize) * blocksize;
- error = __block_write_begin(page, start, offset - start,
+ error = __block_write_begin(folio, start, offset - start,
reiserfs_get_block_create_0);
if (error)
goto unlock;
- head = page_buffers(page);
+ head = folio_buffers(folio);
bh = head;
do {
if (pos >= start) {
@@ -2226,14 +2226,13 @@ static int grab_tail_page(struct inode *inode,
goto unlock;
}
*bh_result = bh;
- *page_result = page;
+ *page_result = &folio->page;
-out:
return error;
unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return error;
}
@@ -2736,23 +2735,24 @@ static void reiserfs_truncate_failed_write(struct inode *inode)
static int reiserfs_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct inode *inode;
- struct page *page;
+ struct folio *folio;
pgoff_t index;
int ret;
int old_ref = 0;
inode = mapping->host;
index = pos >> PAGE_SHIFT;
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
- return -ENOMEM;
- *pagep = page;
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ *foliop = folio;
reiserfs_wait_on_write_block(inode->i_sb);
- fix_tail_page_for_writing(page);
+ fix_tail_page_for_writing(&folio->page);
if (reiserfs_transaction_running(inode->i_sb)) {
struct reiserfs_transaction_handle *th;
th = (struct reiserfs_transaction_handle *)current->
@@ -2762,7 +2762,7 @@ static int reiserfs_write_begin(struct file *file,
old_ref = th->t_refcount;
th->t_refcount++;
}
- ret = __block_write_begin(page, pos, len, reiserfs_get_block);
+ ret = __block_write_begin(folio, pos, len, reiserfs_get_block);
if (ret && reiserfs_transaction_running(inode->i_sb)) {
struct reiserfs_transaction_handle *th = current->journal_info;
/*
@@ -2792,8 +2792,8 @@ static int reiserfs_write_begin(struct file *file,
}
}
if (ret) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
/* Truncate allocated blocks */
reiserfs_truncate_failed_write(inode);
}
@@ -2822,7 +2822,7 @@ int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
th->t_refcount++;
}
- ret = __block_write_begin(page, from, len, reiserfs_get_block);
+ ret = __block_write_begin(page_folio(page), from, len, reiserfs_get_block);
if (ret && reiserfs_transaction_running(inode->i_sb)) {
struct reiserfs_transaction_handle *th = current->journal_info;
/*
@@ -2862,10 +2862,9 @@ static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
static int reiserfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct folio *folio = page_folio(page);
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
int ret = 0;
int update_sd = 0;
struct reiserfs_transaction_handle *th;
@@ -2887,7 +2886,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
}
flush_dcache_folio(folio);
- reiserfs_commit_page(inode, page, start, start + copied);
+ reiserfs_commit_page(inode, &folio->page, start, start + copied);
/*
* generic_commit_write does this for us, but does not update the
@@ -2942,8 +2941,8 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
out:
if (locked)
reiserfs_write_unlock(inode->i_sb);
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
if (pos + len > inode->i_size)
reiserfs_truncate_failed_write(inode);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 68758b6fed94..0addcc849ff2 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -126,7 +126,7 @@ static int romfs_read_folio(struct file *file, struct folio *folio)
}
}
- buf = folio_zero_tail(folio, fillsize, buf);
+ buf = folio_zero_tail(folio, fillsize, buf + fillsize);
kunmap_local(buf);
folio_end_read(folio, ret == 0);
return ret;
diff --git a/fs/select.c b/fs/select.c
index 9515c3fa1a03..1a4849e2afb9 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -840,7 +840,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
struct poll_list {
struct poll_list *next;
unsigned int len;
- struct pollfd entries[];
+ struct pollfd entries[] __counted_by(len);
};
#define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c
index 6322f0f68a17..b0473c2567fe 100644
--- a/fs/smb/client/cifsencrypt.c
+++ b/fs/smb/client/cifsencrypt.c
@@ -129,7 +129,7 @@ static ssize_t cifs_shash_xarray(const struct iov_iter *iter, ssize_t maxsize,
for (j = foffset / PAGE_SIZE; j < npages; j++) {
len = min_t(size_t, maxsize, PAGE_SIZE - offset);
p = kmap_local_page(folio_page(folio, j));
- ret = crypto_shash_update(shash, p, len);
+ ret = crypto_shash_update(shash, p + offset, len);
kunmap_local(p);
if (ret < 0)
return ret;
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 2c4b357d85e2..2a2523c93944 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -75,9 +75,9 @@ unsigned int sign_CIFS_PDUs = 1;
/*
* Global transaction id (XID) information
*/
-unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */
-unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */
-unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */
+unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */
+unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */
+unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */
spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */
/*
@@ -1341,7 +1341,6 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
struct cifsFileInfo *smb_file_target;
struct cifs_tcon *src_tcon;
struct cifs_tcon *target_tcon;
- unsigned long long destend, fstart, fend;
ssize_t rc;
cifs_dbg(FYI, "copychunk range\n");
@@ -1391,25 +1390,13 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
goto unlock;
}
- destend = destoff + len - 1;
-
- /* Flush the folios at either end of the destination range to prevent
- * accidental loss of dirty data outside of the range.
+ /* Flush and invalidate all the folios in the destination region. If
+ * the copy was successful, then some of the flush is extra overhead,
+ * but we need to allow for the copy failing in some way (eg. ENOSPC).
*/
- fstart = destoff;
- fend = destend;
-
- rc = cifs_flush_folio(target_inode, destoff, &fstart, &fend, true);
+ rc = filemap_invalidate_inode(target_inode, true, destoff, destoff + len - 1);
if (rc)
goto unlock;
- rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false);
- if (rc)
- goto unlock;
- if (fend > target_cifsi->netfs.zero_point)
- target_cifsi->netfs.zero_point = fend + 1;
-
- /* Discard all the folios that overlap the destination region. */
- truncate_inode_pages_range(&target_inode->i_data, fstart, fend);
fscache_invalidate(cifs_inode_cookie(target_inode), NULL,
i_size_read(target_inode), 0);
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 5c9b3e6cd95f..9eae8649f90c 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -254,7 +254,6 @@ struct cifs_open_info_data {
struct smb_rqst {
struct kvec *rq_iov; /* array of kvecs */
unsigned int rq_nvec; /* number of kvecs in array */
- size_t rq_iter_size; /* Amount of data in ->rq_iter */
struct iov_iter rq_iter; /* Data iterator */
struct xarray rq_buffer; /* Page buffer for encryption */
};
@@ -1486,6 +1485,7 @@ struct cifs_io_subrequest {
struct cifs_io_request *req;
};
ssize_t got_bytes;
+ size_t actual_len;
unsigned int xid;
int result;
bool have_xid;
@@ -2017,9 +2017,9 @@ extern spinlock_t cifs_tcp_ses_lock;
/*
* Global transaction id (XID) information
*/
-extern unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */
-extern unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */
-extern unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */
+extern unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */
+extern unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */
+extern unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */
extern spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */
/*
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 595c4b673707..cfae2e918209 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1261,16 +1261,32 @@ openRetry:
return rc;
}
+static void cifs_readv_worker(struct work_struct *work)
+{
+ struct cifs_io_subrequest *rdata =
+ container_of(work, struct cifs_io_subrequest, subreq.work);
+
+ netfs_subreq_terminated(&rdata->subreq,
+ (rdata->result == 0 || rdata->result == -EAGAIN) ?
+ rdata->got_bytes : rdata->result, true);
+}
+
static void
cifs_readv_callback(struct mid_q_entry *mid)
{
struct cifs_io_subrequest *rdata = mid->callback_data;
+ struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode);
struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
struct smb_rqst rqst = { .rq_iov = rdata->iov,
.rq_nvec = 2,
.rq_iter = rdata->subreq.io_iter };
- struct cifs_credits credits = { .value = 1, .instance = 0 };
+ struct cifs_credits credits = {
+ .value = 1,
+ .instance = 0,
+ .rreq_debug_id = rdata->rreq->debug_id,
+ .rreq_debug_index = rdata->subreq.debug_index,
+ };
cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n",
__func__, mid->mid, mid->mid_state, rdata->result,
@@ -1282,6 +1298,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
if (server->sign) {
int rc = 0;
+ iov_iter_truncate(&rqst.rq_iter, rdata->got_bytes);
rc = cifs_verify_signature(&rqst, server,
mid->sequence_number);
if (rc)
@@ -1306,13 +1323,21 @@ cifs_readv_callback(struct mid_q_entry *mid)
rdata->result = -EIO;
}
- if (rdata->result == 0 || rdata->result == -EAGAIN)
- iov_iter_advance(&rdata->subreq.io_iter, rdata->got_bytes);
+ if (rdata->result == -ENODATA) {
+ __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
+ rdata->result = 0;
+ } else {
+ if (rdata->got_bytes < rdata->actual_len &&
+ rdata->subreq.start + rdata->subreq.transferred + rdata->got_bytes ==
+ ictx->remote_i_size) {
+ __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
+ rdata->result = 0;
+ }
+ }
+
rdata->credits.value = 0;
- netfs_subreq_terminated(&rdata->subreq,
- (rdata->result == 0 || rdata->result == -EAGAIN) ?
- rdata->got_bytes : rdata->result,
- false);
+ INIT_WORK(&rdata->subreq.work, cifs_readv_worker);
+ queue_work(cifsiod_wq, &rdata->subreq.work);
release_mid(mid);
add_credits(server, &credits, 0);
}
@@ -1619,9 +1644,15 @@ static void
cifs_writev_callback(struct mid_q_entry *mid)
{
struct cifs_io_subrequest *wdata = mid->callback_data;
+ struct TCP_Server_Info *server = wdata->server;
struct cifs_tcon *tcon = tlink_tcon(wdata->req->cfile->tlink);
WRITE_RSP *smb = (WRITE_RSP *)mid->resp_buf;
- struct cifs_credits credits = { .value = 1, .instance = 0 };
+ struct cifs_credits credits = {
+ .value = 1,
+ .instance = 0,
+ .rreq_debug_id = wdata->rreq->debug_id,
+ .rreq_debug_index = wdata->subreq.debug_index,
+ };
ssize_t result;
size_t written;
@@ -1657,9 +1688,16 @@ cifs_writev_callback(struct mid_q_entry *mid)
break;
}
+ trace_smb3_rw_credits(credits.rreq_debug_id, credits.rreq_debug_index,
+ wdata->credits.value,
+ server->credits, server->in_flight,
+ 0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
cifs_write_subrequest_terminated(wdata, result, true);
release_mid(mid);
+ trace_smb3_rw_credits(credits.rreq_debug_id, credits.rreq_debug_index, 0,
+ server->credits, server->in_flight,
+ credits.value, cifs_trace_rw_credits_write_response_add);
add_credits(tcon->ses->server, &credits, 0);
}
@@ -1713,7 +1751,6 @@ cifs_async_writev(struct cifs_io_subrequest *wdata)
rqst.rq_iov = iov;
rqst.rq_nvec = 2;
rqst.rq_iter = wdata->subreq.io_iter;
- rqst.rq_iter_size = iov_iter_count(&wdata->subreq.io_iter);
cifs_dbg(FYI, "async write at %llu %zu bytes\n",
wdata->subreq.start, wdata->subreq.len);
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index d2307162a2de..5375b0c1dfb9 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -657,6 +657,19 @@ static bool
server_unresponsive(struct TCP_Server_Info *server)
{
/*
+ * If we're in the process of mounting a share or reconnecting a session
+ * and the server abruptly shut down (e.g. socket wasn't closed, packet
+ * had been ACK'ed but no SMB response), don't wait longer than 20s to
+ * negotiate protocol.
+ */
+ spin_lock(&server->srv_lock);
+ if (server->tcpStatus == CifsInNegotiate &&
+ time_after(jiffies, server->lstrp + 20 * HZ)) {
+ spin_unlock(&server->srv_lock);
+ cifs_reconnect(server, false);
+ return true;
+ }
+ /*
* We need to wait 3 echo intervals to make sure we handle such
* situations right:
* 1s client sends a normal SMB request
@@ -667,7 +680,6 @@ server_unresponsive(struct TCP_Server_Info *server)
* 65s kernel_recvmsg times out, and we see that we haven't gotten
* a response in >60s.
*/
- spin_lock(&server->srv_lock);
if ((server->tcpStatus == CifsGood ||
server->tcpStatus == CifsNeedNegotiate) &&
(!server->ops->can_echo || server->ops->can_echo(server)) &&
@@ -4194,6 +4206,9 @@ tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
*
* If one doesn't exist then insert a new tcon_link struct into the tree and
* try to construct a new one.
+ *
+ * REMEMBER to call cifs_put_tlink() after successful calls to cifs_sb_tlink,
+ * to avoid refcount issues
*/
struct tcon_link *
cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 1fc66bcf49eb..2d387485f05b 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -111,6 +111,7 @@ static void cifs_issue_write(struct netfs_io_subrequest *subreq)
goto fail;
}
+ wdata->actual_len = wdata->subreq.len;
rc = adjust_credits(wdata->server, wdata, cifs_trace_rw_credits_issue_write_adjust);
if (rc)
goto fail;
@@ -153,7 +154,7 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq)
struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq);
struct TCP_Server_Info *server = req->server;
struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
- size_t rsize = 0;
+ size_t rsize;
int rc;
rdata->xid = get_xid();
@@ -166,8 +167,8 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq)
cifs_sb->ctx);
- rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize,
- &rdata->credits);
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
+ &rsize, &rdata->credits);
if (rc) {
subreq->error = rc;
return false;
@@ -183,7 +184,8 @@ static bool cifs_clamp_length(struct netfs_io_subrequest *subreq)
server->credits, server->in_flight, 0,
cifs_trace_rw_credits_read_submit);
- subreq->len = min_t(size_t, subreq->len, rsize);
+ subreq->len = umin(subreq->len, rsize);
+ rdata->actual_len = subreq->len;
#ifdef CONFIG_CIFS_SMB_DIRECT
if (server->smbd_conn)
@@ -203,12 +205,39 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq)
struct netfs_io_request *rreq = subreq->rreq;
struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq);
struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq);
+ struct TCP_Server_Info *server = req->server;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
int rc = 0;
cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n",
__func__, rreq->debug_id, subreq->debug_index, rreq->mapping,
subreq->transferred, subreq->len);
+ if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) {
+ /*
+ * As we're issuing a retry, we need to negotiate some new
+ * credits otherwise the server may reject the op with
+ * INVALID_PARAMETER. Note, however, we may get back less
+ * credit than we need to complete the op, in which case, we
+ * shorten the op and rely on additional rounds of retry.
+ */
+ size_t rsize = umin(subreq->len - subreq->transferred,
+ cifs_sb->ctx->rsize);
+
+ rc = server->ops->wait_mtu_credits(server, rsize, &rdata->actual_len,
+ &rdata->credits);
+ if (rc)
+ goto out;
+
+ rdata->credits.in_flight_check = 1;
+
+ trace_smb3_rw_credits(rdata->rreq->debug_id,
+ rdata->subreq.debug_index,
+ rdata->credits.value,
+ server->credits, server->in_flight, 0,
+ cifs_trace_rw_credits_read_resubmit);
+ }
+
if (req->cfile->invalidHandle) {
do {
rc = cifs_reopen_file(req->cfile, true);
@@ -2912,9 +2941,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
if (!CIFS_CACHE_READ(cinode))
return netfs_unbuffered_read_iter(iocb, to);
- if (cap_unix(tcon->ses) &&
- (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
- ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) {
if (iocb->ki_flags & IOCB_DIRECT)
return netfs_unbuffered_read_iter(iocb, to);
return netfs_buffered_read_iter(iocb, to);
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index dd0afa23734c..73e2e6c230b7 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -172,6 +172,8 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr,
CIFS_I(inode)->time = 0; /* force reval */
return -ESTALE;
}
+ if (inode->i_state & I_NEW)
+ CIFS_I(inode)->netfs.zero_point = fattr->cf_eof;
cifs_revalidate_cache(inode, fattr);
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
index 44dbaf9929a4..9bb5c869f4db 100644
--- a/fs/smb/client/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -229,9 +229,11 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg)
shutdown_good:
trace_smb3_shutdown_done(flags, tcon->tid);
+ cifs_put_tlink(tlink);
return 0;
shutdown_out_err:
trace_smb3_shutdown_err(rc, flags, tcon->tid);
+ cifs_put_tlink(tlink);
return rc;
}
diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c
index d86da949a919..80099bbb333b 100644
--- a/fs/smb/client/link.c
+++ b/fs/smb/client/link.c
@@ -588,6 +588,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode,
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
rc = PTR_ERR(tlink);
+ /* BB could be clearer if skipped put_tlink on error here, but harmless */
goto symlink_exit;
}
pTcon = tlink_tcon(tlink);
diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c
index 689d8a506d45..48c27581ec51 100644
--- a/fs/smb/client/reparse.c
+++ b/fs/smb/client/reparse.c
@@ -378,6 +378,8 @@ int parse_reparse_point(struct reparse_data_buffer *buf,
u32 plen, struct cifs_sb_info *cifs_sb,
bool unicode, struct cifs_open_info_data *data)
{
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+
data->reparse.buf = buf;
/* See MS-FSCC 2.1.2 */
@@ -394,12 +396,13 @@ int parse_reparse_point(struct reparse_data_buffer *buf,
case IO_REPARSE_TAG_LX_FIFO:
case IO_REPARSE_TAG_LX_CHR:
case IO_REPARSE_TAG_LX_BLK:
- return 0;
+ break;
default:
- cifs_dbg(VFS, "%s: unhandled reparse tag: 0x%08x\n",
- __func__, le32_to_cpu(buf->ReparseTag));
- return -EOPNOTSUPP;
+ cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n",
+ le32_to_cpu(buf->ReparseTag));
+ break;
}
+ return 0;
}
int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb,
diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c
index 9f5bc41433c1..11a1c53c64e0 100644
--- a/fs/smb/client/smb2inode.c
+++ b/fs/smb/client/smb2inode.c
@@ -1106,6 +1106,8 @@ int smb2_rename_path(const unsigned int xid,
co, DELETE, SMB2_OP_RENAME, cfile, source_dentry);
if (rc == -EINVAL) {
cifs_dbg(FYI, "invalid lease key, resending request without lease");
+ cifs_get_writable_path(tcon, from_name,
+ FIND_WR_WITH_DELETE, &cfile);
rc = smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb,
co, DELETE, SMB2_OP_RENAME, cfile, NULL);
}
@@ -1149,6 +1151,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
cfile, NULL, NULL, dentry);
if (rc == -EINVAL) {
cifs_dbg(FYI, "invalid lease key, resending request without lease");
+ cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
rc = smb2_compound_op(xid, tcon, cifs_sb,
full_path, &oparms, &in_iov,
&(int){SMB2_OP_SET_EOF}, 1,
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 322cabc69c6f..e6540072ffb0 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -301,7 +301,7 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
unsigned int /*enum smb3_rw_credits_trace*/ trace)
{
struct cifs_credits *credits = &subreq->credits;
- int new_val = DIV_ROUND_UP(subreq->subreq.len, SMB2_MAX_BUFFER_SIZE);
+ int new_val = DIV_ROUND_UP(subreq->actual_len, SMB2_MAX_BUFFER_SIZE);
int scredits, in_flight;
if (!credits->value || credits->value == new_val)
@@ -316,7 +316,8 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
cifs_trace_rw_credits_no_adjust_up);
trace_smb3_too_many_credits(server->CurrentMid,
server->conn_id, server->hostname, 0, credits->value - new_val, 0);
- cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)",
+ cifs_server_dbg(VFS, "R=%x[%x] request has less credits (%d) than required (%d)",
+ subreq->rreq->debug_id, subreq->subreq.debug_index,
credits->value, new_val);
return -EOPNOTSUPP;
@@ -338,8 +339,9 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
trace_smb3_reconnect_detected(server->CurrentMid,
server->conn_id, server->hostname, scredits,
credits->value - new_val, in_flight);
- cifs_server_dbg(VFS, "trying to return %d credits to old session\n",
- credits->value - new_val);
+ cifs_server_dbg(VFS, "R=%x[%x] trying to return %d credits to old session\n",
+ subreq->rreq->debug_id, subreq->subreq.debug_index,
+ credits->value - new_val);
return -EAGAIN;
}
@@ -3237,13 +3239,15 @@ static long smb3_zero_data(struct file *file, struct cifs_tcon *tcon,
}
static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
- loff_t offset, loff_t len, bool keep_size)
+ unsigned long long offset, unsigned long long len,
+ bool keep_size)
{
struct cifs_ses *ses = tcon->ses;
struct inode *inode = file_inode(file);
struct cifsInodeInfo *cifsi = CIFS_I(inode);
struct cifsFileInfo *cfile = file->private_data;
- unsigned long long new_size;
+ struct netfs_inode *ictx = netfs_inode(inode);
+ unsigned long long i_size, new_size, remote_size;
long rc;
unsigned int xid;
@@ -3255,6 +3259,16 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
inode_lock(inode);
filemap_invalidate_lock(inode->i_mapping);
+ i_size = i_size_read(inode);
+ remote_size = ictx->remote_i_size;
+ if (offset + len >= remote_size && offset < i_size) {
+ unsigned long long top = umin(offset + len, i_size);
+
+ rc = filemap_write_and_wait_range(inode->i_mapping, offset, top - 1);
+ if (rc < 0)
+ goto zero_range_exit;
+ }
+
/*
* We zero the range through ioctl, so we need remove the page caches
* first, otherwise the data may be inconsistent with the server.
@@ -3305,6 +3319,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
struct file_zero_data_information fsctl_buf;
+ unsigned long long end = offset + len, i_size, remote_i_size;
long rc;
unsigned int xid;
__u8 set_sparse = 1;
@@ -3336,6 +3351,27 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
(char *)&fsctl_buf,
sizeof(struct file_zero_data_information),
CIFSMaxBufSize, NULL, NULL);
+
+ if (rc)
+ goto unlock;
+
+ /* If there's dirty data in the buffer that would extend the EOF if it
+ * were written, then we need to move the EOF marker over to the lower
+ * of the high end of the hole and the proposed EOF. The problem is
+ * that we locally hole-punch the tail of the dirty data, the proposed
+ * EOF update will end up in the wrong place.
+ */
+ i_size = i_size_read(inode);
+ remote_i_size = netfs_inode(inode)->remote_i_size;
+ if (end > remote_i_size && i_size > remote_i_size) {
+ unsigned long long extend_to = umin(end, i_size);
+ rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid, cfile->pid, extend_to);
+ if (rc >= 0)
+ netfs_inode(inode)->remote_i_size = extend_to;
+ }
+
+unlock:
filemap_invalidate_unlock(inode->i_mapping);
out:
inode_unlock(inode);
@@ -4446,7 +4482,6 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst,
}
iov_iter_xarray(&new->rq_iter, ITER_SOURCE,
buffer, 0, size);
- new->rq_iter_size = size;
}
}
@@ -4492,7 +4527,6 @@ decrypt_raw_data(struct TCP_Server_Info *server, char *buf,
rqst.rq_nvec = 2;
if (iter) {
rqst.rq_iter = *iter;
- rqst.rq_iter_size = iov_iter_count(iter);
iter_size = iov_iter_count(iter);
}
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 83facb54276a..88dc49d67037 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4441,7 +4441,7 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
* If we want to do a RDMA write, fill in and append
* smbd_buffer_descriptor_v1 to the end of read request
*/
- if (smb3_use_rdma_offload(io_parms)) {
+ if (rdata && smb3_use_rdma_offload(io_parms)) {
struct smbd_buffer_descriptor_v1 *v1;
bool need_invalidate = server->dialect == SMB30_PROT_ID;
@@ -4507,6 +4507,7 @@ static void
smb2_readv_callback(struct mid_q_entry *mid)
{
struct cifs_io_subrequest *rdata = mid->callback_data;
+ struct netfs_inode *ictx = netfs_inode(rdata->rreq->inode);
struct cifs_tcon *tcon = tlink_tcon(rdata->req->cfile->tlink);
struct TCP_Server_Info *server = rdata->server;
struct smb2_hdr *shdr =
@@ -4523,16 +4524,15 @@ smb2_readv_callback(struct mid_q_entry *mid)
if (rdata->got_bytes) {
rqst.rq_iter = rdata->subreq.io_iter;
- rqst.rq_iter_size = iov_iter_count(&rdata->subreq.io_iter);
}
WARN_ONCE(rdata->server != mid->server,
"rdata server %p != mid server %p",
rdata->server, mid->server);
- cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n",
+ cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu/%zu\n",
__func__, mid->mid, mid->mid_state, rdata->result,
- rdata->subreq.len);
+ rdata->actual_len, rdata->subreq.len - rdata->subreq.transferred);
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
@@ -4586,22 +4586,29 @@ smb2_readv_callback(struct mid_q_entry *mid)
rdata->subreq.debug_index,
rdata->xid,
rdata->req->cfile->fid.persistent_fid,
- tcon->tid, tcon->ses->Suid, rdata->subreq.start,
- rdata->subreq.len, rdata->result);
+ tcon->tid, tcon->ses->Suid,
+ rdata->subreq.start + rdata->subreq.transferred,
+ rdata->actual_len,
+ rdata->result);
} else
trace_smb3_read_done(rdata->rreq->debug_id,
rdata->subreq.debug_index,
rdata->xid,
rdata->req->cfile->fid.persistent_fid,
tcon->tid, tcon->ses->Suid,
- rdata->subreq.start, rdata->got_bytes);
+ rdata->subreq.start + rdata->subreq.transferred,
+ rdata->got_bytes);
if (rdata->result == -ENODATA) {
- /* We may have got an EOF error because fallocate
- * failed to enlarge the file.
- */
- if (rdata->subreq.start < rdata->subreq.rreq->i_size)
+ __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
+ rdata->result = 0;
+ } else {
+ if (rdata->got_bytes < rdata->actual_len &&
+ rdata->subreq.start + rdata->subreq.transferred + rdata->got_bytes ==
+ ictx->remote_i_size) {
+ __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
rdata->result = 0;
+ }
}
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value,
server->credits, server->in_flight,
@@ -4622,6 +4629,7 @@ smb2_async_readv(struct cifs_io_subrequest *rdata)
{
int rc, flags = 0;
char *buf;
+ struct netfs_io_subrequest *subreq = &rdata->subreq;
struct smb2_hdr *shdr;
struct cifs_io_parms io_parms;
struct smb_rqst rqst = { .rq_iov = rdata->iov,
@@ -4632,15 +4640,15 @@ smb2_async_readv(struct cifs_io_subrequest *rdata)
int credit_request;
cifs_dbg(FYI, "%s: offset=%llu bytes=%zu\n",
- __func__, rdata->subreq.start, rdata->subreq.len);
+ __func__, subreq->start, subreq->len);
if (!rdata->server)
rdata->server = cifs_pick_channel(tcon->ses);
io_parms.tcon = tlink_tcon(rdata->req->cfile->tlink);
io_parms.server = server = rdata->server;
- io_parms.offset = rdata->subreq.start;
- io_parms.length = rdata->subreq.len;
+ io_parms.offset = subreq->start + subreq->transferred;
+ io_parms.length = rdata->actual_len;
io_parms.persistent_fid = rdata->req->cfile->fid.persistent_fid;
io_parms.volatile_fid = rdata->req->cfile->fid.volatile_fid;
io_parms.pid = rdata->req->pid;
@@ -4655,11 +4663,13 @@ smb2_async_readv(struct cifs_io_subrequest *rdata)
rdata->iov[0].iov_base = buf;
rdata->iov[0].iov_len = total_len;
+ rdata->got_bytes = 0;
+ rdata->result = 0;
shdr = (struct smb2_hdr *)buf;
if (rdata->credits.value > 0) {
- shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->subreq.len,
+ shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->actual_len,
SMB2_MAX_BUFFER_SIZE));
credit_request = le16_to_cpu(shdr->CreditCharge) + 8;
if (server->credits >= server->max_credits)
@@ -4683,11 +4693,11 @@ smb2_async_readv(struct cifs_io_subrequest *rdata)
if (rc) {
cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
trace_smb3_read_err(rdata->rreq->debug_id,
- rdata->subreq.debug_index,
+ subreq->debug_index,
rdata->xid, io_parms.persistent_fid,
io_parms.tcon->tid,
io_parms.tcon->ses->Suid,
- io_parms.offset, io_parms.length, rc);
+ io_parms.offset, rdata->actual_len, rc);
}
async_readv_out:
@@ -4914,6 +4924,13 @@ smb2_async_writev(struct cifs_io_subrequest *wdata)
if (rc)
goto out;
+ rqst.rq_iov = iov;
+ rqst.rq_iter = wdata->subreq.io_iter;
+
+ rqst.rq_iov[0].iov_len = total_len - 1;
+ rqst.rq_iov[0].iov_base = (char *)req;
+ rqst.rq_nvec += 1;
+
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
@@ -4925,6 +4942,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata)
req->WriteChannelInfoOffset = 0;
req->WriteChannelInfoLength = 0;
req->Channel = SMB2_CHANNEL_NONE;
+ req->Length = cpu_to_le32(io_parms->length);
req->Offset = cpu_to_le64(io_parms->offset);
req->DataOffset = cpu_to_le16(
offsetof(struct smb2_write_req, Buffer));
@@ -4944,7 +4962,6 @@ smb2_async_writev(struct cifs_io_subrequest *wdata)
*/
if (smb3_use_rdma_offload(io_parms)) {
struct smbd_buffer_descriptor_v1 *v1;
- size_t data_size = iov_iter_count(&wdata->subreq.io_iter);
bool need_invalidate = server->dialect == SMB30_PROT_ID;
wdata->mr = smbd_register_mr(server->smbd_conn, &wdata->subreq.io_iter,
@@ -4953,9 +4970,10 @@ smb2_async_writev(struct cifs_io_subrequest *wdata)
rc = -EAGAIN;
goto async_writev_out;
}
+ /* For RDMA read, I/O size is in RemainingBytes not in Length */
+ req->RemainingBytes = req->Length;
req->Length = 0;
req->DataOffset = 0;
- req->RemainingBytes = cpu_to_le32(data_size);
req->Channel = SMB2_CHANNEL_RDMA_V1_INVALIDATE;
if (need_invalidate)
req->Channel = SMB2_CHANNEL_RDMA_V1;
@@ -4967,31 +4985,22 @@ smb2_async_writev(struct cifs_io_subrequest *wdata)
v1->offset = cpu_to_le64(wdata->mr->mr->iova);
v1->token = cpu_to_le32(wdata->mr->mr->rkey);
v1->length = cpu_to_le32(wdata->mr->mr->length);
+
+ rqst.rq_iov[0].iov_len += sizeof(*v1);
+
+ /*
+ * We keep wdata->subreq.io_iter,
+ * but we have to truncate rqst.rq_iter
+ */
+ iov_iter_truncate(&rqst.rq_iter, 0);
}
#endif
- iov[0].iov_len = total_len - 1;
- iov[0].iov_base = (char *)req;
- rqst.rq_iov = iov;
- rqst.rq_nvec = 1;
- rqst.rq_iter = wdata->subreq.io_iter;
- rqst.rq_iter_size = iov_iter_count(&rqst.rq_iter);
if (test_bit(NETFS_SREQ_RETRYING, &wdata->subreq.flags))
smb2_set_replay(server, &rqst);
-#ifdef CONFIG_CIFS_SMB_DIRECT
- if (wdata->mr)
- iov[0].iov_len += sizeof(struct smbd_buffer_descriptor_v1);
-#endif
- cifs_dbg(FYI, "async write at %llu %u bytes iter=%zx\n",
- io_parms->offset, io_parms->length, iov_iter_count(&rqst.rq_iter));
-#ifdef CONFIG_CIFS_SMB_DIRECT
- /* For RDMA read, I/O size is in RemainingBytes not in Length */
- if (!wdata->mr)
- req->Length = cpu_to_le32(io_parms->length);
-#else
- req->Length = cpu_to_le32(io_parms->length);
-#endif
+ cifs_dbg(FYI, "async write at %llu %u bytes iter=%zx\n",
+ io_parms->offset, io_parms->length, iov_iter_count(&wdata->subreq.io_iter));
if (wdata->credits.value > 0) {
shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->subreq.len,
diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h
index 0f0c10c7ada7..8e9964001e2a 100644
--- a/fs/smb/client/trace.h
+++ b/fs/smb/client/trace.h
@@ -30,6 +30,7 @@
EM(cifs_trace_rw_credits_old_session, "old-session") \
EM(cifs_trace_rw_credits_read_response_add, "rd-resp-add") \
EM(cifs_trace_rw_credits_read_response_clear, "rd-resp-clr") \
+ EM(cifs_trace_rw_credits_read_resubmit, "rd-resubmit") \
EM(cifs_trace_rw_credits_read_submit, "rd-submit ") \
EM(cifs_trace_rw_credits_write_prepare, "wr-prepare ") \
EM(cifs_trace_rw_credits_write_response_add, "wr-resp-add") \
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index 09e1e7771592..7889df8112b4 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -165,11 +165,43 @@ void ksmbd_all_conn_set_status(u64 sess_id, u32 status)
up_read(&conn_list_lock);
}
-void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id)
+void ksmbd_conn_wait_idle(struct ksmbd_conn *conn)
{
wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2);
}
+int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id)
+{
+ struct ksmbd_conn *conn;
+ int rc, retry_count = 0, max_timeout = 120;
+ int rcount = 1;
+
+retry_idle:
+ if (retry_count >= max_timeout)
+ return -EIO;
+
+ down_read(&conn_list_lock);
+ list_for_each_entry(conn, &conn_list, conns_list) {
+ if (conn->binding || xa_load(&conn->sessions, sess_id)) {
+ if (conn == curr_conn)
+ rcount = 2;
+ if (atomic_read(&conn->req_running) >= rcount) {
+ rc = wait_event_timeout(conn->req_running_q,
+ atomic_read(&conn->req_running) < rcount,
+ HZ);
+ if (!rc) {
+ up_read(&conn_list_lock);
+ retry_count++;
+ goto retry_idle;
+ }
+ }
+ }
+ }
+ up_read(&conn_list_lock);
+
+ return 0;
+}
+
int ksmbd_conn_write(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index 5c2845e47cf2..5b947175c048 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -145,7 +145,8 @@ extern struct list_head conn_list;
extern struct rw_semaphore conn_list_lock;
bool ksmbd_conn_alive(struct ksmbd_conn *conn);
-void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id);
+void ksmbd_conn_wait_idle(struct ksmbd_conn *conn);
+int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id);
struct ksmbd_conn *ksmbd_conn_alloc(void);
void ksmbd_conn_free(struct ksmbd_conn *conn);
bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c);
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 162a12685d2c..99416ce9f501 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -311,6 +311,7 @@ void destroy_previous_session(struct ksmbd_conn *conn,
{
struct ksmbd_session *prev_sess;
struct ksmbd_user *prev_user;
+ int err;
down_write(&sessions_table_lock);
down_write(&conn->session_lock);
@@ -325,8 +326,16 @@ void destroy_previous_session(struct ksmbd_conn *conn,
memcmp(user->passkey, prev_user->passkey, user->passkey_sz))
goto out;
+ ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_RECONNECT);
+ err = ksmbd_conn_wait_idle_sess_id(conn, id);
+ if (err) {
+ ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE);
+ goto out;
+ }
+
ksmbd_destroy_file_table(&prev_sess->file_table);
prev_sess->state = SMB2_SESSION_EXPIRED;
+ ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE);
ksmbd_launch_ksmbd_durable_scavenger();
out:
up_write(&conn->session_lock);
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index a8f52c4ebbda..e546ffa57b55 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -1510,7 +1510,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease)
* parse_lease_state() - parse lease context containted in file open request
* @open_req: buffer containing smb2 file open(create) request
*
- * Return: oplock state, -ENOENT if create lease context not found
+ * Return: allocated lease context object on success, otherwise NULL
*/
struct lease_ctx_info *parse_lease_state(void *open_req)
{
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 2df1354288e6..8bdc59251418 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -519,7 +519,7 @@ int init_smb2_rsp_hdr(struct ksmbd_work *work)
* smb2_allocate_rsp_buf() - allocate smb2 response buffer
* @work: smb work containing smb request buffer
*
- * Return: 0 on success, otherwise -ENOMEM
+ * Return: 0 on success, otherwise error
*/
int smb2_allocate_rsp_buf(struct ksmbd_work *work)
{
@@ -1370,7 +1370,8 @@ static int ntlm_negotiate(struct ksmbd_work *work,
}
sz = le16_to_cpu(rsp->SecurityBufferOffset);
- memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len);
+ unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len,
+ /* alloc is larger than blob, see smb2_allocate_rsp_buf() */);
rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
out:
@@ -1453,7 +1454,9 @@ static int ntlm_authenticate(struct ksmbd_work *work,
return -ENOMEM;
sz = le16_to_cpu(rsp->SecurityBufferOffset);
- memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len);
+ unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob,
+ spnego_blob_len,
+ /* alloc is larger than blob, see smb2_allocate_rsp_buf() */);
rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
kfree(spnego_blob);
}
@@ -1687,6 +1690,8 @@ int smb2_sess_setup(struct ksmbd_work *work)
rc = ksmbd_session_register(conn, sess);
if (rc)
goto out_err;
+
+ conn->binding = false;
} else if (conn->dialect >= SMB30_PROT_ID &&
(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) &&
req->Flags & SMB2_SESSION_REQ_FLAG_BINDING) {
@@ -1765,6 +1770,8 @@ int smb2_sess_setup(struct ksmbd_work *work)
sess = NULL;
goto out_err;
}
+
+ conn->binding = false;
}
work->sess = sess;
@@ -2210,7 +2217,7 @@ int smb2_session_logoff(struct ksmbd_work *work)
ksmbd_conn_unlock(conn);
ksmbd_close_session_fds(work);
- ksmbd_conn_wait_idle(conn, sess_id);
+ ksmbd_conn_wait_idle(conn);
/*
* Re-lookup session to validate if session is deleted
@@ -2767,8 +2774,8 @@ static int parse_durable_handle_context(struct ksmbd_work *work,
}
}
- if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) ||
- req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) {
+ if ((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) ||
+ req_op_level == SMB2_OPLOCK_LEVEL_BATCH) {
dh_info->CreateGuid =
durable_v2_blob->CreateGuid;
dh_info->persistent =
@@ -2788,8 +2795,8 @@ static int parse_durable_handle_context(struct ksmbd_work *work,
goto out;
}
- if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) ||
- req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) {
+ if ((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) ||
+ req_op_level == SMB2_OPLOCK_LEVEL_BATCH) {
ksmbd_debug(SMB, "Request for durable open\n");
dh_info->type = dh_idx;
}
@@ -3093,7 +3100,6 @@ int smb2_open(struct ksmbd_work *work)
goto err_out;
}
- file_present = true;
idmap = mnt_idmap(path.mnt);
} else {
if (rc != -ENOENT)
@@ -3411,7 +3417,7 @@ int smb2_open(struct ksmbd_work *work)
goto err_out1;
}
} else {
- if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) {
+ if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE && lc) {
if (S_ISDIR(file_inode(filp)->i_mode)) {
lc->req_state &= ~SMB2_LEASE_WRITE_CACHING_LE;
lc->is_dir = true;
@@ -3710,7 +3716,7 @@ err_out2:
kfree(name);
kfree(lc);
- return 0;
+ return rc;
}
static int readdir_info_level_struct_sz(int info_level)
@@ -4406,7 +4412,8 @@ int smb2_query_dir(struct ksmbd_work *work)
rsp->OutputBufferLength = cpu_to_le32(0);
rsp->Buffer[0] = 0;
rc = ksmbd_iov_pin_rsp(work, (void *)rsp,
- sizeof(struct smb2_query_directory_rsp));
+ offsetof(struct smb2_query_directory_rsp, Buffer)
+ + 1);
if (rc)
goto err_out;
} else {
@@ -5357,7 +5364,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
"NTFS", PATH_MAX, conn->local_nls, 0);
len = len * 2;
info->FileSystemNameLen = cpu_to_le32(len);
- sz = sizeof(struct filesystem_attribute_info) - 2 + len;
+ sz = sizeof(struct filesystem_attribute_info) + len;
rsp->OutputBufferLength = cpu_to_le32(sz);
break;
}
@@ -5383,7 +5390,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
len = len * 2;
info->VolumeLabelSize = cpu_to_le32(len);
info->Reserved = 0;
- sz = sizeof(struct filesystem_vol_info) - 2 + len;
+ sz = sizeof(struct filesystem_vol_info) + len;
rsp->OutputBufferLength = cpu_to_le32(sz);
break;
}
diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h
index 4a3148b0167f..cc1d6dfe29d5 100644
--- a/fs/smb/server/smb_common.h
+++ b/fs/smb/server/smb_common.h
@@ -213,7 +213,7 @@ struct filesystem_attribute_info {
__le32 Attributes;
__le32 MaxPathNameComponentLength;
__le32 FileSystemNameLen;
- __le16 FileSystemName[1]; /* do not have to save this - get subset? */
+ __le16 FileSystemName[]; /* do not have to save this - get subset? */
} __packed;
struct filesystem_device_info {
@@ -226,7 +226,7 @@ struct filesystem_vol_info {
__le32 SerialNumber;
__le32 VolumeLabelSize;
__le16 Reserved;
- __le16 VolumeLabel[1];
+ __le16 VolumeLabel[];
} __packed;
struct filesystem_info {
diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c
index a84788396daa..aaed9e293b2e 100644
--- a/fs/smb/server/transport_tcp.c
+++ b/fs/smb/server/transport_tcp.c
@@ -624,8 +624,10 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz)
for_each_netdev(&init_net, netdev) {
if (netif_is_bridge_port(netdev))
continue;
- if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL)))
+ if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL))) {
+ rtnl_unlock();
return -ENOMEM;
+ }
}
rtnl_unlock();
bind_additional_ifaces = 1;
diff --git a/fs/smb/server/xattr.h b/fs/smb/server/xattr.h
index 16499ca5c82d..fa3e27d6971b 100644
--- a/fs/smb/server/xattr.h
+++ b/fs/smb/server/xattr.h
@@ -76,7 +76,7 @@ struct xattr_acl_entry {
struct xattr_smb_acl {
int count;
int next;
- struct xattr_acl_entry entries[];
+ struct xattr_acl_entry entries[] __counted_by(count);
};
/* 64bytes hash in xattr_ntacl is computed with sha256 */
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index a8c1e7f9a609..21aaa96856c1 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -494,39 +494,73 @@ out:
}
static int squashfs_readahead_fragment(struct page **page,
- unsigned int pages, unsigned int expected)
+ unsigned int pages, unsigned int expected, loff_t start)
{
struct inode *inode = page[0]->mapping->host;
struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
squashfs_i(inode)->fragment_block,
squashfs_i(inode)->fragment_size);
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
- int error = buffer->error;
+ int i, bytes, copied;
+ struct squashfs_page_actor *actor;
+ unsigned int offset;
+ void *addr;
+ struct page *last_page;
+
+ if (buffer->error)
+ goto out;
- if (error)
+ actor = squashfs_page_actor_init_special(msblk, page, pages,
+ expected, start);
+ if (!actor)
goto out;
- expected += squashfs_i(inode)->fragment_offset;
+ squashfs_actor_nobuff(actor);
+ addr = squashfs_first_page(actor);
+
+ for (copied = offset = 0; offset < expected; offset += PAGE_SIZE) {
+ int avail = min_t(int, expected - offset, PAGE_SIZE);
+
+ if (!IS_ERR(addr)) {
+ bytes = squashfs_copy_data(addr, buffer, offset +
+ squashfs_i(inode)->fragment_offset, avail);
+
+ if (bytes != avail)
+ goto failed;
+ }
+
+ copied += avail;
+ addr = squashfs_next_page(actor);
+ }
- for (n = 0; n < pages; n++) {
- unsigned int base = (page[n]->index & mask) << PAGE_SHIFT;
- unsigned int offset = base + squashfs_i(inode)->fragment_offset;
+ last_page = squashfs_page_actor_free(actor);
- if (expected > offset) {
- unsigned int avail = min_t(unsigned int, expected -
- offset, PAGE_SIZE);
+ if (copied == expected && !IS_ERR(last_page)) {
+ /* Last page (if present) may have trailing bytes not filled */
+ bytes = copied % PAGE_SIZE;
+ if (bytes && last_page)
+ memzero_page(last_page, bytes, PAGE_SIZE - bytes);
- squashfs_fill_page(page[n], buffer, offset, avail);
+ for (i = 0; i < pages; i++) {
+ flush_dcache_page(page[i]);
+ SetPageUptodate(page[i]);
}
+ }
- unlock_page(page[n]);
- put_page(page[n]);
+ for (i = 0; i < pages; i++) {
+ unlock_page(page[i]);
+ put_page(page[i]);
}
+ squashfs_cache_put(buffer);
+ return 0;
+
+failed:
+ squashfs_page_actor_free(actor);
+
out:
squashfs_cache_put(buffer);
- return error;
+ return 1;
}
static void squashfs_readahead(struct readahead_control *ractl)
@@ -551,7 +585,6 @@ static void squashfs_readahead(struct readahead_control *ractl)
return;
for (;;) {
- pgoff_t index;
int res, bsize;
u64 block = 0;
unsigned int expected;
@@ -570,26 +603,21 @@ static void squashfs_readahead(struct readahead_control *ractl)
if (readahead_pos(ractl) >= i_size_read(inode))
goto skip_pages;
- index = pages[0]->index >> shift;
-
- if ((pages[nr_pages - 1]->index >> shift) != index)
- goto skip_pages;
-
- if (index == file_end && squashfs_i(inode)->fragment_block !=
- SQUASHFS_INVALID_BLK) {
+ if (start >> msblk->block_log == file_end &&
+ squashfs_i(inode)->fragment_block != SQUASHFS_INVALID_BLK) {
res = squashfs_readahead_fragment(pages, nr_pages,
- expected);
+ expected, start);
if (res)
goto skip_pages;
continue;
}
- bsize = read_blocklist(inode, index, &block);
+ bsize = read_blocklist(inode, start >> msblk->block_log, &block);
if (bsize == 0)
goto skip_pages;
actor = squashfs_page_actor_init_special(msblk, pages, nr_pages,
- expected);
+ expected, start);
if (!actor)
goto skip_pages;
@@ -597,12 +625,12 @@ static void squashfs_readahead(struct readahead_control *ractl)
last_page = squashfs_page_actor_free(actor);
- if (res == expected) {
+ if (res == expected && !IS_ERR(last_page)) {
int bytes;
/* Last page (if present) may have trailing bytes not filled */
bytes = res % PAGE_SIZE;
- if (index == file_end && bytes && last_page)
+ if (start >> msblk->block_log == file_end && bytes && last_page)
memzero_page(last_page, bytes,
PAGE_SIZE - bytes);
@@ -616,6 +644,8 @@ static void squashfs_readahead(struct readahead_control *ractl)
unlock_page(pages[i]);
put_page(pages[i]);
}
+
+ start += readahead_batch_length(ractl);
}
kfree(pages);
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 2a689ce71de9..22251743fadf 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -23,15 +23,15 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
int expected)
{
+ struct folio *folio = page_folio(target_page);
struct inode *inode = target_page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-
loff_t file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
- loff_t start_index = target_page->index & ~mask;
+ loff_t start_index = folio->index & ~mask;
loff_t end_index = start_index | mask;
int i, n, pages, bytes, res = -ENOMEM;
- struct page **page;
+ struct page **page, *last_page;
struct squashfs_page_actor *actor;
void *pageaddr;
@@ -46,7 +46,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
/* Try to grab all the pages covered by the Squashfs block */
for (i = 0, n = start_index; n <= end_index; n++) {
- page[i] = (n == target_page->index) ? target_page :
+ page[i] = (n == folio->index) ? target_page :
grab_cache_page_nowait(target_page->mapping, n);
if (page[i] == NULL)
@@ -67,27 +67,28 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
* Create a "page actor" which will kmap and kunmap the
* page cache pages appropriately within the decompressor
*/
- actor = squashfs_page_actor_init_special(msblk, page, pages, expected);
+ actor = squashfs_page_actor_init_special(msblk, page, pages, expected,
+ start_index << PAGE_SHIFT);
if (actor == NULL)
goto out;
/* Decompress directly into the page cache buffers */
res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
- squashfs_page_actor_free(actor);
+ last_page = squashfs_page_actor_free(actor);
if (res < 0)
goto mark_errored;
- if (res != expected) {
+ if (res != expected || IS_ERR(last_page)) {
res = -EIO;
goto mark_errored;
}
/* Last page (if present) may have trailing bytes not filled */
bytes = res % PAGE_SIZE;
- if (page[pages - 1]->index == end_index && bytes) {
- pageaddr = kmap_local_page(page[pages - 1]);
+ if (end_index == file_end && last_page && bytes) {
+ pageaddr = kmap_local_page(last_page);
memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
kunmap_local(pageaddr);
}
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 81af6c4ca115..2b3e807d4dea 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -60,6 +60,11 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
}
/* Implementation of page_actor for decompressing directly into page cache. */
+static loff_t page_next_index(struct squashfs_page_actor *actor)
+{
+ return page_folio(actor->page[actor->next_page])->index;
+}
+
static void *handle_next_page(struct squashfs_page_actor *actor)
{
int max_pages = (actor->length + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -68,7 +73,7 @@ static void *handle_next_page(struct squashfs_page_actor *actor)
return NULL;
if ((actor->next_page == actor->pages) ||
- (actor->next_index != actor->page[actor->next_page]->index)) {
+ (actor->next_index != page_next_index(actor))) {
actor->next_index++;
actor->returned_pages++;
actor->last_page = NULL;
@@ -103,7 +108,7 @@ static void direct_finish_page(struct squashfs_page_actor *actor)
}
struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_info *msblk,
- struct page **page, int pages, int length)
+ struct page **page, int pages, int length, loff_t start_index)
{
struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
@@ -125,7 +130,7 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_
actor->pages = pages;
actor->next_page = 0;
actor->returned_pages = 0;
- actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1);
+ actor->next_index = start_index >> PAGE_SHIFT;
actor->pageaddr = NULL;
actor->last_page = NULL;
actor->alloc_buffer = msblk->decompressor->alloc_buffer;
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 97d4983559b1..ffe25eb77c32 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -29,13 +29,15 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
int pages, int length);
extern struct squashfs_page_actor *squashfs_page_actor_init_special(
struct squashfs_sb_info *msblk,
- struct page **page, int pages, int length);
+ struct page **page, int pages, int length,
+ loff_t start_index);
static inline struct page *squashfs_page_actor_free(struct squashfs_page_actor *actor)
{
- struct page *last_page = actor->last_page;
+ struct page *last_page = actor->next_page == actor->pages ? actor->last_page : ERR_PTR(-EIO);
kfree(actor->tmp_buffer);
kfree(actor);
+
return last_page;
}
static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
diff --git a/fs/super.c b/fs/super.c
index 38d72a3cf6fc..1db230432960 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -621,7 +621,7 @@ void generic_shutdown_super(struct super_block *sb)
sync_filesystem(sb);
sb->s_flags &= ~SB_ACTIVE;
- cgroup_writeback_umount();
+ cgroup_writeback_umount(sb);
/* Evict all inodes with zero refcount. */
evict_inodes(sb);
@@ -1802,8 +1802,8 @@ int vfs_get_tree(struct fs_context *fc)
return error;
if (!fc->root) {
- pr_err("Filesystem %s get_tree() didn't set fc->root\n",
- fc->fs_type->name);
+ pr_err("Filesystem %s get_tree() didn't set fc->root, returned %i\n",
+ fc->fs_type->name, error);
/* We don't know what the locking state of the superblock is -
* if there is a superblock.
*/
@@ -1905,7 +1905,7 @@ static void lockdep_sb_freeze_release(struct super_block *sb)
int level;
for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
- percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+ percpu_rwsem_release(sb->s_writers.rw_sem + level, _THIS_IP_);
}
/*
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 2e126d72d619..639307e2ff8c 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -28,17 +28,17 @@ const struct file_operations sysv_dir_operations = {
.fsync = generic_file_fsync,
};
-static void dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
+static void dir_commit_chunk(struct folio *folio, loff_t pos, unsigned len)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
struct inode *dir = mapping->host;
- block_write_end(NULL, mapping, pos, len, len, page, NULL);
+ block_write_end(NULL, mapping, pos, len, len, folio, NULL);
if (pos+len > dir->i_size) {
i_size_write(dir, pos+len);
mark_inode_dirty(dir);
}
- unlock_page(page);
+ folio_unlock(folio);
}
static int sysv_handle_dirsync(struct inode *dir)
@@ -52,20 +52,21 @@ static int sysv_handle_dirsync(struct inode *dir)
}
/*
- * Calls to dir_get_page()/unmap_and_put_page() must be nested according to the
+ * Calls to dir_get_folio()/folio_release_kmap() must be nested according to the
* rules documented in mm/highmem.rst.
*
- * NOTE: sysv_find_entry() and sysv_dotdot() act as calls to dir_get_page()
+ * NOTE: sysv_find_entry() and sysv_dotdot() act as calls to dir_get_folio()
* and must be treated accordingly for nesting purposes.
*/
-static void *dir_get_page(struct inode *dir, unsigned long n, struct page **p)
+static void *dir_get_folio(struct inode *dir, unsigned long n,
+ struct folio **foliop)
{
- struct address_space *mapping = dir->i_mapping;
- struct page *page = read_mapping_page(mapping, n, NULL);
- if (IS_ERR(page))
- return ERR_CAST(page);
- *p = page;
- return kmap_local_page(page);
+ struct folio *folio = read_mapping_folio(dir->i_mapping, n, NULL);
+
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
+ *foliop = folio;
+ return kmap_local_folio(folio, 0);
}
static int sysv_readdir(struct file *file, struct dir_context *ctx)
@@ -87,9 +88,9 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
for ( ; n < npages; n++, offset = 0) {
char *kaddr, *limit;
struct sysv_dir_entry *de;
- struct page *page;
+ struct folio *folio;
- kaddr = dir_get_page(inode, n, &page);
+ kaddr = dir_get_folio(inode, n, &folio);
if (IS_ERR(kaddr))
continue;
de = (struct sysv_dir_entry *)(kaddr+offset);
@@ -103,11 +104,11 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN),
fs16_to_cpu(SYSV_SB(sb), de->inode),
DT_UNKNOWN)) {
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(folio, kaddr);
return 0;
}
}
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(folio, kaddr);
}
return 0;
}
@@ -126,39 +127,35 @@ static inline int namecompare(int len, int maxlen,
/*
* sysv_find_entry()
*
- * finds an entry in the specified directory with the wanted name. It
- * returns the cache buffer in which the entry was found, and the entry
- * itself (as a parameter - res_dir). It does NOT read the inode of the
+ * finds an entry in the specified directory with the wanted name.
+ * It does NOT read the inode of the
* entry - you'll have to do that yourself if you want to.
*
- * On Success unmap_and_put_page() should be called on *res_page.
+ * On Success folio_release_kmap() should be called on *foliop.
*
- * sysv_find_entry() acts as a call to dir_get_page() and must be treated
+ * sysv_find_entry() acts as a call to dir_get_folio() and must be treated
* accordingly for nesting purposes.
*/
-struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_page)
+struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct folio **foliop)
{
const char * name = dentry->d_name.name;
int namelen = dentry->d_name.len;
struct inode * dir = d_inode(dentry->d_parent);
unsigned long start, n;
unsigned long npages = dir_pages(dir);
- struct page *page = NULL;
struct sysv_dir_entry *de;
- *res_page = NULL;
-
start = SYSV_I(dir)->i_dir_start_lookup;
if (start >= npages)
start = 0;
n = start;
do {
- char *kaddr = dir_get_page(dir, n, &page);
+ char *kaddr = dir_get_folio(dir, n, foliop);
if (!IS_ERR(kaddr)) {
de = (struct sysv_dir_entry *)kaddr;
- kaddr += PAGE_SIZE - SYSV_DIRSIZE;
+ kaddr += folio_size(*foliop) - SYSV_DIRSIZE;
for ( ; (char *) de <= kaddr ; de++) {
if (!de->inode)
continue;
@@ -166,7 +163,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
name, de->name))
goto found;
}
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(*foliop, kaddr);
}
if (++n >= npages)
@@ -177,7 +174,6 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
found:
SYSV_I(dir)->i_dir_start_lookup = n;
- *res_page = page;
return de;
}
@@ -186,7 +182,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
struct inode *dir = d_inode(dentry->d_parent);
const char * name = dentry->d_name.name;
int namelen = dentry->d_name.len;
- struct page *page = NULL;
+ struct folio *folio = NULL;
struct sysv_dir_entry * de;
unsigned long npages = dir_pages(dir);
unsigned long n;
@@ -196,7 +192,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
/* We take care of directory expansion in the same loop */
for (n = 0; n <= npages; n++) {
- kaddr = dir_get_page(dir, n, &page);
+ kaddr = dir_get_folio(dir, n, &folio);
if (IS_ERR(kaddr))
return PTR_ERR(kaddr);
de = (struct sysv_dir_entry *)kaddr;
@@ -206,49 +202,49 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
goto got_it;
err = -EEXIST;
if (namecompare(namelen, SYSV_NAMELEN, name, de->name))
- goto out_page;
+ goto out_folio;
de++;
}
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(folio, kaddr);
}
BUG();
return -EINVAL;
got_it:
- pos = page_offset(page) + offset_in_page(de);
- lock_page(page);
- err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
+ pos = folio_pos(folio) + offset_in_folio(folio, de);
+ folio_lock(folio);
+ err = sysv_prepare_chunk(folio, pos, SYSV_DIRSIZE);
if (err)
goto out_unlock;
memcpy (de->name, name, namelen);
memset (de->name + namelen, 0, SYSV_DIRSIZE - namelen - 2);
de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
- dir_commit_chunk(page, pos, SYSV_DIRSIZE);
+ dir_commit_chunk(folio, pos, SYSV_DIRSIZE);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
mark_inode_dirty(dir);
err = sysv_handle_dirsync(dir);
-out_page:
- unmap_and_put_page(page, kaddr);
+out_folio:
+ folio_release_kmap(folio, kaddr);
return err;
out_unlock:
- unlock_page(page);
- goto out_page;
+ folio_unlock(folio);
+ goto out_folio;
}
-int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
+int sysv_delete_entry(struct sysv_dir_entry *de, struct folio *folio)
{
- struct inode *inode = page->mapping->host;
- loff_t pos = page_offset(page) + offset_in_page(de);
+ struct inode *inode = folio->mapping->host;
+ loff_t pos = folio_pos(folio) + offset_in_folio(folio, de);
int err;
- lock_page(page);
- err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
+ folio_lock(folio);
+ err = sysv_prepare_chunk(folio, pos, SYSV_DIRSIZE);
if (err) {
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
de->inode = 0;
- dir_commit_chunk(page, pos, SYSV_DIRSIZE);
+ dir_commit_chunk(folio, pos, SYSV_DIRSIZE);
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
mark_inode_dirty(inode);
return sysv_handle_dirsync(inode);
@@ -256,33 +252,33 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
int sysv_make_empty(struct inode *inode, struct inode *dir)
{
- struct page *page = grab_cache_page(inode->i_mapping, 0);
+ struct folio *folio = filemap_grab_folio(inode->i_mapping, 0);
struct sysv_dir_entry * de;
- char *base;
+ char *kaddr;
int err;
- if (!page)
- return -ENOMEM;
- err = sysv_prepare_chunk(page, 0, 2 * SYSV_DIRSIZE);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ err = sysv_prepare_chunk(folio, 0, 2 * SYSV_DIRSIZE);
if (err) {
- unlock_page(page);
+ folio_unlock(folio);
goto fail;
}
- base = kmap_local_page(page);
- memset(base, 0, PAGE_SIZE);
+ kaddr = kmap_local_folio(folio, 0);
+ memset(kaddr, 0, folio_size(folio));
- de = (struct sysv_dir_entry *) base;
+ de = (struct sysv_dir_entry *)kaddr;
de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
strcpy(de->name,".");
de++;
de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), dir->i_ino);
strcpy(de->name,"..");
- kunmap_local(base);
- dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE);
+ kunmap_local(kaddr);
+ dir_commit_chunk(folio, 0, 2 * SYSV_DIRSIZE);
err = sysv_handle_dirsync(inode);
fail:
- put_page(page);
+ folio_put(folio);
return err;
}
@@ -292,19 +288,19 @@ fail:
int sysv_empty_dir(struct inode * inode)
{
struct super_block *sb = inode->i_sb;
- struct page *page = NULL;
+ struct folio *folio = NULL;
unsigned long i, npages = dir_pages(inode);
char *kaddr;
for (i = 0; i < npages; i++) {
struct sysv_dir_entry *de;
- kaddr = dir_get_page(inode, i, &page);
+ kaddr = dir_get_folio(inode, i, &folio);
if (IS_ERR(kaddr))
continue;
de = (struct sysv_dir_entry *)kaddr;
- kaddr += PAGE_SIZE-SYSV_DIRSIZE;
+ kaddr += folio_size(folio) - SYSV_DIRSIZE;
for ( ;(char *)de <= kaddr; de++) {
if (!de->inode)
@@ -321,46 +317,46 @@ int sysv_empty_dir(struct inode * inode)
if (de->name[1] != '.' || de->name[2])
goto not_empty;
}
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(folio, kaddr);
}
return 1;
not_empty:
- unmap_and_put_page(page, kaddr);
+ folio_release_kmap(folio, kaddr);
return 0;
}
/* Releases the page */
-int sysv_set_link(struct sysv_dir_entry *de, struct page *page,
- struct inode *inode)
+int sysv_set_link(struct sysv_dir_entry *de, struct folio *folio,
+ struct inode *inode)
{
- struct inode *dir = page->mapping->host;
- loff_t pos = page_offset(page) + offset_in_page(de);
+ struct inode *dir = folio->mapping->host;
+ loff_t pos = folio_pos(folio) + offset_in_folio(folio, de);
int err;
- lock_page(page);
- err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
+ folio_lock(folio);
+ err = sysv_prepare_chunk(folio, pos, SYSV_DIRSIZE);
if (err) {
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
- dir_commit_chunk(page, pos, SYSV_DIRSIZE);
+ dir_commit_chunk(folio, pos, SYSV_DIRSIZE);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
mark_inode_dirty(dir);
return sysv_handle_dirsync(inode);
}
/*
- * Calls to dir_get_page()/unmap_and_put_page() must be nested according to the
+ * Calls to dir_get_folio()/folio_release_kmap() must be nested according to the
* rules documented in mm/highmem.rst.
*
- * sysv_dotdot() acts as a call to dir_get_page() and must be treated
+ * sysv_dotdot() acts as a call to dir_get_folio() and must be treated
* accordingly for nesting purposes.
*/
-struct sysv_dir_entry *sysv_dotdot(struct inode *dir, struct page **p)
+struct sysv_dir_entry *sysv_dotdot(struct inode *dir, struct folio **foliop)
{
- struct sysv_dir_entry *de = dir_get_page(dir, 0, p);
+ struct sysv_dir_entry *de = dir_get_folio(dir, 0, foliop);
if (IS_ERR(de))
return NULL;
@@ -370,13 +366,13 @@ struct sysv_dir_entry *sysv_dotdot(struct inode *dir, struct page **p)
ino_t sysv_inode_by_name(struct dentry *dentry)
{
- struct page *page;
- struct sysv_dir_entry *de = sysv_find_entry (dentry, &page);
+ struct folio *folio;
+ struct sysv_dir_entry *de = sysv_find_entry (dentry, &folio);
ino_t res = 0;
if (de) {
res = fs16_to_cpu(SYSV_SB(dentry->d_sb), de->inode);
- unmap_and_put_page(page, de);
+ folio_release_kmap(folio, de);
}
return res;
}
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 19bcb51a2203..451e95f474fa 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -466,9 +466,9 @@ static int sysv_read_folio(struct file *file, struct folio *folio)
return block_read_full_folio(folio, get_block);
}
-int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len)
+int sysv_prepare_chunk(struct folio *folio, loff_t pos, unsigned len)
{
- return __block_write_begin(page, pos, len, get_block);
+ return __block_write_begin(folio, pos, len, get_block);
}
static void sysv_write_failed(struct address_space *mapping, loff_t to)
@@ -483,11 +483,11 @@ static void sysv_write_failed(struct address_space *mapping, loff_t to)
static int sysv_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
- ret = block_write_begin(mapping, pos, len, pagep, get_block);
+ ret = block_write_begin(mapping, pos, len, foliop, get_block);
if (unlikely(ret))
sysv_write_failed(mapping, pos + len);
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index d6b73798071b..fb8bd8437872 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -151,20 +151,20 @@ out_dir:
static int sysv_unlink(struct inode * dir, struct dentry * dentry)
{
struct inode * inode = d_inode(dentry);
- struct page * page;
+ struct folio *folio;
struct sysv_dir_entry * de;
int err;
- de = sysv_find_entry(dentry, &page);
+ de = sysv_find_entry(dentry, &folio);
if (!de)
return -ENOENT;
- err = sysv_delete_entry(de, page);
+ err = sysv_delete_entry(de, folio);
if (!err) {
inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
inode_dec_link_count(inode);
}
- unmap_and_put_page(page, de);
+ folio_release_kmap(folio, de);
return err;
}
@@ -194,28 +194,28 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
{
struct inode * old_inode = d_inode(old_dentry);
struct inode * new_inode = d_inode(new_dentry);
- struct page * dir_page = NULL;
+ struct folio *dir_folio;
struct sysv_dir_entry * dir_de = NULL;
- struct page * old_page;
+ struct folio *old_folio;
struct sysv_dir_entry * old_de;
int err = -ENOENT;
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
- old_de = sysv_find_entry(old_dentry, &old_page);
+ old_de = sysv_find_entry(old_dentry, &old_folio);
if (!old_de)
goto out;
if (S_ISDIR(old_inode->i_mode)) {
err = -EIO;
- dir_de = sysv_dotdot(old_inode, &dir_page);
+ dir_de = sysv_dotdot(old_inode, &dir_folio);
if (!dir_de)
goto out_old;
}
if (new_inode) {
- struct page * new_page;
+ struct folio *new_folio;
struct sysv_dir_entry * new_de;
err = -ENOTEMPTY;
@@ -223,11 +223,11 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
goto out_dir;
err = -ENOENT;
- new_de = sysv_find_entry(new_dentry, &new_page);
+ new_de = sysv_find_entry(new_dentry, &new_folio);
if (!new_de)
goto out_dir;
- err = sysv_set_link(new_de, new_page, old_inode);
- unmap_and_put_page(new_page, new_de);
+ err = sysv_set_link(new_de, new_folio, old_inode);
+ folio_release_kmap(new_folio, new_de);
if (err)
goto out_dir;
inode_set_ctime_current(new_inode);
@@ -242,23 +242,23 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
inode_inc_link_count(new_dir);
}
- err = sysv_delete_entry(old_de, old_page);
+ err = sysv_delete_entry(old_de, old_folio);
if (err)
goto out_dir;
mark_inode_dirty(old_inode);
if (dir_de) {
- err = sysv_set_link(dir_de, dir_page, new_dir);
+ err = sysv_set_link(dir_de, dir_folio, new_dir);
if (!err)
inode_dec_link_count(old_dir);
}
out_dir:
if (dir_de)
- unmap_and_put_page(dir_page, dir_de);
+ folio_release_kmap(dir_folio, dir_de);
out_old:
- unmap_and_put_page(old_page, old_de);
+ folio_release_kmap(old_folio, old_de);
out:
return err;
}
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index e3f988b469ee..0a48b2e7edb1 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -133,8 +133,8 @@ extern void sysv_free_block(struct super_block *, sysv_zone_t);
extern unsigned long sysv_count_free_blocks(struct super_block *);
/* itree.c */
-extern void sysv_truncate(struct inode *);
-extern int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len);
+void sysv_truncate(struct inode *);
+int sysv_prepare_chunk(struct folio *folio, loff_t pos, unsigned len);
/* inode.c */
extern struct inode *sysv_iget(struct super_block *, unsigned int);
@@ -148,15 +148,15 @@ extern void sysv_destroy_icache(void);
/* dir.c */
-extern struct sysv_dir_entry *sysv_find_entry(struct dentry *, struct page **);
-extern int sysv_add_link(struct dentry *, struct inode *);
-extern int sysv_delete_entry(struct sysv_dir_entry *, struct page *);
-extern int sysv_make_empty(struct inode *, struct inode *);
-extern int sysv_empty_dir(struct inode *);
-extern int sysv_set_link(struct sysv_dir_entry *, struct page *,
+struct sysv_dir_entry *sysv_find_entry(struct dentry *, struct folio **);
+int sysv_add_link(struct dentry *, struct inode *);
+int sysv_delete_entry(struct sysv_dir_entry *, struct folio *);
+int sysv_make_empty(struct inode *, struct inode *);
+int sysv_empty_dir(struct inode *);
+int sysv_set_link(struct sysv_dir_entry *, struct folio *,
struct inode *);
-extern struct sysv_dir_entry *sysv_dotdot(struct inode *, struct page **);
-extern ino_t sysv_inode_by_name(struct dentry *);
+struct sysv_dir_entry *sysv_dotdot(struct inode *, struct folio **);
+ino_t sysv_inode_by_name(struct dentry *);
extern const struct inode_operations sysv_file_inode_operations;
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index 01e99e98457d..8705c77a9e75 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -862,7 +862,7 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
list_for_each_entry(ei_child, &ei->children, list)
eventfs_remove_rec(ei_child, level + 1);
- list_del(&ei->list);
+ list_del_rcu(&ei->list);
free_ei(ei);
}
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 68e104423a48..5130123005e4 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -211,7 +211,7 @@ static void release_existing_page_budget(struct ubifs_info *c)
}
static int write_begin_slow(struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep)
+ loff_t pos, unsigned len, struct folio **foliop)
{
struct inode *inode = mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -298,7 +298,7 @@ static int write_begin_slow(struct address_space *mapping,
ubifs_release_dirty_inode_budget(c, ui);
}
- *pagep = &folio->page;
+ *foliop = folio;
return 0;
}
@@ -414,7 +414,7 @@ static int allocate_budget(struct ubifs_info *c, struct folio *folio,
*/
static int ubifs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct inode *inode = mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -483,7 +483,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
folio_unlock(folio);
folio_put(folio);
- return write_begin_slow(mapping, pos, len, pagep);
+ return write_begin_slow(mapping, pos, len, foliop);
}
/*
@@ -492,7 +492,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* with @ui->ui_mutex locked if we are appending pages, and unlocked
* otherwise. This is an optimization (slightly hacky though).
*/
- *pagep = &folio->page;
+ *foliop = folio;
return 0;
}
@@ -524,9 +524,8 @@ static void cancel_budget(struct ubifs_info *c, struct folio *folio,
static int ubifs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct folio *folio = page_folio(page);
struct inode *inode = mapping->host;
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_info *c = inode->i_sb->s_fs_info;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 3a4179de316b..412fe7c4d348 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -62,7 +62,7 @@ static vm_fault_t udf_page_mkwrite(struct vm_fault *vmf)
end = size & ~PAGE_MASK;
else
end = PAGE_SIZE;
- err = __block_write_begin(&folio->page, 0, end, udf_get_block);
+ err = __block_write_begin(folio, 0, end, udf_get_block);
if (err) {
folio_unlock(folio);
ret = vmf_fs_error(err);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4726a4d014b6..eaee57b91c6c 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -246,14 +246,14 @@ static void udf_readahead(struct readahead_control *rac)
static int udf_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct udf_inode_info *iinfo = UDF_I(file_inode(file));
struct folio *folio;
int ret;
if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
- ret = block_write_begin(mapping, pos, len, pagep,
+ ret = block_write_begin(mapping, pos, len, foliop,
udf_get_block);
if (unlikely(ret))
udf_write_failed(mapping, pos + len);
@@ -265,7 +265,7 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
mapping_gfp_mask(mapping));
if (IS_ERR(folio))
return PTR_ERR(folio);
- *pagep = &folio->page;
+ *foliop = folio;
if (!folio_test_uptodate(folio))
udf_adinicb_read_folio(folio);
return 0;
@@ -273,16 +273,14 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
static int udf_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = file_inode(file);
- struct folio *folio;
loff_t last_pos;
if (UDF_I(inode)->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB)
- return generic_write_end(file, mapping, pos, len, copied, page,
+ return generic_write_end(file, mapping, pos, len, copied, folio,
fsdata);
- folio = page_folio(page);
last_pos = pos + copied;
if (last_pos > inode->i_size)
i_size_write(inode, last_pos);
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 335f0ae529b4..d6e6a2198971 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -42,18 +42,18 @@ static inline int ufs_match(struct super_block *sb, int len,
return !memcmp(name, de->d_name, len);
}
-static void ufs_commit_chunk(struct page *page, loff_t pos, unsigned len)
+static void ufs_commit_chunk(struct folio *folio, loff_t pos, unsigned len)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = folio->mapping;
struct inode *dir = mapping->host;
inode_inc_iversion(dir);
- block_write_end(NULL, mapping, pos, len, len, page, NULL);
+ block_write_end(NULL, mapping, pos, len, len, folio, NULL);
if (pos+len > dir->i_size) {
i_size_write(dir, pos+len);
mark_inode_dirty(dir);
}
- unlock_page(page);
+ folio_unlock(folio);
}
static int ufs_handle_dirsync(struct inode *dir)
@@ -66,22 +66,16 @@ static int ufs_handle_dirsync(struct inode *dir)
return err;
}
-static inline void ufs_put_page(struct page *page)
-{
- kunmap(page);
- put_page(page);
-}
-
ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)
{
ino_t res = 0;
struct ufs_dir_entry *de;
- struct page *page;
+ struct folio *folio;
- de = ufs_find_entry(dir, qstr, &page);
+ de = ufs_find_entry(dir, qstr, &folio);
if (de) {
res = fs32_to_cpu(dir->i_sb, de->d_ino);
- ufs_put_page(page);
+ folio_release_kmap(folio, de);
}
return res;
}
@@ -89,43 +83,40 @@ ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)
/* Releases the page */
void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
- struct page *page, struct inode *inode,
+ struct folio *folio, struct inode *inode,
bool update_times)
{
- loff_t pos = page_offset(page) +
- (char *) de - (char *) page_address(page);
+ loff_t pos = folio_pos(folio) + offset_in_folio(folio, de);
unsigned len = fs16_to_cpu(dir->i_sb, de->d_reclen);
int err;
- lock_page(page);
- err = ufs_prepare_chunk(page, pos, len);
+ folio_lock(folio);
+ err = ufs_prepare_chunk(folio, pos, len);
BUG_ON(err);
de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino);
ufs_set_de_type(dir->i_sb, de, inode->i_mode);
- ufs_commit_chunk(page, pos, len);
- ufs_put_page(page);
+ ufs_commit_chunk(folio, pos, len);
+ folio_release_kmap(folio, de);
if (update_times)
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
mark_inode_dirty(dir);
ufs_handle_dirsync(dir);
}
-
-static bool ufs_check_page(struct page *page)
+static bool ufs_check_folio(struct folio *folio, char *kaddr)
{
- struct inode *dir = page->mapping->host;
+ struct inode *dir = folio->mapping->host;
struct super_block *sb = dir->i_sb;
- char *kaddr = page_address(page);
unsigned offs, rec_len;
- unsigned limit = PAGE_SIZE;
+ unsigned limit = folio_size(folio);
const unsigned chunk_mask = UFS_SB(sb)->s_uspi->s_dirblksize - 1;
struct ufs_dir_entry *p;
char *error;
- if ((dir->i_size >> PAGE_SHIFT) == page->index) {
- limit = dir->i_size & ~PAGE_MASK;
+ if (dir->i_size < folio_pos(folio) + limit) {
+ limit = offset_in_folio(folio, dir->i_size);
if (limit & chunk_mask)
goto Ebadsize;
if (!limit)
@@ -150,13 +141,13 @@ static bool ufs_check_page(struct page *page)
if (offs != limit)
goto Eend;
out:
- SetPageChecked(page);
+ folio_set_checked(folio);
return true;
/* Too bad, we had an error */
Ebadsize:
- ufs_error(sb, "ufs_check_page",
+ ufs_error(sb, __func__,
"size of directory #%lu is not a multiple of chunk size",
dir->i_ino
);
@@ -176,36 +167,40 @@ Espan:
Einumber:
error = "inode out of bounds";
bad_entry:
- ufs_error (sb, "ufs_check_page", "bad entry in directory #%lu: %s - "
- "offset=%lu, rec_len=%d, name_len=%d",
- dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
+ ufs_error(sb, __func__, "bad entry in directory #%lu: %s - "
+ "offset=%llu, rec_len=%d, name_len=%d",
+ dir->i_ino, error, folio_pos(folio) + offs,
rec_len, ufs_get_de_namlen(sb, p));
goto fail;
Eend:
p = (struct ufs_dir_entry *)(kaddr + offs);
ufs_error(sb, __func__,
"entry in directory #%lu spans the page boundary"
- "offset=%lu",
- dir->i_ino, (page->index<<PAGE_SHIFT)+offs);
+ "offset=%llu",
+ dir->i_ino, folio_pos(folio) + offs);
fail:
return false;
}
-static struct page *ufs_get_page(struct inode *dir, unsigned long n)
+static void *ufs_get_folio(struct inode *dir, unsigned long n,
+ struct folio **foliop)
{
struct address_space *mapping = dir->i_mapping;
- struct page *page = read_mapping_page(mapping, n, NULL);
- if (!IS_ERR(page)) {
- kmap(page);
- if (unlikely(!PageChecked(page))) {
- if (!ufs_check_page(page))
- goto fail;
- }
+ struct folio *folio = read_mapping_folio(mapping, n, NULL);
+ void *kaddr;
+
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
+ kaddr = kmap_local_folio(folio, 0);
+ if (unlikely(!folio_test_checked(folio))) {
+ if (!ufs_check_folio(folio, kaddr))
+ goto fail;
}
- return page;
+ *foliop = folio;
+ return kaddr;
fail:
- ufs_put_page(page);
+ folio_release_kmap(folio, kaddr);
return ERR_PTR(-EIO);
}
@@ -231,17 +226,14 @@ ufs_next_entry(struct super_block *sb, struct ufs_dir_entry *p)
fs16_to_cpu(sb, p->d_reclen));
}
-struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
+struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct folio **foliop)
{
- struct page *page = ufs_get_page(dir, 0);
- struct ufs_dir_entry *de = NULL;
+ struct ufs_dir_entry *de = ufs_get_folio(dir, 0, foliop);
- if (!IS_ERR(page)) {
- de = ufs_next_entry(dir->i_sb,
- (struct ufs_dir_entry *)page_address(page));
- *p = page;
- }
- return de;
+ if (!IS_ERR(de))
+ return ufs_next_entry(dir->i_sb, de);
+
+ return NULL;
}
/*
@@ -253,7 +245,7 @@ struct ufs_dir_entry *ufs_dotdot(struct inode *dir, struct page **p)
* Entry is guaranteed to be valid.
*/
struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
- struct page **res_page)
+ struct folio **foliop)
{
struct super_block *sb = dir->i_sb;
const unsigned char *name = qstr->name;
@@ -261,7 +253,6 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
unsigned reclen = UFS_DIR_REC_LEN(namelen);
unsigned long start, n;
unsigned long npages = dir_pages(dir);
- struct page *page = NULL;
struct ufs_inode_info *ui = UFS_I(dir);
struct ufs_dir_entry *de;
@@ -270,27 +261,23 @@ struct ufs_dir_entry *ufs_find_entry(struct inode *dir, const struct qstr *qstr,
if (npages == 0 || namelen > UFS_MAXNAMLEN)
goto out;
- /* OFFSET_CACHE */
- *res_page = NULL;
-
start = ui->i_dir_start_lookup;
if (start >= npages)
start = 0;
n = start;
do {
- char *kaddr;
- page = ufs_get_page(dir, n);
- if (!IS_ERR(page)) {
- kaddr = page_address(page);
- de = (struct ufs_dir_entry *) kaddr;
+ char *kaddr = ufs_get_folio(dir, n, foliop);
+
+ if (!IS_ERR(kaddr)) {
+ de = (struct ufs_dir_entry *)kaddr;
kaddr += ufs_last_byte(dir, n) - reclen;
while ((char *) de <= kaddr) {
if (ufs_match(sb, namelen, name, de))
goto found;
de = ufs_next_entry(sb, de);
}
- ufs_put_page(page);
+ folio_release_kmap(*foliop, kaddr);
}
if (++n >= npages)
n = 0;
@@ -299,7 +286,6 @@ out:
return NULL;
found:
- *res_page = page;
ui->i_dir_start_lookup = n;
return de;
}
@@ -316,11 +302,10 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
unsigned reclen = UFS_DIR_REC_LEN(namelen);
const unsigned int chunk_size = UFS_SB(sb)->s_uspi->s_dirblksize;
unsigned short rec_len, name_len;
- struct page *page = NULL;
+ struct folio *folio = NULL;
struct ufs_dir_entry *de;
unsigned long npages = dir_pages(dir);
unsigned long n;
- char *kaddr;
loff_t pos;
int err;
@@ -328,21 +313,19 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
/*
* We take care of directory expansion in the same loop.
- * This code plays outside i_size, so it locks the page
+ * This code plays outside i_size, so it locks the folio
* to protect that region.
*/
for (n = 0; n <= npages; n++) {
+ char *kaddr = ufs_get_folio(dir, n, &folio);
char *dir_end;
- page = ufs_get_page(dir, n);
- err = PTR_ERR(page);
- if (IS_ERR(page))
- goto out;
- lock_page(page);
- kaddr = page_address(page);
+ if (IS_ERR(kaddr))
+ return PTR_ERR(kaddr);
+ folio_lock(folio);
dir_end = kaddr + ufs_last_byte(dir, n);
de = (struct ufs_dir_entry *)kaddr;
- kaddr += PAGE_SIZE - reclen;
+ kaddr += folio_size(folio) - reclen;
while ((char *)de <= kaddr) {
if ((char *)de == dir_end) {
/* We hit i_size */
@@ -369,16 +352,15 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
goto got_it;
de = (struct ufs_dir_entry *) ((char *) de + rec_len);
}
- unlock_page(page);
- ufs_put_page(page);
+ folio_unlock(folio);
+ folio_release_kmap(folio, kaddr);
}
BUG();
return -EINVAL;
got_it:
- pos = page_offset(page) +
- (char*)de - (char*)page_address(page);
- err = ufs_prepare_chunk(page, pos, rec_len);
+ pos = folio_pos(folio) + offset_in_folio(folio, de);
+ err = ufs_prepare_chunk(folio, pos, rec_len);
if (err)
goto out_unlock;
if (de->d_ino) {
@@ -395,18 +377,17 @@ got_it:
de->d_ino = cpu_to_fs32(sb, inode->i_ino);
ufs_set_de_type(sb, de, inode->i_mode);
- ufs_commit_chunk(page, pos, rec_len);
+ ufs_commit_chunk(folio, pos, rec_len);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
mark_inode_dirty(dir);
err = ufs_handle_dirsync(dir);
/* OFFSET_CACHE */
out_put:
- ufs_put_page(page);
-out:
+ folio_release_kmap(folio, de);
return err;
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
goto out_put;
}
@@ -444,19 +425,18 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
return 0;
for ( ; n < npages; n++, offset = 0) {
- char *kaddr, *limit;
struct ufs_dir_entry *de;
+ struct folio *folio;
+ char *kaddr = ufs_get_folio(inode, n, &folio);
+ char *limit;
- struct page *page = ufs_get_page(inode, n);
-
- if (IS_ERR(page)) {
+ if (IS_ERR(kaddr)) {
ufs_error(sb, __func__,
"bad page in #%lu",
inode->i_ino);
ctx->pos += PAGE_SIZE - offset;
- return -EIO;
+ return PTR_ERR(kaddr);
}
- kaddr = page_address(page);
if (unlikely(need_revalidate)) {
if (offset) {
offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask);
@@ -482,13 +462,13 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
ufs_get_de_namlen(sb, de),
fs32_to_cpu(sb, de->d_ino),
d_type)) {
- ufs_put_page(page);
+ folio_release_kmap(folio, de);
return 0;
}
}
ctx->pos += fs16_to_cpu(sb, de->d_reclen);
}
- ufs_put_page(page);
+ folio_release_kmap(folio, kaddr);
}
return 0;
}
@@ -499,19 +479,23 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
* previous entry.
*/
int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
- struct page * page)
+ struct folio *folio)
{
struct super_block *sb = inode->i_sb;
- char *kaddr = page_address(page);
- unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
- unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
+ size_t from, to;
+ char *kaddr;
loff_t pos;
- struct ufs_dir_entry *pde = NULL;
- struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from);
+ struct ufs_dir_entry *de, *pde = NULL;
int err;
UFSD("ENTER\n");
+ from = offset_in_folio(folio, dir);
+ to = from + fs16_to_cpu(sb, dir->d_reclen);
+ kaddr = (char *)dir - from;
+ from &= ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
+ de = (struct ufs_dir_entry *) (kaddr + from);
+
UFSD("ino %u, reclen %u, namlen %u, name %s\n",
fs32_to_cpu(sb, de->d_ino),
fs16_to_cpu(sb, de->d_reclen),
@@ -528,21 +512,20 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
de = ufs_next_entry(sb, de);
}
if (pde)
- from = (char*)pde - (char*)page_address(page);
-
- pos = page_offset(page) + from;
- lock_page(page);
- err = ufs_prepare_chunk(page, pos, to - from);
+ from = offset_in_folio(folio, pde);
+ pos = folio_pos(folio) + from;
+ folio_lock(folio);
+ err = ufs_prepare_chunk(folio, pos, to - from);
BUG_ON(err);
if (pde)
pde->d_reclen = cpu_to_fs16(sb, to - from);
dir->d_ino = 0;
- ufs_commit_chunk(page, pos, to - from);
+ ufs_commit_chunk(folio, pos, to - from);
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
mark_inode_dirty(inode);
err = ufs_handle_dirsync(inode);
out:
- ufs_put_page(page);
+ folio_release_kmap(folio, kaddr);
UFSD("EXIT\n");
return err;
}
@@ -551,26 +534,25 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
{
struct super_block * sb = dir->i_sb;
struct address_space *mapping = inode->i_mapping;
- struct page *page = grab_cache_page(mapping, 0);
+ struct folio *folio = filemap_grab_folio(mapping, 0);
const unsigned int chunk_size = UFS_SB(sb)->s_uspi->s_dirblksize;
struct ufs_dir_entry * de;
- char *base;
int err;
+ char *kaddr;
- if (!page)
- return -ENOMEM;
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- err = ufs_prepare_chunk(page, 0, chunk_size);
+ err = ufs_prepare_chunk(folio, 0, chunk_size);
if (err) {
- unlock_page(page);
+ folio_unlock(folio);
goto fail;
}
- kmap(page);
- base = (char*)page_address(page);
- memset(base, 0, PAGE_SIZE);
+ kaddr = kmap_local_folio(folio, 0);
+ memset(kaddr, 0, folio_size(folio));
- de = (struct ufs_dir_entry *) base;
+ de = (struct ufs_dir_entry *)kaddr;
de->d_ino = cpu_to_fs32(sb, inode->i_ino);
ufs_set_de_type(sb, de, inode->i_mode);
@@ -584,12 +566,12 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
de->d_reclen = cpu_to_fs16(sb, chunk_size - UFS_DIR_REC_LEN(1));
ufs_set_de_namlen(sb, de, 2);
strcpy (de->d_name, "..");
- kunmap(page);
+ kunmap_local(kaddr);
- ufs_commit_chunk(page, 0, chunk_size);
+ ufs_commit_chunk(folio, 0, chunk_size);
err = ufs_handle_dirsync(inode);
fail:
- put_page(page);
+ folio_put(folio);
return err;
}
@@ -599,18 +581,17 @@ fail:
int ufs_empty_dir(struct inode * inode)
{
struct super_block *sb = inode->i_sb;
- struct page *page = NULL;
+ struct folio *folio;
+ char *kaddr;
unsigned long i, npages = dir_pages(inode);
for (i = 0; i < npages; i++) {
- char *kaddr;
struct ufs_dir_entry *de;
- page = ufs_get_page(inode, i);
- if (IS_ERR(page))
+ kaddr = ufs_get_folio(inode, i, &folio);
+ if (IS_ERR(kaddr))
continue;
- kaddr = page_address(page);
de = (struct ufs_dir_entry *)kaddr;
kaddr += ufs_last_byte(inode, i) - UFS_DIR_REC_LEN(1);
@@ -637,12 +618,12 @@ int ufs_empty_dir(struct inode * inode)
}
de = ufs_next_entry(sb, de);
}
- ufs_put_page(page);
+ folio_release_kmap(folio, kaddr);
}
return 1;
not_empty:
- ufs_put_page(page);
+ folio_release_kmap(folio, kaddr);
return 0;
}
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a7bb2e63cdde..5331ae7ebf3e 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -479,9 +479,9 @@ static int ufs_read_folio(struct file *file, struct folio *folio)
return block_read_full_folio(folio, ufs_getfrag_block);
}
-int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len)
+int ufs_prepare_chunk(struct folio *folio, loff_t pos, unsigned len)
{
- return __block_write_begin(page, pos, len, ufs_getfrag_block);
+ return __block_write_begin(folio, pos, len, ufs_getfrag_block);
}
static void ufs_truncate_blocks(struct inode *);
@@ -498,11 +498,11 @@ static void ufs_write_failed(struct address_space *mapping, loff_t to)
static int ufs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
- ret = block_write_begin(mapping, pos, len, pagep, ufs_getfrag_block);
+ ret = block_write_begin(mapping, pos, len, foliop, ufs_getfrag_block);
if (unlikely(ret))
ufs_write_failed(mapping, pos + len);
@@ -511,11 +511,11 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping,
static int ufs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
int ret;
- ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+ ret = generic_write_end(file, mapping, pos, len, copied, folio, fsdata);
if (ret < len)
ufs_write_failed(mapping, pos + len);
return ret;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 9cad29463791..24bd12186647 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -209,14 +209,14 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode * inode = d_inode(dentry);
struct ufs_dir_entry *de;
- struct page *page;
+ struct folio *folio;
int err = -ENOENT;
- de = ufs_find_entry(dir, &dentry->d_name, &page);
+ de = ufs_find_entry(dir, &dentry->d_name, &folio);
if (!de)
goto out;
- err = ufs_delete_entry(dir, de, page);
+ err = ufs_delete_entry(dir, de, folio);
if (err)
goto out;
@@ -249,28 +249,28 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
- struct page *dir_page = NULL;
+ struct folio *dir_folio = NULL;
struct ufs_dir_entry * dir_de = NULL;
- struct page *old_page;
+ struct folio *old_folio;
struct ufs_dir_entry *old_de;
int err = -ENOENT;
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
- old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
+ old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_folio);
if (!old_de)
goto out;
if (S_ISDIR(old_inode->i_mode)) {
err = -EIO;
- dir_de = ufs_dotdot(old_inode, &dir_page);
+ dir_de = ufs_dotdot(old_inode, &dir_folio);
if (!dir_de)
goto out_old;
}
if (new_inode) {
- struct page *new_page;
+ struct folio *new_folio;
struct ufs_dir_entry *new_de;
err = -ENOTEMPTY;
@@ -278,10 +278,10 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
goto out_dir;
err = -ENOENT;
- new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
+ new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_folio);
if (!new_de)
goto out_dir;
- ufs_set_link(new_dir, new_de, new_page, old_inode, 1);
+ ufs_set_link(new_dir, new_de, new_folio, old_inode, 1);
inode_set_ctime_current(new_inode);
if (dir_de)
drop_nlink(new_inode);
@@ -300,29 +300,24 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
*/
inode_set_ctime_current(old_inode);
- ufs_delete_entry(old_dir, old_de, old_page);
+ ufs_delete_entry(old_dir, old_de, old_folio);
mark_inode_dirty(old_inode);
if (dir_de) {
if (old_dir != new_dir)
- ufs_set_link(old_inode, dir_de, dir_page, new_dir, 0);
- else {
- kunmap(dir_page);
- put_page(dir_page);
- }
+ ufs_set_link(old_inode, dir_de, dir_folio, new_dir, 0);
+ else
+ folio_release_kmap(dir_folio, new_dir);
inode_dec_link_count(old_dir);
}
return 0;
out_dir:
- if (dir_de) {
- kunmap(dir_page);
- put_page(dir_page);
- }
+ if (dir_de)
+ folio_release_kmap(dir_folio, dir_de);
out_old:
- kunmap(old_page);
- put_page(old_page);
+ folio_release_kmap(old_folio, old_de);
out:
return err;
}
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 6b499180643b..a2c762cb65a0 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -99,15 +99,17 @@ extern void ufs_put_cylinder (struct super_block *, unsigned);
/* dir.c */
extern const struct inode_operations ufs_dir_inode_operations;
-extern int ufs_add_link (struct dentry *, struct inode *);
-extern ino_t ufs_inode_by_name(struct inode *, const struct qstr *);
-extern int ufs_make_empty(struct inode *, struct inode *);
-extern struct ufs_dir_entry *ufs_find_entry(struct inode *, const struct qstr *, struct page **);
-extern int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct page *);
-extern int ufs_empty_dir (struct inode *);
-extern struct ufs_dir_entry *ufs_dotdot(struct inode *, struct page **);
-extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
- struct page *page, struct inode *inode, bool update_times);
+
+int ufs_add_link(struct dentry *, struct inode *);
+ino_t ufs_inode_by_name(struct inode *, const struct qstr *);
+int ufs_make_empty(struct inode *, struct inode *);
+struct ufs_dir_entry *ufs_find_entry(struct inode *, const struct qstr *,
+ struct folio **);
+int ufs_delete_entry(struct inode *, struct ufs_dir_entry *, struct folio *);
+int ufs_empty_dir(struct inode *);
+struct ufs_dir_entry *ufs_dotdot(struct inode *, struct folio **);
+void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
+ struct folio *folio, struct inode *inode, bool update_times);
/* file.c */
extern const struct inode_operations ufs_file_inode_operations;
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 0ecd2ed792f5..bf708b68f150 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -250,9 +250,9 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value)
}
}
-extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *);
-extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t);
-extern int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len);
+dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *);
+void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t);
+int ufs_prepare_chunk(struct folio *folio, loff_t pos, unsigned len);
/*
* These functions manipulate ufs buffers
diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c
index fdb4da24d662..b780deb81b02 100644
--- a/fs/vboxsf/file.c
+++ b/fs/vboxsf/file.c
@@ -300,23 +300,23 @@ static int vboxsf_writepage(struct page *page, struct writeback_control *wbc)
static int vboxsf_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned int len, unsigned int copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct inode *inode = mapping->host;
struct vboxsf_handle *sf_handle = file->private_data;
- unsigned int from = pos & ~PAGE_MASK;
+ size_t from = offset_in_folio(folio, pos);
u32 nwritten = len;
u8 *buf;
int err;
- /* zero the stale part of the page if we did a short copy */
- if (!PageUptodate(page) && copied < len)
- zero_user(page, from + copied, len - copied);
+ /* zero the stale part of the folio if we did a short copy */
+ if (!folio_test_uptodate(folio) && copied < len)
+ folio_zero_range(folio, from + copied, len - copied);
- buf = kmap(page);
+ buf = kmap(&folio->page);
err = vboxsf_write(sf_handle->root, sf_handle->handle,
pos, &nwritten, buf + from);
- kunmap(page);
+ kunmap(&folio->page);
if (err) {
nwritten = 0;
@@ -326,16 +326,16 @@ static int vboxsf_write_end(struct file *file, struct address_space *mapping,
/* mtime changed */
VBOXSF_I(inode)->force_restat = 1;
- if (!PageUptodate(page) && nwritten == PAGE_SIZE)
- SetPageUptodate(page);
+ if (!folio_test_uptodate(folio) && nwritten == folio_size(folio))
+ folio_mark_uptodate(folio);
pos += nwritten;
if (pos > inode->i_size)
i_size_write(inode, pos);
out:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return nwritten;
}
@@ -343,7 +343,7 @@ out:
/*
* Note simple_write_begin does not read the page from disk on partial writes
* this is ok since vboxsf_write_end only writes the written parts of the
- * page and it does not call SetPageUptodate for partial writes.
+ * page and it does not call folio_mark_uptodate for partial writes.
*/
const struct address_space_operations vboxsf_reg_aops = {
.read_folio = vboxsf_read_folio,
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 496e2f72a85b..797d5b5f7b72 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -749,7 +749,7 @@ xfs_finobt_count_blocks(
if (error)
return error;
- cur = xfs_inobt_init_cursor(pag, tp, agbp);
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
error = xfs_btree_count_blocks(cur, tree_blocks);
xfs_btree_del_cursor(cur, error);
xfs_trans_brelse(tp, agbp);
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 513b50da6215..79babeac9d75 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -514,12 +514,18 @@ xfs_dinode_verify(
return __this_address;
}
- if (dip->di_version > 1) {
+ /*
+ * Historical note: xfsprogs in the 3.2 era set up its incore inodes to
+ * have di_nlink track the link count, even if the actual filesystem
+ * only supported V1 inodes (i.e. di_onlink). When writing out the
+ * ondisk inode, it would set both the ondisk di_nlink and di_onlink to
+ * the the incore di_nlink value, which is why we cannot check for
+ * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with
+ * di_onlink==0, so we can check that.
+ */
+ if (dip->di_version >= 2) {
if (dip->di_onlink)
return __this_address;
- } else {
- if (dip->di_nlink)
- return __this_address;
}
/* don't allow invalid i_size */
diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
index d848222f802b..9b5d98fe1f8a 100644
--- a/fs/xfs/scrub/xfile.c
+++ b/fs/xfs/scrub/xfile.c
@@ -293,7 +293,7 @@ xfile_get_folio(
* (potentially last) reference in xfile_put_folio.
*/
if (flags & XFILE_ALLOC)
- folio_set_dirty(folio);
+ folio_mark_dirty(folio);
return folio;
}
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 6f0fc7fe1f2b..25f5dffeab2a 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -158,8 +158,7 @@ static int
xfs_trim_gather_extents(
struct xfs_perag *pag,
struct xfs_trim_cur *tcur,
- struct xfs_busy_extents *extents,
- uint64_t *blocks_trimmed)
+ struct xfs_busy_extents *extents)
{
struct xfs_mount *mp = pag->pag_mount;
struct xfs_trans *tp;
@@ -280,7 +279,6 @@ xfs_trim_gather_extents(
xfs_extent_busy_insert_discard(pag, fbno, flen,
&extents->extent_list);
- *blocks_trimmed += flen;
next_extent:
if (tcur->by_bno)
error = xfs_btree_increment(cur, 0, &i);
@@ -327,8 +325,7 @@ xfs_trim_perag_extents(
struct xfs_perag *pag,
xfs_agblock_t start,
xfs_agblock_t end,
- xfs_extlen_t minlen,
- uint64_t *blocks_trimmed)
+ xfs_extlen_t minlen)
{
struct xfs_trim_cur tcur = {
.start = start,
@@ -354,8 +351,7 @@ xfs_trim_perag_extents(
extents->owner = extents;
INIT_LIST_HEAD(&extents->extent_list);
- error = xfs_trim_gather_extents(pag, &tcur, extents,
- blocks_trimmed);
+ error = xfs_trim_gather_extents(pag, &tcur, extents);
if (error) {
kfree(extents);
break;
@@ -389,8 +385,7 @@ xfs_trim_datadev_extents(
struct xfs_mount *mp,
xfs_daddr_t start,
xfs_daddr_t end,
- xfs_extlen_t minlen,
- uint64_t *blocks_trimmed)
+ xfs_extlen_t minlen)
{
xfs_agnumber_t start_agno, end_agno;
xfs_agblock_t start_agbno, end_agbno;
@@ -411,8 +406,7 @@ xfs_trim_datadev_extents(
if (start_agno == end_agno)
agend = end_agbno;
- error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen,
- blocks_trimmed);
+ error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen);
if (error)
last_error = error;
@@ -431,9 +425,6 @@ struct xfs_trim_rtdev {
/* list of rt extents to free */
struct list_head extent_list;
- /* pointer to count of blocks trimmed */
- uint64_t *blocks_trimmed;
-
/* minimum length that caller allows us to trim */
xfs_rtblock_t minlen_fsb;
@@ -551,7 +542,6 @@ xfs_trim_gather_rtextent(
busyp->length = rlen;
INIT_LIST_HEAD(&busyp->list);
list_add_tail(&busyp->list, &tr->extent_list);
- *tr->blocks_trimmed += rlen;
tr->restart_rtx = rec->ar_startext + rec->ar_extcount;
return 0;
@@ -562,13 +552,11 @@ xfs_trim_rtdev_extents(
struct xfs_mount *mp,
xfs_daddr_t start,
xfs_daddr_t end,
- xfs_daddr_t minlen,
- uint64_t *blocks_trimmed)
+ xfs_daddr_t minlen)
{
struct xfs_rtalloc_rec low = { };
struct xfs_rtalloc_rec high = { };
struct xfs_trim_rtdev tr = {
- .blocks_trimmed = blocks_trimmed,
.minlen_fsb = XFS_BB_TO_FSB(mp, minlen),
};
struct xfs_trans *tp;
@@ -634,7 +622,7 @@ xfs_trim_rtdev_extents(
return error;
}
#else
-# define xfs_trim_rtdev_extents(m,s,e,n,b) (-EOPNOTSUPP)
+# define xfs_trim_rtdev_extents(...) (-EOPNOTSUPP)
#endif /* CONFIG_XFS_RT */
/*
@@ -661,7 +649,6 @@ xfs_ioc_trim(
xfs_daddr_t start, end;
xfs_extlen_t minlen;
xfs_rfsblock_t max_blocks;
- uint64_t blocks_trimmed = 0;
int error, last_error = 0;
if (!capable(CAP_SYS_ADMIN))
@@ -706,15 +693,13 @@ xfs_ioc_trim(
end = start + BTOBBT(range.len) - 1;
if (bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev)) {
- error = xfs_trim_datadev_extents(mp, start, end, minlen,
- &blocks_trimmed);
+ error = xfs_trim_datadev_extents(mp, start, end, minlen);
if (error)
last_error = error;
}
if (rt_bdev && !xfs_trim_should_stop()) {
- error = xfs_trim_rtdev_extents(mp, start, end, minlen,
- &blocks_trimmed);
+ error = xfs_trim_rtdev_extents(mp, start, end, minlen);
if (error)
last_error = error;
}
@@ -722,7 +707,8 @@ xfs_ioc_trim(
if (last_error)
return last_error;
- range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
+ range.len = min_t(unsigned long long, range.len,
+ XFS_FSB_TO_B(mp, max_blocks));
if (copy_to_user(urange, &range, sizeof(range)))
return -EFAULT;
return 0;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 85dbb46452ca..71f32354944e 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -71,7 +71,7 @@ xfs_fsmap_owner_to_rmap(
switch (src->fmr_owner) {
case 0: /* "lowest owner id possible" */
case -1ULL: /* "highest owner id possible" */
- dest->rm_owner = 0;
+ dest->rm_owner = src->fmr_owner;
break;
case XFS_FMR_OWN_FREE:
dest->rm_owner = XFS_RMAP_OWN_NULL;
@@ -162,6 +162,7 @@ struct xfs_getfsmap_info {
xfs_daddr_t next_daddr; /* next daddr we expect */
/* daddr of low fsmap key when we're using the rtbitmap */
xfs_daddr_t low_daddr;
+ xfs_daddr_t end_daddr; /* daddr of high fsmap key */
u64 missing_owner; /* owner of holes */
u32 dev; /* device id */
/*
@@ -182,6 +183,7 @@ struct xfs_getfsmap_dev {
int (*fn)(struct xfs_trans *tp,
const struct xfs_fsmap *keys,
struct xfs_getfsmap_info *info);
+ sector_t nr_sectors;
};
/* Compare two getfsmap device handlers. */
@@ -252,7 +254,7 @@ xfs_getfsmap_rec_before_start(
const struct xfs_rmap_irec *rec,
xfs_daddr_t rec_daddr)
{
- if (info->low_daddr != -1ULL)
+ if (info->low_daddr != XFS_BUF_DADDR_NULL)
return rec_daddr < info->low_daddr;
if (info->low.rm_blockcount)
return xfs_rmap_compare(rec, &info->low) < 0;
@@ -294,6 +296,18 @@ xfs_getfsmap_helper(
return 0;
}
+ /*
+ * For an info->last query, we're looking for a gap between the last
+ * mapping emitted and the high key specified by userspace. If the
+ * user's query spans less than 1 fsblock, then info->high and
+ * info->low will have the same rm_startblock, which causes rec_daddr
+ * and next_daddr to be the same. Therefore, use the end_daddr that
+ * we calculated from userspace's high key to synthesize the record.
+ * Note that if the btree query found a mapping, there won't be a gap.
+ */
+ if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL)
+ rec_daddr = info->end_daddr;
+
/* Are we just counting mappings? */
if (info->head->fmh_count == 0) {
if (info->head->fmh_entries == UINT_MAX)
@@ -904,17 +918,21 @@ xfs_getfsmap(
/* Set up our device handlers. */
memset(handlers, 0, sizeof(handlers));
+ handlers[0].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
if (use_rmap)
handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
else
handlers[0].fn = xfs_getfsmap_datadev_bnobt;
if (mp->m_logdev_targp != mp->m_ddev_targp) {
+ handlers[1].nr_sectors = XFS_FSB_TO_BB(mp,
+ mp->m_sb.sb_logblocks);
handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
handlers[1].fn = xfs_getfsmap_logdev;
}
#ifdef CONFIG_XFS_RT
if (mp->m_rtdev_targp) {
+ handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
}
@@ -946,6 +964,7 @@ xfs_getfsmap(
info.next_daddr = head->fmh_keys[0].fmr_physical +
head->fmh_keys[0].fmr_length;
+ info.end_daddr = XFS_BUF_DADDR_NULL;
info.fsmap_recs = fsmap_recs;
info.head = head;
@@ -966,8 +985,11 @@ xfs_getfsmap(
* low key, zero out the low key so that we get
* everything from the beginning.
*/
- if (handlers[i].dev == head->fmh_keys[1].fmr_device)
+ if (handlers[i].dev == head->fmh_keys[1].fmr_device) {
dkeys[1] = head->fmh_keys[1];
+ info.end_daddr = min(handlers[i].nr_sectors - 1,
+ dkeys[1].fmr_physical);
+ }
if (handlers[i].dev > head->fmh_keys[0].fmr_device)
memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
@@ -983,7 +1005,7 @@ xfs_getfsmap(
info.dev = handlers[i].dev;
info.last = false;
info.pag = NULL;
- info.low_daddr = -1ULL;
+ info.low_daddr = XFS_BUF_DADDR_NULL;
info.low.rm_blockcount = 0;
error = handlers[i].fn(tp, dkeys, &info);
if (error)
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 0c3e96c621a6..ebeab8e4dab1 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -785,6 +785,39 @@ xfs_alloc_rsum_cache(
}
/*
+ * If we changed the rt extent size (meaning there was no rt volume previously)
+ * and the root directory had EXTSZINHERIT and RTINHERIT set, it's possible
+ * that the extent size hint on the root directory is no longer congruent with
+ * the new rt extent size. Log the rootdir inode to fix this.
+ */
+static int
+xfs_growfs_rt_fixup_extsize(
+ struct xfs_mount *mp)
+{
+ struct xfs_inode *ip = mp->m_rootip;
+ struct xfs_trans *tp;
+ int error = 0;
+
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ if (!(ip->i_diflags & XFS_DIFLAG_RTINHERIT) ||
+ !(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT))
+ goto out_iolock;
+
+ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange, 0, 0, false,
+ &tp);
+ if (error)
+ goto out_iolock;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+out_iolock:
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return error;
+}
+
+/*
* Visible (exported) functions.
*/
@@ -812,6 +845,7 @@ xfs_growfs_rt(
xfs_extlen_t rsumblocks; /* current number of rt summary blks */
xfs_sb_t *sbp; /* old superblock */
uint8_t *rsum_cache; /* old summary cache */
+ xfs_agblock_t old_rextsize = mp->m_sb.sb_rextsize;
sbp = &mp->m_sb;
@@ -821,34 +855,39 @@ xfs_growfs_rt(
/* Needs to have been mounted with an rt device. */
if (!XFS_IS_REALTIME_MOUNT(mp))
return -EINVAL;
+
+ if (!mutex_trylock(&mp->m_growlock))
+ return -EWOULDBLOCK;
/*
* Mount should fail if the rt bitmap/summary files don't load, but
* we'll check anyway.
*/
+ error = -EINVAL;
if (!mp->m_rbmip || !mp->m_rsumip)
- return -EINVAL;
+ goto out_unlock;
/* Shrink not supported. */
if (in->newblocks <= sbp->sb_rblocks)
- return -EINVAL;
+ goto out_unlock;
/* Can only change rt extent size when adding rt volume. */
if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize)
- return -EINVAL;
+ goto out_unlock;
/* Range check the extent size. */
if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE ||
XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE)
- return -EINVAL;
+ goto out_unlock;
/* Unsupported realtime features. */
+ error = -EOPNOTSUPP;
if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp))
- return -EOPNOTSUPP;
+ goto out_unlock;
nrblocks = in->newblocks;
error = xfs_sb_validate_fsb_count(sbp, nrblocks);
if (error)
- return error;
+ goto out_unlock;
/*
* Read in the last block of the device, make sure it exists.
*/
@@ -856,7 +895,7 @@ xfs_growfs_rt(
XFS_FSB_TO_BB(mp, nrblocks - 1),
XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL);
if (error)
- return error;
+ goto out_unlock;
xfs_buf_relse(bp);
/*
@@ -864,8 +903,10 @@ xfs_growfs_rt(
*/
nrextents = nrblocks;
do_div(nrextents, in->extsize);
- if (!xfs_validate_rtextents(nrextents))
- return -EINVAL;
+ if (!xfs_validate_rtextents(nrextents)) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
nrbmblocks = xfs_rtbitmap_blockcount(mp, nrextents);
nrextslog = xfs_compute_rextslog(nrextents);
nrsumlevels = nrextslog + 1;
@@ -876,8 +917,11 @@ xfs_growfs_rt(
* the log. This prevents us from getting a log overflow,
* since we'll log basically the whole summary file at once.
*/
- if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1))
- return -EINVAL;
+ if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+
/*
* Get the old block counts for bitmap and summary inodes.
* These can't change since other growfs callers are locked out.
@@ -889,10 +933,10 @@ xfs_growfs_rt(
*/
error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip);
if (error)
- return error;
+ goto out_unlock;
error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip);
if (error)
- return error;
+ goto out_unlock;
rsum_cache = mp->m_rsum_cache;
if (nrbmblocks != sbp->sb_rbmblocks)
@@ -1036,6 +1080,12 @@ error_cancel:
if (error)
goto out_free;
+ if (old_rextsize != in->extsize) {
+ error = xfs_growfs_rt_fixup_extsize(mp);
+ if (error)
+ goto out_free;
+ }
+
/* Update secondary superblocks now the physical grow has completed */
error = xfs_update_secondary_sbs(mp);
@@ -1059,6 +1109,8 @@ out_free:
}
}
+out_unlock:
+ mutex_unlock(&mp->m_growlock);
return error;
}