summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-12-29 18:34:34 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2024-12-29 18:34:34 +0100
commitc059361673e487fe33bb736fb944f313024ad726 (patch)
treeb35bd7ecf61bf7ce15c585fa3330461f8c0675d3 /fs/btrfs
parentMerge tag 'i2c-for-6.13-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git... (diff)
parentbtrfs: sysfs: fix direct super block member reads (diff)
downloadlinux-c059361673e487fe33bb736fb944f313024ad726.tar.xz
linux-c059361673e487fe33bb736fb944f313024ad726.zip
Merge tag 'for-6.13-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "A few more fixes that accumulated over the last two weeks, fixing some user reported problems: - swapfile fixes: - conditional reschedule in the activation loop - fix race with memory mapped file when activating - make activation loop interruptible - rework and fix extent sharing checks - folio fixes: - in send, recheck folio mapping after unlock - in relocation, recheck folio mapping after unlock - fix waiting for encoded read io_uring requests - fix transaction atomicity when enabling simple quotas - move COW block trace point before the block gets freed - print various sizes in sysfs with correct endianity" * tag 'for-6.13-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: sysfs: fix direct super block member reads btrfs: fix transaction atomicity bug when enabling simple quotas btrfs: avoid monopolizing a core when activating a swap file btrfs: allow swap activation to be interruptible btrfs: fix swap file activation failure due to extents that used to be shared btrfs: fix race with memory mapped writes when activating swap file btrfs: check folio mapping after unlock in put_file_data() btrfs: check folio mapping after unlock in relocate_one_folio() btrfs: fix use-after-free when COWing tree bock and tracing is enabled btrfs: fix use-after-free waiting for encoded read endios
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.c11
-rw-r--r--fs/btrfs/inode.c154
-rw-r--r--fs/btrfs/qgroup.c3
-rw-r--r--fs/btrfs/relocation.c6
-rw-r--r--fs/btrfs/send.c6
-rw-r--r--fs/btrfs/sysfs.c6
6 files changed, 130 insertions, 56 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 693dc27ffb89..185985a337b3 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -654,6 +654,8 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
goto error_unlock_cow;
}
}
+
+ trace_btrfs_cow_block(root, buf, cow);
if (unlock_orig)
btrfs_tree_unlock(buf);
free_extent_buffer_stale(buf);
@@ -710,7 +712,6 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 search_start;
- int ret;
if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) {
btrfs_abort_transaction(trans, -EUCLEAN);
@@ -751,12 +752,8 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
* Also We don't care about the error, as it's handled internally.
*/
btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
- ret = btrfs_force_cow_block(trans, root, buf, parent, parent_slot,
- cow_ret, search_start, 0, nest);
-
- trace_btrfs_cow_block(root, buf, *cow_ret);
-
- return ret;
+ return btrfs_force_cow_block(trans, root, buf, parent, parent_slot,
+ cow_ret, search_start, 0, nest);
}
ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 488edca8333a..27b2fe7f735d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9078,9 +9078,9 @@ out:
}
struct btrfs_encoded_read_private {
- wait_queue_head_t wait;
+ struct completion done;
void *uring_ctx;
- atomic_t pending;
+ refcount_t pending_refs;
blk_status_t status;
};
@@ -9099,14 +9099,14 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
*/
WRITE_ONCE(priv->status, bbio->bio.bi_status);
}
- if (atomic_dec_and_test(&priv->pending)) {
+ if (refcount_dec_and_test(&priv->pending_refs)) {
int err = blk_status_to_errno(READ_ONCE(priv->status));
if (priv->uring_ctx) {
btrfs_uring_read_extent_endio(priv->uring_ctx, err);
kfree(priv);
} else {
- wake_up(&priv->wait);
+ complete(&priv->done);
}
}
bio_put(&bbio->bio);
@@ -9126,8 +9126,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
if (!priv)
return -ENOMEM;
- init_waitqueue_head(&priv->wait);
- atomic_set(&priv->pending, 1);
+ init_completion(&priv->done);
+ refcount_set(&priv->pending_refs, 1);
priv->status = 0;
priv->uring_ctx = uring_ctx;
@@ -9140,7 +9140,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
size_t bytes = min_t(u64, disk_io_size, PAGE_SIZE);
if (bio_add_page(&bbio->bio, pages[i], bytes, 0) < bytes) {
- atomic_inc(&priv->pending);
+ refcount_inc(&priv->pending_refs);
btrfs_submit_bbio(bbio, 0);
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
@@ -9155,11 +9155,11 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
disk_io_size -= bytes;
} while (disk_io_size);
- atomic_inc(&priv->pending);
+ refcount_inc(&priv->pending_refs);
btrfs_submit_bbio(bbio, 0);
if (uring_ctx) {
- if (atomic_dec_return(&priv->pending) == 0) {
+ if (refcount_dec_and_test(&priv->pending_refs)) {
ret = blk_status_to_errno(READ_ONCE(priv->status));
btrfs_uring_read_extent_endio(uring_ctx, ret);
kfree(priv);
@@ -9168,8 +9168,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
return -EIOCBQUEUED;
} else {
- if (atomic_dec_return(&priv->pending) != 0)
- io_wait_event(priv->wait, !atomic_read(&priv->pending));
+ if (!refcount_dec_and_test(&priv->pending_refs))
+ wait_for_completion_io(&priv->done);
/* See btrfs_encoded_read_endio() for ordering. */
ret = blk_status_to_errno(READ_ONCE(priv->status));
kfree(priv);
@@ -9799,15 +9799,25 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
- struct extent_map *em = NULL;
struct btrfs_chunk_map *map = NULL;
struct btrfs_device *device = NULL;
struct btrfs_swap_info bsi = {
.lowest_ppage = (sector_t)-1ULL,
};
+ struct btrfs_backref_share_check_ctx *backref_ctx = NULL;
+ struct btrfs_path *path = NULL;
int ret = 0;
u64 isize;
- u64 start;
+ u64 prev_extent_end = 0;
+
+ /*
+ * Acquire the inode's mmap lock to prevent races with memory mapped
+ * writes, as they could happen after we flush delalloc below and before
+ * we lock the extent range further below. The inode was already locked
+ * up in the call chain.
+ */
+ btrfs_assert_inode_locked(BTRFS_I(inode));
+ down_write(&BTRFS_I(inode)->i_mmap_lock);
/*
* If the swap file was just created, make sure delalloc is done. If the
@@ -9816,22 +9826,32 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
*/
ret = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1);
if (ret)
- return ret;
+ goto out_unlock_mmap;
/*
* The inode is locked, so these flags won't change after we check them.
*/
if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
btrfs_warn(fs_info, "swapfile must not be compressed");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock_mmap;
}
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
btrfs_warn(fs_info, "swapfile must not be copy-on-write");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock_mmap;
}
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
btrfs_warn(fs_info, "swapfile must not be checksummed");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock_mmap;
+ }
+
+ path = btrfs_alloc_path();
+ backref_ctx = btrfs_alloc_backref_share_check_ctx();
+ if (!path || !backref_ctx) {
+ ret = -ENOMEM;
+ goto out_unlock_mmap;
}
/*
@@ -9846,7 +9866,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_SWAP_ACTIVATE)) {
btrfs_warn(fs_info,
"cannot activate swapfile while exclusive operation is running");
- return -EBUSY;
+ ret = -EBUSY;
+ goto out_unlock_mmap;
}
/*
@@ -9860,7 +9881,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
btrfs_exclop_finish(fs_info);
btrfs_warn(fs_info,
"cannot activate swapfile because snapshot creation is in progress");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock_mmap;
}
/*
* Snapshots can create extents which require COW even if NODATACOW is
@@ -9881,7 +9903,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
btrfs_warn(fs_info,
"cannot activate swapfile because subvolume %llu is being deleted",
btrfs_root_id(root));
- return -EPERM;
+ ret = -EPERM;
+ goto out_unlock_mmap;
}
atomic_inc(&root->nr_swapfiles);
spin_unlock(&root->root_item_lock);
@@ -9889,24 +9912,39 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
lock_extent(io_tree, 0, isize - 1, &cached_state);
- start = 0;
- while (start < isize) {
- u64 logical_block_start, physical_block_start;
+ while (prev_extent_end < isize) {
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *ei;
struct btrfs_block_group *bg;
- u64 len = isize - start;
+ u64 logical_block_start;
+ u64 physical_block_start;
+ u64 extent_gen;
+ u64 disk_bytenr;
+ u64 len;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, start, len);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
+ key.objectid = btrfs_ino(BTRFS_I(inode));
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = prev_extent_end;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
goto out;
- }
- if (em->disk_bytenr == EXTENT_MAP_HOLE) {
+ /*
+ * If key not found it means we have an implicit hole (NO_HOLES
+ * is enabled).
+ */
+ if (ret > 0) {
btrfs_warn(fs_info, "swapfile must not have holes");
ret = -EINVAL;
goto out;
}
- if (em->disk_bytenr == EXTENT_MAP_INLINE) {
+
+ leaf = path->nodes[0];
+ ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_INLINE) {
/*
* It's unlikely we'll ever actually find ourselves
* here, as a file small enough to fit inline won't be
@@ -9918,23 +9956,45 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
ret = -EINVAL;
goto out;
}
- if (extent_map_is_compressed(em)) {
+
+ if (btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
btrfs_warn(fs_info, "swapfile must not be compressed");
ret = -EINVAL;
goto out;
}
- logical_block_start = extent_map_block_start(em) + (start - em->start);
- len = min(len, em->len - (start - em->start));
- free_extent_map(em);
- em = NULL;
+ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
+ if (disk_bytenr == 0) {
+ btrfs_warn(fs_info, "swapfile must not have holes");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ logical_block_start = disk_bytenr + btrfs_file_extent_offset(leaf, ei);
+ extent_gen = btrfs_file_extent_generation(leaf, ei);
+ prev_extent_end = btrfs_file_extent_end(path);
- ret = can_nocow_extent(inode, start, &len, NULL, false, true);
+ if (prev_extent_end > isize)
+ len = isize - key.offset;
+ else
+ len = btrfs_file_extent_num_bytes(leaf, ei);
+
+ backref_ctx->curr_leaf_bytenr = leaf->start;
+
+ /*
+ * Don't need the path anymore, release to avoid deadlocks when
+ * calling btrfs_is_data_extent_shared() because when joining a
+ * transaction it can block waiting for the current one's commit
+ * which in turn may be trying to lock the same leaf to flush
+ * delayed items for example.
+ */
+ btrfs_release_path(path);
+
+ ret = btrfs_is_data_extent_shared(BTRFS_I(inode), disk_bytenr,
+ extent_gen, backref_ctx);
if (ret < 0) {
goto out;
- } else if (ret) {
- ret = 0;
- } else {
+ } else if (ret > 0) {
btrfs_warn(fs_info,
"swapfile must not be copy-on-write");
ret = -EINVAL;
@@ -9969,7 +10029,6 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
physical_block_start = (map->stripes[0].physical +
(logical_block_start - map->start));
- len = min(len, map->chunk_len - (logical_block_start - map->start));
btrfs_free_chunk_map(map);
map = NULL;
@@ -10010,20 +10069,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (ret)
goto out;
}
- bsi.start = start;
+ bsi.start = key.offset;
bsi.block_start = physical_block_start;
bsi.block_len = len;
}
- start += len;
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
+
+ cond_resched();
}
if (bsi.block_len)
ret = btrfs_add_swap_extent(sis, &bsi);
out:
- if (!IS_ERR_OR_NULL(em))
- free_extent_map(em);
if (!IS_ERR_OR_NULL(map))
btrfs_free_chunk_map(map);
@@ -10036,6 +10098,10 @@ out:
btrfs_exclop_finish(fs_info);
+out_unlock_mmap:
+ up_write(&BTRFS_I(inode)->i_mmap_lock);
+ btrfs_free_backref_share_ctx(backref_ctx);
+ btrfs_free_path(path);
if (ret)
return ret;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a6f92836c9b1..f9b214992212 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1121,6 +1121,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON;
if (simple) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE;
+ btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA);
btrfs_set_qgroup_status_enable_gen(leaf, ptr, trans->transid);
} else {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -1254,8 +1255,6 @@ out_add_root:
spin_lock(&fs_info->qgroup_lock);
fs_info->quota_root = quota_root;
set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
- if (simple)
- btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA);
spin_unlock(&fs_info->qgroup_lock);
/* Skip rescan for simple qgroups. */
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index bf267bdfa8f8..db8b42f674b7 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2902,6 +2902,7 @@ static int relocate_one_folio(struct reloc_control *rc,
const bool use_rst = btrfs_need_stripe_tree_update(fs_info, rc->block_group->flags);
ASSERT(index <= last_index);
+again:
folio = filemap_lock_folio(inode->i_mapping, index);
if (IS_ERR(folio)) {
@@ -2937,6 +2938,11 @@ static int relocate_one_folio(struct reloc_control *rc,
ret = -EIO;
goto release_folio;
}
+ if (folio->mapping != inode->i_mapping) {
+ folio_unlock(folio);
+ folio_put(folio);
+ goto again;
+ }
}
/*
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 7254279c3cc9..498c84323253 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -5280,6 +5280,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
unsigned cur_len = min_t(unsigned, len,
PAGE_SIZE - pg_offset);
+again:
folio = filemap_lock_folio(mapping, index);
if (IS_ERR(folio)) {
page_cache_sync_readahead(mapping,
@@ -5312,6 +5313,11 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
ret = -EIO;
break;
}
+ if (folio->mapping != mapping) {
+ folio_unlock(folio);
+ folio_put(folio);
+ goto again;
+ }
}
memcpy_from_folio(sctx->send_buf + sctx->send_size, folio,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index fdcbf650ac31..7f09b6c9cc2d 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1118,7 +1118,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
- return sysfs_emit(buf, "%u\n", fs_info->super_copy->nodesize);
+ return sysfs_emit(buf, "%u\n", fs_info->nodesize);
}
BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
@@ -1128,7 +1128,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
- return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
+ return sysfs_emit(buf, "%u\n", fs_info->sectorsize);
}
BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
@@ -1180,7 +1180,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
- return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
+ return sysfs_emit(buf, "%u\n", fs_info->sectorsize);
}
BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);