diff options
author | Josef Bacik <josef@toxicpanda.com> | 2024-08-16 21:16:24 +0200 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2024-09-10 16:51:20 +0200 |
commit | ac325fc2aad513072722387a71bf857c938aae4e (patch) | |
tree | f00b90af5f961b99f3dc2b4af9b28af2d752e5b1 /fs/btrfs/direct-io.c | |
parent | btrfs: take the dio extent lock during O_DIRECT operations (diff) | |
download | linux-ac325fc2aad513072722387a71bf857c938aae4e.tar.xz linux-ac325fc2aad513072722387a71bf857c938aae4e.zip |
btrfs: do not hold the extent lock for entire read
Historically we've held the extent lock throughout the entire read.
There's been a few reasons for this, but it's mostly just caused us
problems. For example, this prevents us from allowing page faults
during direct io reads, because we could deadlock. This has forced us
to only allow 4k reads at a time for io_uring NOWAIT requests because we
have no idea if we'll be forced to page fault and thus have to do a
whole lot of work.
On the buffered side we are protected by the page lock, as long as we're
reading things like buffered writes, punch hole, and even direct IO to a
certain degree will get hung up on the page lock while the page is in
flight.
On the direct side we have the dio extent lock, which acts much like the
way the extent lock worked previously to this patch, however just for
direct reads. This protects direct reads from concurrent direct writes,
while we're protected from buffered writes via the inode lock.
Now that we're protected in all cases, narrow the extent lock to the
part where we're getting the extent map to submit the reads, no longer
holding the extent lock for the entire read operation. Push the extent
lock down into do_readpage() so that we're only grabbing it when looking
up the extent map. This portion was contributed by Goldwyn.
Co-developed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/direct-io.c')
-rw-r--r-- | fs/btrfs/direct-io.c | 49 |
1 files changed, 24 insertions, 25 deletions
diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c index 4a5f9b2632f2..bd38df5647e3 100644 --- a/fs/btrfs/direct-io.c +++ b/fs/btrfs/direct-io.c @@ -365,7 +365,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, int ret = 0; u64 len = length; const u64 data_alloc_len = length; - bool unlock_extents = false; + u32 unlock_bits = EXTENT_LOCKED; /* * We could potentially fault if we have a buffer > PAGE_SIZE, and if @@ -526,7 +526,6 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, start, &len, flags); if (ret < 0) goto unlock_err; - unlock_extents = true; /* Recalc len in case the new em is smaller than requested */ len = min(len, em->len - (start - em->start)); if (dio_data->data_space_reserved) { @@ -547,23 +546,8 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, release_offset, release_len); } - } else { - /* - * We need to unlock only the end area that we aren't using. - * The rest is going to be unlocked by the endio routine. - */ - lockstart = start + len; - if (lockstart < lockend) - unlock_extents = true; } - if (unlock_extents) - clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, - EXTENT_LOCKED | EXTENT_DIO_LOCKED, - &cached_state); - else - free_extent_state(cached_state); - /* * Translate extent map information to iomap. * We trim the extents (and move the addr) even though iomap code does @@ -582,6 +566,23 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, iomap->length = len; free_extent_map(em); + /* + * Reads will hold the EXTENT_DIO_LOCKED bit until the io is completed, + * writes only hold it for this part. We hold the extent lock until + * we're completely done with the extent map to make sure it remains + * valid. + */ + if (write) + unlock_bits |= EXTENT_DIO_LOCKED; + + clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, + unlock_bits, &cached_state); + + /* We didn't use everything, unlock the dio extent for the remainder. */ + if (!write && (start + len) < lockend) + unlock_dio_extent(&BTRFS_I(inode)->io_tree, start + len, + lockend, NULL); + return 0; unlock_err: @@ -614,8 +615,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length, if (!write && (iomap->type == IOMAP_HOLE)) { /* If reading from a hole, unlock and return */ - clear_extent_bit(&BTRFS_I(inode)->io_tree, pos, pos + length - 1, - EXTENT_LOCKED | EXTENT_DIO_LOCKED, NULL); + unlock_dio_extent(&BTRFS_I(inode)->io_tree, pos, + pos + length - 1, NULL); return 0; } @@ -626,9 +627,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length, btrfs_finish_ordered_extent(dio_data->ordered, NULL, pos, length, false); else - clear_extent_bit(&BTRFS_I(inode)->io_tree, pos, - pos + length - 1, - EXTENT_LOCKED | EXTENT_DIO_LOCKED, NULL); + unlock_dio_extent(&BTRFS_I(inode)->io_tree, pos, + pos + length - 1, NULL); ret = -ENOTBLK; } if (write) { @@ -660,9 +660,8 @@ static void btrfs_dio_end_io(struct btrfs_bio *bbio) dip->file_offset, dip->bytes, !bio->bi_status); } else { - clear_extent_bit(&inode->io_tree, dip->file_offset, - dip->file_offset + dip->bytes - 1, - EXTENT_LOCKED | EXTENT_DIO_LOCKED, NULL); + unlock_dio_extent(&inode->io_tree, dip->file_offset, + dip->file_offset + dip->bytes - 1, NULL); } bbio->bio.bi_private = bbio->private; |