summaryrefslogtreecommitdiffstats
path: root/block/blk-throttle.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-16 13:33:06 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-16 13:33:06 +0200
commit26bb0d3f38a764b743a3ad5c8b6e5b5044d7ceb4 (patch)
treea08d01893b603d2f611a617f6055b54a835c03f0 /block/blk-throttle.c
parentMerge tag 'for-6.12/io_uring-20240913' of git://git.kernel.dk/linux (diff)
parentMerge tag 'nvme-6.12-2024-09-13' of git://git.infradead.org/nvme into for-6.1... (diff)
downloadlinux-26bb0d3f38a764b743a3ad5c8b6e5b5044d7ceb4.tar.xz
linux-26bb0d3f38a764b743a3ad5c8b6e5b5044d7ceb4.zip
Merge tag 'for-6.12/block-20240913' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - MD changes via Song: - md-bitmap refactoring (Yu Kuai) - raid5 performance optimization (Artur Paszkiewicz) - Other small fixes (Yu Kuai, Chen Ni) - Add a sysfs entry 'new_level' (Xiao Ni) - Improve information reported in /proc/mdstat (Mateusz Kusiak) - NVMe changes via Keith: - Asynchronous namespace scanning (Stuart) - TCP TLS updates (Hannes) - RDMA queue controller validation (Niklas) - Align field names to the spec (Anuj) - Metadata support validation (Puranjay) - A syntax cleanup (Shen) - Fix a Kconfig linking error (Arnd) - New queue-depth quirk (Keith) - Add missing unplug trace event (Keith) - blk-iocost fixes (Colin, Konstantin) - t10-pi modular removal and fixes (Alexey) - Fix for potential BLKSECDISCARD overflow (Alexey) - bio splitting cleanups and fixes (Christoph) - Deal with folios rather than rather than pages, speeding up how the block layer handles bigger IOs (Kundan) - Use spinlocks rather than bit spinlocks in zram (Sebastian, Mike) - Reduce zoned device overhead in ublk (Ming) - Add and use sendpages_ok() for drbd and nvme-tcp (Ofir) - Fix regression in partition error pointer checking (Riyan) - Add support for write zeroes and rotational status in nbd (Wouter) - Add Yu Kuai as new BFQ maintainer. The scheduler has been unmaintained for quite a while. - Various sets of fixes for BFQ (Yu Kuai) - Misc fixes and cleanups (Alvaro, Christophe, Li, Md Haris, Mikhail, Yang) * tag 'for-6.12/block-20240913' of git://git.kernel.dk/linux: (120 commits) nvme-pci: qdepth 1 quirk block: fix potential invalid pointer dereference in blk_add_partition blk_iocost: make read-only static array vrate_adj_pct const block: unpin user pages belonging to a folio at once mm: release number of pages of a folio block: introduce folio awareness and add a bigger size from folio block: Added folio-ized version of bio_add_hw_page() block, bfq: factor out a helper to split bfqq in bfq_init_rq() block, bfq: remove local variable 'bfqq_already_existing' in bfq_init_rq() block, bfq: remove local variable 'split' in bfq_init_rq() block, bfq: remove bfq_log_bfqg() block, bfq: merge bfq_release_process_ref() into bfq_put_cooperator() block, bfq: fix procress reference leakage for bfqq in merge chain block, bfq: fix uaf for accessing waker_bfqq after splitting blk-throttle: support prioritized processing of metadata blk-throttle: remove last_low_overflow_time drbd: Add NULL check for net_conf to prevent dereference in state validation nvme-tcp: fix link failure for TCP auth blk-mq: add missing unplug trace event mtip32xx: Remove redundant null pointer checks in mtip_hw_debugfs_init() ...
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r--block/blk-throttle.c69
1 files changed, 42 insertions, 27 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 6943ec720f39..2c4192e12efa 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1584,6 +1584,22 @@ void blk_throtl_cancel_bios(struct gendisk *disk)
spin_unlock_irq(&q->queue_lock);
}
+static bool tg_within_limit(struct throtl_grp *tg, struct bio *bio, bool rw)
+{
+ /* throtl is FIFO - if bios are already queued, should queue */
+ if (tg->service_queue.nr_queued[rw])
+ return false;
+
+ return tg_may_dispatch(tg, bio, NULL);
+}
+
+static void tg_dispatch_in_debt(struct throtl_grp *tg, struct bio *bio, bool rw)
+{
+ if (!bio_flagged(bio, BIO_BPS_THROTTLED))
+ tg->carryover_bytes[rw] -= throtl_bio_data_size(bio);
+ tg->carryover_ios[rw]--;
+}
+
bool __blk_throtl_bio(struct bio *bio)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
@@ -1600,34 +1616,35 @@ bool __blk_throtl_bio(struct bio *bio)
sq = &tg->service_queue;
while (true) {
- if (tg->last_low_overflow_time[rw] == 0)
- tg->last_low_overflow_time[rw] = jiffies;
- /* throtl is FIFO - if bios are already queued, should queue */
- if (sq->nr_queued[rw])
- break;
-
- /* if above limits, break to queue */
- if (!tg_may_dispatch(tg, bio, NULL)) {
- tg->last_low_overflow_time[rw] = jiffies;
+ if (tg_within_limit(tg, bio, rw)) {
+ /* within limits, let's charge and dispatch directly */
+ throtl_charge_bio(tg, bio);
+
+ /*
+ * We need to trim slice even when bios are not being
+ * queued otherwise it might happen that a bio is not
+ * queued for a long time and slice keeps on extending
+ * and trim is not called for a long time. Now if limits
+ * are reduced suddenly we take into account all the IO
+ * dispatched so far at new low rate and * newly queued
+ * IO gets a really long dispatch time.
+ *
+ * So keep on trimming slice even if bio is not queued.
+ */
+ throtl_trim_slice(tg, rw);
+ } else if (bio_issue_as_root_blkg(bio)) {
+ /*
+ * IOs which may cause priority inversions are
+ * dispatched directly, even if they're over limit.
+ * Debts are handled by carryover_bytes/ios while
+ * calculating wait time.
+ */
+ tg_dispatch_in_debt(tg, bio, rw);
+ } else {
+ /* if above limits, break to queue */
break;
}
- /* within limits, let's charge and dispatch directly */
- throtl_charge_bio(tg, bio);
-
- /*
- * We need to trim slice even when bios are not being queued
- * otherwise it might happen that a bio is not queued for
- * a long time and slice keeps on extending and trim is not
- * called for a long time. Now if limits are reduced suddenly
- * we take into account all the IO dispatched so far at new
- * low rate and * newly queued IO gets a really long dispatch
- * time.
- *
- * So keep on trimming slice even if bio is not queued.
- */
- throtl_trim_slice(tg, rw);
-
/*
* @bio passed through this layer without being throttled.
* Climb up the ladder. If we're already at the top, it
@@ -1650,8 +1667,6 @@ bool __blk_throtl_bio(struct bio *bio)
tg->io_disp[rw], tg_iops_limit(tg, rw),
sq->nr_queued[READ], sq->nr_queued[WRITE]);
- tg->last_low_overflow_time[rw] = jiffies;
-
td->nr_queued[rw]++;
throtl_add_bio_tg(bio, qn, tg);
throttled = true;