From 9778369a2d6c5ed2b81a04164c4aa9da1bdb193d Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 3 Jan 2023 15:54:56 +0100 Subject: block, bfq: split sync bfq_queues on a per-actuator basis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single-LUN multi-actuator SCSI drives, as well as all multi-actuator SATA drives appear as a single device to the I/O subsystem [1]. Yet they address commands to different actuators internally, as a function of Logical Block Addressing (LBAs). A given sector is reachable by only one of the actuators. For example, Seagate’s Serial Advanced Technology Attachment (SATA) version contains two actuators and maps the lower half of the SATA LBA space to the lower actuator and the upper half to the upper actuator. Evidently, to fully utilize actuators, no actuator must be left idle or underutilized while there is pending I/O for it. The block layer must somehow control the load of each actuator individually. This commit lays the ground for allowing BFQ to provide such a per-actuator control. BFQ associates an I/O-request sync bfq_queue with each process doing synchronous I/O, or with a group of processes, in case of queue merging. Then BFQ serves one bfq_queue at a time. While in service, a bfq_queue is emptied in request-position order. Yet the same process, or group of processes, may generate I/O for different actuators. In this case, different streams of I/O (each for a different actuator) get all inserted into the same sync bfq_queue. So there is basically no individual control on when each stream is served, i.e., on when the I/O requests of the stream are picked from the bfq_queue and dispatched to the drive. This commit enables BFQ to control the service of each actuator individually for synchronous I/O, by simply splitting each sync bfq_queue into N queues, one for each actuator. In other words, a sync bfq_queue is now associated to a pair (process, actuator). As a consequence of this split, the per-queue proportional-share policy implemented by BFQ will guarantee that the sync I/O generated for each actuator, by each process, receives its fair share of service. This is just a preparatory patch. If the I/O of the same process happens to be sent to different queues, then each of these queues may undergo queue merging. To handle this event, the bfq_io_cq data structure must be properly extended. In addition, stable merging must be disabled to avoid loss of control on individual actuators. Finally, also async queues must be split. These issues are described in detail and addressed in next commits. As for this commit, although multiple per-process bfq_queues are provided, the I/O of each process or group of processes is still sent to only one queue, regardless of the actuator the I/O is for. The forwarding to distinct bfq_queues will be enabled after addressing the above issues. [1] https://www.linaro.org/blog/budget-fair-queueing-bfq-linux-io-scheduler-optimizations-for-multi-actuator-sata-hard-drives/ Reviewed-by: Damien Le Moal Signed-off-by: Gabriele Felici Signed-off-by: Carmine Zaccagnino Signed-off-by: Paolo Valente Link: https://lore.kernel.org/r/20230103145503.71712-2-paolo.valente@linaro.org Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 91 +++++++++++++++++++++++++++++------------------------- 1 file changed, 49 insertions(+), 42 deletions(-) (limited to 'block/bfq-cgroup.c') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 7d9b15f0dbd5..5f081f4d51fb 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -712,6 +712,46 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_put_queue(bfqq); } +static void bfq_sync_bfqq_move(struct bfq_data *bfqd, + struct bfq_queue *sync_bfqq, + struct bfq_io_cq *bic, + struct bfq_group *bfqg, + unsigned int act_idx) +{ + struct bfq_queue *bfqq; + + if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { + /* We are the only user of this bfqq, just move it */ + if (sync_bfqq->entity.sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + return; + } + + /* + * The queue was merged to a different queue. Check + * that the merge chain still belongs to the same + * cgroup. + */ + for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) + if (bfqq->entity.sched_data != &bfqg->sched_data) + break; + if (bfqq) { + /* + * Some queue changed cgroup so the merge is not valid + * anymore. We cannot easily just cancel the merge (by + * clearing new_bfqq) as there may be other processes + * using this queue and holding refs to all queues + * below sync_bfqq->new_bfqq. Similarly if the merge + * already happened, we need to detach from bfqq now + * so that we cannot merge bio to a request from the + * old cgroup. + */ + bfq_put_cooperator(sync_bfqq); + bfq_release_process_ref(bfqd, sync_bfqq); + bic_set_bfqq(bic, NULL, true, act_idx); + } +} + /** * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. @@ -726,53 +766,20 @@ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd, struct bfq_io_cq *bic, struct bfq_group *bfqg) { - struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false); - struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true); - struct bfq_entity *entity; + unsigned int act_idx; - if (async_bfqq) { - entity = &async_bfqq->entity; + for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) { + struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx); + struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx); - if (entity->sched_data != &bfqg->sched_data) { - bic_set_bfqq(bic, NULL, false); + if (async_bfqq && + async_bfqq->entity.sched_data != &bfqg->sched_data) { + bic_set_bfqq(bic, NULL, false, act_idx); bfq_release_process_ref(bfqd, async_bfqq); } - } - if (sync_bfqq) { - if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { - /* We are the only user of this bfqq, just move it */ - if (sync_bfqq->entity.sched_data != &bfqg->sched_data) - bfq_bfqq_move(bfqd, sync_bfqq, bfqg); - } else { - struct bfq_queue *bfqq; - - /* - * The queue was merged to a different queue. Check - * that the merge chain still belongs to the same - * cgroup. - */ - for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) - if (bfqq->entity.sched_data != - &bfqg->sched_data) - break; - if (bfqq) { - /* - * Some queue changed cgroup so the merge is - * not valid anymore. We cannot easily just - * cancel the merge (by clearing new_bfqq) as - * there may be other processes using this - * queue and holding refs to all queues below - * sync_bfqq->new_bfqq. Similarly if the merge - * already happened, we need to detach from - * bfqq now so that we cannot merge bio to a - * request from the old cgroup. - */ - bfq_put_cooperator(sync_bfqq); - bfq_release_process_ref(bfqd, sync_bfqq); - bic_set_bfqq(bic, NULL, true); - } - } + if (sync_bfqq) + bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx); } } -- cgit v1.2.3 From 2d31c684a053b46f75b230899d00c0f56708243d Mon Sep 17 00:00:00 2001 From: Davide Zini Date: Tue, 3 Jan 2023 15:55:02 +0100 Subject: block, bfq: inject I/O to underutilized actuators The main service scheme of BFQ for sync I/O is serving one sync bfq_queue at a time, for a while. In particular, BFQ enforces this scheme when it deems the latter necessary to boost throughput or to preserve service guarantees. Unfortunately, when BFQ enforces this policy, only one actuator at a time gets served for a while, because each bfq_queue contains I/O only for one actuator. The other actuators may remain underutilized. Actually, BFQ may serve (inject) extra I/O, taken from other bfq_queues, in parallel with that of the in-service queue. This injection mechanism may provide the ground for dealing also with the above actuator-underutilization problem. Yet BFQ does not take the actuator load into account when choosing which queue to pick extra I/O from. In addition, BFQ may happen to inject extra I/O only when the in-service queue is temporarily empty. In view of these facts, this commit extends the injection mechanism in such a way that the latter: (1) takes into account also the actuator load; (2) checks such a load on each dispatch, and injects I/O for an underutilized actuator, if there is one and there is I/O for it. To perform the check in (2), this commit introduces a load threshold, currently set to 4. A linear scan of each actuator is performed, until an actuator is found for which the following two conditions hold: the load of the actuator is below the threshold, and there is at least one non-in-service queue that contains I/O for that actuator. If such a pair (actuator, queue) is found, then the head request of that queue is returned for dispatch, instead of the head request of the in-service queue. We have set the threshold, empirically, to the minimum possible value for which an actuator is fully utilized, or close to be fully utilized. By doing so, injected I/O 'steals' as few drive-queue slots as possibile to the in-service queue. This reduces as much as possible the probability that the service of I/O from the in-service bfq_queue gets delayed because of slot exhaustion, i.e., because all the slots of the drive queue are filled with I/O injected from other queues (NCQ provides for 32 slots). This new mechanism also counters actuator underutilization in the case of asymmetric configurations of bfq_queues. Namely if there are few bfq_queues containing I/O for some actuators and many bfq_queues containing I/O for other actuators. Or if the bfq_queues containing I/O for some actuators have lower weights than the other bfq_queues. Reviewed-by: Damien Le Moal Signed-off-by: Paolo Valente Signed-off-by: Davide Zini Link: https://lore.kernel.org/r/20230103145503.71712-8-paolo.valente@linaro.org Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 2 +- block/bfq-iosched.c | 136 ++++++++++++++++++++++++++++++++++++++-------------- block/bfq-iosched.h | 39 +++++++++++++-- block/bfq-wf2q.c | 2 +- 4 files changed, 139 insertions(+), 40 deletions(-) (limited to 'block/bfq-cgroup.c') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 5f081f4d51fb..b42956ab5550 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -706,7 +706,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_activate_bfqq(bfqd, bfqq); } - if (!bfqd->in_service_queue && !bfqd->rq_in_driver) + if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver) bfq_schedule_dispatch(bfqd); /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 56486f24b4c5..d42a229b5a86 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2259,9 +2259,9 @@ static void bfq_add_request(struct request *rq) * elapsed. */ if (bfqq == bfqd->in_service_queue && - (bfqd->rq_in_driver == 0 || + (bfqd->tot_rq_in_driver == 0 || (bfqq->last_serv_time_ns > 0 && - bfqd->rqs_injected && bfqd->rq_in_driver > 0)) && + bfqd->rqs_injected && bfqd->tot_rq_in_driver > 0)) && time_is_before_eq_jiffies(bfqq->decrease_time_jif + msecs_to_jiffies(10))) { bfqd->last_empty_occupied_ns = ktime_get_ns(); @@ -2285,7 +2285,7 @@ static void bfq_add_request(struct request *rq) * will be set in case injection is performed * on bfqq before rq is completed). */ - if (bfqd->rq_in_driver == 0) + if (bfqd->tot_rq_in_driver == 0) bfqd->rqs_injected = false; } } @@ -2650,11 +2650,14 @@ void bfq_end_wr_async_queues(struct bfq_data *bfqd, static void bfq_end_wr(struct bfq_data *bfqd) { struct bfq_queue *bfqq; + int i; spin_lock_irq(&bfqd->lock); - list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) - bfq_bfqq_end_wr(bfqq); + for (i = 0; i < bfqd->num_actuators; i++) { + list_for_each_entry(bfqq, &bfqd->active_list[i], bfqq_list) + bfq_bfqq_end_wr(bfqq); + } list_for_each_entry(bfqq, &bfqd->idle_list, bfqq_list) bfq_bfqq_end_wr(bfqq); bfq_end_wr_async(bfqd); @@ -3611,13 +3614,13 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq) * - start a new observation interval with this dispatch */ if (now_ns - bfqd->last_dispatch > 100*NSEC_PER_MSEC && - bfqd->rq_in_driver == 0) + bfqd->tot_rq_in_driver == 0) goto update_rate_and_reset; /* Update sampling information */ bfqd->peak_rate_samples++; - if ((bfqd->rq_in_driver > 0 || + if ((bfqd->tot_rq_in_driver > 0 || now_ns - bfqd->last_completion < BFQ_MIN_TT) && !BFQ_RQ_SEEKY(bfqd, bfqd->last_position, rq)) bfqd->sequential_samples++; @@ -3882,10 +3885,8 @@ static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, return false; return (bfqq->wr_coeff > 1 && - (bfqd->wr_busy_queues < - tot_busy_queues || - bfqd->rq_in_driver >= - bfqq->dispatched + 4)) || + (bfqd->wr_busy_queues < tot_busy_queues || + bfqd->tot_rq_in_driver >= bfqq->dispatched + 4)) || bfq_asymmetric_scenario(bfqd, bfqq) || tot_busy_queues == 1; } @@ -4656,6 +4657,8 @@ bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) { struct bfq_queue *bfqq, *in_serv_bfqq = bfqd->in_service_queue; unsigned int limit = in_serv_bfqq->inject_limit; + int i; + /* * If * - bfqq is not weight-raised and therefore does not carry @@ -4687,7 +4690,7 @@ bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) ) limit = 1; - if (bfqd->rq_in_driver >= limit) + if (bfqd->tot_rq_in_driver >= limit) return NULL; /* @@ -4702,11 +4705,12 @@ bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) * (and re-added only if it gets new requests, but then it * is assigned again enough budget for its new backlog). */ - list_for_each_entry(bfqq, &bfqd->active_list, bfqq_list) - if (!RB_EMPTY_ROOT(&bfqq->sort_list) && - (in_serv_always_inject || bfqq->wr_coeff > 1) && - bfq_serv_to_charge(bfqq->next_rq, bfqq) <= - bfq_bfqq_budget_left(bfqq)) { + for (i = 0; i < bfqd->num_actuators; i++) { + list_for_each_entry(bfqq, &bfqd->active_list[i], bfqq_list) + if (!RB_EMPTY_ROOT(&bfqq->sort_list) && + (in_serv_always_inject || bfqq->wr_coeff > 1) && + bfq_serv_to_charge(bfqq->next_rq, bfqq) <= + bfq_bfqq_budget_left(bfqq)) { /* * Allow for only one large in-flight request * on non-rotational devices, for the @@ -4731,22 +4735,69 @@ bfq_choose_bfqq_for_injection(struct bfq_data *bfqd) else limit = in_serv_bfqq->inject_limit; - if (bfqd->rq_in_driver < limit) { + if (bfqd->tot_rq_in_driver < limit) { bfqd->rqs_injected = true; return bfqq; } } + } + + return NULL; +} + +static struct bfq_queue * +bfq_find_active_bfqq_for_actuator(struct bfq_data *bfqd, int idx) +{ + struct bfq_queue *bfqq; + + if (bfqd->in_service_queue && + bfqd->in_service_queue->actuator_idx == idx) + return bfqd->in_service_queue; + + list_for_each_entry(bfqq, &bfqd->active_list[idx], bfqq_list) { + if (!RB_EMPTY_ROOT(&bfqq->sort_list) && + bfq_serv_to_charge(bfqq->next_rq, bfqq) <= + bfq_bfqq_budget_left(bfqq)) { + return bfqq; + } + } return NULL; } +/* + * Perform a linear scan of each actuator, until an actuator is found + * for which the following two conditions hold: the load of the + * actuator is below the threshold (see comments on actuator_load_threshold + * for details), and there is a queue that contains I/O for that + * actuator. On success, return that queue. + */ +static struct bfq_queue * +bfq_find_bfqq_for_underused_actuator(struct bfq_data *bfqd) +{ + int i; + + for (i = 0 ; i < bfqd->num_actuators; i++) { + if (bfqd->rq_in_driver[i] < bfqd->actuator_load_threshold) { + struct bfq_queue *bfqq = + bfq_find_active_bfqq_for_actuator(bfqd, i); + + if (bfqq) + return bfqq; + } + } + + return NULL; +} + + /* * Select a queue for service. If we have a current queue in service, * check whether to continue servicing it, or retrieve and set a new one. */ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) { - struct bfq_queue *bfqq; + struct bfq_queue *bfqq, *inject_bfqq; struct request *next_rq; enum bfqq_expiration reason = BFQQE_BUDGET_TIMEOUT; @@ -4768,6 +4819,15 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) goto expire; check_queue: + /* + * If some actuator is underutilized, but the in-service + * queue does not contain I/O for that actuator, then try to + * inject I/O for that actuator. + */ + inject_bfqq = bfq_find_bfqq_for_underused_actuator(bfqd); + if (inject_bfqq && inject_bfqq != bfqq) + return inject_bfqq; + /* * This loop is rarely executed more than once. Even when it * happens, it is much more convenient to re-execute this loop @@ -5123,11 +5183,11 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) /* * We exploit the bfq_finish_requeue_request hook to - * decrement rq_in_driver, but + * decrement tot_rq_in_driver, but * bfq_finish_requeue_request will not be invoked on * this request. So, to avoid unbalance, just start - * this request, without incrementing rq_in_driver. As - * a negative consequence, rq_in_driver is deceptively + * this request, without incrementing tot_rq_in_driver. As + * a negative consequence, tot_rq_in_driver is deceptively * lower than it should be while this request is in * service. This may cause bfq_schedule_dispatch to be * invoked uselessly. @@ -5136,7 +5196,7 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) * bfq_finish_requeue_request hook, if defined, is * probably invoked also on this request. So, by * exploiting this hook, we could 1) increment - * rq_in_driver here, and 2) decrement it in + * tot_rq_in_driver here, and 2) decrement it in * bfq_finish_requeue_request. Such a solution would * let the value of the counter be always accurate, * but it would entail using an extra interface @@ -5165,7 +5225,7 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) * Of course, serving one request at a time may cause loss of * throughput. */ - if (bfqd->strict_guarantees && bfqd->rq_in_driver > 0) + if (bfqd->strict_guarantees && bfqd->tot_rq_in_driver > 0) goto exit; bfqq = bfq_select_queue(bfqd); @@ -5176,7 +5236,8 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx) if (rq) { inc_in_driver_start_rq: - bfqd->rq_in_driver++; + bfqd->rq_in_driver[bfqq->actuator_idx]++; + bfqd->tot_rq_in_driver++; start_rq: rq->rq_flags |= RQF_STARTED; } @@ -6243,7 +6304,7 @@ static void bfq_update_hw_tag(struct bfq_data *bfqd) struct bfq_queue *bfqq = bfqd->in_service_queue; bfqd->max_rq_in_driver = max_t(int, bfqd->max_rq_in_driver, - bfqd->rq_in_driver); + bfqd->tot_rq_in_driver); if (bfqd->hw_tag == 1) return; @@ -6254,7 +6315,7 @@ static void bfq_update_hw_tag(struct bfq_data *bfqd) * sum is not exact, as it's not taking into account deactivated * requests. */ - if (bfqd->rq_in_driver + bfqd->queued <= BFQ_HW_QUEUE_THRESHOLD) + if (bfqd->tot_rq_in_driver + bfqd->queued <= BFQ_HW_QUEUE_THRESHOLD) return; /* @@ -6265,7 +6326,7 @@ static void bfq_update_hw_tag(struct bfq_data *bfqd) if (bfqq && bfq_bfqq_has_short_ttime(bfqq) && bfqq->dispatched + bfqq->queued[0] + bfqq->queued[1] < BFQ_HW_QUEUE_THRESHOLD && - bfqd->rq_in_driver < BFQ_HW_QUEUE_THRESHOLD) + bfqd->tot_rq_in_driver < BFQ_HW_QUEUE_THRESHOLD) return; if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES) @@ -6286,7 +6347,8 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) bfq_update_hw_tag(bfqd); - bfqd->rq_in_driver--; + bfqd->rq_in_driver[bfqq->actuator_idx]--; + bfqd->tot_rq_in_driver--; bfqq->dispatched--; if (!bfqq->dispatched && !bfq_bfqq_busy(bfqq)) { @@ -6406,7 +6468,7 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) BFQQE_NO_MORE_REQUESTS); } - if (!bfqd->rq_in_driver) + if (!bfqd->tot_rq_in_driver) bfq_schedule_dispatch(bfqd); } @@ -6537,13 +6599,13 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd, * conditions to do it, or we can lower the last base value * computed. * - * NOTE: (bfqd->rq_in_driver == 1) means that there is no I/O + * NOTE: (bfqd->tot_rq_in_driver == 1) means that there is no I/O * request in flight, because this function is in the code * path that handles the completion of a request of bfqq, and, * in particular, this function is executed before - * bfqd->rq_in_driver is decremented in such a code path. + * bfqd->tot_rq_in_driver is decremented in such a code path. */ - if ((bfqq->last_serv_time_ns == 0 && bfqd->rq_in_driver == 1) || + if ((bfqq->last_serv_time_ns == 0 && bfqd->tot_rq_in_driver == 1) || tot_time_ns < bfqq->last_serv_time_ns) { if (bfqq->last_serv_time_ns == 0) { /* @@ -6553,7 +6615,7 @@ static void bfq_update_inject_limit(struct bfq_data *bfqd, bfqq->inject_limit = max_t(unsigned int, 1, old_limit); } bfqq->last_serv_time_ns = tot_time_ns; - } else if (!bfqd->rqs_injected && bfqd->rq_in_driver == 1) + } else if (!bfqd->rqs_injected && bfqd->tot_rq_in_driver == 1) /* * No I/O injected and no request still in service in * the drive: these are the exact conditions for @@ -7208,7 +7270,8 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfqd->num_groups_with_pending_reqs = 0; #endif - INIT_LIST_HEAD(&bfqd->active_list); + INIT_LIST_HEAD(&bfqd->active_list[0]); + INIT_LIST_HEAD(&bfqd->active_list[1]); INIT_LIST_HEAD(&bfqd->idle_list); INIT_HLIST_HEAD(&bfqd->burst_list); @@ -7253,6 +7316,9 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) ref_wr_duration[blk_queue_nonrot(bfqd->queue)]; bfqd->peak_rate = ref_rate[blk_queue_nonrot(bfqd->queue)] * 2 / 3; + /* see comments on the definition of next field inside bfq_data */ + bfqd->actuator_load_threshold = 4; + spin_lock_init(&bfqd->lock); /* diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index ba2ece8b6646..2b4893a68b43 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -590,7 +590,12 @@ struct bfq_data { /* number of queued requests */ int queued; /* number of requests dispatched and waiting for completion */ - int rq_in_driver; + int tot_rq_in_driver; + /* + * number of requests dispatched and waiting for completion + * for each actuator + */ + int rq_in_driver[BFQ_MAX_ACTUATORS]; /* true if the device is non rotational and performs queueing */ bool nonrot_with_queueing; @@ -684,8 +689,13 @@ struct bfq_data { /* maximum budget allotted to a bfq_queue before rescheduling */ int bfq_max_budget; - /* list of all the bfq_queues active on the device */ - struct list_head active_list; + /* + * List of all the bfq_queues active for a specific actuator + * on the device. Keeping active queues separate on a + * per-actuator basis helps implementing per-actuator + * injection more efficiently. + */ + struct list_head active_list[BFQ_MAX_ACTUATORS]; /* list of all the bfq_queues idle on the device */ struct list_head idle_list; @@ -821,6 +831,29 @@ struct bfq_data { sector_t sector[BFQ_MAX_ACTUATORS]; sector_t nr_sectors[BFQ_MAX_ACTUATORS]; struct blk_independent_access_range ia_ranges[BFQ_MAX_ACTUATORS]; + + /* + * If the number of I/O requests queued in the device for a + * given actuator is below next threshold, then the actuator + * is deemed as underutilized. If this condition is found to + * hold for some actuator upon a dispatch, but (i) the + * in-service queue does not contain I/O for that actuator, + * while (ii) some other queue does contain I/O for that + * actuator, then the head I/O request of the latter queue is + * returned (injected), instead of the head request of the + * currently in-service queue. + * + * We set the threshold, empirically, to the minimum possible + * value for which an actuator is fully utilized, or close to + * be fully utilized. By doing so, injected I/O 'steals' as + * few drive-queue slots as possibile to the in-service + * queue. This reduces as much as possible the probability + * that the service of I/O from the in-service bfq_queue gets + * delayed because of slot exhaustion, i.e., because all the + * slots of the drive queue are filled with I/O injected from + * other queues (NCQ provides for 32 slots). + */ + unsigned int actuator_load_threshold; }; enum bfqq_state_flags { diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index ea4c3d757fdd..7941b6f07391 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -493,7 +493,7 @@ static void bfq_active_insert(struct bfq_service_tree *st, bfq_update_active_tree(node); if (bfqq) - list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list); + list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list[bfqq->actuator_idx]); bfq_inc_active_entities(entity); } -- cgit v1.2.3 From 84d7d462b16dd5f0bf7c7ca9254bf81db2c952a2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Feb 2023 16:03:47 +0100 Subject: blk-cgroup: pin the gendisk in struct blkcg_gq Currently each blkcg_gq holds a request_queue reference, which is what is used in the policies. But a lot of these interfaces will move over to use a gendisk, so store a disk in struct blkcg_gq and hold a reference to it. Signed-off-by: Christoph Hellwig Reviewed-by: Andreas Herrmann Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20230203150400.3199230-7-hch@lst.de Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 6 +++--- block/blk-cgroup-rwstat.c | 2 +- block/blk-cgroup.c | 35 +++++++++++++++++------------------ block/blk-cgroup.h | 11 +++++------ block/blk-iocost.c | 2 +- block/blk-iolatency.c | 4 ++-- block/blk-throttle.c | 4 ++-- 7 files changed, 31 insertions(+), 33 deletions(-) (limited to 'block/bfq-cgroup.c') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index b42956ab5550..1d4a3f15049b 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -405,7 +405,7 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) parent = bfqg_parent(bfqg); - lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock); + lockdep_assert_held(&bfqg_to_blkg(bfqg)->disk->queue->queue_lock); if (unlikely(!parent)) return; @@ -536,7 +536,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) { struct blkcg_gq *blkg = pd_to_blkg(pd); struct bfq_group *bfqg = blkg_to_bfqg(blkg); - struct bfq_data *bfqd = blkg->q->elevator->elevator_data; + struct bfq_data *bfqd = blkg->disk->queue->elevator->elevator_data; struct bfq_entity *entity = &bfqg->entity; struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg); @@ -1201,7 +1201,7 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, struct cgroup_subsys_state *pos_css; u64 sum = 0; - lockdep_assert_held(&blkg->q->queue_lock); + lockdep_assert_held(&blkg->disk->queue->queue_lock); rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c index 3304e841df7c..b8b8c82e667a 100644 --- a/block/blk-cgroup-rwstat.c +++ b/block/blk-cgroup-rwstat.c @@ -107,7 +107,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, struct cgroup_subsys_state *pos_css; unsigned int i; - lockdep_assert_held(&blkg->q->queue_lock); + lockdep_assert_held(&blkg->disk->queue->queue_lock); memset(sum, 0, sizeof(*sum)); rcu_read_lock(); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0b3226cbf3f2..0e368387497d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -116,7 +116,6 @@ static bool blkcg_policy_enabled(struct request_queue *q, static void blkg_free(struct blkcg_gq *blkg) { - struct request_queue *q = blkg->q; int i; /* @@ -126,16 +125,16 @@ static void blkg_free(struct blkcg_gq *blkg) * blkcg_mutex is used to synchronize blkg_free_workfn() and * blkcg_deactivate_policy(). */ - mutex_lock(&q->blkcg_mutex); + mutex_lock(&blkg->disk->queue->blkcg_mutex); for (i = 0; i < BLKCG_MAX_POLS; i++) if (blkg->pd[i]) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->parent) blkg_put(blkg->parent); list_del_init(&blkg->q_node); - mutex_unlock(&q->blkcg_mutex); + mutex_unlock(&blkg->disk->queue->blkcg_mutex); - blk_put_queue(q); + put_disk(blkg->disk); free_percpu(blkg->iostat_cpu); percpu_ref_exit(&blkg->refcnt); kfree(blkg); @@ -251,10 +250,12 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask); if (!blkg->iostat_cpu) goto out_exit_refcnt; - if (!blk_get_queue(disk->queue)) + + if (test_bit(GD_DEAD, &disk->state)) goto out_free_iostat; + get_device(disk_to_dev(disk)); + blkg->disk = disk; - blkg->q = disk->queue; INIT_LIST_HEAD(&blkg->q_node); spin_lock_init(&blkg->async_bio_lock); bio_list_init(&blkg->async_bios); @@ -290,7 +291,7 @@ out_free_pds: while (--i >= 0) if (blkg->pd[i]) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); - blk_put_queue(disk->queue); + put_disk(blkg->disk); out_free_iostat: free_percpu(blkg->iostat_cpu); out_exit_refcnt: @@ -461,7 +462,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) struct blkcg *blkcg = blkg->blkcg; int i; - lockdep_assert_held(&blkg->q->queue_lock); + lockdep_assert_held(&blkg->disk->queue->queue_lock); lockdep_assert_held(&blkcg->lock); /* @@ -485,7 +486,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) blkg->online = false; - radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); + radix_tree_delete(&blkcg->blkg_tree, blkg->disk->queue->id); hlist_del_init_rcu(&blkg->blkcg_node); /* @@ -572,9 +573,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, const char *blkg_dev_name(struct blkcg_gq *blkg) { - if (!blkg->q->disk) - return NULL; - return bdi_dev_name(blkg->q->disk->bdi); + return bdi_dev_name(blkg->disk->bdi); } /** @@ -606,10 +605,10 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { - spin_lock_irq(&blkg->q->queue_lock); - if (blkcg_policy_enabled(blkg->q, pol)) + spin_lock_irq(&blkg->disk->queue->queue_lock); + if (blkcg_policy_enabled(blkg->disk->queue, pol)) total += prfill(sf, blkg->pd[pol->plid], data); - spin_unlock_irq(&blkg->q->queue_lock); + spin_unlock_irq(&blkg->disk->queue->queue_lock); } rcu_read_unlock(); @@ -1033,9 +1032,9 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { - spin_lock_irq(&blkg->q->queue_lock); + spin_lock_irq(&blkg->disk->queue->queue_lock); blkcg_print_one_stat(blkg, sf); - spin_unlock_irq(&blkg->q->queue_lock); + spin_unlock_irq(&blkg->disk->queue->queue_lock); } rcu_read_unlock(); return 0; @@ -1105,7 +1104,7 @@ static void blkcg_destroy_blkgs(struct blkcg *blkcg) while (!hlist_empty(&blkcg->blkg_list)) { struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, struct blkcg_gq, blkcg_node); - struct request_queue *q = blkg->q; + struct request_queue *q = blkg->disk->queue; if (need_resched() || !spin_trylock(&q->queue_lock)) { /* diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index b13ee84f358e..996572a9a0b7 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -53,8 +53,7 @@ struct blkg_iostat_set { /* association between a blk cgroup and a request queue */ struct blkcg_gq { - /* Pointer to the associated request_queue */ - struct request_queue *q; + struct gendisk *disk; struct list_head q_node; struct hlist_node blkcg_node; struct blkcg *blkcg; @@ -255,11 +254,11 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, return q->root_blkg; blkg = rcu_dereference(blkcg->blkg_hint); - if (blkg && blkg->q == q) + if (blkg && blkg->disk->queue == q) return blkg; blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); - if (blkg && blkg->q != q) + if (blkg && blkg->disk->queue != q) blkg = NULL; return blkg; } @@ -359,7 +358,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->q))) + (p_blkg)->disk->queue))) /** * blkg_for_each_descendant_post - post-order walk of a blkg's descendants @@ -374,7 +373,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->q))) + (p_blkg)->disk->queue))) bool __blkcg_punt_bio_submit(struct bio *bio); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 9694394ed883..a2b4e7146be5 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2953,7 +2953,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd) { struct ioc_gq *iocg = pd_to_iocg(pd); struct blkcg_gq *blkg = pd_to_blkg(&iocg->pd); - struct ioc *ioc = q_to_ioc(blkg->q); + struct ioc *ioc = q_to_ioc(blkg->disk->queue); struct ioc_now now; struct blkcg_gq *tblkg; unsigned long flags; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index ecdc10741836..b55eac2cf919 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -974,12 +974,12 @@ static void iolatency_pd_init(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); - struct rq_qos *rqos = blkcg_rq_qos(blkg->q); + struct rq_qos *rqos = blkcg_rq_qos(blkg->disk->queue); struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); u64 now = ktime_to_ns(ktime_get()); int cpu; - if (blk_queue_nonrot(blkg->q)) + if (blk_queue_nonrot(blkg->disk->queue)) iolat->ssd = true; else iolat->ssd = false; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 6fb5a2f9e1ee..f802d8f90994 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -388,7 +388,7 @@ static void throtl_pd_init(struct blkg_policy_data *pd) { struct throtl_grp *tg = pd_to_tg(pd); struct blkcg_gq *blkg = tg_to_blkg(tg); - struct throtl_data *td = blkg->q->td; + struct throtl_data *td = blkg->disk->queue->td; struct throtl_service_queue *sq = &tg->service_queue; /* @@ -1175,7 +1175,7 @@ static void throtl_pending_timer_fn(struct timer_list *t) /* throtl_data may be gone, so figure out request queue by blkg */ if (tg) - q = tg->pd.blkg->q; + q = tg->pd.blkg->disk->queue; else q = td->queue; -- cgit v1.2.3 From 40e4996ec099a301083eb7e29095ebdfc31443da Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Feb 2023 16:03:57 +0100 Subject: blk-cgroup: pass a gendisk to blkcg_{de,}activate_policy Prepare for storing the blkcg information in the gendisk instead of the request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Andreas Herrmann Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20230203150400.3199230-17-hch@lst.de Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 2 +- block/bfq-iosched.c | 2 +- block/blk-cgroup.c | 21 +++++++++++---------- block/blk-cgroup.h | 9 ++++----- block/blk-iocost.c | 4 ++-- block/blk-iolatency.c | 4 ++-- block/blk-ioprio.c | 4 ++-- block/blk-throttle.c | 4 ++-- 8 files changed, 25 insertions(+), 25 deletions(-) (limited to 'block/bfq-cgroup.c') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 1d4a3f15049b..032c14f0451a 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -1291,7 +1291,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { int ret; - ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq); + ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq); if (ret) return NULL; diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 5afa661fa2ea..777dcab73c8e 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7146,7 +7146,7 @@ static void bfq_exit_queue(struct elevator_queue *e) bfqg_and_blkg_put(bfqd->root_group); #ifdef CONFIG_BFQ_GROUP_IOSCHED - blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); + blkcg_deactivate_policy(bfqd->queue->disk, &blkcg_policy_bfq); #else spin_lock_irq(&bfqd->lock); bfq_put_async_queues(bfqd, bfqd->root_group); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 168b2f803238..c20929bce812 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1390,14 +1390,14 @@ struct cgroup_subsys io_cgrp_subsys = { EXPORT_SYMBOL_GPL(io_cgrp_subsys); /** - * blkcg_activate_policy - activate a blkcg policy on a request_queue - * @q: request_queue of interest + * blkcg_activate_policy - activate a blkcg policy on a gendisk + * @disk: gendisk of interest * @pol: blkcg policy to activate * - * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through + * Activate @pol on @disk. Requires %GFP_KERNEL context. @disk goes through * bypass mode to populate its blkgs with policy_data for @pol. * - * Activation happens with @q bypassed, so nobody would be accessing blkgs + * Activation happens with @disk bypassed, so nobody would be accessing blkgs * from IO path. Update of each blkg is protected by both queue and blkcg * locks so that holding either lock and testing blkcg_policy_enabled() is * always enough for dereferencing policy data. @@ -1405,9 +1405,9 @@ EXPORT_SYMBOL_GPL(io_cgrp_subsys); * The caller is responsible for synchronizing [de]activations and policy * [un]registerations. Returns 0 on success, -errno on failure. */ -int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol) +int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) { + struct request_queue *q = disk->queue; struct blkg_policy_data *pd_prealloc = NULL; struct blkcg_gq *blkg, *pinned_blkg = NULL; int ret; @@ -1508,16 +1508,17 @@ enomem: EXPORT_SYMBOL_GPL(blkcg_activate_policy); /** - * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue - * @q: request_queue of interest + * blkcg_deactivate_policy - deactivate a blkcg policy on a gendisk + * @disk: gendisk of interest * @pol: blkcg policy to deactivate * - * Deactivate @pol on @q. Follows the same synchronization rules as + * Deactivate @pol on @disk. Follows the same synchronization rules as * blkcg_activate_policy(). */ -void blkcg_deactivate_policy(struct request_queue *q, +void blkcg_deactivate_policy(struct gendisk *disk, const struct blkcg_policy *pol) { + struct request_queue *q = disk->queue; struct blkcg_gq *blkg; if (!blkcg_policy_enabled(q, pol)) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 996572a9a0b7..27068faa2cd0 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -194,9 +194,8 @@ void blkcg_exit_disk(struct gendisk *disk); /* Blkio controller policy registration */ int blkcg_policy_register(struct blkcg_policy *pol); void blkcg_policy_unregister(struct blkcg_policy *pol); -int blkcg_activate_policy(struct request_queue *q, - const struct blkcg_policy *pol); -void blkcg_deactivate_policy(struct request_queue *q, +int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol); +void blkcg_deactivate_policy(struct gendisk *disk, const struct blkcg_policy *pol); const char *blkg_dev_name(struct blkcg_gq *blkg); @@ -495,9 +494,9 @@ static inline int blkcg_init_disk(struct gendisk *disk) { return 0; } static inline void blkcg_exit_disk(struct gendisk *disk) { } static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } -static inline int blkcg_activate_policy(struct request_queue *q, +static inline int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) { return 0; } -static inline void blkcg_deactivate_policy(struct request_queue *q, +static inline void blkcg_deactivate_policy(struct gendisk *disk, const struct blkcg_policy *pol) { } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, diff --git a/block/blk-iocost.c b/block/blk-iocost.c index a2e9bf30039b..078b77709519 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2821,7 +2821,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos) { struct ioc *ioc = rqos_to_ioc(rqos); - blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iocost); + blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iocost); spin_lock_irq(&ioc->lock); ioc->running = IOC_STOP; @@ -2893,7 +2893,7 @@ static int blk_iocost_init(struct gendisk *disk) if (ret) goto err_free_ioc; - ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iocost); + ret = blkcg_activate_policy(disk, &blkcg_policy_iocost); if (ret) goto err_del_qos; return 0; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 8e1e43bbde6f..39853fc5c2b0 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -646,7 +646,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos) timer_shutdown_sync(&blkiolat->timer); flush_work(&blkiolat->enable_work); - blkcg_deactivate_policy(rqos->disk->queue, &blkcg_policy_iolatency); + blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iolatency); kfree(blkiolat); } @@ -768,7 +768,7 @@ int blk_iolatency_init(struct gendisk *disk) &blkcg_iolatency_ops); if (ret) goto err_free; - ret = blkcg_activate_policy(disk->queue, &blkcg_policy_iolatency); + ret = blkcg_activate_policy(disk, &blkcg_policy_iolatency); if (ret) goto err_qos_del; diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c index 8bb6b8eba4ce..8194826cc824 100644 --- a/block/blk-ioprio.c +++ b/block/blk-ioprio.c @@ -204,12 +204,12 @@ void blkcg_set_ioprio(struct bio *bio) void blk_ioprio_exit(struct gendisk *disk) { - blkcg_deactivate_policy(disk->queue, &ioprio_policy); + blkcg_deactivate_policy(disk, &ioprio_policy); } int blk_ioprio_init(struct gendisk *disk) { - return blkcg_activate_policy(disk->queue, &ioprio_policy); + return blkcg_activate_policy(disk, &ioprio_policy); } static int __init ioprio_init(void) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f802d8f90994..efc0a9092c69 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2395,7 +2395,7 @@ int blk_throtl_init(struct gendisk *disk) td->low_downgrade_time = jiffies; /* activate policy */ - ret = blkcg_activate_policy(q, &blkcg_policy_throtl); + ret = blkcg_activate_policy(disk, &blkcg_policy_throtl); if (ret) { free_percpu(td->latency_buckets[READ]); free_percpu(td->latency_buckets[WRITE]); @@ -2411,7 +2411,7 @@ void blk_throtl_exit(struct gendisk *disk) BUG_ON(!q->td); del_timer_sync(&q->td->service_queue.pending_timer); throtl_shutdown_wq(q); - blkcg_deactivate_policy(q, &blkcg_policy_throtl); + blkcg_deactivate_policy(disk, &blkcg_policy_throtl); free_percpu(q->td->latency_buckets[READ]); free_percpu(q->td->latency_buckets[WRITE]); kfree(q->td); -- cgit v1.2.3 From 0a0b4f79db2e6e745672aa3852cf5fdf7af14a0f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Feb 2023 16:03:58 +0100 Subject: blk-cgroup: pass a gendisk to pd_alloc_fn No need to the request_queue here, pass a gendisk and extract the node ids from that. Signed-off-by: Christoph Hellwig Reviewed-by: Andreas Herrmann Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20230203150400.3199230-18-hch@lst.de Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 6 +++--- block/blk-cgroup.c | 10 +++++----- block/blk-cgroup.h | 4 ++-- block/blk-iocost.c | 7 ++++--- block/blk-iolatency.c | 7 +++---- block/blk-ioprio.c | 2 +- block/blk-throttle.c | 7 +++---- 7 files changed, 21 insertions(+), 22 deletions(-) (limited to 'block/bfq-cgroup.c') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 032c14f0451a..37333c164ed4 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -513,12 +513,12 @@ static void bfq_cpd_free(struct blkcg_policy_data *cpd) kfree(cpd_to_bfqgd(cpd)); } -static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q, - struct blkcg *blkcg) +static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk, + struct blkcg *blkcg, gfp_t gfp) { struct bfq_group *bfqg; - bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node); + bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id); if (!bfqg) return NULL; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c20929bce812..fc264b155882 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -276,7 +276,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, continue; /* alloc per-policy data and attach it to blkg */ - pd = pol->pd_alloc_fn(gfp_mask, disk->queue, blkcg); + pd = pol->pd_alloc_fn(disk, blkcg, gfp_mask); if (!pd) goto out_free_pds; blkg->pd[i] = pd; @@ -1432,8 +1432,8 @@ retry: pd = pd_prealloc; pd_prealloc = NULL; } else { - pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, - blkg->blkcg); + pd = pol->pd_alloc_fn(disk, blkg->blkcg, + GFP_NOWAIT | __GFP_NOWARN); } if (!pd) { @@ -1450,8 +1450,8 @@ retry: if (pd_prealloc) pol->pd_free_fn(pd_prealloc); - pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, - blkg->blkcg); + pd_prealloc = pol->pd_alloc_fn(disk, blkg->blkcg, + GFP_KERNEL); if (pd_prealloc) goto retry; else diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 27068faa2cd0..3d9e42c519db 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -154,8 +154,8 @@ typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); -typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, - struct request_queue *q, struct blkcg *blkcg); +typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(struct gendisk *disk, + struct blkcg *blkcg, gfp_t gfp); typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 078b77709519..7a2dc9dc8e3b 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2923,13 +2923,14 @@ static void ioc_cpd_free(struct blkcg_policy_data *cpd) kfree(container_of(cpd, struct ioc_cgrp, cpd)); } -static struct blkg_policy_data *ioc_pd_alloc(gfp_t gfp, struct request_queue *q, - struct blkcg *blkcg) +static struct blkg_policy_data *ioc_pd_alloc(struct gendisk *disk, + struct blkcg *blkcg, gfp_t gfp) { int levels = blkcg->css.cgroup->level + 1; struct ioc_gq *iocg; - iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, q->node); + iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, + disk->node_id); if (!iocg) return NULL; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 39853fc5c2b0..bc0d217f5c17 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -946,13 +946,12 @@ static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) iolat->max_depth, avg_lat, cur_win); } -static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, - struct request_queue *q, - struct blkcg *blkcg) +static struct blkg_policy_data *iolatency_pd_alloc(struct gendisk *disk, + struct blkcg *blkcg, gfp_t gfp) { struct iolatency_grp *iolat; - iolat = kzalloc_node(sizeof(*iolat), gfp, q->node); + iolat = kzalloc_node(sizeof(*iolat), gfp, disk->node_id); if (!iolat) return NULL; iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat), diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c index 8194826cc824..055529b9b92b 100644 --- a/block/blk-ioprio.c +++ b/block/blk-ioprio.c @@ -116,7 +116,7 @@ static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf, } static struct blkg_policy_data * -ioprio_alloc_pd(gfp_t gfp, struct request_queue *q, struct blkcg *blkcg) +ioprio_alloc_pd(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp) { struct ioprio_blkg *ioprio_blkg; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index efc0a9092c69..74bb1e753ea0 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -335,14 +335,13 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq) timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0); } -static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, - struct request_queue *q, - struct blkcg *blkcg) +static struct blkg_policy_data *throtl_pd_alloc(struct gendisk *disk, + struct blkcg *blkcg, gfp_t gfp) { struct throtl_grp *tg; int rw; - tg = kzalloc_node(sizeof(*tg), gfp, q->node); + tg = kzalloc_node(sizeof(*tg), gfp, disk->node_id); if (!tg) return NULL; -- cgit v1.2.3 From 3f13ab7c80fdb0ada86a8e3e818960bc1ccbaa59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Feb 2023 16:04:00 +0100 Subject: blk-cgroup: move the cgroup information to struct gendisk cgroup information only makes sense on a live gendisk that allows file system I/O (which includes the raw block device). So move over the cgroup related members. Signed-off-by: Christoph Hellwig Reviewed-by: Andreas Herrmann Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20230203150400.3199230-20-hch@lst.de Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 4 +-- block/blk-cgroup.c | 66 +++++++++++++++++++++++++------------------------- block/blk-cgroup.h | 4 +-- block/blk-iolatency.c | 2 +- block/blk-throttle.c | 16 +++++++----- include/linux/blkdev.h | 12 ++++----- 6 files changed, 54 insertions(+), 50 deletions(-) (limited to 'block/bfq-cgroup.c') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 37333c164ed4..4fdbbec71647 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -1001,7 +1001,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd) { struct blkcg_gq *blkg; - list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { + list_for_each_entry(blkg, &bfqd->queue->disk->blkg_list, entry) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); bfq_end_wr_async_queues(bfqd, bfqg); @@ -1295,7 +1295,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) if (ret) return NULL; - return blkg_to_bfqg(bfqd->queue->root_blkg); + return blkg_to_bfqg(bfqd->queue->disk->root_blkg); } struct blkcg_policy blkcg_policy_bfq = { diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 45a683e88bca..8faeca6022be 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -108,10 +108,10 @@ static struct cgroup_subsys_state *blkcg_css(void) return task_css(current, io_cgrp_id); } -static bool blkcg_policy_enabled(struct request_queue *q, +static bool blkcg_policy_enabled(struct gendisk *disk, const struct blkcg_policy *pol) { - return pol && test_bit(pol->plid, q->blkcg_pols); + return pol && test_bit(pol->plid, disk->blkcg_pols); } static void blkg_free(struct blkcg_gq *blkg) @@ -121,18 +121,18 @@ static void blkg_free(struct blkcg_gq *blkg) /* * pd_free_fn() can also be called from blkcg_deactivate_policy(), * in order to make sure pd_free_fn() is called in order, the deletion - * of the list blkg->q_node is delayed to here from blkg_destroy(), and + * of the list blkg->entry is delayed to here from blkg_destroy(), and * blkcg_mutex is used to synchronize blkg_free_workfn() and * blkcg_deactivate_policy(). */ - mutex_lock(&blkg->disk->queue->blkcg_mutex); + mutex_lock(&blkg->disk->blkcg_mutex); for (i = 0; i < BLKCG_MAX_POLS; i++) if (blkg->pd[i]) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->parent) blkg_put(blkg->parent); - list_del_init(&blkg->q_node); - mutex_unlock(&blkg->disk->queue->blkcg_mutex); + list_del_init(&blkg->entry); + mutex_unlock(&blkg->disk->blkcg_mutex); put_disk(blkg->disk); free_percpu(blkg->iostat_cpu); @@ -256,7 +256,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, get_device(disk_to_dev(disk)); blkg->disk = disk; - INIT_LIST_HEAD(&blkg->q_node); + INIT_LIST_HEAD(&blkg->entry); spin_lock_init(&blkg->async_bio_lock); bio_list_init(&blkg->async_bios); INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); @@ -272,7 +272,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, struct blkcg_policy *pol = blkcg_policy[i]; struct blkg_policy_data *pd; - if (!blkcg_policy_enabled(disk->queue, pol)) + if (!blkcg_policy_enabled(disk, pol)) continue; /* alloc per-policy data and attach it to blkg */ @@ -358,7 +358,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk, ret = radix_tree_insert(&blkcg->blkg_tree, disk->queue->id, blkg); if (likely(!ret)) { hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); - list_add(&blkg->q_node, &disk->queue->blkg_list); + list_add(&blkg->entry, &disk->blkg_list); for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -430,7 +430,7 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, while (true) { struct blkcg *pos = blkcg; struct blkcg *parent = blkcg_parent(blkcg); - struct blkcg_gq *ret_blkg = q->root_blkg; + struct blkcg_gq *ret_blkg = disk->root_blkg; while (parent) { blkg = blkg_lookup(parent, disk); @@ -512,7 +512,7 @@ static void blkg_destroy_all(struct gendisk *disk) restart: spin_lock_irq(&q->queue_lock); - list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { + list_for_each_entry_safe(blkg, n, &disk->blkg_list, entry) { struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); @@ -531,7 +531,7 @@ restart: } } - q->root_blkg = NULL; + disk->root_blkg = NULL; spin_unlock_irq(&q->queue_lock); } @@ -606,7 +606,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { spin_lock_irq(&blkg->disk->queue->queue_lock); - if (blkcg_policy_enabled(blkg->disk->queue, pol)) + if (blkcg_policy_enabled(blkg->disk, pol)) total += prfill(sf, blkg->pd[pol->plid], data); spin_unlock_irq(&blkg->disk->queue->queue_lock); } @@ -714,7 +714,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, rcu_read_lock(); spin_lock_irq(&q->queue_lock); - if (!blkcg_policy_enabled(q, pol)) { + if (!blkcg_policy_enabled(disk, pol)) { ret = -EOPNOTSUPP; goto fail_unlock; } @@ -757,7 +757,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, rcu_read_lock(); spin_lock_irq(&q->queue_lock); - if (!blkcg_policy_enabled(q, pol)) { + if (!blkcg_policy_enabled(disk, pol)) { blkg_free(new_blkg); ret = -EOPNOTSUPP; goto fail_preloaded; @@ -937,7 +937,7 @@ static void blkcg_fill_root_iostats(void) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { struct block_device *bdev = dev_to_bdev(dev); - struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg; + struct blkcg_gq *blkg = bdev->bd_disk->root_blkg; struct blkg_iostat tmp; int cpu; unsigned long flags; @@ -1284,8 +1284,8 @@ int blkcg_init_disk(struct gendisk *disk) bool preloaded; int ret; - INIT_LIST_HEAD(&q->blkg_list); - mutex_init(&q->blkcg_mutex); + INIT_LIST_HEAD(&disk->blkg_list); + mutex_init(&disk->blkcg_mutex); new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL); if (!new_blkg) @@ -1299,7 +1299,7 @@ int blkcg_init_disk(struct gendisk *disk) blkg = blkg_create(&blkcg_root, disk, new_blkg); if (IS_ERR(blkg)) goto err_unlock; - q->root_blkg = blkg; + disk->root_blkg = blkg; spin_unlock_irq(&q->queue_lock); if (preloaded) @@ -1412,7 +1412,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) struct blkcg_gq *blkg, *pinned_blkg = NULL; int ret; - if (blkcg_policy_enabled(q, pol)) + if (blkcg_policy_enabled(disk, pol)) return 0; if (queue_is_mq(q)) @@ -1421,7 +1421,7 @@ retry: spin_lock_irq(&q->queue_lock); /* blkg_list is pushed at the head, reverse walk to allocate parents first */ - list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { + list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) { struct blkg_policy_data *pd; if (blkg->pd[pol->plid]) @@ -1466,16 +1466,16 @@ retry: /* all allocated, init in the same order */ if (pol->pd_init_fn) - list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) + list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) pol->pd_init_fn(blkg->pd[pol->plid]); - list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { + list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) { if (pol->pd_online_fn) pol->pd_online_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid]->online = true; } - __set_bit(pol->plid, q->blkcg_pols); + __set_bit(pol->plid, disk->blkcg_pols); ret = 0; spin_unlock_irq(&q->queue_lock); @@ -1491,7 +1491,7 @@ out: enomem: /* alloc failed, nothing's initialized yet, free everything */ spin_lock_irq(&q->queue_lock); - list_for_each_entry(blkg, &q->blkg_list, q_node) { + list_for_each_entry(blkg, &disk->blkg_list, entry) { struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); @@ -1521,18 +1521,18 @@ void blkcg_deactivate_policy(struct gendisk *disk, struct request_queue *q = disk->queue; struct blkcg_gq *blkg; - if (!blkcg_policy_enabled(q, pol)) + if (!blkcg_policy_enabled(disk, pol)) return; if (queue_is_mq(q)) blk_mq_freeze_queue(q); - mutex_lock(&q->blkcg_mutex); + mutex_lock(&disk->blkcg_mutex); spin_lock_irq(&q->queue_lock); - __clear_bit(pol->plid, q->blkcg_pols); + __clear_bit(pol->plid, disk->blkcg_pols); - list_for_each_entry(blkg, &q->blkg_list, q_node) { + list_for_each_entry(blkg, &disk->blkg_list, entry) { struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); @@ -1546,7 +1546,7 @@ void blkcg_deactivate_policy(struct gendisk *disk, } spin_unlock_irq(&q->queue_lock); - mutex_unlock(&q->blkcg_mutex); + mutex_unlock(&disk->blkcg_mutex); if (queue_is_mq(q)) blk_mq_unfreeze_queue(q); @@ -1943,7 +1943,7 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio, * Associate @bio with the blkg found by combining the css's blkg and the * request_queue of the @bio. An association failure is handled by walking up * the blkg tree. Therefore, the blkg associated can be anything between @blkg - * and q->root_blkg. This situation only happens when a cgroup is dying and + * and disk->root_blkg. This situation only happens when a cgroup is dying and * then the remaining bios will spill to the closest alive blkg. * * A reference will be taken on the blkg and will be released when @bio is @@ -1958,8 +1958,8 @@ void bio_associate_blkg_from_css(struct bio *bio, if (css && css->parent) { bio->bi_blkg = blkg_tryget_closest(bio, css); } else { - blkg_get(bdev_get_queue(bio->bi_bdev)->root_blkg); - bio->bi_blkg = bdev_get_queue(bio->bi_bdev)->root_blkg; + blkg_get(bio->bi_bdev->bd_disk->root_blkg); + bio->bi_blkg = bio->bi_bdev->bd_disk->root_blkg; } } EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 151f24de2539..e442b406ca0d 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -54,7 +54,7 @@ struct blkg_iostat_set { /* association between a blk cgroup and a request queue */ struct blkcg_gq { struct gendisk *disk; - struct list_head q_node; + struct list_head entry; struct hlist_node blkcg_node; struct blkcg *blkcg; @@ -250,7 +250,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, WARN_ON_ONCE(!rcu_read_lock_held()); if (blkcg == &blkcg_root) - return disk->queue->root_blkg; + return disk->root_blkg; blkg = rcu_dereference(blkcg->blkg_hint); if (blkg && blkg->disk == disk) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index bc0d217f5c17..5d5aa1e526b7 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -665,7 +665,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) rcu_read_lock(); blkg_for_each_descendant_pre(blkg, pos_css, - blkiolat->rqos.disk->queue->root_blkg) { + blkiolat->rqos.disk->root_blkg) { struct iolatency_grp *iolat; struct child_latency_info *lat_info; unsigned long flags; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 74bb1e753ea0..902203bdddb4 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -451,7 +451,8 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td) bool low_valid = false; rcu_read_lock(); - blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { + blkg_for_each_descendant_post(blkg, pos_css, + td->queue->disk->root_blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] || @@ -1180,7 +1181,7 @@ static void throtl_pending_timer_fn(struct timer_list *t) spin_lock_irq(&q->queue_lock); - if (!q->root_blkg) + if (!q->disk->root_blkg) goto out_unlock; if (throtl_can_upgrade(td, NULL)) @@ -1322,7 +1323,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global) * blk-throttle. */ blkg_for_each_descendant_pre(blkg, pos_css, - global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) { + global ? tg->td->queue->disk->root_blkg : + tg_to_blkg(tg)) { struct throtl_grp *this_tg = blkg_to_tg(blkg); struct throtl_grp *parent_tg; @@ -1717,7 +1719,7 @@ void blk_throtl_cancel_bios(struct gendisk *disk) * path need RCU protection and to prevent warning from lockdep. */ rcu_read_lock(); - blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) { + blkg_for_each_descendant_post(blkg, pos_css, disk->root_blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_service_queue *sq = &tg->service_queue; @@ -1871,7 +1873,8 @@ static bool throtl_can_upgrade(struct throtl_data *td, return false; rcu_read_lock(); - blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { + blkg_for_each_descendant_post(blkg, pos_css, + td->queue->disk->root_blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); if (tg == this_tg) @@ -1917,7 +1920,8 @@ static void throtl_upgrade_state(struct throtl_data *td) td->low_upgrade_time = jiffies; td->scale = 0; rcu_read_lock(); - blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { + blkg_for_each_descendant_post(blkg, pos_css, + td->queue->disk->root_blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_service_queue *sq = &tg->service_queue; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b9637d63e6f0..79aec4ebadb9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -163,6 +163,12 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ struct disk_events *ev; +#ifdef CONFIG_BLK_CGROUP + DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); + struct blkcg_gq *root_blkg; + struct list_head blkg_list; + struct mutex blkcg_mutex; +#endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_BLK_DEV_INTEGRITY struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -481,12 +487,6 @@ struct request_queue { struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; -#ifdef CONFIG_BLK_CGROUP - DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); - struct blkcg_gq *root_blkg; - struct list_head blkg_list; - struct mutex blkcg_mutex; -#endif struct queue_limits limits; -- cgit v1.2.3 From f37bf75ca73d523ebaa7ceb44c45d8ecd05374fe Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 2 Feb 2023 21:49:13 +0800 Subject: block, bfq: cleanup 'bfqg->online' After commit dfd6200a0954 ("blk-cgroup: support to track if policy is online"), there is no need to do this again in bfq. However, 'pd->online' is not protected by 'bfqd->lock', in order to make sure bfq won't see that 'pd->online' is still set after bfq_pd_offline(), clear it before bfq_pd_offline() is called. This is fine because other polices doesn't use 'pd->online' and bfq_pd_offline() will move active bfqq to root cgroup anyway. Signed-off-by: Yu Kuai Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230202134913.2364549-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 4 +--- block/bfq-iosched.h | 2 -- block/blk-cgroup.c | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'block/bfq-cgroup.c') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 4fdbbec71647..624530643a05 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -551,7 +551,6 @@ static void bfq_pd_init(struct blkg_policy_data *pd) bfqg->bfqd = bfqd; bfqg->active_entities = 0; bfqg->num_queues_with_pending_reqs = 0; - bfqg->online = true; bfqg->rq_pos_tree = RB_ROOT; } @@ -614,7 +613,7 @@ struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) continue; } bfqg = blkg_to_bfqg(blkg); - if (bfqg->online) { + if (bfqg->pd.online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css); return bfqg; } @@ -985,7 +984,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) put_async_queues: bfq_put_async_queues(bfqd, bfqg); - bfqg->online = false; spin_unlock_irqrestore(&bfqd->lock, flags); /* diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 75cc6a324267..69aaee52285a 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -1009,8 +1009,6 @@ struct bfq_group { /* reference counter (see comments in bfq_bic_update_cgroup) */ refcount_t ref; - /* Is bfq_group still online? */ - bool online; struct bfq_entity entity; struct bfq_sched_data sched_data; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c46778d1f3c2..d8fe607138b9 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -479,9 +479,9 @@ static void blkg_destroy(struct blkcg_gq *blkg) struct blkcg_policy *pol = blkcg_policy[i]; if (blkg->pd[i] && blkg->pd[i]->online) { + blkg->pd[i]->online = false; if (pol->pd_offline_fn) pol->pd_offline_fn(blkg->pd[i]); - blkg->pd[i]->online = false; } } -- cgit v1.2.3 From 1231039db31cf0703996d0b1797c2702e25a110a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Feb 2023 19:33:04 +0100 Subject: Revert "blk-cgroup: move the cgroup information to struct gendisk" This reverts commit 3f13ab7c80fdb0ada86a8e3e818960bc1ccbaa59 as a patch it depends on caused a few problems. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230214183308.1658775-2-hch@lst.de Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 4 +-- block/blk-cgroup.c | 66 +++++++++++++++++++++++++------------------------- block/blk-cgroup.h | 4 +-- block/blk-iolatency.c | 2 +- block/blk-throttle.c | 16 +++++------- include/linux/blkdev.h | 12 ++++----- 6 files changed, 50 insertions(+), 54 deletions(-) (limited to 'block/bfq-cgroup.c') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 624530643a05..935a497b5ded 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -999,7 +999,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd) { struct blkcg_gq *blkg; - list_for_each_entry(blkg, &bfqd->queue->disk->blkg_list, entry) { + list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); bfq_end_wr_async_queues(bfqd, bfqg); @@ -1293,7 +1293,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) if (ret) return NULL; - return blkg_to_bfqg(bfqd->queue->disk->root_blkg); + return blkg_to_bfqg(bfqd->queue->root_blkg); } struct blkcg_policy blkcg_policy_bfq = { diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 935028912e7a..1653786644ea 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -108,10 +108,10 @@ static struct cgroup_subsys_state *blkcg_css(void) return task_css(current, io_cgrp_id); } -static bool blkcg_policy_enabled(struct gendisk *disk, +static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { - return pol && test_bit(pol->plid, disk->blkcg_pols); + return pol && test_bit(pol->plid, q->blkcg_pols); } static void blkg_free_workfn(struct work_struct *work) @@ -123,18 +123,18 @@ static void blkg_free_workfn(struct work_struct *work) /* * pd_free_fn() can also be called from blkcg_deactivate_policy(), * in order to make sure pd_free_fn() is called in order, the deletion - * of the list blkg->entry is delayed to here from blkg_destroy(), and + * of the list blkg->q_node is delayed to here from blkg_destroy(), and * blkcg_mutex is used to synchronize blkg_free_workfn() and * blkcg_deactivate_policy(). */ - mutex_lock(&blkg->disk->blkcg_mutex); + mutex_lock(&blkg->disk->queue->blkcg_mutex); for (i = 0; i < BLKCG_MAX_POLS; i++) if (blkg->pd[i]) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->parent) blkg_put(blkg->parent); - list_del_init(&blkg->entry); - mutex_unlock(&blkg->disk->blkcg_mutex); + list_del_init(&blkg->q_node); + mutex_unlock(&blkg->disk->queue->blkcg_mutex); put_disk(blkg->disk); free_percpu(blkg->iostat_cpu); @@ -269,7 +269,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, get_device(disk_to_dev(disk)); blkg->disk = disk; - INIT_LIST_HEAD(&blkg->entry); + INIT_LIST_HEAD(&blkg->q_node); spin_lock_init(&blkg->async_bio_lock); bio_list_init(&blkg->async_bios); INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); @@ -285,7 +285,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, struct blkcg_policy *pol = blkcg_policy[i]; struct blkg_policy_data *pd; - if (!blkcg_policy_enabled(disk, pol)) + if (!blkcg_policy_enabled(disk->queue, pol)) continue; /* alloc per-policy data and attach it to blkg */ @@ -371,7 +371,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk, ret = radix_tree_insert(&blkcg->blkg_tree, disk->queue->id, blkg); if (likely(!ret)) { hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); - list_add(&blkg->entry, &disk->blkg_list); + list_add(&blkg->q_node, &disk->queue->blkg_list); for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -444,7 +444,7 @@ static struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, while (true) { struct blkcg *pos = blkcg; struct blkcg *parent = blkcg_parent(blkcg); - struct blkcg_gq *ret_blkg = disk->root_blkg; + struct blkcg_gq *ret_blkg = q->root_blkg; while (parent) { blkg = blkg_lookup(parent, disk); @@ -526,7 +526,7 @@ static void blkg_destroy_all(struct gendisk *disk) restart: spin_lock_irq(&q->queue_lock); - list_for_each_entry_safe(blkg, n, &disk->blkg_list, entry) { + list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); @@ -545,7 +545,7 @@ restart: } } - disk->root_blkg = NULL; + q->root_blkg = NULL; spin_unlock_irq(&q->queue_lock); } @@ -620,7 +620,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { spin_lock_irq(&blkg->disk->queue->queue_lock); - if (blkcg_policy_enabled(blkg->disk, pol)) + if (blkcg_policy_enabled(blkg->disk->queue, pol)) total += prfill(sf, blkg->pd[pol->plid], data); spin_unlock_irq(&blkg->disk->queue->queue_lock); } @@ -728,7 +728,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, rcu_read_lock(); spin_lock_irq(&q->queue_lock); - if (!blkcg_policy_enabled(disk, pol)) { + if (!blkcg_policy_enabled(q, pol)) { ret = -EOPNOTSUPP; goto fail_unlock; } @@ -771,7 +771,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, rcu_read_lock(); spin_lock_irq(&q->queue_lock); - if (!blkcg_policy_enabled(disk, pol)) { + if (!blkcg_policy_enabled(q, pol)) { blkg_free(new_blkg); ret = -EOPNOTSUPP; goto fail_preloaded; @@ -951,7 +951,7 @@ static void blkcg_fill_root_iostats(void) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { struct block_device *bdev = dev_to_bdev(dev); - struct blkcg_gq *blkg = bdev->bd_disk->root_blkg; + struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg; struct blkg_iostat tmp; int cpu; unsigned long flags; @@ -1298,8 +1298,8 @@ int blkcg_init_disk(struct gendisk *disk) bool preloaded; int ret; - INIT_LIST_HEAD(&disk->blkg_list); - mutex_init(&disk->blkcg_mutex); + INIT_LIST_HEAD(&q->blkg_list); + mutex_init(&q->blkcg_mutex); new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL); if (!new_blkg) @@ -1313,7 +1313,7 @@ int blkcg_init_disk(struct gendisk *disk) blkg = blkg_create(&blkcg_root, disk, new_blkg); if (IS_ERR(blkg)) goto err_unlock; - disk->root_blkg = blkg; + q->root_blkg = blkg; spin_unlock_irq(&q->queue_lock); if (preloaded) @@ -1426,7 +1426,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol) struct blkcg_gq *blkg, *pinned_blkg = NULL; int ret; - if (blkcg_policy_enabled(disk, pol)) + if (blkcg_policy_enabled(q, pol)) return 0; if (queue_is_mq(q)) @@ -1435,7 +1435,7 @@ retry: spin_lock_irq(&q->queue_lock); /* blkg_list is pushed at the head, reverse walk to allocate parents first */ - list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) { + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { struct blkg_policy_data *pd; if (blkg->pd[pol->plid]) @@ -1480,16 +1480,16 @@ retry: /* all allocated, init in the same order */ if (pol->pd_init_fn) - list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) pol->pd_init_fn(blkg->pd[pol->plid]); - list_for_each_entry_reverse(blkg, &disk->blkg_list, entry) { + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { if (pol->pd_online_fn) pol->pd_online_fn(blkg->pd[pol->plid]); blkg->pd[pol->plid]->online = true; } - __set_bit(pol->plid, disk->blkcg_pols); + __set_bit(pol->plid, q->blkcg_pols); ret = 0; spin_unlock_irq(&q->queue_lock); @@ -1505,7 +1505,7 @@ out: enomem: /* alloc failed, nothing's initialized yet, free everything */ spin_lock_irq(&q->queue_lock); - list_for_each_entry(blkg, &disk->blkg_list, entry) { + list_for_each_entry(blkg, &q->blkg_list, q_node) { struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); @@ -1535,18 +1535,18 @@ void blkcg_deactivate_policy(struct gendisk *disk, struct request_queue *q = disk->queue; struct blkcg_gq *blkg; - if (!blkcg_policy_enabled(disk, pol)) + if (!blkcg_policy_enabled(q, pol)) return; if (queue_is_mq(q)) blk_mq_freeze_queue(q); - mutex_lock(&disk->blkcg_mutex); + mutex_lock(&q->blkcg_mutex); spin_lock_irq(&q->queue_lock); - __clear_bit(pol->plid, disk->blkcg_pols); + __clear_bit(pol->plid, q->blkcg_pols); - list_for_each_entry(blkg, &disk->blkg_list, entry) { + list_for_each_entry(blkg, &q->blkg_list, q_node) { struct blkcg *blkcg = blkg->blkcg; spin_lock(&blkcg->lock); @@ -1560,7 +1560,7 @@ void blkcg_deactivate_policy(struct gendisk *disk, } spin_unlock_irq(&q->queue_lock); - mutex_unlock(&disk->blkcg_mutex); + mutex_unlock(&q->blkcg_mutex); if (queue_is_mq(q)) blk_mq_unfreeze_queue(q); @@ -1957,7 +1957,7 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct bio *bio, * Associate @bio with the blkg found by combining the css's blkg and the * request_queue of the @bio. An association failure is handled by walking up * the blkg tree. Therefore, the blkg associated can be anything between @blkg - * and disk->root_blkg. This situation only happens when a cgroup is dying and + * and q->root_blkg. This situation only happens when a cgroup is dying and * then the remaining bios will spill to the closest alive blkg. * * A reference will be taken on the blkg and will be released when @bio is @@ -1972,8 +1972,8 @@ void bio_associate_blkg_from_css(struct bio *bio, if (css && css->parent) { bio->bi_blkg = blkg_tryget_closest(bio, css); } else { - blkg_get(bio->bi_bdev->bd_disk->root_blkg); - bio->bi_blkg = bio->bi_bdev->bd_disk->root_blkg; + blkg_get(bdev_get_queue(bio->bi_bdev)->root_blkg); + bio->bi_blkg = bdev_get_queue(bio->bi_bdev)->root_blkg; } } EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index e442b406ca0d..151f24de2539 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -54,7 +54,7 @@ struct blkg_iostat_set { /* association between a blk cgroup and a request queue */ struct blkcg_gq { struct gendisk *disk; - struct list_head entry; + struct list_head q_node; struct hlist_node blkcg_node; struct blkcg *blkcg; @@ -250,7 +250,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, WARN_ON_ONCE(!rcu_read_lock_held()); if (blkcg == &blkcg_root) - return disk->root_blkg; + return disk->queue->root_blkg; blkg = rcu_dereference(blkcg->blkg_hint); if (blkg && blkg->disk == disk) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 5d5aa1e526b7..bc0d217f5c17 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -665,7 +665,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) rcu_read_lock(); blkg_for_each_descendant_pre(blkg, pos_css, - blkiolat->rqos.disk->root_blkg) { + blkiolat->rqos.disk->queue->root_blkg) { struct iolatency_grp *iolat; struct child_latency_info *lat_info; unsigned long flags; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e7bd7050d684..21c8d5e871ea 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -451,8 +451,7 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td) bool low_valid = false; rcu_read_lock(); - blkg_for_each_descendant_post(blkg, pos_css, - td->queue->disk->root_blkg) { + blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] || @@ -1181,7 +1180,7 @@ static void throtl_pending_timer_fn(struct timer_list *t) spin_lock_irq(&q->queue_lock); - if (!q->disk->root_blkg) + if (!q->root_blkg) goto out_unlock; if (throtl_can_upgrade(td, NULL)) @@ -1323,8 +1322,7 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global) * blk-throttle. */ blkg_for_each_descendant_pre(blkg, pos_css, - global ? tg->td->queue->disk->root_blkg : - tg_to_blkg(tg)) { + global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) { struct throtl_grp *this_tg = blkg_to_tg(blkg); struct throtl_grp *parent_tg; @@ -1719,7 +1717,7 @@ void blk_throtl_cancel_bios(struct gendisk *disk) * path need RCU protection and to prevent warning from lockdep. */ rcu_read_lock(); - blkg_for_each_descendant_post(blkg, pos_css, disk->root_blkg) { + blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_service_queue *sq = &tg->service_queue; @@ -1873,8 +1871,7 @@ static bool throtl_can_upgrade(struct throtl_data *td, return false; rcu_read_lock(); - blkg_for_each_descendant_post(blkg, pos_css, - td->queue->disk->root_blkg) { + blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); if (tg == this_tg) @@ -1920,8 +1917,7 @@ static void throtl_upgrade_state(struct throtl_data *td) td->low_upgrade_time = jiffies; td->scale = 0; rcu_read_lock(); - blkg_for_each_descendant_post(blkg, pos_css, - td->queue->disk->root_blkg) { + blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); struct throtl_service_queue *sq = &tg->service_queue; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 79aec4ebadb9..b9637d63e6f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -163,12 +163,6 @@ struct gendisk { struct timer_rand_state *random; atomic_t sync_io; /* RAID */ struct disk_events *ev; -#ifdef CONFIG_BLK_CGROUP - DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); - struct blkcg_gq *root_blkg; - struct list_head blkg_list; - struct mutex blkcg_mutex; -#endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_BLK_DEV_INTEGRITY struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -487,6 +481,12 @@ struct request_queue { struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; +#ifdef CONFIG_BLK_CGROUP + DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS); + struct blkcg_gq *root_blkg; + struct list_head blkg_list; + struct mutex blkcg_mutex; +#endif struct queue_limits limits; -- cgit v1.2.3 From a06377c5d01eeeaa52ad979b62c3c72efcc3eff0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Feb 2023 19:33:08 +0100 Subject: Revert "blk-cgroup: pin the gendisk in struct blkcg_gq" This reverts commit 84d7d462b16dd5f0bf7c7ca9254bf81db2c952a2. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230214183308.1658775-6-hch@lst.de Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 6 +++--- block/blk-cgroup-rwstat.c | 2 +- block/blk-cgroup.c | 35 ++++++++++++++++++----------------- block/blk-cgroup.h | 11 ++++++----- block/blk-iocost.c | 2 +- block/blk-iolatency.c | 4 ++-- block/blk-throttle.c | 4 ++-- 7 files changed, 33 insertions(+), 31 deletions(-) (limited to 'block/bfq-cgroup.c') diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 935a497b5ded..ea3638e06e04 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -405,7 +405,7 @@ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) parent = bfqg_parent(bfqg); - lockdep_assert_held(&bfqg_to_blkg(bfqg)->disk->queue->queue_lock); + lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock); if (unlikely(!parent)) return; @@ -536,7 +536,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) { struct blkcg_gq *blkg = pd_to_blkg(pd); struct bfq_group *bfqg = blkg_to_bfqg(blkg); - struct bfq_data *bfqd = blkg->disk->queue->elevator->elevator_data; + struct bfq_data *bfqd = blkg->q->elevator->elevator_data; struct bfq_entity *entity = &bfqg->entity; struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg); @@ -1199,7 +1199,7 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, struct cgroup_subsys_state *pos_css; u64 sum = 0; - lockdep_assert_held(&blkg->disk->queue->queue_lock); + lockdep_assert_held(&blkg->q->queue_lock); rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c index b8b8c82e667a..3304e841df7c 100644 --- a/block/blk-cgroup-rwstat.c +++ b/block/blk-cgroup-rwstat.c @@ -107,7 +107,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, struct cgroup_subsys_state *pos_css; unsigned int i; - lockdep_assert_held(&blkg->disk->queue->queue_lock); + lockdep_assert_held(&blkg->q->queue_lock); memset(sum, 0, sizeof(*sum)); rcu_read_lock(); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 157456632124..981ebe003b1c 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -118,6 +118,7 @@ static void blkg_free_workfn(struct work_struct *work) { struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, free_work); + struct request_queue *q = blkg->q; int i; /* @@ -127,16 +128,16 @@ static void blkg_free_workfn(struct work_struct *work) * blkcg_mutex is used to synchronize blkg_free_workfn() and * blkcg_deactivate_policy(). */ - mutex_lock(&blkg->disk->queue->blkcg_mutex); + mutex_lock(&q->blkcg_mutex); for (i = 0; i < BLKCG_MAX_POLS; i++) if (blkg->pd[i]) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->parent) blkg_put(blkg->parent); list_del_init(&blkg->q_node); - mutex_unlock(&blkg->disk->queue->blkcg_mutex); + mutex_unlock(&q->blkcg_mutex); - put_disk(blkg->disk); + blk_put_queue(q); free_percpu(blkg->iostat_cpu); percpu_ref_exit(&blkg->refcnt); kfree(blkg); @@ -263,12 +264,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask); if (!blkg->iostat_cpu) goto out_exit_refcnt; - - if (test_bit(GD_DEAD, &disk->state)) + if (!blk_get_queue(disk->queue)) goto out_free_iostat; - get_device(disk_to_dev(disk)); - blkg->disk = disk; + blkg->q = disk->queue; INIT_LIST_HEAD(&blkg->q_node); spin_lock_init(&blkg->async_bio_lock); bio_list_init(&blkg->async_bios); @@ -304,7 +303,7 @@ out_free_pds: while (--i >= 0) if (blkg->pd[i]) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); - put_disk(blkg->disk); + blk_put_queue(disk->queue); out_free_iostat: free_percpu(blkg->iostat_cpu); out_exit_refcnt: @@ -476,7 +475,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) struct blkcg *blkcg = blkg->blkcg; int i; - lockdep_assert_held(&blkg->disk->queue->queue_lock); + lockdep_assert_held(&blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); /* @@ -500,7 +499,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) blkg->online = false; - radix_tree_delete(&blkcg->blkg_tree, blkg->disk->queue->id); + radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); hlist_del_init_rcu(&blkg->blkcg_node); /* @@ -587,7 +586,9 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, const char *blkg_dev_name(struct blkcg_gq *blkg) { - return bdi_dev_name(blkg->disk->bdi); + if (!blkg->q->disk) + return NULL; + return bdi_dev_name(blkg->q->disk->bdi); } /** @@ -619,10 +620,10 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { - spin_lock_irq(&blkg->disk->queue->queue_lock); - if (blkcg_policy_enabled(blkg->disk->queue, pol)) + spin_lock_irq(&blkg->q->queue_lock); + if (blkcg_policy_enabled(blkg->q, pol)) total += prfill(sf, blkg->pd[pol->plid], data); - spin_unlock_irq(&blkg->disk->queue->queue_lock); + spin_unlock_irq(&blkg->q->queue_lock); } rcu_read_unlock(); @@ -1046,9 +1047,9 @@ static int blkcg_print_stat(struct seq_file *sf, void *v) rcu_read_lock(); hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { - spin_lock_irq(&blkg->disk->queue->queue_lock); + spin_lock_irq(&blkg->q->queue_lock); blkcg_print_one_stat(blkg, sf); - spin_unlock_irq(&blkg->disk->queue->queue_lock); + spin_unlock_irq(&blkg->q->queue_lock); } rcu_read_unlock(); return 0; @@ -1118,7 +1119,7 @@ static void blkcg_destroy_blkgs(struct blkcg *blkcg) while (!hlist_empty(&blkcg->blkg_list)) { struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first, struct blkcg_gq, blkcg_node); - struct request_queue *q = blkg->disk->queue; + struct request_queue *q = blkg->q; if (need_resched() || !spin_trylock(&q->queue_lock)) { /* diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 3d9e42c519db..9c5078755e5e 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -53,7 +53,8 @@ struct blkg_iostat_set { /* association between a blk cgroup and a request queue */ struct blkcg_gq { - struct gendisk *disk; + /* Pointer to the associated request_queue */ + struct request_queue *q; struct list_head q_node; struct hlist_node blkcg_node; struct blkcg *blkcg; @@ -253,11 +254,11 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, return q->root_blkg; blkg = rcu_dereference(blkcg->blkg_hint); - if (blkg && blkg->disk->queue == q) + if (blkg && blkg->q == q) return blkg; blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); - if (blkg && blkg->disk->queue != q) + if (blkg && blkg->q != q) blkg = NULL; return blkg; } @@ -357,7 +358,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->disk->queue))) + (p_blkg)->q))) /** * blkg_for_each_descendant_post - post-order walk of a blkg's descendants @@ -372,7 +373,7 @@ static inline void blkg_put(struct blkcg_gq *blkg) #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \ - (p_blkg)->disk->queue))) + (p_blkg)->q))) bool __blkcg_punt_bio_submit(struct bio *bio); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 7a2dc9dc8e3b..ff534e9d92dc 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2947,7 +2947,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd) { struct ioc_gq *iocg = pd_to_iocg(pd); struct blkcg_gq *blkg = pd_to_blkg(&iocg->pd); - struct ioc *ioc = q_to_ioc(blkg->disk->queue); + struct ioc *ioc = q_to_ioc(blkg->q); struct ioc_now now; struct blkcg_gq *tblkg; unsigned long flags; diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index bc0d217f5c17..0dc910568b31 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -967,12 +967,12 @@ static void iolatency_pd_init(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); - struct rq_qos *rqos = blkcg_rq_qos(blkg->disk->queue); + struct rq_qos *rqos = blkcg_rq_qos(blkg->q); struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); u64 now = ktime_to_ns(ktime_get()); int cpu; - if (blk_queue_nonrot(blkg->disk->queue)) + if (blk_queue_nonrot(blkg->q)) iolat->ssd = true; else iolat->ssd = false; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 74bb1e753ea0..47e9d8be68f3 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -387,7 +387,7 @@ static void throtl_pd_init(struct blkg_policy_data *pd) { struct throtl_grp *tg = pd_to_tg(pd); struct blkcg_gq *blkg = tg_to_blkg(tg); - struct throtl_data *td = blkg->disk->queue->td; + struct throtl_data *td = blkg->q->td; struct throtl_service_queue *sq = &tg->service_queue; /* @@ -1174,7 +1174,7 @@ static void throtl_pending_timer_fn(struct timer_list *t) /* throtl_data may be gone, so figure out request queue by blkg */ if (tg) - q = tg->pd.blkg->disk->queue; + q = tg->pd.blkg->q; else q = td->queue; -- cgit v1.2.3