diff options
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 78 |
1 files changed, 44 insertions, 34 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 0ecb4cce8af2..0ce64dd73cfe 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -55,7 +55,6 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ bool blkcg_debug_stats = false; -static struct workqueue_struct *blkcg_punt_bio_wq; #define BLKG_DESTROY_BATCH_SIZE 64 @@ -165,7 +164,9 @@ static void __blkg_release(struct rcu_head *rcu) { struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO WARN_ON(!bio_list_empty(&blkg->async_bios)); +#endif /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); @@ -187,6 +188,9 @@ static void blkg_release(struct percpu_ref *ref) call_rcu(&blkg->rcu_head, __blkg_release); } +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO +static struct workqueue_struct *blkcg_punt_bio_wq; + static void blkg_async_bio_workfn(struct work_struct *work) { struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, @@ -197,10 +201,10 @@ static void blkg_async_bio_workfn(struct work_struct *work) bool need_plug = false; /* as long as there are pending bios, @blkg can't go away */ - spin_lock_bh(&blkg->async_bio_lock); + spin_lock(&blkg->async_bio_lock); bio_list_merge(&bios, &blkg->async_bios); bio_list_init(&blkg->async_bios); - spin_unlock_bh(&blkg->async_bio_lock); + spin_unlock(&blkg->async_bio_lock); /* start plug only when bio_list contains at least 2 bios */ if (bios.head && bios.head->bi_next) { @@ -213,6 +217,40 @@ static void blkg_async_bio_workfn(struct work_struct *work) blk_finish_plug(&plug); } +/* + * When a shared kthread issues a bio for a cgroup, doing so synchronously can + * lead to priority inversions as the kthread can be trapped waiting for that + * cgroup. Use this helper instead of submit_bio to punt the actual issuing to + * a dedicated per-blkcg work item to avoid such priority inversions. + */ +void blkcg_punt_bio_submit(struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + + if (blkg->parent) { + spin_lock(&blkg->async_bio_lock); + bio_list_add(&blkg->async_bios, bio); + spin_unlock(&blkg->async_bio_lock); + queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); + } else { + /* never bounce for the root cgroup */ + submit_bio(bio); + } +} +EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit); + +static int __init blkcg_punt_bio_init(void) +{ + blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", + WQ_MEM_RECLAIM | WQ_FREEZABLE | + WQ_UNBOUND | WQ_SYSFS, 0); + if (!blkcg_punt_bio_wq) + return -ENOMEM; + return 0; +} +subsys_initcall(blkcg_punt_bio_init); +#endif /* CONFIG_BLK_CGROUP_PUNT_BIO */ + /** * bio_blkcg_css - return the blkcg CSS associated with a bio * @bio: target bio @@ -268,10 +306,12 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, blkg->q = disk->queue; INIT_LIST_HEAD(&blkg->q_node); + blkg->blkcg = blkcg; +#ifdef CONFIG_BLK_CGROUP_PUNT_BIO spin_lock_init(&blkg->async_bio_lock); bio_list_init(&blkg->async_bios); INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); - blkg->blkcg = blkcg; +#endif u64_stats_init(&blkg->iostat.sync); for_each_possible_cpu(cpu) { @@ -1682,25 +1722,6 @@ out_unlock: } EXPORT_SYMBOL_GPL(blkcg_policy_unregister); -bool __blkcg_punt_bio_submit(struct bio *bio) -{ - struct blkcg_gq *blkg = bio->bi_blkg; - - /* consume the flag first */ - bio->bi_opf &= ~REQ_CGROUP_PUNT; - - /* never bounce for the root cgroup */ - if (!blkg->parent) - return false; - - spin_lock_bh(&blkg->async_bio_lock); - bio_list_add(&blkg->async_bios, bio); - spin_unlock_bh(&blkg->async_bio_lock); - - queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); - return true; -} - /* * Scale the accumulated delay based on how long it has been since we updated * the delay. We only call this when we are adding delay, in case it's been a @@ -2079,16 +2100,5 @@ bool blk_cgroup_congested(void) return ret; } -static int __init blkcg_init(void) -{ - blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", - WQ_MEM_RECLAIM | WQ_FREEZABLE | - WQ_UNBOUND | WQ_SYSFS, 0); - if (!blkcg_punt_bio_wq) - return -ENOMEM; - return 0; -} -subsys_initcall(blkcg_init); - module_param(blkcg_debug_stats, bool, 0644); MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); |