Commit 2c275afe authored by Christoph Hellwig's avatar Christoph Hellwig Committed by David Sterba

block: make blkcg_punt_bio_submit optional

Guard all the code to punt bios to a per-cgroup submission helper by a
new CONFIG_BLK_CGROUP_PUNT_BIO symbol that is selected by btrfs.
This way non-btrfs kernel builds don't need to have this code.
Reviewed-by: default avatarJens Axboe <axboe@kernel.dk>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 12be09fe
...@@ -41,6 +41,9 @@ config BLK_RQ_ALLOC_TIME ...@@ -41,6 +41,9 @@ config BLK_RQ_ALLOC_TIME
config BLK_CGROUP_RWSTAT config BLK_CGROUP_RWSTAT
bool bool
config BLK_CGROUP_PUNT_BIO
bool
config BLK_DEV_BSG_COMMON config BLK_DEV_BSG_COMMON
tristate tristate
......
...@@ -56,7 +56,6 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; ...@@ -56,7 +56,6 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
bool blkcg_debug_stats = false; bool blkcg_debug_stats = false;
static struct workqueue_struct *blkcg_punt_bio_wq;
#define BLKG_DESTROY_BATCH_SIZE 64 #define BLKG_DESTROY_BATCH_SIZE 64
...@@ -166,7 +165,9 @@ static void __blkg_release(struct rcu_head *rcu) ...@@ -166,7 +165,9 @@ static void __blkg_release(struct rcu_head *rcu)
{ {
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
WARN_ON(!bio_list_empty(&blkg->async_bios)); WARN_ON(!bio_list_empty(&blkg->async_bios));
#endif
/* release the blkcg and parent blkg refs this blkg has been holding */ /* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css); css_put(&blkg->blkcg->css);
...@@ -188,6 +189,9 @@ static void blkg_release(struct percpu_ref *ref) ...@@ -188,6 +189,9 @@ static void blkg_release(struct percpu_ref *ref)
call_rcu(&blkg->rcu_head, __blkg_release); call_rcu(&blkg->rcu_head, __blkg_release);
} }
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
static struct workqueue_struct *blkcg_punt_bio_wq;
static void blkg_async_bio_workfn(struct work_struct *work) static void blkg_async_bio_workfn(struct work_struct *work)
{ {
struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
...@@ -214,6 +218,40 @@ static void blkg_async_bio_workfn(struct work_struct *work) ...@@ -214,6 +218,40 @@ static void blkg_async_bio_workfn(struct work_struct *work)
blk_finish_plug(&plug); blk_finish_plug(&plug);
} }
/*
* When a shared kthread issues a bio for a cgroup, doing so synchronously can
* lead to priority inversions as the kthread can be trapped waiting for that
* cgroup. Use this helper instead of submit_bio to punt the actual issuing to
* a dedicated per-blkcg work item to avoid such priority inversions.
*/
void blkcg_punt_bio_submit(struct bio *bio)
{
struct blkcg_gq *blkg = bio->bi_blkg;
if (blkg->parent) {
spin_lock(&blkg->async_bio_lock);
bio_list_add(&blkg->async_bios, bio);
spin_unlock(&blkg->async_bio_lock);
queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
} else {
/* never bounce for the root cgroup */
submit_bio(bio);
}
}
EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit);
static int __init blkcg_punt_bio_init(void)
{
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
WQ_MEM_RECLAIM | WQ_FREEZABLE |
WQ_UNBOUND | WQ_SYSFS, 0);
if (!blkcg_punt_bio_wq)
return -ENOMEM;
return 0;
}
subsys_initcall(blkcg_punt_bio_init);
#endif /* CONFIG_BLK_CGROUP_PUNT_BIO */
/** /**
* bio_blkcg_css - return the blkcg CSS associated with a bio * bio_blkcg_css - return the blkcg CSS associated with a bio
* @bio: target bio * @bio: target bio
...@@ -269,10 +307,12 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk, ...@@ -269,10 +307,12 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
blkg->q = disk->queue; blkg->q = disk->queue;
INIT_LIST_HEAD(&blkg->q_node); INIT_LIST_HEAD(&blkg->q_node);
blkg->blkcg = blkcg;
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
spin_lock_init(&blkg->async_bio_lock); spin_lock_init(&blkg->async_bio_lock);
bio_list_init(&blkg->async_bios); bio_list_init(&blkg->async_bios);
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
blkg->blkcg = blkcg; #endif
u64_stats_init(&blkg->iostat.sync); u64_stats_init(&blkg->iostat.sync);
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
...@@ -1688,28 +1728,6 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) ...@@ -1688,28 +1728,6 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
} }
EXPORT_SYMBOL_GPL(blkcg_policy_unregister); EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
/*
* When a shared kthread issues a bio for a cgroup, doing so synchronously can
* lead to priority inversions as the kthread can be trapped waiting for that
* cgroup. Use this helper instead of submit_bio to punt the actual issuing to
* a dedicated per-blkcg work item to avoid such priority inversions.
*/
void blkcg_punt_bio_submit(struct bio *bio)
{
struct blkcg_gq *blkg = bio->bi_blkg;
if (blkg->parent) {
spin_lock(&blkg->async_bio_lock);
bio_list_add(&blkg->async_bios, bio);
spin_unlock(&blkg->async_bio_lock);
queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
} else {
/* never bounce for the root cgroup */
submit_bio(bio);
}
}
EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit);
/* /*
* Scale the accumulated delay based on how long it has been since we updated * Scale the accumulated delay based on how long it has been since we updated
* the delay. We only call this when we are adding delay, in case it's been a * the delay. We only call this when we are adding delay, in case it's been a
...@@ -2088,16 +2106,5 @@ bool blk_cgroup_congested(void) ...@@ -2088,16 +2106,5 @@ bool blk_cgroup_congested(void)
return ret; return ret;
} }
static int __init blkcg_init(void)
{
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
WQ_MEM_RECLAIM | WQ_FREEZABLE |
WQ_UNBOUND | WQ_SYSFS, 0);
if (!blkcg_punt_bio_wq)
return -ENOMEM;
return 0;
}
subsys_initcall(blkcg_init);
module_param(blkcg_debug_stats, bool, 0644); module_param(blkcg_debug_stats, bool, 0644);
MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
...@@ -72,9 +72,10 @@ struct blkcg_gq { ...@@ -72,9 +72,10 @@ struct blkcg_gq {
struct blkg_iostat_set iostat; struct blkg_iostat_set iostat;
struct blkg_policy_data *pd[BLKCG_MAX_POLS]; struct blkg_policy_data *pd[BLKCG_MAX_POLS];
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
spinlock_t async_bio_lock; spinlock_t async_bio_lock;
struct bio_list async_bios; struct bio_list async_bios;
#endif
union { union {
struct work_struct async_bio_work; struct work_struct async_bio_work;
struct work_struct free_work; struct work_struct free_work;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
config BTRFS_FS config BTRFS_FS
tristate "Btrfs filesystem support" tristate "Btrfs filesystem support"
select BLK_CGROUP_PUNT_BIO
select CRYPTO select CRYPTO
select CRYPTO_CRC32C select CRYPTO_CRC32C
select LIBCRC32C select LIBCRC32C
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment