Commit 2a68165d authored by Daniel Bristot de Oliveira's avatar Daniel Bristot de Oliveira Committed by Khalid Elmously

cgroup: Disable IRQs while holding css_set_lock

BugLink: https://bugs.launchpad.net/bugs/1845036

commit 82d6489d upstream.

While testing the deadline scheduler + cgroup setup I hit this
warning.

[  132.612935] ------------[ cut here ]------------
[  132.612951] WARNING: CPU: 5 PID: 0 at kernel/softirq.c:150 __local_bh_enable_ip+0x6b/0x80
[  132.612952] Modules linked in: (a ton of modules...)
[  132.612981] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.7.0-rc2 #2
[  132.612981] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.2-20150714_191134- 04/01/2014
[  132.612982]  0000000000000086 45c8bb5effdd088b ffff88013fd43da0 ffffffff813d229e
[  132.612984]  0000000000000000 0000000000000000 ffff88013fd43de0 ffffffff810a652b
[  132.612985]  00000096811387b5 0000000000000200 ffff8800bab29d80 ffff880034c54c00
[  132.612986] Call Trace:
[  132.612987]  <IRQ>  [<ffffffff813d229e>] dump_stack+0x63/0x85
[  132.612994]  [<ffffffff810a652b>] __warn+0xcb/0xf0
[  132.612997]  [<ffffffff810e76a0>] ? push_dl_task.part.32+0x170/0x170
[  132.612999]  [<ffffffff810a665d>] warn_slowpath_null+0x1d/0x20
[  132.613000]  [<ffffffff810aba5b>] __local_bh_enable_ip+0x6b/0x80
[  132.613008]  [<ffffffff817d6c8a>] _raw_write_unlock_bh+0x1a/0x20
[  132.613010]  [<ffffffff817d6c9e>] _raw_spin_unlock_bh+0xe/0x10
[  132.613015]  [<ffffffff811388ac>] put_css_set+0x5c/0x60
[  132.613016]  [<ffffffff8113dc7f>] cgroup_free+0x7f/0xa0
[  132.613017]  [<ffffffff810a3912>] __put_task_struct+0x42/0x140
[  132.613018]  [<ffffffff810e776a>] dl_task_timer+0xca/0x250
[  132.613027]  [<ffffffff810e76a0>] ? push_dl_task.part.32+0x170/0x170
[  132.613030]  [<ffffffff8111371e>] __hrtimer_run_queues+0xee/0x270
[  132.613031]  [<ffffffff81113ec8>] hrtimer_interrupt+0xa8/0x190
[  132.613034]  [<ffffffff81051a58>] local_apic_timer_interrupt+0x38/0x60
[  132.613035]  [<ffffffff817d9b0d>] smp_apic_timer_interrupt+0x3d/0x50
[  132.613037]  [<ffffffff817d7c5c>] apic_timer_interrupt+0x8c/0xa0
[  132.613038]  <EOI>  [<ffffffff81063466>] ? native_safe_halt+0x6/0x10
[  132.613043]  [<ffffffff81037a4e>] default_idle+0x1e/0xd0
[  132.613044]  [<ffffffff810381cf>] arch_cpu_idle+0xf/0x20
[  132.613046]  [<ffffffff810e8fda>] default_idle_call+0x2a/0x40
[  132.613047]  [<ffffffff810e92d7>] cpu_startup_entry+0x2e7/0x340
[  132.613048]  [<ffffffff81050235>] start_secondary+0x155/0x190
[  132.613049] ---[ end trace f91934d162ce9977 ]---

The warn is the spin_(lock|unlock)_bh(&css_set_lock) in the interrupt
context. Converting the spin_lock_bh to spin_lock_irq(save) to avoid
this problem - and other problems of sharing a spinlock with an
interrupt.

Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: cgroups@vger.kernel.org
Cc: stable@vger.kernel.org # 4.5+
Cc: linux-kernel@vger.kernel.org
Reviewed-by: default avatarRik van Riel <riel@redhat.com>
Reviewed-by: default avatar"Luis Claudio R. Goncalves" <lgoncalv@redhat.com>
Signed-off-by: default avatarDaniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: default avatarZefan Li <lizefan@huawei.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Signed-off-by: default avatarZubin Mithra <zsm@chromium.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: default avatarConnor Kuehl <connor.kuehl@canonical.com>
Signed-off-by: default avatarKhalid Elmously <khalid.elmously@canonical.com>
parent da070c47
......@@ -797,6 +797,8 @@ static void put_css_set_locked(struct css_set *cset)
static void put_css_set(struct css_set *cset)
{
unsigned long flags;
/*
* Ensure that the refcount doesn't hit zero while any readers
* can see it. Similar to atomic_dec_and_lock(), but for an
......@@ -805,9 +807,9 @@ static void put_css_set(struct css_set *cset)
if (atomic_add_unless(&cset->refcount, -1, 1))
return;
spin_lock_bh(&css_set_lock);
spin_lock_irqsave(&css_set_lock, flags);
put_css_set_locked(cset);
spin_unlock_bh(&css_set_lock);
spin_unlock_irqrestore(&css_set_lock, flags);
}
/*
......@@ -1030,11 +1032,11 @@ static struct css_set *find_css_set(struct css_set *old_cset,
/* First see if we already have a cgroup group that matches
* the desired set */
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
cset = find_existing_css_set(old_cset, cgrp, template);
if (cset)
get_css_set(cset);
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
if (cset)
return cset;
......@@ -1062,7 +1064,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
* find_existing_css_set() */
memcpy(cset->subsys, template, sizeof(cset->subsys));
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
/* Add reference counts and links from the new css_set. */
list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
struct cgroup *c = link->cgrp;
......@@ -1088,7 +1090,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
css_get(css);
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
return cset;
}
......@@ -1152,7 +1154,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
* Release all the links from cset_links to this hierarchy's
* root cgroup
*/
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
list_del(&link->cset_link);
......@@ -1160,7 +1162,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
kfree(link);
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
if (!list_empty(&root->root_list)) {
list_del(&root->root_list);
......@@ -1599,11 +1601,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
ss->root = dst_root;
css->cgroup = dcgrp;
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
hash_for_each(css_set_table, i, cset, hlist)
list_move_tail(&cset->e_cset_node[ss->id],
&dcgrp->e_csets[ss->id]);
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
src_root->subsys_mask &= ~(1 << ssid);
scgrp->subtree_control &= ~(1 << ssid);
......@@ -1907,7 +1909,7 @@ static void cgroup_enable_task_cg_lists(void)
{
struct task_struct *p, *g;
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
if (use_task_css_set_links)
goto out_unlock;
......@@ -1932,8 +1934,12 @@ static void cgroup_enable_task_cg_lists(void)
* entry won't be deleted though the process has exited.
* Do it while holding siglock so that we don't end up
* racing against cgroup_exit().
*
* Interrupts were already disabled while acquiring
* the css_set_lock, so we do not need to disable it
* again when acquiring the sighand->siglock here.
*/
spin_lock_irq(&p->sighand->siglock);
spin_lock(&p->sighand->siglock);
if (!(p->flags & PF_EXITING)) {
struct css_set *cset = task_css_set(p);
......@@ -1942,11 +1948,11 @@ static void cgroup_enable_task_cg_lists(void)
list_add_tail(&p->cg_list, &cset->tasks);
get_css_set(cset);
}
spin_unlock_irq(&p->sighand->siglock);
spin_unlock(&p->sighand->siglock);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
out_unlock:
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
}
static void init_cgroup_housekeeping(struct cgroup *cgrp)
......@@ -2051,13 +2057,13 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
* Link the root cgroup in this hierarchy into all the css_set
* objects.
*/
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
hash_for_each(css_set_table, i, cset, hlist) {
link_css_set(&tmp_links, cset, root_cgrp);
if (css_set_populated(cset))
cgroup_update_populated(root_cgrp, true);
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
BUG_ON(!list_empty(&root_cgrp->self.children));
BUG_ON(atomic_read(&root->nr_cgrps) != 1);
......@@ -2366,7 +2372,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
char *path = NULL;
mutex_lock(&cgroup_mutex);
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
......@@ -2380,7 +2386,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
path = buf;
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
mutex_unlock(&cgroup_mutex);
return path;
}
......@@ -2555,7 +2561,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
* the new cgroup. There are no failure cases after here, so this
* is the commit point.
*/
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
list_for_each_entry(cset, &tset->src_csets, mg_node) {
list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
struct css_set *from_cset = task_css_set(task);
......@@ -2566,7 +2572,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
put_css_set_locked(from_cset);
}
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
/*
* Migration is committed, all target tasks are now on dst_csets.
......@@ -2595,13 +2601,13 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
}
}
out_release_tset:
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
list_splice_init(&tset->dst_csets, &tset->src_csets);
list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
list_del_init(&cset->mg_node);
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
return ret;
}
......@@ -2618,14 +2624,14 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
lockdep_assert_held(&cgroup_mutex);
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
cset->mg_src_cgrp = NULL;
cset->mg_dst_cset = NULL;
list_del_init(&cset->mg_preload_node);
put_css_set_locked(cset);
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
}
/**
......@@ -2775,7 +2781,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
* already PF_EXITING could be freed from underneath us unless we
* take an rcu_read_lock.
*/
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
rcu_read_lock();
task = leader;
do {
......@@ -2784,7 +2790,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
break;
} while_each_thread(leader, task);
rcu_read_unlock();
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
return cgroup_taskset_migrate(&tset, cgrp);
}
......@@ -2805,7 +2811,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
int ret;
/* look up all src csets */
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
rcu_read_lock();
task = leader;
do {
......@@ -2815,7 +2821,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
break;
} while_each_thread(leader, task);
rcu_read_unlock();
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
/* prepare dst csets and commit */
ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets);
......@@ -2848,9 +2854,9 @@ static int cgroup_procs_write_permission(struct task_struct *task,
struct cgroup *cgrp;
struct inode *inode;
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
while (!cgroup_is_descendant(dst_cgrp, cgrp))
cgrp = cgroup_parent(cgrp);
......@@ -2952,9 +2958,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
if (root == &cgrp_dfl_root)
continue;
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
from_cgrp = task_cgroup_from_root(from, root);
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
retval = cgroup_attach_task(from_cgrp, tsk, false);
if (retval)
......@@ -3079,7 +3085,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
percpu_down_write(&cgroup_threadgroup_rwsem);
/* look up all csses currently attached to @cgrp's subtree */
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
struct cgrp_cset_link *link;
......@@ -3091,14 +3097,14 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
cgroup_migrate_add_src(link->cset, cgrp,
&preloaded_csets);
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
/* NULL dst indicates self on default hierarchy */
ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets);
if (ret)
goto out_finish;
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
struct task_struct *task, *ntask;
......@@ -3110,7 +3116,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
cgroup_taskset_add(task, &tset);
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
ret = cgroup_taskset_migrate(&tset, cgrp);
out_finish:
......@@ -3793,10 +3799,10 @@ static int cgroup_task_count(const struct cgroup *cgrp)
int count = 0;
struct cgrp_cset_link *link;
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
count += atomic_read(&link->cset->refcount);
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
return count;
}
......@@ -4134,7 +4140,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
memset(it, 0, sizeof(*it));
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
it->ss = css->ss;
......@@ -4147,7 +4153,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
css_task_iter_advance_css_set(it);
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
}
/**
......@@ -4165,7 +4171,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
it->cur_task = NULL;
}
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
if (it->task_pos) {
it->cur_task = list_entry(it->task_pos, struct task_struct,
......@@ -4174,7 +4180,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
css_task_iter_advance(it);
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
return it->cur_task;
}
......@@ -4188,10 +4194,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
void css_task_iter_end(struct css_task_iter *it)
{
if (it->cur_cset) {
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
list_del(&it->iters_node);
put_css_set_locked(it->cur_cset);
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
}
if (it->cur_task)
......@@ -4220,10 +4226,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
mutex_lock(&cgroup_mutex);
/* all tasks in @from are being moved, all csets are source */
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &from->cset_links, cset_link)
cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
ret = cgroup_migrate_prepare_dst(to, &preloaded_csets);
if (ret)
......@@ -5332,10 +5338,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
*/
cgrp->self.flags &= ~CSS_ONLINE;
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
link->cset->dead = true;
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
/* initiate massacre of all css's */
for_each_css(css, ssid, cgrp)
......@@ -5591,7 +5597,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
goto out;
mutex_lock(&cgroup_mutex);
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
for_each_root(root) {
struct cgroup_subsys *ss;
......@@ -5644,7 +5650,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
retval = 0;
out_unlock:
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
mutex_unlock(&cgroup_mutex);
kfree(buf);
out:
......@@ -5805,13 +5811,13 @@ void cgroup_post_fork(struct task_struct *child,
if (use_task_css_set_links) {
struct css_set *cset;
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
cset = task_css_set(current);
if (list_empty(&child->cg_list)) {
get_css_set(cset);
css_set_move_task(child, NULL, cset, false);
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
}
/*
......@@ -5855,9 +5861,9 @@ void cgroup_exit(struct task_struct *tsk)
cset = task_css_set(tsk);
if (!list_empty(&tsk->cg_list)) {
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
css_set_move_task(tsk, cset, NULL, false);
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
} else {
get_css_set(cset);
}
......@@ -6200,7 +6206,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
if (!name_buf)
return -ENOMEM;
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
rcu_read_lock();
cset = rcu_dereference(current->cgroups);
list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
......@@ -6211,7 +6217,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
c->root->hierarchy_id, name_buf);
}
rcu_read_unlock();
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
kfree(name_buf);
return 0;
}
......@@ -6222,7 +6228,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
struct cgroup_subsys_state *css = seq_css(seq);
struct cgrp_cset_link *link;
spin_lock_bh(&css_set_lock);
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
struct css_set *cset = link->cset;
struct task_struct *task;
......@@ -6245,7 +6251,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
overflow:
seq_puts(seq, " ...\n");
}
spin_unlock_bh(&css_set_lock);
spin_unlock_irq(&css_set_lock);
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment