Commit 763cfc86 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "Two patches to fix a deadlock which can be easily triggered if memcg
  charge moving is used.

  This bug was introduced while converting threadgroup locking to a
  global percpu_rwsem and is caused by cgroup controller task migration
  path depending on the ability to create new kthreads.  cpuset had a
  similar issue which was fixed by performing heavy-lifting operations
  asynchronous to task migration.  The two patches fix the same issue in
  memcg in a similar way.  The first patch makes the mechanism generic
  and the second relocates memcg charge moving outside the migration
  path.

  Given that we don't want to perform heavy operations while
  writelocking threadgroup lock anyway, moving them out of the way is a
  desirable solution.  One thing to note is that the problem was
  difficult to debug because lockdep couldn't figure out the deadlock
  condition.  Looking into how to improve that"

* 'for-4.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  memcg: relocate charge moving from ->attach to ->post_attach
  cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback
parents 3118e5f9 264a0ae1
...@@ -444,6 +444,7 @@ struct cgroup_subsys { ...@@ -444,6 +444,7 @@ struct cgroup_subsys {
int (*can_attach)(struct cgroup_taskset *tset); int (*can_attach)(struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset);
void (*attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset);
void (*post_attach)(void);
int (*can_fork)(struct task_struct *task); int (*can_fork)(struct task_struct *task);
void (*cancel_fork)(struct task_struct *task); void (*cancel_fork)(struct task_struct *task);
void (*fork)(struct task_struct *task); void (*fork)(struct task_struct *task);
......
...@@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask) ...@@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask)
task_unlock(current); task_unlock(current);
} }
extern void cpuset_post_attach_flush(void);
#else /* !CONFIG_CPUSETS */ #else /* !CONFIG_CPUSETS */
static inline bool cpusets_enabled(void) { return false; } static inline bool cpusets_enabled(void) { return false; }
...@@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq) ...@@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
return false; return false;
} }
static inline void cpuset_post_attach_flush(void)
{
}
#endif /* !CONFIG_CPUSETS */ #endif /* !CONFIG_CPUSETS */
#endif /* _LINUX_CPUSET_H */ #endif /* _LINUX_CPUSET_H */
...@@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, ...@@ -2825,9 +2825,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup) size_t nbytes, loff_t off, bool threadgroup)
{ {
struct task_struct *tsk; struct task_struct *tsk;
struct cgroup_subsys *ss;
struct cgroup *cgrp; struct cgroup *cgrp;
pid_t pid; pid_t pid;
int ret; int ssid, ret;
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL; return -EINVAL;
...@@ -2875,8 +2876,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, ...@@ -2875,8 +2876,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
rcu_read_unlock(); rcu_read_unlock();
out_unlock_threadgroup: out_unlock_threadgroup:
percpu_up_write(&cgroup_threadgroup_rwsem); percpu_up_write(&cgroup_threadgroup_rwsem);
for_each_subsys(ss, ssid)
if (ss->post_attach)
ss->post_attach();
cgroup_kn_unlock(of->kn); cgroup_kn_unlock(of->kn);
cpuset_post_attach_flush();
return ret ?: nbytes; return ret ?: nbytes;
} }
......
...@@ -58,7 +58,6 @@ ...@@ -58,7 +58,6 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h> #include <linux/cgroup.h>
#include <linux/wait.h> #include <linux/wait.h>
...@@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, ...@@ -1016,7 +1015,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
} }
} }
void cpuset_post_attach_flush(void) static void cpuset_post_attach(void)
{ {
flush_workqueue(cpuset_migrate_mm_wq); flush_workqueue(cpuset_migrate_mm_wq);
} }
...@@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { ...@@ -2087,6 +2086,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.can_attach = cpuset_can_attach, .can_attach = cpuset_can_attach,
.cancel_attach = cpuset_cancel_attach, .cancel_attach = cpuset_cancel_attach,
.attach = cpuset_attach, .attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind, .bind = cpuset_bind,
.legacy_cftypes = files, .legacy_cftypes = files,
.early_init = true, .early_init = true,
......
...@@ -207,6 +207,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); ...@@ -207,6 +207,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
/* "mc" and its members are protected by cgroup_mutex */ /* "mc" and its members are protected by cgroup_mutex */
static struct move_charge_struct { static struct move_charge_struct {
spinlock_t lock; /* for from, to */ spinlock_t lock; /* for from, to */
struct mm_struct *mm;
struct mem_cgroup *from; struct mem_cgroup *from;
struct mem_cgroup *to; struct mem_cgroup *to;
unsigned long flags; unsigned long flags;
...@@ -4667,6 +4668,8 @@ static void __mem_cgroup_clear_mc(void) ...@@ -4667,6 +4668,8 @@ static void __mem_cgroup_clear_mc(void)
static void mem_cgroup_clear_mc(void) static void mem_cgroup_clear_mc(void)
{ {
struct mm_struct *mm = mc.mm;
/* /*
* we must clear moving_task before waking up waiters at the end of * we must clear moving_task before waking up waiters at the end of
* task migration. * task migration.
...@@ -4676,7 +4679,10 @@ static void mem_cgroup_clear_mc(void) ...@@ -4676,7 +4679,10 @@ static void mem_cgroup_clear_mc(void)
spin_lock(&mc.lock); spin_lock(&mc.lock);
mc.from = NULL; mc.from = NULL;
mc.to = NULL; mc.to = NULL;
mc.mm = NULL;
spin_unlock(&mc.lock); spin_unlock(&mc.lock);
mmput(mm);
} }
static int mem_cgroup_can_attach(struct cgroup_taskset *tset) static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
...@@ -4733,6 +4739,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) ...@@ -4733,6 +4739,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
VM_BUG_ON(mc.moved_swap); VM_BUG_ON(mc.moved_swap);
spin_lock(&mc.lock); spin_lock(&mc.lock);
mc.mm = mm;
mc.from = from; mc.from = from;
mc.to = memcg; mc.to = memcg;
mc.flags = move_flags; mc.flags = move_flags;
...@@ -4742,8 +4749,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) ...@@ -4742,8 +4749,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
ret = mem_cgroup_precharge_mc(mm); ret = mem_cgroup_precharge_mc(mm);
if (ret) if (ret)
mem_cgroup_clear_mc(); mem_cgroup_clear_mc();
} } else {
mmput(mm); mmput(mm);
}
return ret; return ret;
} }
...@@ -4852,11 +4860,11 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, ...@@ -4852,11 +4860,11 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
return ret; return ret;
} }
static void mem_cgroup_move_charge(struct mm_struct *mm) static void mem_cgroup_move_charge(void)
{ {
struct mm_walk mem_cgroup_move_charge_walk = { struct mm_walk mem_cgroup_move_charge_walk = {
.pmd_entry = mem_cgroup_move_charge_pte_range, .pmd_entry = mem_cgroup_move_charge_pte_range,
.mm = mm, .mm = mc.mm,
}; };
lru_add_drain_all(); lru_add_drain_all();
...@@ -4868,7 +4876,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) ...@@ -4868,7 +4876,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
atomic_inc(&mc.from->moving_account); atomic_inc(&mc.from->moving_account);
synchronize_rcu(); synchronize_rcu();
retry: retry:
if (unlikely(!down_read_trylock(&mm->mmap_sem))) { if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
/* /*
* Someone who are holding the mmap_sem might be waiting in * Someone who are holding the mmap_sem might be waiting in
* waitq. So we cancel all extra charges, wake up all waiters, * waitq. So we cancel all extra charges, wake up all waiters,
...@@ -4885,23 +4893,16 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) ...@@ -4885,23 +4893,16 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
* additional charge, the page walk just aborts. * additional charge, the page walk just aborts.
*/ */
walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
up_read(&mm->mmap_sem); up_read(&mc.mm->mmap_sem);
atomic_dec(&mc.from->moving_account); atomic_dec(&mc.from->moving_account);
} }
static void mem_cgroup_move_task(struct cgroup_taskset *tset) static void mem_cgroup_move_task(void)
{ {
struct cgroup_subsys_state *css; if (mc.to) {
struct task_struct *p = cgroup_taskset_first(tset, &css); mem_cgroup_move_charge();
struct mm_struct *mm = get_task_mm(p);
if (mm) {
if (mc.to)
mem_cgroup_move_charge(mm);
mmput(mm);
}
if (mc.to)
mem_cgroup_clear_mc(); mem_cgroup_clear_mc();
}
} }
#else /* !CONFIG_MMU */ #else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset) static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
...@@ -4911,7 +4912,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset) ...@@ -4911,7 +4912,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{ {
} }
static void mem_cgroup_move_task(struct cgroup_taskset *tset) static void mem_cgroup_move_task(void)
{ {
} }
#endif #endif
...@@ -5195,7 +5196,7 @@ struct cgroup_subsys memory_cgrp_subsys = { ...@@ -5195,7 +5196,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_reset = mem_cgroup_css_reset, .css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach, .can_attach = mem_cgroup_can_attach,
.cancel_attach = mem_cgroup_cancel_attach, .cancel_attach = mem_cgroup_cancel_attach,
.attach = mem_cgroup_move_task, .post_attach = mem_cgroup_move_task,
.bind = mem_cgroup_bind, .bind = mem_cgroup_bind,
.dfl_cftypes = memory_files, .dfl_cftypes = memory_files,
.legacy_cftypes = mem_cgroup_legacy_files, .legacy_cftypes = mem_cgroup_legacy_files,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment