Commit 502b24c2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup changes from Tejun Heo:
 "Nothing too drastic.

   - Removal of synchronize_rcu() from userland visible paths.

   - Various fixes and cleanups from Li.

   - cgroup_rightmost_descendant() added which will be used by cpuset
     changes (it will be a separate pull request)."

* 'for-3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: fail if monitored file and event_control are in different cgroup
  cgroup: fix cgroup_rmdir() vs close(eventfd) race
  cpuset: fix cpuset_print_task_mems_allowed() vs rename() race
  cgroup: fix exit() vs rmdir() race
  cgroup: remove bogus comments in cgroup_diput()
  cgroup: remove synchronize_rcu() from cgroup_diput()
  cgroup: remove duplicate RCU free on struct cgroup
  sched: remove redundant NULL cgroup check in task_group_path()
  sched: split out css_online/css_offline from tg creation/destruction
  cgroup: initialize cgrp->dentry before css_alloc()
  cgroup: remove a NULL check in cgroup_exit()
  cgroup: fix bogus kernel warnings when cgroup_create() failed
  cgroup: remove synchronize_rcu() from rebind_subsystems()
  cgroup: remove synchronize_rcu() from cgroup_attach_{task|proc}()
  cgroup: use new hashtable implementation
  cgroups: fix cgroup_event_listener error handling
  cgroups: move cgroup_event_listener.c to tools/cgroup
  cgroup: implement cgroup_rightmost_descendant()
  cgroup: remove unused dummy cgroup_fork_callbacks()
parents ece8e0b2 f169007b
...@@ -4,8 +4,6 @@ blkio-controller.txt ...@@ -4,8 +4,6 @@ blkio-controller.txt
- Description for Block IO Controller, implementation and usage details. - Description for Block IO Controller, implementation and usage details.
cgroups.txt cgroups.txt
- Control Groups definition, implementation details, examples and API. - Control Groups definition, implementation details, examples and API.
cgroup_event_listener.c
- A user program for cgroup listener.
cpuacct.txt cpuacct.txt
- CPU Accounting Controller; account CPU usage for groups of tasks. - CPU Accounting Controller; account CPU usage for groups of tasks.
cpusets.txt cpusets.txt
......
...@@ -399,8 +399,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. ...@@ -399,8 +399,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
9.10 Memory thresholds 9.10 Memory thresholds
Memory controller implements memory thresholds using cgroups notification Memory controller implements memory thresholds using cgroups notification
API. You can use Documentation/cgroups/cgroup_event_listener.c to test API. You can use tools/cgroup/cgroup_event_listener.c to test it.
it.
(Shell-A) Create cgroup and run event listener (Shell-A) Create cgroup and run event listener
# mkdir /cgroup/A # mkdir /cgroup/A
......
...@@ -203,6 +203,7 @@ struct cgroup { ...@@ -203,6 +203,7 @@ struct cgroup {
/* For RCU-protected deletion */ /* For RCU-protected deletion */
struct rcu_head rcu_head; struct rcu_head rcu_head;
struct work_struct free_work;
/* List of events which userspace want to receive */ /* List of events which userspace want to receive */
struct list_head event_list; struct list_head event_list;
...@@ -558,6 +559,7 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, ...@@ -558,6 +559,7 @@ static inline struct cgroup* task_cgroup(struct task_struct *task,
struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
struct cgroup *cgroup); struct cgroup *cgroup);
struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos);
/** /**
* cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants
...@@ -706,7 +708,6 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); ...@@ -706,7 +708,6 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init_early(void) { return 0; }
static inline int cgroup_init(void) { return 0; } static inline int cgroup_init(void) { return 0; }
static inline void cgroup_fork(struct task_struct *p) {} static inline void cgroup_fork(struct task_struct *p) {}
static inline void cgroup_fork_callbacks(struct task_struct *p) {}
static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {}
static inline void cgroup_exit(struct task_struct *p, int callbacks) {} static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
......
...@@ -2659,7 +2659,10 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask); ...@@ -2659,7 +2659,10 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
extern struct task_group root_task_group; extern struct task_group root_task_group;
extern struct task_group *sched_create_group(struct task_group *parent); extern struct task_group *sched_create_group(struct task_group *parent);
extern void sched_online_group(struct task_group *tg,
struct task_group *parent);
extern void sched_destroy_group(struct task_group *tg); extern void sched_destroy_group(struct task_group *tg);
extern void sched_offline_group(struct task_group *tg);
extern void sched_move_task(struct task_struct *tsk); extern void sched_move_task(struct task_struct *tsk);
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
......
This diff is collapsed.
...@@ -2511,8 +2511,16 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk) ...@@ -2511,8 +2511,16 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk)
dentry = task_cs(tsk)->css.cgroup->dentry; dentry = task_cs(tsk)->css.cgroup->dentry;
spin_lock(&cpuset_buffer_lock); spin_lock(&cpuset_buffer_lock);
snprintf(cpuset_name, CPUSET_NAME_LEN,
dentry ? (const char *)dentry->d_name.name : "/"); if (!dentry) {
strcpy(cpuset_name, "/");
} else {
spin_lock(&dentry->d_lock);
strlcpy(cpuset_name, (const char *)dentry->d_name.name,
CPUSET_NAME_LEN);
spin_unlock(&dentry->d_lock);
}
nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
tsk->mems_allowed); tsk->mems_allowed);
printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n", printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
......
...@@ -35,6 +35,7 @@ static inline void autogroup_destroy(struct kref *kref) ...@@ -35,6 +35,7 @@ static inline void autogroup_destroy(struct kref *kref)
ag->tg->rt_se = NULL; ag->tg->rt_se = NULL;
ag->tg->rt_rq = NULL; ag->tg->rt_rq = NULL;
#endif #endif
sched_offline_group(ag->tg);
sched_destroy_group(ag->tg); sched_destroy_group(ag->tg);
} }
...@@ -76,6 +77,8 @@ static inline struct autogroup *autogroup_create(void) ...@@ -76,6 +77,8 @@ static inline struct autogroup *autogroup_create(void)
if (IS_ERR(tg)) if (IS_ERR(tg))
goto out_free; goto out_free;
sched_online_group(tg, &root_task_group);
kref_init(&ag->kref); kref_init(&ag->kref);
init_rwsem(&ag->lock); init_rwsem(&ag->lock);
ag->id = atomic_inc_return(&autogroup_seq_nr); ag->id = atomic_inc_return(&autogroup_seq_nr);
......
...@@ -7161,7 +7161,6 @@ static void free_sched_group(struct task_group *tg) ...@@ -7161,7 +7161,6 @@ static void free_sched_group(struct task_group *tg)
struct task_group *sched_create_group(struct task_group *parent) struct task_group *sched_create_group(struct task_group *parent)
{ {
struct task_group *tg; struct task_group *tg;
unsigned long flags;
tg = kzalloc(sizeof(*tg), GFP_KERNEL); tg = kzalloc(sizeof(*tg), GFP_KERNEL);
if (!tg) if (!tg)
...@@ -7173,6 +7172,17 @@ struct task_group *sched_create_group(struct task_group *parent) ...@@ -7173,6 +7172,17 @@ struct task_group *sched_create_group(struct task_group *parent)
if (!alloc_rt_sched_group(tg, parent)) if (!alloc_rt_sched_group(tg, parent))
goto err; goto err;
return tg;
err:
free_sched_group(tg);
return ERR_PTR(-ENOMEM);
}
void sched_online_group(struct task_group *tg, struct task_group *parent)
{
unsigned long flags;
spin_lock_irqsave(&task_group_lock, flags); spin_lock_irqsave(&task_group_lock, flags);
list_add_rcu(&tg->list, &task_groups); list_add_rcu(&tg->list, &task_groups);
...@@ -7182,12 +7192,6 @@ struct task_group *sched_create_group(struct task_group *parent) ...@@ -7182,12 +7192,6 @@ struct task_group *sched_create_group(struct task_group *parent)
INIT_LIST_HEAD(&tg->children); INIT_LIST_HEAD(&tg->children);
list_add_rcu(&tg->siblings, &parent->children); list_add_rcu(&tg->siblings, &parent->children);
spin_unlock_irqrestore(&task_group_lock, flags); spin_unlock_irqrestore(&task_group_lock, flags);
return tg;
err:
free_sched_group(tg);
return ERR_PTR(-ENOMEM);
} }
/* rcu callback to free various structures associated with a task group */ /* rcu callback to free various structures associated with a task group */
...@@ -7199,6 +7203,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp) ...@@ -7199,6 +7203,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
/* Destroy runqueue etc associated with a task group */ /* Destroy runqueue etc associated with a task group */
void sched_destroy_group(struct task_group *tg) void sched_destroy_group(struct task_group *tg)
{
/* wait for possible concurrent references to cfs_rqs complete */
call_rcu(&tg->rcu, free_sched_group_rcu);
}
void sched_offline_group(struct task_group *tg)
{ {
unsigned long flags; unsigned long flags;
int i; int i;
...@@ -7211,9 +7221,6 @@ void sched_destroy_group(struct task_group *tg) ...@@ -7211,9 +7221,6 @@ void sched_destroy_group(struct task_group *tg)
list_del_rcu(&tg->list); list_del_rcu(&tg->list);
list_del_rcu(&tg->siblings); list_del_rcu(&tg->siblings);
spin_unlock_irqrestore(&task_group_lock, flags); spin_unlock_irqrestore(&task_group_lock, flags);
/* wait for possible concurrent references to cfs_rqs complete */
call_rcu(&tg->rcu, free_sched_group_rcu);
} }
/* change task's runqueue when it moves between groups. /* change task's runqueue when it moves between groups.
...@@ -7584,6 +7591,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp) ...@@ -7584,6 +7591,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
return &tg->css; return &tg->css;
} }
static int cpu_cgroup_css_online(struct cgroup *cgrp)
{
struct task_group *tg = cgroup_tg(cgrp);
struct task_group *parent;
if (!cgrp->parent)
return 0;
parent = cgroup_tg(cgrp->parent);
sched_online_group(tg, parent);
return 0;
}
static void cpu_cgroup_css_free(struct cgroup *cgrp) static void cpu_cgroup_css_free(struct cgroup *cgrp)
{ {
struct task_group *tg = cgroup_tg(cgrp); struct task_group *tg = cgroup_tg(cgrp);
...@@ -7591,6 +7611,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp) ...@@ -7591,6 +7611,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
sched_destroy_group(tg); sched_destroy_group(tg);
} }
static void cpu_cgroup_css_offline(struct cgroup *cgrp)
{
struct task_group *tg = cgroup_tg(cgrp);
sched_offline_group(tg);
}
static int cpu_cgroup_can_attach(struct cgroup *cgrp, static int cpu_cgroup_can_attach(struct cgroup *cgrp,
struct cgroup_taskset *tset) struct cgroup_taskset *tset)
{ {
...@@ -7946,6 +7973,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { ...@@ -7946,6 +7973,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
.name = "cpu", .name = "cpu",
.css_alloc = cpu_cgroup_css_alloc, .css_alloc = cpu_cgroup_css_alloc,
.css_free = cpu_cgroup_css_free, .css_free = cpu_cgroup_css_free,
.css_online = cpu_cgroup_css_online,
.css_offline = cpu_cgroup_css_offline,
.can_attach = cpu_cgroup_can_attach, .can_attach = cpu_cgroup_can_attach,
.attach = cpu_cgroup_attach, .attach = cpu_cgroup_attach,
.exit = cpu_cgroup_exit, .exit = cpu_cgroup_exit,
......
...@@ -110,13 +110,6 @@ static char *task_group_path(struct task_group *tg) ...@@ -110,13 +110,6 @@ static char *task_group_path(struct task_group *tg)
if (autogroup_path(tg, group_path, PATH_MAX)) if (autogroup_path(tg, group_path, PATH_MAX))
return group_path; return group_path;
/*
* May be NULL if the underlying cgroup isn't fully-created yet
*/
if (!tg->css.cgroup) {
group_path[0] = '\0';
return group_path;
}
cgroup_path(tg->css.cgroup, group_path, PATH_MAX); cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
return group_path; return group_path;
} }
......
...@@ -3,6 +3,7 @@ include scripts/Makefile.include ...@@ -3,6 +3,7 @@ include scripts/Makefile.include
help: help:
@echo 'Possible targets:' @echo 'Possible targets:'
@echo '' @echo ''
@echo ' cgroup - cgroup tools'
@echo ' cpupower - a tool for all things x86 CPU power' @echo ' cpupower - a tool for all things x86 CPU power'
@echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer'
@echo ' lguest - a minimal 32-bit x86 hypervisor' @echo ' lguest - a minimal 32-bit x86 hypervisor'
...@@ -33,7 +34,7 @@ help: ...@@ -33,7 +34,7 @@ help:
cpupower: FORCE cpupower: FORCE
$(call descend,power/$@) $(call descend,power/$@)
firewire lguest perf usb virtio vm: FORCE cgroup firewire lguest perf usb virtio vm: FORCE
$(call descend,$@) $(call descend,$@)
selftests: FORCE selftests: FORCE
...@@ -45,7 +46,7 @@ turbostat x86_energy_perf_policy: FORCE ...@@ -45,7 +46,7 @@ turbostat x86_energy_perf_policy: FORCE
cpupower_install: cpupower_install:
$(call descend,power/$(@:_install=),install) $(call descend,power/$(@:_install=),install)
firewire_install lguest_install perf_install usb_install virtio_install vm_install: cgroup_install firewire_install lguest_install perf_install usb_install virtio_install vm_install:
$(call descend,$(@:_install=),install) $(call descend,$(@:_install=),install)
selftests_install: selftests_install:
...@@ -54,14 +55,14 @@ selftests_install: ...@@ -54,14 +55,14 @@ selftests_install:
turbostat_install x86_energy_perf_policy_install: turbostat_install x86_energy_perf_policy_install:
$(call descend,power/x86/$(@:_install=),install) $(call descend,power/x86/$(@:_install=),install)
install: cpupower_install firewire_install lguest_install perf_install \ install: cgroup_install cpupower_install firewire_install lguest_install \
selftests_install turbostat_install usb_install virtio_install \ perf_install selftests_install turbostat_install usb_install \
vm_install x86_energy_perf_policy_install virtio_install vm_install x86_energy_perf_policy_install
cpupower_clean: cpupower_clean:
$(call descend,power/cpupower,clean) $(call descend,power/cpupower,clean)
firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean: cgroup_clean firewire_clean lguest_clean perf_clean usb_clean virtio_clean vm_clean:
$(call descend,$(@:_clean=),clean) $(call descend,$(@:_clean=),clean)
selftests_clean: selftests_clean:
...@@ -70,8 +71,8 @@ selftests_clean: ...@@ -70,8 +71,8 @@ selftests_clean:
turbostat_clean x86_energy_perf_policy_clean: turbostat_clean x86_energy_perf_policy_clean:
$(call descend,power/x86/$(@:_clean=),clean) $(call descend,power/x86/$(@:_clean=),clean)
clean: cpupower_clean firewire_clean lguest_clean perf_clean selftests_clean \ clean: cgroup_clean cpupower_clean firewire_clean lguest_clean perf_clean \
turbostat_clean usb_clean virtio_clean vm_clean \ selftests_clean turbostat_clean usb_clean virtio_clean \
x86_energy_perf_policy_clean vm_clean x86_energy_perf_policy_clean
.PHONY: FORCE .PHONY: FORCE
cgroup_event_listener
# Makefile for cgroup tools
CC = $(CROSS_COMPILE)gcc
CFLAGS = -Wall -Wextra
all: cgroup_event_listener
%: %.c
$(CC) $(CFLAGS) -o $@ $^
clean:
$(RM) cgroup_event_listener
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
*/ */
#include <assert.h> #include <assert.h>
#include <err.h>
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <libgen.h> #include <libgen.h>
...@@ -15,7 +16,7 @@ ...@@ -15,7 +16,7 @@
#include <sys/eventfd.h> #include <sys/eventfd.h>
#define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>\n" #define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>"
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
...@@ -26,49 +27,33 @@ int main(int argc, char **argv) ...@@ -26,49 +27,33 @@ int main(int argc, char **argv)
char line[LINE_MAX]; char line[LINE_MAX];
int ret; int ret;
if (argc != 3) { if (argc != 3)
fputs(USAGE_STR, stderr); errx(1, "%s", USAGE_STR);
return 1;
}
cfd = open(argv[1], O_RDONLY); cfd = open(argv[1], O_RDONLY);
if (cfd == -1) { if (cfd == -1)
fprintf(stderr, "Cannot open %s: %s\n", argv[1], err(1, "Cannot open %s", argv[1]);
strerror(errno));
goto out;
}
ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control", ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control",
dirname(argv[1])); dirname(argv[1]));
if (ret >= PATH_MAX) { if (ret >= PATH_MAX)
fputs("Path to cgroup.event_control is too long\n", stderr); errx(1, "Path to cgroup.event_control is too long");
goto out;
}
event_control = open(event_control_path, O_WRONLY); event_control = open(event_control_path, O_WRONLY);
if (event_control == -1) { if (event_control == -1)
fprintf(stderr, "Cannot open %s: %s\n", event_control_path, err(1, "Cannot open %s", event_control_path);
strerror(errno));
goto out;
}
efd = eventfd(0, 0); efd = eventfd(0, 0);
if (efd == -1) { if (efd == -1)
perror("eventfd() failed"); err(1, "eventfd() failed");
goto out;
}
ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]); ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]);
if (ret >= LINE_MAX) { if (ret >= LINE_MAX)
fputs("Arguments string is too long\n", stderr); errx(1, "Arguments string is too long");
goto out;
}
ret = write(event_control, line, strlen(line) + 1); ret = write(event_control, line, strlen(line) + 1);
if (ret == -1) { if (ret == -1)
perror("Cannot write to cgroup.event_control"); err(1, "Cannot write to cgroup.event_control");
goto out;
}
while (1) { while (1) {
uint64_t result; uint64_t result;
...@@ -77,34 +62,21 @@ int main(int argc, char **argv) ...@@ -77,34 +62,21 @@ int main(int argc, char **argv)
if (ret == -1) { if (ret == -1) {
if (errno == EINTR) if (errno == EINTR)
continue; continue;
perror("Cannot read from eventfd"); err(1, "Cannot read from eventfd");
break;
} }
assert(ret == sizeof(result)); assert(ret == sizeof(result));
ret = access(event_control_path, W_OK); ret = access(event_control_path, W_OK);
if ((ret == -1) && (errno == ENOENT)) { if ((ret == -1) && (errno == ENOENT)) {
puts("The cgroup seems to have removed."); puts("The cgroup seems to have removed.");
ret = 0;
break; break;
} }
if (ret == -1) { if (ret == -1)
perror("cgroup.event_control " err(1, "cgroup.event_control is not accessible any more");
"is not accessible any more");
break;
}
printf("%s %s: crossed\n", argv[1], argv[2]); printf("%s %s: crossed\n", argv[1], argv[2]);
} }
out: return 0;
if (efd >= 0)
close(efd);
if (event_control >= 0)
close(event_control);
if (cfd >= 0)
close(cfd);
return (ret != 0);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment