Commit 728dba3a authored by Eric W. Biederman's avatar Eric W. Biederman

namespaces: Use task_lock and not rcu to protect nsproxy

The synchronous syncrhonize_rcu in switch_task_namespaces makes setns
a sufficiently expensive system call that people have complained.

Upon inspect nsproxy no longer needs rcu protection for remote reads.
remote reads are rare.  So optimize for same process reads and write
by switching using rask_lock instead.

This yields a simpler to understand lock, and a faster setns system call.

In particular this fixes a performance regression observed
by Rafael David Tinoco <rafael.tinoco@canonical.com>.

This is effectively a revert of Pavel Emelyanov's commit
cf7b708c Make access to task's nsproxy lighter
from 2007.  The race this originialy fixed no longer exists as
do_notify_parent uses task_active_pid_ns(parent) instead of
parent->nsproxy.
Signed-off-by: default avatar"Eric W. Biederman" <ebiederm@xmission.com>
parent 9a3c4145
...@@ -2972,13 +2972,13 @@ static void *mntns_get(struct task_struct *task) ...@@ -2972,13 +2972,13 @@ static void *mntns_get(struct task_struct *task)
struct mnt_namespace *ns = NULL; struct mnt_namespace *ns = NULL;
struct nsproxy *nsproxy; struct nsproxy *nsproxy;
rcu_read_lock(); task_lock(task);
nsproxy = task_nsproxy(task); nsproxy = task->nsproxy;
if (nsproxy) { if (nsproxy) {
ns = nsproxy->mnt_ns; ns = nsproxy->mnt_ns;
get_mnt_ns(ns); get_mnt_ns(ns);
} }
rcu_read_unlock(); task_unlock(task);
return ns; return ns;
} }
......
...@@ -113,9 +113,11 @@ static struct net *get_proc_task_net(struct inode *dir) ...@@ -113,9 +113,11 @@ static struct net *get_proc_task_net(struct inode *dir)
rcu_read_lock(); rcu_read_lock();
task = pid_task(proc_pid(dir), PIDTYPE_PID); task = pid_task(proc_pid(dir), PIDTYPE_PID);
if (task != NULL) { if (task != NULL) {
ns = task_nsproxy(task); task_lock(task);
ns = task->nsproxy;
if (ns != NULL) if (ns != NULL)
net = get_net(ns->net_ns); net = get_net(ns->net_ns);
task_unlock(task);
} }
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -232,17 +232,15 @@ static int mounts_open_common(struct inode *inode, struct file *file, ...@@ -232,17 +232,15 @@ static int mounts_open_common(struct inode *inode, struct file *file,
if (!task) if (!task)
goto err; goto err;
rcu_read_lock(); task_lock(task);
nsp = task_nsproxy(task); nsp = task->nsproxy;
if (!nsp || !nsp->mnt_ns) { if (!nsp || !nsp->mnt_ns) {
rcu_read_unlock(); task_unlock(task);
put_task_struct(task); put_task_struct(task);
goto err; goto err;
} }
ns = nsp->mnt_ns; ns = nsp->mnt_ns;
get_mnt_ns(ns); get_mnt_ns(ns);
rcu_read_unlock();
task_lock(task);
if (!task->fs) { if (!task->fs) {
task_unlock(task); task_unlock(task);
put_task_struct(task); put_task_struct(task);
......
...@@ -40,32 +40,28 @@ extern struct nsproxy init_nsproxy; ...@@ -40,32 +40,28 @@ extern struct nsproxy init_nsproxy;
* the namespaces access rules are: * the namespaces access rules are:
* *
* 1. only current task is allowed to change tsk->nsproxy pointer or * 1. only current task is allowed to change tsk->nsproxy pointer or
* any pointer on the nsproxy itself * any pointer on the nsproxy itself. Current must hold the task_lock
* when changing tsk->nsproxy.
* *
* 2. when accessing (i.e. reading) current task's namespaces - no * 2. when accessing (i.e. reading) current task's namespaces - no
* precautions should be taken - just dereference the pointers * precautions should be taken - just dereference the pointers
* *
* 3. the access to other task namespaces is performed like this * 3. the access to other task namespaces is performed like this
* rcu_read_lock(); * task_lock(task);
* nsproxy = task_nsproxy(tsk); * nsproxy = task->nsproxy;
* if (nsproxy != NULL) { * if (nsproxy != NULL) {
* / * * / *
* * work with the namespaces here * * work with the namespaces here
* * e.g. get the reference on one of them * * e.g. get the reference on one of them
* * / * * /
* } / * * } / *
* * NULL task_nsproxy() means that this task is * * NULL task->nsproxy means that this task is
* * almost dead (zombie) * * almost dead (zombie)
* * / * * /
* rcu_read_unlock(); * task_unlock(task);
* *
*/ */
static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
{
return rcu_dereference(tsk->nsproxy);
}
int copy_namespaces(unsigned long flags, struct task_struct *tsk); int copy_namespaces(unsigned long flags, struct task_struct *tsk);
void exit_task_namespaces(struct task_struct *tsk); void exit_task_namespaces(struct task_struct *tsk);
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
......
...@@ -154,11 +154,11 @@ static void *ipcns_get(struct task_struct *task) ...@@ -154,11 +154,11 @@ static void *ipcns_get(struct task_struct *task)
struct ipc_namespace *ns = NULL; struct ipc_namespace *ns = NULL;
struct nsproxy *nsproxy; struct nsproxy *nsproxy;
rcu_read_lock(); task_lock(task);
nsproxy = task_nsproxy(task); nsproxy = task->nsproxy;
if (nsproxy) if (nsproxy)
ns = get_ipc_ns(nsproxy->ipc_ns); ns = get_ipc_ns(nsproxy->ipc_ns);
rcu_read_unlock(); task_unlock(task);
return ns; return ns;
} }
......
...@@ -204,20 +204,13 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) ...@@ -204,20 +204,13 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
might_sleep(); might_sleep();
task_lock(p);
ns = p->nsproxy; ns = p->nsproxy;
p->nsproxy = new;
task_unlock(p);
rcu_assign_pointer(p->nsproxy, new); if (ns && atomic_dec_and_test(&ns->count))
if (ns && atomic_dec_and_test(&ns->count)) {
/*
* wait for others to get what they want from this nsproxy.
*
* cannot release this nsproxy via the call_rcu() since
* put_mnt_ns() will want to sleep
*/
synchronize_rcu();
free_nsproxy(ns); free_nsproxy(ns);
}
} }
void exit_task_namespaces(struct task_struct *p) void exit_task_namespaces(struct task_struct *p)
......
...@@ -93,13 +93,13 @@ static void *utsns_get(struct task_struct *task) ...@@ -93,13 +93,13 @@ static void *utsns_get(struct task_struct *task)
struct uts_namespace *ns = NULL; struct uts_namespace *ns = NULL;
struct nsproxy *nsproxy; struct nsproxy *nsproxy;
rcu_read_lock(); task_lock(task);
nsproxy = task_nsproxy(task); nsproxy = task->nsproxy;
if (nsproxy) { if (nsproxy) {
ns = nsproxy->uts_ns; ns = nsproxy->uts_ns;
get_uts_ns(ns); get_uts_ns(ns);
} }
rcu_read_unlock(); task_unlock(task);
return ns; return ns;
} }
......
...@@ -373,9 +373,11 @@ struct net *get_net_ns_by_pid(pid_t pid) ...@@ -373,9 +373,11 @@ struct net *get_net_ns_by_pid(pid_t pid)
tsk = find_task_by_vpid(pid); tsk = find_task_by_vpid(pid);
if (tsk) { if (tsk) {
struct nsproxy *nsproxy; struct nsproxy *nsproxy;
nsproxy = task_nsproxy(tsk); task_lock(tsk);
nsproxy = tsk->nsproxy;
if (nsproxy) if (nsproxy)
net = get_net(nsproxy->net_ns); net = get_net(nsproxy->net_ns);
task_unlock(tsk);
} }
rcu_read_unlock(); rcu_read_unlock();
return net; return net;
...@@ -632,11 +634,11 @@ static void *netns_get(struct task_struct *task) ...@@ -632,11 +634,11 @@ static void *netns_get(struct task_struct *task)
struct net *net = NULL; struct net *net = NULL;
struct nsproxy *nsproxy; struct nsproxy *nsproxy;
rcu_read_lock(); task_lock(task);
nsproxy = task_nsproxy(task); nsproxy = task->nsproxy;
if (nsproxy) if (nsproxy)
net = get_net(nsproxy->net_ns); net = get_net(nsproxy->net_ns);
rcu_read_unlock(); task_unlock(task);
return net; return net;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment