Commit cca26e80 authored by Kirill Tkhai's avatar Kirill Tkhai Committed by Ingo Molnar

sched: Teach scheduler to understand TASK_ON_RQ_MIGRATING state

This is a new p->on_rq state which will be used to indicate that a task
is in a process of migrating between two RQs. It allows to get
rid of double_rq_lock(), which we used to use to change a rq of
a queued task before.

Let's consider an example. To move a task between src_rq and
dst_rq we will do the following:

	raw_spin_lock(&src_rq->lock);
	/* p is a task which is queued on src_rq */
	p = ...;

	dequeue_task(src_rq, p, 0);
	p->on_rq = TASK_ON_RQ_MIGRATING;
	set_task_cpu(p, dst_cpu);
	raw_spin_unlock(&src_rq->lock);

    	/*
    	 * Both RQs are unlocked here.
    	 * Task p is dequeued from src_rq
    	 * but its on_rq value is not zero.
    	 */

	raw_spin_lock(&dst_rq->lock);
	p->on_rq = TASK_ON_RQ_QUEUED;
	enqueue_task(dst_rq, p, 0);
	raw_spin_unlock(&dst_rq->lock);

While p->on_rq is TASK_ON_RQ_MIGRATING, task is considered as
"migrating", and other parallel scheduler actions with it are
not available to parallel callers. The parallel caller is
spining till migration is completed.

The unavailable actions are changing of cpu affinity, changing
of priority etc, in other words all the functionality which used
to require task_rq(p)->lock before (and related to the task).

To implement TASK_ON_RQ_MIGRATING support we primarily are using
the following fact. Most of scheduler users (from which we are
protecting a migrating task) use task_rq_lock() and
__task_rq_lock() to get the lock of task_rq(p). These primitives
know that task's cpu may change, and they are spining while the
lock of the right RQ is not held. We add one more condition into
them, so they will be also spinning until the migration is
finished.
Signed-off-by: default avatarKirill Tkhai <ktkhai@parallels.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Turner <pjt@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Kirill Tkhai <tkhai@yandex.ru>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1408528062.23412.88.camel@tkhaiSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent da0c1e65
...@@ -333,9 +333,12 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) ...@@ -333,9 +333,12 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
for (;;) { for (;;) {
rq = task_rq(p); rq = task_rq(p);
raw_spin_lock(&rq->lock); raw_spin_lock(&rq->lock);
if (likely(rq == task_rq(p))) if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
return rq; return rq;
raw_spin_unlock(&rq->lock); raw_spin_unlock(&rq->lock);
while (unlikely(task_on_rq_migrating(p)))
cpu_relax();
} }
} }
...@@ -352,10 +355,13 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) ...@@ -352,10 +355,13 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
raw_spin_lock_irqsave(&p->pi_lock, *flags); raw_spin_lock_irqsave(&p->pi_lock, *flags);
rq = task_rq(p); rq = task_rq(p);
raw_spin_lock(&rq->lock); raw_spin_lock(&rq->lock);
if (likely(rq == task_rq(p))) if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
return rq; return rq;
raw_spin_unlock(&rq->lock); raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags); raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
while (unlikely(task_on_rq_migrating(p)))
cpu_relax();
} }
} }
...@@ -1678,7 +1684,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ...@@ -1678,7 +1684,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
success = 1; /* we're going to change ->state */ success = 1; /* we're going to change ->state */
cpu = task_cpu(p); cpu = task_cpu(p);
if (task_on_rq_queued(p) && ttwu_remote(p, wake_flags)) if (p->on_rq && ttwu_remote(p, wake_flags))
goto stat; goto stat;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -17,6 +17,7 @@ struct rq; ...@@ -17,6 +17,7 @@ struct rq;
/* task_struct::on_rq states: */ /* task_struct::on_rq states: */
#define TASK_ON_RQ_QUEUED 1 #define TASK_ON_RQ_QUEUED 1
#define TASK_ON_RQ_MIGRATING 2
extern __read_mostly int scheduler_running; extern __read_mostly int scheduler_running;
...@@ -950,6 +951,11 @@ static inline int task_on_rq_queued(struct task_struct *p) ...@@ -950,6 +951,11 @@ static inline int task_on_rq_queued(struct task_struct *p)
return p->on_rq == TASK_ON_RQ_QUEUED; return p->on_rq == TASK_ON_RQ_QUEUED;
} }
static inline int task_on_rq_migrating(struct task_struct *p)
{
return p->on_rq == TASK_ON_RQ_MIGRATING;
}
#ifndef prepare_arch_switch #ifndef prepare_arch_switch
# define prepare_arch_switch(next) do { } while (0) # define prepare_arch_switch(next) do { } while (0)
#endif #endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment