Commit 90d3ac15 authored by David S. Miller's avatar David S. Miller

Merge commit '317f3941'

Conflicts:
	arch/sparc/kernel/smp_32.c

With merge conflict help from Daniel Hellstrom.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9fafbd80 317f3941
......@@ -585,8 +585,7 @@ handle_ipi(struct pt_regs *regs)
switch (which) {
case IPI_RESCHEDULE:
/* Reschedule callback. Everything to be done
is done by the interrupt return path. */
scheduler_ipi();
break;
case IPI_CALL_FUNC:
......
......@@ -560,10 +560,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
break;
case IPI_RESCHEDULE:
/*
* nothing more to do - eveything is
* done on the interrupt return path
*/
scheduler_ipi();
break;
case IPI_CALL_FUNC:
......
......@@ -177,6 +177,9 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
while (msg_queue->count) {
msg = &msg_queue->ipi_message[msg_queue->head];
switch (msg->type) {
case BFIN_IPI_RESCHEDULE:
scheduler_ipi();
break;
case BFIN_IPI_CALL_FUNC:
spin_unlock_irqrestore(&msg_queue->lock, flags);
ipi_call_function(cpu, msg);
......
......@@ -342,15 +342,18 @@ irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id)
ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi);
if (ipi.vector & IPI_SCHEDULE) {
scheduler_ipi();
}
if (ipi.vector & IPI_CALL) {
func(info);
func(info);
}
if (ipi.vector & IPI_FLUSH_TLB) {
if (flush_mm == FLUSH_ALL)
__flush_tlb_all();
else if (flush_vma == FLUSH_ALL)
if (flush_mm == FLUSH_ALL)
__flush_tlb_all();
else if (flush_vma == FLUSH_ALL)
__flush_tlb_mm(flush_mm);
else
else
__flush_tlb_page(flush_vma, flush_addr);
}
......
......@@ -31,6 +31,7 @@
#include <linux/irq.h>
#include <linux/ratelimit.h>
#include <linux/acpi.h>
#include <linux/sched.h>
#include <asm/delay.h>
#include <asm/intrinsics.h>
......@@ -496,6 +497,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
smp_local_flush_tlb();
kstat_incr_irqs_this_cpu(irq, desc);
} else if (unlikely(IS_RESCHEDULE(vector))) {
scheduler_ipi();
kstat_incr_irqs_this_cpu(irq, desc);
} else {
ia64_setreg(_IA64_REG_CR_TPR, vector);
......
......@@ -92,6 +92,8 @@ static unsigned short saved_irq_cnt;
static int xen_slab_ready;
#ifdef CONFIG_SMP
#include <linux/sched.h>
/* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ,
* it ends up to issue several memory accesses upon percpu data and
* thus adds unnecessary traffic to other paths.
......@@ -99,7 +101,13 @@ static int xen_slab_ready;
static irqreturn_t
xen_dummy_handler(int irq, void *dev_id)
{
return IRQ_HANDLED;
}
static irqreturn_t
xen_resched_handler(int irq, void *dev_id)
{
scheduler_ipi();
return IRQ_HANDLED;
}
......@@ -110,7 +118,7 @@ static struct irqaction xen_ipi_irqaction = {
};
static struct irqaction xen_resched_irqaction = {
.handler = xen_dummy_handler,
.handler = xen_resched_handler,
.flags = IRQF_DISABLED,
.name = "resched"
};
......
......@@ -122,8 +122,6 @@ void smp_send_reschedule(int cpu_id)
*
* Description: This routine executes on CPU which received
* 'RESCHEDULE_IPI'.
* Rescheduling is processed at the exit of interrupt
* operation.
*
* Born on Date: 2002.02.05
*
......@@ -138,7 +136,7 @@ void smp_send_reschedule(int cpu_id)
*==========================================================================*/
void smp_reschedule_interrupt(void)
{
/* nothing to do */
scheduler_ipi();
}
/*==========================================================================*
......
......@@ -44,6 +44,8 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
if (action & SMP_CALL_FUNCTION)
smp_call_function_interrupt();
if (action & SMP_RESCHEDULE_YOURSELF)
scheduler_ipi();
/* Check if we've been told to flush the icache */
if (action & SMP_ICACHE_FLUSH)
......
......@@ -929,7 +929,7 @@ static void post_direct_ipi(int cpu, struct smtc_ipi *pipi)
static void ipi_resched_interrupt(void)
{
/* Return from interrupt should be enough to cause scheduler check */
scheduler_ipi();
}
static void ipi_call_interrupt(void)
......
......@@ -309,6 +309,8 @@ static void ipi_call_dispatch(void)
static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
{
scheduler_ipi();
return IRQ_HANDLED;
}
......
......@@ -55,6 +55,8 @@ void titan_mailbox_irq(void)
if (status & 0x2)
smp_call_function_interrupt();
if (status & 0x4)
scheduler_ipi();
break;
case 1:
......@@ -63,6 +65,8 @@ void titan_mailbox_irq(void)
if (status & 0x2)
smp_call_function_interrupt();
if (status & 0x4)
scheduler_ipi();
break;
}
}
......
......@@ -147,8 +147,10 @@ static void ip27_do_irq_mask0(void)
#ifdef CONFIG_SMP
if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) {
LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ);
scheduler_ipi();
} else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) {
LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ);
scheduler_ipi();
} else if (pend0 & (1UL << CPU_CALL_A_IRQ)) {
LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ);
smp_call_function_interrupt();
......
......@@ -20,6 +20,7 @@
#include <linux/delay.h>
#include <linux/smp.h>
#include <linux/kernel_stat.h>
#include <linux/sched.h>
#include <asm/mmu_context.h>
#include <asm/io.h>
......@@ -189,10 +190,8 @@ void bcm1480_mailbox_interrupt(void)
/* Clear the mailbox to clear the interrupt */
__raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]);
/*
* Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
* interrupt will do the reschedule for us
*/
if (action & SMP_RESCHEDULE_YOURSELF)
scheduler_ipi();
if (action & SMP_CALL_FUNCTION)
smp_call_function_interrupt();
......
......@@ -21,6 +21,7 @@
#include <linux/interrupt.h>
#include <linux/smp.h>
#include <linux/kernel_stat.h>
#include <linux/sched.h>
#include <asm/mmu_context.h>
#include <asm/io.h>
......@@ -177,10 +178,8 @@ void sb1250_mailbox_interrupt(void)
/* Clear the mailbox to clear the interrupt */
____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]);
/*
* Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
* interrupt will do the reschedule for us
*/
if (action & SMP_RESCHEDULE_YOURSELF)
scheduler_ipi();
if (action & SMP_CALL_FUNCTION)
smp_call_function_interrupt();
......
......@@ -494,14 +494,11 @@ void smp_send_stop(void)
* @irq: The interrupt number.
* @dev_id: The device ID.
*
* We need do nothing here, since the scheduling will be effected on our way
* back through entry.S.
*
* Returns IRQ_HANDLED to indicate we handled the interrupt successfully.
*/
static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
{
/* do nothing */
scheduler_ipi();
return IRQ_HANDLED;
}
......
......@@ -155,10 +155,7 @@ ipi_interrupt(int irq, void *dev_id)
case IPI_RESCHEDULE:
smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu);
/*
* Reschedule callback. Everything to be
* done is done by the interrupt return path.
*/
scheduler_ipi();
break;
case IPI_CALL_FUNC:
......
......@@ -116,7 +116,7 @@ void smp_message_recv(int msg)
generic_smp_call_function_interrupt();
break;
case PPC_MSG_RESCHEDULE:
/* we notice need_resched on exit */
scheduler_ipi();
break;
case PPC_MSG_CALL_FUNC_SINGLE:
generic_smp_call_function_single_interrupt();
......@@ -146,7 +146,7 @@ static irqreturn_t call_function_action(int irq, void *data)
static irqreturn_t reschedule_action(int irq, void *data)
{
/* we just need the return path side effect of checking need_resched */
scheduler_ipi();
return IRQ_HANDLED;
}
......
......@@ -165,12 +165,12 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++;
/*
* handle bit signal external calls
*
* For the ec_schedule signal we have to do nothing. All the work
* is done automatically when we return from the interrupt.
*/
bits = xchg(&S390_lowcore.ext_call_fast, 0);
if (test_bit(ec_schedule, &bits))
scheduler_ipi();
if (test_bit(ec_call_function, &bits))
generic_smp_call_function_interrupt();
......
......@@ -20,6 +20,7 @@
#include <linux/module.h>
#include <linux/cpu.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <asm/atomic.h>
#include <asm/processor.h>
#include <asm/system.h>
......@@ -323,6 +324,7 @@ void smp_message_recv(unsigned int msg)
generic_smp_call_function_interrupt();
break;
case SMP_MSG_RESCHEDULE:
scheduler_ipi();
break;
case SMP_MSG_FUNCTION_SINGLE:
generic_smp_call_function_single_interrupt();
......
......@@ -156,11 +156,11 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
void smp_resched_interrupt(void)
{
irq_enter();
scheduler_ipi();
local_cpu_data().irq_resched_count++;
/*
* do nothing, since it all was about calling re-schedule
* routine called by interrupt return code.
*/
irq_exit();
/* re-schedule routine called by interrupt return code. */
}
void smp_call_function_single_interrupt(void)
......
......@@ -1368,6 +1368,7 @@ void smp_send_reschedule(int cpu)
void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
{
clear_softint(1 << irq);
scheduler_ipi();
}
/* This is a nop because we capture all other cpus
......
......@@ -189,12 +189,8 @@ void flush_icache_range(unsigned long start, unsigned long end)
/* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */
static irqreturn_t handle_reschedule_ipi(int irq, void *token)
{
/*
* Nothing to do here; when we return from interrupt, the
* rescheduling will occur there. But do bump the interrupt
* profiler count in the meantime.
*/
__get_cpu_var(irq_stat).irq_resched_count++;
scheduler_ipi();
return IRQ_HANDLED;
}
......
......@@ -173,7 +173,7 @@ void IPI_handler(int cpu)
break;
case 'R':
set_tsk_need_resched(current);
scheduler_ipi();
break;
case 'S':
......
......@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait)
}
/*
* Reschedule call back. Nothing to do,
* all the work is done automatically when
* we return from the interrupt.
* Reschedule call back.
*/
void smp_reschedule_interrupt(struct pt_regs *regs)
{
ack_APIC_irq();
inc_irq_stat(irq_resched_count);
scheduler_ipi();
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
......
......@@ -46,13 +46,12 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
/*
* Reschedule call back. Nothing to do,
* all the work is done automatically when
* we return from the interrupt.
* Reschedule call back.
*/
static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
{
inc_irq_stat(irq_resched_count);
scheduler_ipi();
return IRQ_HANDLED;
}
......
......@@ -51,7 +51,7 @@ struct mutex {
spinlock_t wait_lock;
struct list_head wait_list;
#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
struct thread_info *owner;
struct task_struct *owner;
#endif
#ifdef CONFIG_DEBUG_MUTEXES
const char *name;
......
......@@ -360,7 +360,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout);
extern signed long schedule_timeout_killable(signed long timeout);
extern signed long schedule_timeout_uninterruptible(signed long timeout);
asmlinkage void schedule(void);
extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
struct nsproxy;
struct user_namespace;
......@@ -1048,8 +1048,12 @@ struct sched_domain;
#define WF_FORK 0x02 /* child wakeup after fork */
#define ENQUEUE_WAKEUP 1
#define ENQUEUE_WAKING 2
#define ENQUEUE_HEAD 4
#define ENQUEUE_HEAD 2
#ifdef CONFIG_SMP
#define ENQUEUE_WAKING 4 /* sched_class::task_waking was called */
#else
#define ENQUEUE_WAKING 0
#endif
#define DEQUEUE_SLEEP 1
......@@ -1067,12 +1071,11 @@ struct sched_class {
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
int (*select_task_rq)(struct rq *rq, struct task_struct *p,
int sd_flag, int flags);
int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
void (*post_schedule) (struct rq *this_rq);
void (*task_waking) (struct rq *this_rq, struct task_struct *task);
void (*task_waking) (struct task_struct *task);
void (*task_woken) (struct rq *this_rq, struct task_struct *task);
void (*set_cpus_allowed)(struct task_struct *p,
......@@ -1200,10 +1203,10 @@ struct task_struct {
int lock_depth; /* BKL lock depth */
#ifdef CONFIG_SMP
#ifdef __ARCH_WANT_UNLOCKED_CTXSW
int oncpu;
#endif
struct task_struct *wake_entry;
int on_cpu;
#endif
int on_rq;
int prio, static_prio, normal_prio;
unsigned int rt_priority;
......@@ -1274,6 +1277,7 @@ struct task_struct {
/* Revert to default priority/policy when forking */
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
pid_t pid;
pid_t tgid;
......@@ -2192,8 +2196,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
extern char *get_task_comm(char *to, struct task_struct *tsk);
#ifdef CONFIG_SMP
void scheduler_ipi(void);
extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
#else
static inline void scheduler_ipi(void) { }
static inline unsigned long wait_task_inactive(struct task_struct *p,
long match_state)
{
......
......@@ -827,6 +827,11 @@ config SCHED_AUTOGROUP
desktop applications. Task group autogeneration is currently based
upon task session.
config SCHED_TTWU_QUEUE
bool
depends on !SPARC32
default y
config MM_OWNER
bool
......
......@@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock)
return;
DEBUG_LOCKS_WARN_ON(lock->magic != lock);
DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
DEBUG_LOCKS_WARN_ON(lock->owner != current);
DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
mutex_clear_owner(lock);
}
......
......@@ -29,7 +29,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
static inline void mutex_set_owner(struct mutex *lock)
{
lock->owner = current_thread_info();
lock->owner = current;
}
static inline void mutex_clear_owner(struct mutex *lock)
......
......@@ -160,7 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
*/
for (;;) {
struct thread_info *owner;
struct task_struct *owner;
/*
* If we own the BKL, then don't spin. The owner of
......
......@@ -19,7 +19,7 @@
#ifdef CONFIG_SMP
static inline void mutex_set_owner(struct mutex *lock)
{
lock->owner = current_thread_info();
lock->owner = current;
}
static inline void mutex_clear_owner(struct mutex *lock)
......
......@@ -312,6 +312,9 @@ struct cfs_rq {
u64 exec_clock;
u64 min_vruntime;
#ifndef CONFIG_64BIT
u64 min_vruntime_copy;
#endif
struct rb_root tasks_timeline;
struct rb_node *rb_leftmost;
......@@ -553,6 +556,10 @@ struct rq {
unsigned int ttwu_count;
unsigned int ttwu_local;
#endif
#ifdef CONFIG_SMP
struct task_struct *wake_list;
#endif
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
......@@ -596,7 +603,7 @@ static inline int cpu_of(struct rq *rq)
* Return the group to which this tasks belongs.
*
* We use task_subsys_state_check() and extend the RCU verification
* with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
* with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
* holds that lock for each task it moves into the cgroup. Therefore
* by holding that lock, we pin the task to the current cgroup.
*/
......@@ -606,7 +613,7 @@ static inline struct task_group *task_group(struct task_struct *p)
struct cgroup_subsys_state *css;
css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
lockdep_is_held(&task_rq(p)->lock));
lockdep_is_held(&p->pi_lock));
tg = container_of(css, struct task_group, css);
return autogroup_task_group(p, tg);
......@@ -838,18 +845,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p)
return rq->curr == p;
}
#ifndef __ARCH_WANT_UNLOCKED_CTXSW
static inline int task_running(struct rq *rq, struct task_struct *p)
{
#ifdef CONFIG_SMP
return p->on_cpu;
#else
return task_current(rq, p);
#endif
}
#ifndef __ARCH_WANT_UNLOCKED_CTXSW
static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
{
#ifdef CONFIG_SMP
/*
* We can optimise this out completely for !SMP, because the
* SMP rebalancing from interrupt is the only thing that cares
* here.
*/
next->on_cpu = 1;
#endif
}
static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
{
#ifdef CONFIG_SMP
/*
* After ->on_cpu is cleared, the task can be moved to a different CPU.
* We must ensure this doesn't happen until the switch is completely
* finished.
*/
smp_wmb();
prev->on_cpu = 0;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
/* this is a valid case when another task releases the spinlock */
rq->lock.owner = current;
......@@ -865,15 +893,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
}
#else /* __ARCH_WANT_UNLOCKED_CTXSW */
static inline int task_running(struct rq *rq, struct task_struct *p)
{
#ifdef CONFIG_SMP
return p->oncpu;
#else
return task_current(rq, p);
#endif
}
static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
{
#ifdef CONFIG_SMP
......@@ -882,7 +901,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
* SMP rebalancing from interrupt is the only thing that cares
* here.
*/
next->oncpu = 1;
next->on_cpu = 1;
#endif
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
raw_spin_unlock_irq(&rq->lock);
......@@ -895,12 +914,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
{
#ifdef CONFIG_SMP
/*
* After ->oncpu is cleared, the task can be moved to a different CPU.
* After ->on_cpu is cleared, the task can be moved to a different CPU.
* We must ensure this doesn't happen until the switch is completely
* finished.
*/
smp_wmb();
prev->oncpu = 0;
prev->on_cpu = 0;
#endif
#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_enable();
......@@ -909,23 +928,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
/*
* Check whether the task is waking, we use this to synchronize ->cpus_allowed
* against ttwu().
*/
static inline int task_is_waking(struct task_struct *p)
{
return unlikely(p->state == TASK_WAKING);
}
/*
* __task_rq_lock - lock the runqueue a given task resides on.
* Must be called interrupts disabled.
* __task_rq_lock - lock the rq @p resides on.
*/
static inline struct rq *__task_rq_lock(struct task_struct *p)
__acquires(rq->lock)
{
struct rq *rq;
lockdep_assert_held(&p->pi_lock);
for (;;) {
rq = task_rq(p);
raw_spin_lock(&rq->lock);
......@@ -936,22 +947,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
}
/*
* task_rq_lock - lock the runqueue a given task resides on and disable
* interrupts. Note the ordering: we can safely lookup the task_rq without
* explicitly disabling preemption.
* task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
*/
static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
__acquires(p->pi_lock)
__acquires(rq->lock)
{
struct rq *rq;
for (;;) {
local_irq_save(*flags);
raw_spin_lock_irqsave(&p->pi_lock, *flags);
rq = task_rq(p);
raw_spin_lock(&rq->lock);
if (likely(rq == task_rq(p)))
return rq;
raw_spin_unlock_irqrestore(&rq->lock, *flags);
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
}
}
......@@ -961,10 +972,13 @@ static void __task_rq_unlock(struct rq *rq)
raw_spin_unlock(&rq->lock);
}
static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
static inline void
task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
__releases(rq->lock)
__releases(p->pi_lock)
{
raw_spin_unlock_irqrestore(&rq->lock, *flags);
raw_spin_unlock(&rq->lock);
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
}
/*
......@@ -1773,7 +1787,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
update_rq_clock(rq);
sched_info_queued(p);
p->sched_class->enqueue_task(rq, p, flags);
p->se.on_rq = 1;
}
static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
......@@ -1781,7 +1794,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
update_rq_clock(rq);
sched_info_dequeued(p);
p->sched_class->dequeue_task(rq, p, flags);
p->se.on_rq = 0;
}
/*
......@@ -2116,7 +2128,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
* A queue event has occurred, and we're going to schedule. In
* this case, we can save a useless back to back clock update.
*/
if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
rq->skip_clock_update = 1;
}
......@@ -2162,6 +2174,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
*/
WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
!(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
#ifdef CONFIG_LOCKDEP
WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
lockdep_is_held(&task_rq(p)->lock)));
#endif
#endif
trace_sched_migrate_task(p, new_cpu);
......@@ -2185,13 +2202,15 @@ static int migration_cpu_stop(void *data);
* The task's runqueue lock must be held.
* Returns true if you have to wait for migration thread.
*/
static bool migrate_task(struct task_struct *p, struct rq *rq)
static bool need_migrate_task(struct task_struct *p)
{
/*
* If the task is not on a runqueue (and not running), then
* the next wake-up will properly place the task.
*/
return p->se.on_rq || task_running(rq, p);
bool running = p->on_rq || p->on_cpu;
smp_rmb(); /* finish_lock_switch() */
return running;
}
/*
......@@ -2251,11 +2270,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
rq = task_rq_lock(p, &flags);
trace_sched_wait_task(p);
running = task_running(rq, p);
on_rq = p->se.on_rq;
on_rq = p->on_rq;
ncsw = 0;
if (!match_state || p->state == match_state)
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
task_rq_unlock(rq, &flags);
task_rq_unlock(rq, p, &flags);
/*
* If it changed from the expected state, bail out now.
......@@ -2330,7 +2349,7 @@ EXPORT_SYMBOL_GPL(kick_process);
#ifdef CONFIG_SMP
/*
* ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
* ->cpus_allowed is protected by both rq->lock and p->pi_lock
*/
static int select_fallback_rq(int cpu, struct task_struct *p)
{
......@@ -2363,12 +2382,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
}
/*
* The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
* The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
*/
static inline
int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
{
int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
/*
* In order not to call set_task_cpu() on a blocking task we need
......@@ -2394,27 +2413,60 @@ static void update_avg(u64 *avg, u64 sample)
}
#endif
static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
bool is_sync, bool is_migrate, bool is_local,
unsigned long en_flags)
static void
ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
{
#ifdef CONFIG_SCHEDSTATS
struct rq *rq = this_rq();
#ifdef CONFIG_SMP
int this_cpu = smp_processor_id();
if (cpu == this_cpu) {
schedstat_inc(rq, ttwu_local);
schedstat_inc(p, se.statistics.nr_wakeups_local);
} else {
struct sched_domain *sd;
schedstat_inc(p, se.statistics.nr_wakeups_remote);
for_each_domain(this_cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
schedstat_inc(sd, ttwu_wake_remote);
break;
}
}
}
#endif /* CONFIG_SMP */
schedstat_inc(rq, ttwu_count);
schedstat_inc(p, se.statistics.nr_wakeups);
if (is_sync)
if (wake_flags & WF_SYNC)
schedstat_inc(p, se.statistics.nr_wakeups_sync);
if (is_migrate)
if (cpu != task_cpu(p))
schedstat_inc(p, se.statistics.nr_wakeups_migrate);
if (is_local)
schedstat_inc(p, se.statistics.nr_wakeups_local);
else
schedstat_inc(p, se.statistics.nr_wakeups_remote);
#endif /* CONFIG_SCHEDSTATS */
}
static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
{
activate_task(rq, p, en_flags);
p->on_rq = 1;
/* if a worker is waking up, notify workqueue */
if (p->flags & PF_WQ_WORKER)
wq_worker_waking_up(p, cpu_of(rq));
}
static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
int wake_flags, bool success)
/*
* Mark the task runnable and perform wakeup-preemption.
*/
static void
ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
{
trace_sched_wakeup(p, success);
trace_sched_wakeup(p, true);
check_preempt_curr(rq, p, wake_flags);
p->state = TASK_RUNNING;
......@@ -2433,9 +2485,99 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
rq->idle_stamp = 0;
}
#endif
/* if a worker is waking up, notify workqueue */
if ((p->flags & PF_WQ_WORKER) && success)
wq_worker_waking_up(p, cpu_of(rq));
}
static void
ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
{
#ifdef CONFIG_SMP
if (p->sched_contributes_to_load)
rq->nr_uninterruptible--;
#endif
ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
ttwu_do_wakeup(rq, p, wake_flags);
}
/*
* Called in case the task @p isn't fully descheduled from its runqueue,
* in this case we must do a remote wakeup. Its a 'light' wakeup though,
* since all we need to do is flip p->state to TASK_RUNNING, since
* the task is still ->on_rq.
*/
static int ttwu_remote(struct task_struct *p, int wake_flags)
{
struct rq *rq;
int ret = 0;
rq = __task_rq_lock(p);
if (p->on_rq) {
ttwu_do_wakeup(rq, p, wake_flags);
ret = 1;
}
__task_rq_unlock(rq);
return ret;
}
#ifdef CONFIG_SMP
static void sched_ttwu_pending(void)
{
struct rq *rq = this_rq();
struct task_struct *list = xchg(&rq->wake_list, NULL);
if (!list)
return;
raw_spin_lock(&rq->lock);
while (list) {
struct task_struct *p = list;
list = list->wake_entry;
ttwu_do_activate(rq, p, 0);
}
raw_spin_unlock(&rq->lock);
}
void scheduler_ipi(void)
{
sched_ttwu_pending();
}
static void ttwu_queue_remote(struct task_struct *p, int cpu)
{
struct rq *rq = cpu_rq(cpu);
struct task_struct *next = rq->wake_list;
for (;;) {
struct task_struct *old = next;
p->wake_entry = next;
next = cmpxchg(&rq->wake_list, old, p);
if (next == old)
break;
}
if (!next)
smp_send_reschedule(cpu);
}
#endif
static void ttwu_queue(struct task_struct *p, int cpu)
{
struct rq *rq = cpu_rq(cpu);
#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
ttwu_queue_remote(p, cpu);
return;
}
#endif
raw_spin_lock(&rq->lock);
ttwu_do_activate(rq, p, 0);
raw_spin_unlock(&rq->lock);
}
/**
......@@ -2453,92 +2595,64 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
* Returns %true if @p was woken up, %false if it was already running
* or @state didn't match @p's state.
*/
static int try_to_wake_up(struct task_struct *p, unsigned int state,
int wake_flags)
static int
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
{
int cpu, orig_cpu, this_cpu, success = 0;
unsigned long flags;
unsigned long en_flags = ENQUEUE_WAKEUP;
struct rq *rq;
this_cpu = get_cpu();
int cpu, success = 0;
smp_wmb();
rq = task_rq_lock(p, &flags);
raw_spin_lock_irqsave(&p->pi_lock, flags);
if (!(p->state & state))
goto out;
if (p->se.on_rq)
goto out_running;
success = 1; /* we're going to change ->state */
cpu = task_cpu(p);
orig_cpu = cpu;
#ifdef CONFIG_SMP
if (unlikely(task_running(rq, p)))
goto out_activate;
if (p->on_rq && ttwu_remote(p, wake_flags))
goto stat;
#ifdef CONFIG_SMP
/*
* In order to handle concurrent wakeups and release the rq->lock
* we put the task in TASK_WAKING state.
*
* First fix up the nr_uninterruptible count:
* If the owning (remote) cpu is still in the middle of schedule() with
* this task as prev, wait until its done referencing the task.
*/
if (task_contributes_to_load(p)) {
if (likely(cpu_online(orig_cpu)))
rq->nr_uninterruptible--;
else
this_rq()->nr_uninterruptible--;
}
p->state = TASK_WAKING;
if (p->sched_class->task_waking) {
p->sched_class->task_waking(rq, p);
en_flags |= ENQUEUE_WAKING;
while (p->on_cpu) {
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
/*
* If called from interrupt context we could have landed in the
* middle of schedule(), in this case we should take care not
* to spin on ->on_cpu if p is current, since that would
* deadlock.
*/
if (p == current) {
ttwu_queue(p, cpu);
goto stat;
}
#endif
cpu_relax();
}
cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
if (cpu != orig_cpu)
set_task_cpu(p, cpu);
__task_rq_unlock(rq);
rq = cpu_rq(cpu);
raw_spin_lock(&rq->lock);
/*
* We migrated the task without holding either rq->lock, however
* since the task is not on the task list itself, nobody else
* will try and migrate the task, hence the rq should match the
* cpu we just moved it to.
* Pairs with the smp_wmb() in finish_lock_switch().
*/
WARN_ON(task_cpu(p) != cpu);
WARN_ON(p->state != TASK_WAKING);
smp_rmb();
#ifdef CONFIG_SCHEDSTATS
schedstat_inc(rq, ttwu_count);
if (cpu == this_cpu)
schedstat_inc(rq, ttwu_local);
else {
struct sched_domain *sd;
for_each_domain(this_cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
schedstat_inc(sd, ttwu_wake_remote);
break;
}
}
}
#endif /* CONFIG_SCHEDSTATS */
p->sched_contributes_to_load = !!task_contributes_to_load(p);
p->state = TASK_WAKING;
if (p->sched_class->task_waking)
p->sched_class->task_waking(p);
out_activate:
cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
if (task_cpu(p) != cpu)
set_task_cpu(p, cpu);
#endif /* CONFIG_SMP */
ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu,
cpu == this_cpu, en_flags);
success = 1;
out_running:
ttwu_post_activation(p, rq, wake_flags, success);
ttwu_queue(p, cpu);
stat:
ttwu_stat(p, cpu, wake_flags);
out:
task_rq_unlock(rq, &flags);
put_cpu();
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
return success;
}
......@@ -2547,31 +2661,34 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
* try_to_wake_up_local - try to wake up a local task with rq lock held
* @p: the thread to be awakened
*
* Put @p on the run-queue if it's not already there. The caller must
* Put @p on the run-queue if it's not already there. The caller must
* ensure that this_rq() is locked, @p is bound to this_rq() and not
* the current task. this_rq() stays locked over invocation.
* the current task.
*/
static void try_to_wake_up_local(struct task_struct *p)
{
struct rq *rq = task_rq(p);
bool success = false;
BUG_ON(rq != this_rq());
BUG_ON(p == current);
lockdep_assert_held(&rq->lock);
if (!raw_spin_trylock(&p->pi_lock)) {
raw_spin_unlock(&rq->lock);
raw_spin_lock(&p->pi_lock);
raw_spin_lock(&rq->lock);
}
if (!(p->state & TASK_NORMAL))
return;
goto out;
if (!p->se.on_rq) {
if (likely(!task_running(rq, p))) {
schedstat_inc(rq, ttwu_count);
schedstat_inc(rq, ttwu_local);
}
ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP);
success = true;
}
ttwu_post_activation(p, rq, 0, success);
if (!p->on_rq)
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
ttwu_do_wakeup(rq, p, 0);
ttwu_stat(p, smp_processor_id(), 0);
out:
raw_spin_unlock(&p->pi_lock);
}
/**
......@@ -2604,19 +2721,21 @@ int wake_up_state(struct task_struct *p, unsigned int state)
*/
static void __sched_fork(struct task_struct *p)
{
p->on_rq = 0;
p->se.on_rq = 0;
p->se.exec_start = 0;
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
p->se.vruntime = 0;
INIT_LIST_HEAD(&p->se.group_node);
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
#endif
INIT_LIST_HEAD(&p->rt.run_list);
p->se.on_rq = 0;
INIT_LIST_HEAD(&p->se.group_node);
#ifdef CONFIG_PREEMPT_NOTIFIERS
INIT_HLIST_HEAD(&p->preempt_notifiers);
......@@ -2628,6 +2747,7 @@ static void __sched_fork(struct task_struct *p)
*/
void sched_fork(struct task_struct *p, int clone_flags)
{
unsigned long flags;
int cpu = get_cpu();
__sched_fork(p);
......@@ -2678,16 +2798,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
*
* Silence PROVE_RCU.
*/
rcu_read_lock();
raw_spin_lock_irqsave(&p->pi_lock, flags);
set_task_cpu(p, cpu);
rcu_read_unlock();
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
if (likely(sched_info_on()))
memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
p->oncpu = 0;
#if defined(CONFIG_SMP)
p->on_cpu = 0;
#endif
#ifdef CONFIG_PREEMPT
/* Want to start with kernel preemption disabled. */
......@@ -2711,37 +2831,27 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
{
unsigned long flags;
struct rq *rq;
int cpu __maybe_unused = get_cpu();
raw_spin_lock_irqsave(&p->pi_lock, flags);
#ifdef CONFIG_SMP
rq = task_rq_lock(p, &flags);
p->state = TASK_WAKING;
/*
* Fork balancing, do it here and not earlier because:
* - cpus_allowed can change in the fork path
* - any previously selected cpu might disappear through hotplug
*
* We set TASK_WAKING so that select_task_rq() can drop rq->lock
* without people poking at ->cpus_allowed.
*/
cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
set_task_cpu(p, cpu);
p->state = TASK_RUNNING;
task_rq_unlock(rq, &flags);
set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
#endif
rq = task_rq_lock(p, &flags);
rq = __task_rq_lock(p);
activate_task(rq, p, 0);
trace_sched_wakeup_new(p, 1);
p->on_rq = 1;
trace_sched_wakeup_new(p, true);
check_preempt_curr(rq, p, WF_FORK);
#ifdef CONFIG_SMP
if (p->sched_class->task_woken)
p->sched_class->task_woken(rq, p);
#endif
task_rq_unlock(rq, &flags);
put_cpu();
task_rq_unlock(rq, p, &flags);
}
#ifdef CONFIG_PREEMPT_NOTIFIERS
......@@ -3450,27 +3560,22 @@ void sched_exec(void)
{
struct task_struct *p = current;
unsigned long flags;
struct rq *rq;
int dest_cpu;
rq = task_rq_lock(p, &flags);
dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
raw_spin_lock_irqsave(&p->pi_lock, flags);
dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
if (dest_cpu == smp_processor_id())
goto unlock;
/*
* select_task_rq() can race against ->cpus_allowed
*/
if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
if (likely(cpu_active(dest_cpu))) {
struct migration_arg arg = { p, dest_cpu };
task_rq_unlock(rq, &flags);
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
return;
}
unlock:
task_rq_unlock(rq, &flags);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
}
#endif
......@@ -3507,7 +3612,7 @@ unsigned long long task_delta_exec(struct task_struct *p)
rq = task_rq_lock(p, &flags);
ns = do_task_delta_exec(p, rq);
task_rq_unlock(rq, &flags);
task_rq_unlock(rq, p, &flags);
return ns;
}
......@@ -3525,7 +3630,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
rq = task_rq_lock(p, &flags);
ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
task_rq_unlock(rq, &flags);
task_rq_unlock(rq, p, &flags);
return ns;
}
......@@ -3549,7 +3654,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
rq = task_rq_lock(p, &flags);
thread_group_cputime(p, &totals);
ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
task_rq_unlock(rq, &flags);
task_rq_unlock(rq, p, &flags);
return ns;
}
......@@ -4035,7 +4140,7 @@ static inline void schedule_debug(struct task_struct *prev)
static void put_prev_task(struct rq *rq, struct task_struct *prev)
{
if (prev->se.on_rq)
if (prev->on_rq)
update_rq_clock(rq);
prev->sched_class->put_prev_task(rq, prev);
}
......@@ -4097,11 +4202,13 @@ asmlinkage void __sched schedule(void)
if (unlikely(signal_pending_state(prev->state, prev))) {
prev->state = TASK_RUNNING;
} else {
deactivate_task(rq, prev, DEQUEUE_SLEEP);
prev->on_rq = 0;
/*
* If a worker is going to sleep, notify and
* ask workqueue whether it wants to wake up a
* task to maintain concurrency. If so, wake
* up the task.
* If a worker went to sleep, notify and ask workqueue
* whether it wants to wake up a task to maintain
* concurrency.
*/
if (prev->flags & PF_WQ_WORKER) {
struct task_struct *to_wakeup;
......@@ -4110,11 +4217,10 @@ asmlinkage void __sched schedule(void)
if (to_wakeup)
try_to_wake_up_local(to_wakeup);
}
deactivate_task(rq, prev, DEQUEUE_SLEEP);
/*
* If we are going to sleep and we have plugged IO queued, make
* sure to submit it to avoid deadlocks.
* If we are going to sleep and we have plugged IO
* queued, make sure to submit it to avoid deadlocks.
*/
if (blk_needs_flush_plug(prev)) {
raw_spin_unlock(&rq->lock);
......@@ -4161,70 +4267,53 @@ asmlinkage void __sched schedule(void)
EXPORT_SYMBOL(schedule);
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
* Look out! "owner" is an entirely speculative pointer
* access and not reliable.
*/
int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
{
unsigned int cpu;
struct rq *rq;
if (!sched_feat(OWNER_SPIN))
return 0;
static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
{
bool ret = false;
#ifdef CONFIG_DEBUG_PAGEALLOC
/*
* Need to access the cpu field knowing that
* DEBUG_PAGEALLOC could have unmapped it if
* the mutex owner just released it and exited.
*/
if (probe_kernel_address(&owner->cpu, cpu))
return 0;
#else
cpu = owner->cpu;
#endif
rcu_read_lock();
if (lock->owner != owner)
goto fail;
/*
* Even if the access succeeded (likely case),
* the cpu field may no longer be valid.
* Ensure we emit the owner->on_cpu, dereference _after_ checking
* lock->owner still matches owner, if that fails, owner might
* point to free()d memory, if it still matches, the rcu_read_lock()
* ensures the memory stays valid.
*/
if (cpu >= nr_cpumask_bits)
return 0;
barrier();
/*
* We need to validate that we can do a
* get_cpu() and that we have the percpu area.
*/
if (!cpu_online(cpu))
return 0;
ret = owner->on_cpu;
fail:
rcu_read_unlock();
rq = cpu_rq(cpu);
return ret;
}
for (;;) {
/*
* Owner changed, break to re-assess state.
*/
if (lock->owner != owner) {
/*
* If the lock has switched to a different owner,
* we likely have heavy contention. Return 0 to quit
* optimistic spinning and not contend further:
*/
if (lock->owner)
return 0;
break;
}
/*
* Look out! "owner" is an entirely speculative pointer
* access and not reliable.
*/
int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
{
if (!sched_feat(OWNER_SPIN))
return 0;
/*
* Is that owner really running on that cpu?
*/
if (task_thread_info(rq->curr) != owner || need_resched())
while (owner_running(lock, owner)) {
if (need_resched())
return 0;
arch_mutex_cpu_relax();
}
/*
* If the owner changed to another task there is likely
* heavy contention, stop spinning.
*/
if (lock->owner)
return 0;
return 1;
}
#endif
......@@ -4684,19 +4773,18 @@ EXPORT_SYMBOL(sleep_on_timeout);
*/
void rt_mutex_setprio(struct task_struct *p, int prio)
{
unsigned long flags;
int oldprio, on_rq, running;
struct rq *rq;
const struct sched_class *prev_class;
BUG_ON(prio < 0 || prio > MAX_PRIO);
rq = task_rq_lock(p, &flags);
rq = __task_rq_lock(p);
trace_sched_pi_setprio(p, prio);
oldprio = p->prio;
prev_class = p->sched_class;
on_rq = p->se.on_rq;
on_rq = p->on_rq;
running = task_current(rq, p);
if (on_rq)
dequeue_task(rq, p, 0);
......@@ -4716,7 +4804,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
check_class_changed(rq, p, prev_class, oldprio);
task_rq_unlock(rq, &flags);
__task_rq_unlock(rq);
}
#endif
......@@ -4744,7 +4832,7 @@ void set_user_nice(struct task_struct *p, long nice)
p->static_prio = NICE_TO_PRIO(nice);
goto out_unlock;
}
on_rq = p->se.on_rq;
on_rq = p->on_rq;
if (on_rq)
dequeue_task(rq, p, 0);
......@@ -4764,7 +4852,7 @@ void set_user_nice(struct task_struct *p, long nice)
resched_task(rq->curr);
}
out_unlock:
task_rq_unlock(rq, &flags);
task_rq_unlock(rq, p, &flags);
}
EXPORT_SYMBOL(set_user_nice);
......@@ -4878,8 +4966,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
static void
__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
{
BUG_ON(p->se.on_rq);
p->policy = policy;
p->rt_priority = prio;
p->normal_prio = normal_prio(p);
......@@ -4994,20 +5080,17 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
/*
* make sure no PI-waiters arrive (or leave) while we are
* changing the priority of the task:
*/
raw_spin_lock_irqsave(&p->pi_lock, flags);
/*
*
* To be able to change p->policy safely, the appropriate
* runqueue lock must be held.
*/
rq = __task_rq_lock(p);
rq = task_rq_lock(p, &flags);
/*
* Changing the policy of the stop threads its a very bad idea
*/
if (p == rq->stop) {
__task_rq_unlock(rq);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
task_rq_unlock(rq, p, &flags);
return -EINVAL;
}
......@@ -5031,8 +5114,7 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
if (rt_bandwidth_enabled() && rt_policy(policy) &&
task_group(p)->rt_bandwidth.rt_runtime == 0 &&
!task_group_is_autogroup(task_group(p))) {
__task_rq_unlock(rq);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
task_rq_unlock(rq, p, &flags);
return -EPERM;
}
}
......@@ -5041,11 +5123,10 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
/* recheck policy now with rq lock held */
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1;
__task_rq_unlock(rq);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
task_rq_unlock(rq, p, &flags);
goto recheck;
}
on_rq = p->se.on_rq;
on_rq = p->on_rq;
running = task_current(rq, p);
if (on_rq)
deactivate_task(rq, p, 0);
......@@ -5064,8 +5145,7 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
activate_task(rq, p, 0);
check_class_changed(rq, p, prev_class, oldprio);
__task_rq_unlock(rq);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
task_rq_unlock(rq, p, &flags);
rt_mutex_adjust_pi(p);
......@@ -5316,7 +5396,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
{
struct task_struct *p;
unsigned long flags;
struct rq *rq;
int retval;
get_online_cpus();
......@@ -5331,9 +5410,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
if (retval)
goto out_unlock;
rq = task_rq_lock(p, &flags);
raw_spin_lock_irqsave(&p->pi_lock, flags);
cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
task_rq_unlock(rq, &flags);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
out_unlock:
rcu_read_unlock();
......@@ -5658,7 +5737,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
rq = task_rq_lock(p, &flags);
time_slice = p->sched_class->get_rr_interval(rq, p);
task_rq_unlock(rq, &flags);
task_rq_unlock(rq, p, &flags);
rcu_read_unlock();
jiffies_to_timespec(time_slice, &t);
......@@ -5776,8 +5855,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
rcu_read_unlock();
rq->curr = rq->idle = idle;
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
idle->oncpu = 1;
#if defined(CONFIG_SMP)
idle->on_cpu = 1;
#endif
raw_spin_unlock_irqrestore(&rq->lock, flags);
......@@ -5881,18 +5960,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
unsigned int dest_cpu;
int ret = 0;
/*
* Serialize against TASK_WAKING so that ttwu() and wunt() can
* drop the rq->lock and still rely on ->cpus_allowed.
*/
again:
while (task_is_waking(p))
cpu_relax();
rq = task_rq_lock(p, &flags);
if (task_is_waking(p)) {
task_rq_unlock(rq, &flags);
goto again;
}
if (!cpumask_intersects(new_mask, cpu_active_mask)) {
ret = -EINVAL;
......@@ -5917,16 +5985,16 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
goto out;
dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
if (migrate_task(p, rq)) {
if (need_migrate_task(p)) {
struct migration_arg arg = { p, dest_cpu };
/* Need help from migration thread: drop lock and wait. */
task_rq_unlock(rq, &flags);
task_rq_unlock(rq, p, &flags);
stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
tlb_migrate_finish(p->mm);
return 0;
}
out:
task_rq_unlock(rq, &flags);
task_rq_unlock(rq, p, &flags);
return ret;
}
......@@ -5954,6 +6022,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
rq_src = cpu_rq(src_cpu);
rq_dest = cpu_rq(dest_cpu);
raw_spin_lock(&p->pi_lock);
double_rq_lock(rq_src, rq_dest);
/* Already moved. */
if (task_cpu(p) != src_cpu)
......@@ -5966,7 +6035,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
* If we're not on a rq, the next wake-up will ensure we're
* placed properly.
*/
if (p->se.on_rq) {
if (p->on_rq) {
deactivate_task(rq_src, p, 0);
set_task_cpu(p, dest_cpu);
activate_task(rq_dest, p, 0);
......@@ -5976,6 +6045,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
ret = 1;
fail:
double_rq_unlock(rq_src, rq_dest);
raw_spin_unlock(&p->pi_lock);
return ret;
}
......@@ -6316,6 +6386,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DYING:
sched_ttwu_pending();
/* Update our root-domain */
raw_spin_lock_irqsave(&rq->lock, flags);
if (rq->rd) {
......@@ -8340,7 +8411,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
int old_prio = p->prio;
int on_rq;
on_rq = p->se.on_rq;
on_rq = p->on_rq;
if (on_rq)
deactivate_task(rq, p, 0);
__setscheduler(rq, p, SCHED_NORMAL, 0);
......@@ -8683,7 +8754,7 @@ void sched_move_task(struct task_struct *tsk)
rq = task_rq_lock(tsk, &flags);
running = task_current(rq, tsk);
on_rq = tsk->se.on_rq;
on_rq = tsk->on_rq;
if (on_rq)
dequeue_task(rq, tsk, 0);
......@@ -8702,7 +8773,7 @@ void sched_move_task(struct task_struct *tsk)
if (on_rq)
enqueue_task(rq, tsk, 0);
task_rq_unlock(rq, &flags);
task_rq_unlock(rq, tsk, &flags);
}
#endif /* CONFIG_CGROUP_SCHED */
......
......@@ -152,7 +152,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
read_lock_irqsave(&tasklist_lock, flags);
do_each_thread(g, p) {
if (!p->se.on_rq || task_cpu(p) != rq_cpu)
if (!p->on_rq || task_cpu(p) != rq_cpu)
continue;
print_task(m, rq, p);
......
......@@ -358,6 +358,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
}
cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
#ifndef CONFIG_64BIT
smp_wmb();
cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
#endif
}
/*
......@@ -1372,12 +1376,25 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_SMP
static void task_waking_fair(struct rq *rq, struct task_struct *p)
static void task_waking_fair(struct task_struct *p)
{
struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se);
u64 min_vruntime;
se->vruntime -= cfs_rq->min_vruntime;
#ifndef CONFIG_64BIT
u64 min_vruntime_copy;
do {
min_vruntime_copy = cfs_rq->min_vruntime_copy;
smp_rmb();
min_vruntime = cfs_rq->min_vruntime;
} while (min_vruntime != min_vruntime_copy);
#else
min_vruntime = cfs_rq->min_vruntime;
#endif
se->vruntime -= min_vruntime;
}
#ifdef CONFIG_FAIR_GROUP_SCHED
......@@ -1657,7 +1674,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
* preempt must be disabled.
*/
static int
select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
{
struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
int cpu = smp_processor_id();
......@@ -1789,10 +1806,7 @@ wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
* This is especially important for buddies when the leftmost
* task is higher priority than the buddy.
*/
if (unlikely(se->load.weight != NICE_0_LOAD))
gran = calc_delta_fair(gran, se);
return gran;
return calc_delta_fair(gran, se);
}
/*
......
......@@ -64,3 +64,9 @@ SCHED_FEAT(OWNER_SPIN, 1)
* Decrement CPU power based on irq activity
*/
SCHED_FEAT(NONIRQ_POWER, 1)
/*
* Queue remote wakeups on the target CPU and process them
* using the scheduler IPI. Reduces rq->lock contention/bounces.
*/
SCHED_FEAT(TTWU_QUEUE, 1)
......@@ -7,7 +7,7 @@
#ifdef CONFIG_SMP
static int
select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
{
return task_cpu(p); /* IDLE tasks as never migrated */
}
......
......@@ -977,13 +977,23 @@ static void yield_task_rt(struct rq *rq)
static int find_lowest_rq(struct task_struct *task);
static int
select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
{
struct task_struct *curr;
struct rq *rq;
int cpu;
if (sd_flag != SD_BALANCE_WAKE)
return smp_processor_id();
cpu = task_cpu(p);
rq = cpu_rq(cpu);
rcu_read_lock();
curr = ACCESS_ONCE(rq->curr); /* unlocked access */
/*
* If the current task is an RT task, then
* If the current task on @p's runqueue is an RT task, then
* try to see if we can wake this RT task up on another
* runqueue. Otherwise simply start this RT task
* on its current runqueue.
......@@ -997,21 +1007,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
* lock?
*
* For equal prio tasks, we just let the scheduler sort it out.
*
* Otherwise, just let it ride on the affined RQ and the
* post-schedule router will push the preempted task away
*
* This test is optimistic, if we get it wrong the load-balancer
* will have to sort it out.
*/
if (unlikely(rt_task(rq->curr)) &&
(rq->curr->rt.nr_cpus_allowed < 2 ||
rq->curr->prio < p->prio) &&
if (curr && unlikely(rt_task(curr)) &&
(curr->rt.nr_cpus_allowed < 2 ||
curr->prio < p->prio) &&
(p->rt.nr_cpus_allowed > 1)) {
int cpu = find_lowest_rq(p);
int target = find_lowest_rq(p);
return (cpu == -1) ? task_cpu(p) : cpu;
if (target != -1)
cpu = target;
}
rcu_read_unlock();
/*
* Otherwise, just let it ride on the affined RQ and the
* post-schedule router will push the preempted task away
*/
return task_cpu(p);
return cpu;
}
static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
......@@ -1136,7 +1150,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
* The previous task needs to be made eligible for pushing
* if it is still active
*/
if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
}
......@@ -1287,7 +1301,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
!cpumask_test_cpu(lowest_rq->cpu,
&task->cpus_allowed) ||
task_running(rq, task) ||
!task->se.on_rq)) {
!task->on_rq)) {
raw_spin_unlock(&lowest_rq->lock);
lowest_rq = NULL;
......@@ -1321,7 +1335,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
BUG_ON(task_current(rq, p));
BUG_ON(p->rt.nr_cpus_allowed <= 1);
BUG_ON(!p->se.on_rq);
BUG_ON(!p->on_rq);
BUG_ON(!rt_task(p));
return p;
......@@ -1467,7 +1481,7 @@ static int pull_rt_task(struct rq *this_rq)
*/
if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
WARN_ON(p == src_rq->curr);
WARN_ON(!p->se.on_rq);
WARN_ON(!p->on_rq);
/*
* There's a chance that p is higher in priority
......@@ -1538,7 +1552,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
* Update the migration status of the RQ if we have an RT task
* which is running AND changing its weight value.
*/
if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) {
struct rq *rq = task_rq(p);
if (!task_current(rq, p)) {
......@@ -1608,7 +1622,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
* we may need to handle the pulling of RT tasks
* now.
*/
if (p->se.on_rq && !rq->rt.rt_nr_running)
if (p->on_rq && !rq->rt.rt_nr_running)
pull_rt_task(rq);
}
......@@ -1638,7 +1652,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
* If that current running task is also an RT task
* then see if we can move to another run queue.
*/
if (p->se.on_rq && rq->curr != p) {
if (p->on_rq && rq->curr != p) {
#ifdef CONFIG_SMP
if (rq->rt.overloaded && push_rt_task(rq) &&
/* Don't resched if we changed runqueues */
......@@ -1657,7 +1671,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
static void
prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
{
if (!p->se.on_rq)
if (!p->on_rq)
return;
if (rq->curr == p) {
......
......@@ -9,8 +9,7 @@
#ifdef CONFIG_SMP
static int
select_task_rq_stop(struct rq *rq, struct task_struct *p,
int sd_flag, int flags)
select_task_rq_stop(struct task_struct *p, int sd_flag, int flags)
{
return task_cpu(p); /* stop tasks as never migrate */
}
......@@ -26,7 +25,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
{
struct task_struct *stop = rq->stop;
if (stop && stop->se.on_rq)
if (stop && stop->on_rq)
return stop;
return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment