Commit 1799e35d authored by Ingo Molnar's avatar Ingo Molnar

sched: add /proc/sys/kernel/sched_compat_yield

add /proc/sys/kernel/sched_compat_yield to make sys_sched_yield()
more agressive, by moving the yielding task to the last position
in the rbtree.

with sched_compat_yield=0:

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
  2539 mingo     20   0  1576  252  204 R   50  0.0   0:02.03 loop_yield
  2541 mingo     20   0  1576  244  196 R   50  0.0   0:02.05 loop

with sched_compat_yield=1:

   PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
  2584 mingo     20   0  1576  248  196 R   99  0.0   0:52.45 loop
  2582 mingo     20   0  1576  256  204 R    0  0.0   0:00.00 loop_yield
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
parent a88a8eff
...@@ -1406,6 +1406,7 @@ extern unsigned int sysctl_sched_wakeup_granularity; ...@@ -1406,6 +1406,7 @@ extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_batch_wakeup_granularity; extern unsigned int sysctl_sched_batch_wakeup_granularity;
extern unsigned int sysctl_sched_stat_granularity; extern unsigned int sysctl_sched_stat_granularity;
extern unsigned int sysctl_sched_runtime_limit; extern unsigned int sysctl_sched_runtime_limit;
extern unsigned int sysctl_sched_compat_yield;
extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_features;
......
...@@ -4550,10 +4550,7 @@ asmlinkage long sys_sched_yield(void) ...@@ -4550,10 +4550,7 @@ asmlinkage long sys_sched_yield(void)
struct rq *rq = this_rq_lock(); struct rq *rq = this_rq_lock();
schedstat_inc(rq, yld_cnt); schedstat_inc(rq, yld_cnt);
if (unlikely(rq->nr_running == 1)) current->sched_class->yield_task(rq, current);
schedstat_inc(rq, yld_act_empty);
else
current->sched_class->yield_task(rq, current);
/* /*
* Since we are going to call schedule() anyway, there's * Since we are going to call schedule() anyway, there's
......
...@@ -42,6 +42,14 @@ unsigned int sysctl_sched_latency __read_mostly = 20000000ULL; ...@@ -42,6 +42,14 @@ unsigned int sysctl_sched_latency __read_mostly = 20000000ULL;
*/ */
unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL; unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL;
/*
* sys_sched_yield() compat mode
*
* This option switches the agressive yield implementation of the
* old scheduler back on.
*/
unsigned int __read_mostly sysctl_sched_compat_yield;
/* /*
* SCHED_BATCH wake-up granularity. * SCHED_BATCH wake-up granularity.
* (default: 25 msec, units: nanoseconds) * (default: 25 msec, units: nanoseconds)
...@@ -897,19 +905,62 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) ...@@ -897,19 +905,62 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
} }
/* /*
* sched_yield() support is very simple - we dequeue and enqueue * sched_yield() support is very simple - we dequeue and enqueue.
*
* If compat_yield is turned on then we requeue to the end of the tree.
*/ */
static void yield_task_fair(struct rq *rq, struct task_struct *p) static void yield_task_fair(struct rq *rq, struct task_struct *p)
{ {
struct cfs_rq *cfs_rq = task_cfs_rq(p); struct cfs_rq *cfs_rq = task_cfs_rq(p);
struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
struct sched_entity *rightmost, *se = &p->se;
struct rb_node *parent;
__update_rq_clock(rq);
/* /*
* Dequeue and enqueue the task to update its * Are we the only task in the tree?
* position within the tree: */
if (unlikely(cfs_rq->nr_running == 1))
return;
if (likely(!sysctl_sched_compat_yield)) {
__update_rq_clock(rq);
/*
* Dequeue and enqueue the task to update its
* position within the tree:
*/
dequeue_entity(cfs_rq, &p->se, 0);
enqueue_entity(cfs_rq, &p->se, 0);
return;
}
/*
* Find the rightmost entry in the rbtree:
*/ */
dequeue_entity(cfs_rq, &p->se, 0); do {
enqueue_entity(cfs_rq, &p->se, 0); parent = *link;
link = &parent->rb_right;
} while (*link);
rightmost = rb_entry(parent, struct sched_entity, run_node);
/*
* Already in the rightmost position?
*/
if (unlikely(rightmost == se))
return;
/*
* Minimally necessary key value to be last in the tree:
*/
se->fair_key = rightmost->fair_key + 1;
if (cfs_rq->rb_leftmost == &se->run_node)
cfs_rq->rb_leftmost = rb_next(&se->run_node);
/*
* Relink the task to the rightmost position:
*/
rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
rb_link_node(&se->run_node, parent, link);
rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
} }
/* /*
......
...@@ -303,6 +303,14 @@ static ctl_table kern_table[] = { ...@@ -303,6 +303,14 @@ static ctl_table kern_table[] = {
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
#endif #endif
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_compat_yield",
.data = &sysctl_sched_compat_yield,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#ifdef CONFIG_PROVE_LOCKING #ifdef CONFIG_PROVE_LOCKING
{ {
.ctl_name = CTL_UNNUMBERED, .ctl_name = CTL_UNNUMBERED,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment