Commit 9bd721c5 authored by Jason Low's avatar Jason Low Committed by Ingo Molnar

sched/balancing: Consider max cost of idle balance per sched domain

In this patch, we keep track of the max cost we spend doing idle load balancing
for each sched domain. If the avg time the CPU remains idle is less then the
time we have already spent on idle balancing + the max cost of idle balancing
in the sched domain, then we don't continue to attempt the balance. We also
keep a per rq variable, max_idle_balance_cost, which keeps track of the max
time spent on newidle load balances throughout all its domains so that we can
determine the avg_idle's max value.

By using the max, we avoid overrunning the average. This further reduces the
chance we attempt balancing when the CPU is not idle for longer than the cost
to balance.
Signed-off-by: default avatarJason Low <jason.low2@hp.com>
Signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1379096813-3032-3-git-send-email-jason.low2@hp.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent abfafa54
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
.max_newidle_lb_cost = 0, \
} }
#define cpu_to_node(cpu) ((void)(cpu), 0) #define cpu_to_node(cpu) ((void)(cpu), 0)
......
...@@ -810,6 +810,7 @@ struct sched_domain { ...@@ -810,6 +810,7 @@ struct sched_domain {
unsigned int nr_balance_failed; /* initialise to 0 */ unsigned int nr_balance_failed; /* initialise to 0 */
u64 last_update; u64 last_update;
u64 max_newidle_lb_cost;
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */ /* load_balance() stats */
......
...@@ -106,6 +106,7 @@ int arch_update_cpu_topology(void); ...@@ -106,6 +106,7 @@ int arch_update_cpu_topology(void);
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.smt_gain = 1178, /* 15% */ \ .smt_gain = 1178, /* 15% */ \
.max_newidle_lb_cost = 0, \
} }
#endif #endif
#endif /* CONFIG_SCHED_SMT */ #endif /* CONFIG_SCHED_SMT */
...@@ -135,6 +136,7 @@ int arch_update_cpu_topology(void); ...@@ -135,6 +136,7 @@ int arch_update_cpu_topology(void);
, \ , \
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.max_newidle_lb_cost = 0, \
} }
#endif #endif
#endif /* CONFIG_SCHED_MC */ #endif /* CONFIG_SCHED_MC */
...@@ -166,6 +168,7 @@ int arch_update_cpu_topology(void); ...@@ -166,6 +168,7 @@ int arch_update_cpu_topology(void);
, \ , \
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
.max_newidle_lb_cost = 0, \
} }
#endif #endif
......
...@@ -1330,7 +1330,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) ...@@ -1330,7 +1330,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
if (rq->idle_stamp) { if (rq->idle_stamp) {
u64 delta = rq_clock(rq) - rq->idle_stamp; u64 delta = rq_clock(rq) - rq->idle_stamp;
u64 max = 2*sysctl_sched_migration_cost; u64 max = 2*rq->max_idle_balance_cost;
update_avg(&rq->avg_idle, delta); update_avg(&rq->avg_idle, delta);
...@@ -6506,6 +6506,7 @@ void __init sched_init(void) ...@@ -6506,6 +6506,7 @@ void __init sched_init(void)
rq->online = 0; rq->online = 0;
rq->idle_stamp = 0; rq->idle_stamp = 0;
rq->avg_idle = 2*sysctl_sched_migration_cost; rq->avg_idle = 2*sysctl_sched_migration_cost;
rq->max_idle_balance_cost = sysctl_sched_migration_cost;
INIT_LIST_HEAD(&rq->cfs_tasks); INIT_LIST_HEAD(&rq->cfs_tasks);
......
...@@ -5396,6 +5396,7 @@ void idle_balance(int this_cpu, struct rq *this_rq) ...@@ -5396,6 +5396,7 @@ void idle_balance(int this_cpu, struct rq *this_rq)
struct sched_domain *sd; struct sched_domain *sd;
int pulled_task = 0; int pulled_task = 0;
unsigned long next_balance = jiffies + HZ; unsigned long next_balance = jiffies + HZ;
u64 curr_cost = 0;
this_rq->idle_stamp = rq_clock(this_rq); this_rq->idle_stamp = rq_clock(this_rq);
...@@ -5412,15 +5413,27 @@ void idle_balance(int this_cpu, struct rq *this_rq) ...@@ -5412,15 +5413,27 @@ void idle_balance(int this_cpu, struct rq *this_rq)
for_each_domain(this_cpu, sd) { for_each_domain(this_cpu, sd) {
unsigned long interval; unsigned long interval;
int continue_balancing = 1; int continue_balancing = 1;
u64 t0, domain_cost;
if (!(sd->flags & SD_LOAD_BALANCE)) if (!(sd->flags & SD_LOAD_BALANCE))
continue; continue;
if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
break;
if (sd->flags & SD_BALANCE_NEWIDLE) { if (sd->flags & SD_BALANCE_NEWIDLE) {
t0 = sched_clock_cpu(this_cpu);
/* If we've pulled tasks over stop searching: */ /* If we've pulled tasks over stop searching: */
pulled_task = load_balance(this_cpu, this_rq, pulled_task = load_balance(this_cpu, this_rq,
sd, CPU_NEWLY_IDLE, sd, CPU_NEWLY_IDLE,
&continue_balancing); &continue_balancing);
domain_cost = sched_clock_cpu(this_cpu) - t0;
if (domain_cost > sd->max_newidle_lb_cost)
sd->max_newidle_lb_cost = domain_cost;
curr_cost += domain_cost;
} }
interval = msecs_to_jiffies(sd->balance_interval); interval = msecs_to_jiffies(sd->balance_interval);
...@@ -5442,6 +5455,9 @@ void idle_balance(int this_cpu, struct rq *this_rq) ...@@ -5442,6 +5455,9 @@ void idle_balance(int this_cpu, struct rq *this_rq)
*/ */
this_rq->next_balance = next_balance; this_rq->next_balance = next_balance;
} }
if (curr_cost > this_rq->max_idle_balance_cost)
this_rq->max_idle_balance_cost = curr_cost;
} }
/* /*
......
...@@ -476,6 +476,9 @@ struct rq { ...@@ -476,6 +476,9 @@ struct rq {
u64 age_stamp; u64 age_stamp;
u64 idle_stamp; u64 idle_stamp;
u64 avg_idle; u64 avg_idle;
/* This is used to determine avg_idle's max value */
u64 max_idle_balance_cost;
#endif #endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment