Commit 2a1ed24c authored by Srikar Dronamraju's avatar Srikar Dronamraju Committed by Ingo Molnar

sched/numa: Prefer NUMA hotness over cache hotness

The current load balancer may not try to prevent a task from moving
out of a preferred node to a less preferred node. The reason for this
being:

 - Since sched features NUMA and NUMA_RESIST_LOWER are disabled by
   default, migrate_degrades_locality() always returns false.

 - Even if NUMA_RESIST_LOWER were to be enabled, if its cache hot,
   migrate_degrades_locality() never gets called.

The above behaviour can mean that tasks can move out of their
preferred node but they may be eventually be brought back to their
preferred node by numa balancer (due to higher numa faults).

To avoid the above, this commit merges migrate_degrades_locality() and
migrate_improves_locality(). It also replaces 3 sched features NUMA,
NUMA_FAVOUR_HIGHER and NUMA_RESIST_LOWER by a single sched feature
NUMA.
Signed-off-by: default avatarSrikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarRik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Mike Galbraith <efault@gmx.de>
Link: http://lkml.kernel.org/r/1434455762-30857-2-git-send-email-srikar@linux.vnet.ibm.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 6dfec8d9
...@@ -5670,72 +5670,39 @@ static int task_hot(struct task_struct *p, struct lb_env *env) ...@@ -5670,72 +5670,39 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
/* /*
* Returns true if the destination node is the preferred node. * Returns 1, if task migration degrades locality
* Needs to match fbq_classify_rq(): if there is a runnable task * Returns 0, if task migration improves locality i.e migration preferred.
* that is not on its preferred node, we should identify it. * Returns -1, if task migration is not affected by locality.
*/ */
static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) static int migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
{ {
struct numa_group *numa_group = rcu_dereference(p->numa_group); struct numa_group *numa_group = rcu_dereference(p->numa_group);
unsigned long src_faults, dst_faults; unsigned long src_faults, dst_faults;
int src_nid, dst_nid; int src_nid, dst_nid;
if (!sched_feat(NUMA) || !sched_feat(NUMA_FAVOUR_HIGHER) ||
!p->numa_faults || !(env->sd->flags & SD_NUMA)) {
return false;
}
src_nid = cpu_to_node(env->src_cpu);
dst_nid = cpu_to_node(env->dst_cpu);
if (src_nid == dst_nid)
return false;
/* Encourage migration to the preferred node. */
if (dst_nid == p->numa_preferred_nid)
return true;
/* Migrating away from the preferred node is bad. */
if (src_nid == p->numa_preferred_nid)
return false;
if (numa_group) {
src_faults = group_faults(p, src_nid);
dst_faults = group_faults(p, dst_nid);
} else {
src_faults = task_faults(p, src_nid);
dst_faults = task_faults(p, dst_nid);
}
return dst_faults > src_faults;
}
static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
{
struct numa_group *numa_group = rcu_dereference(p->numa_group);
unsigned long src_faults, dst_faults;
int src_nid, dst_nid;
if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER))
return false;
if (!p->numa_faults || !(env->sd->flags & SD_NUMA)) if (!p->numa_faults || !(env->sd->flags & SD_NUMA))
return false; return -1;
if (!sched_feat(NUMA))
return -1;
src_nid = cpu_to_node(env->src_cpu); src_nid = cpu_to_node(env->src_cpu);
dst_nid = cpu_to_node(env->dst_cpu); dst_nid = cpu_to_node(env->dst_cpu);
if (src_nid == dst_nid) if (src_nid == dst_nid)
return false; return -1;
/* Migrating away from the preferred node is bad. */ /* Migrating away from the preferred node is always bad. */
if (src_nid == p->numa_preferred_nid) if (src_nid == p->numa_preferred_nid) {
return true; if (env->src_rq->nr_running > env->src_rq->nr_preferred_running)
return 1;
else
return -1;
}
/* Encourage migration to the preferred node. */ /* Encourage migration to the preferred node. */
if (dst_nid == p->numa_preferred_nid) if (dst_nid == p->numa_preferred_nid)
return false; return 0;
if (numa_group) { if (numa_group) {
src_faults = group_faults(p, src_nid); src_faults = group_faults(p, src_nid);
...@@ -5749,16 +5716,10 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) ...@@ -5749,16 +5716,10 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
} }
#else #else
static inline bool migrate_improves_locality(struct task_struct *p, static inline int migrate_degrades_locality(struct task_struct *p,
struct lb_env *env) struct lb_env *env)
{ {
return false; return -1;
}
static inline bool migrate_degrades_locality(struct task_struct *p,
struct lb_env *env)
{
return false;
} }
#endif #endif
...@@ -5768,7 +5729,7 @@ static inline bool migrate_degrades_locality(struct task_struct *p, ...@@ -5768,7 +5729,7 @@ static inline bool migrate_degrades_locality(struct task_struct *p,
static static
int can_migrate_task(struct task_struct *p, struct lb_env *env) int can_migrate_task(struct task_struct *p, struct lb_env *env)
{ {
int tsk_cache_hot = 0; int tsk_cache_hot;
lockdep_assert_held(&env->src_rq->lock); lockdep_assert_held(&env->src_rq->lock);
...@@ -5826,13 +5787,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) ...@@ -5826,13 +5787,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
* 2) task is cache cold, or * 2) task is cache cold, or
* 3) too many balance attempts have failed. * 3) too many balance attempts have failed.
*/ */
tsk_cache_hot = task_hot(p, env); tsk_cache_hot = migrate_degrades_locality(p, env);
if (!tsk_cache_hot) if (tsk_cache_hot == -1)
tsk_cache_hot = migrate_degrades_locality(p, env); tsk_cache_hot = task_hot(p, env);
if (migrate_improves_locality(p, env) || !tsk_cache_hot || if (tsk_cache_hot <= 0 ||
env->sd->nr_balance_failed > env->sd->cache_nice_tries) { env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
if (tsk_cache_hot) { if (tsk_cache_hot == 1) {
schedstat_inc(env->sd, lb_hot_gained[env->idle]); schedstat_inc(env->sd, lb_hot_gained[env->idle]);
schedstat_inc(p, se.statistics.nr_forced_migrations); schedstat_inc(p, se.statistics.nr_forced_migrations);
} }
......
...@@ -79,20 +79,12 @@ SCHED_FEAT(LB_MIN, false) ...@@ -79,20 +79,12 @@ SCHED_FEAT(LB_MIN, false)
* numa_balancing= * numa_balancing=
*/ */
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
SCHED_FEAT(NUMA, false)
/* /*
* NUMA_FAVOUR_HIGHER will favor moving tasks towards nodes where a * NUMA will favor moving tasks towards nodes where a higher number of
* higher number of hinting faults are recorded during active load * hinting faults are recorded during active load balancing. It will
* balancing. * resist moving tasks towards nodes where a lower number of hinting
* faults have been recorded.
*/ */
SCHED_FEAT(NUMA_FAVOUR_HIGHER, true) SCHED_FEAT(NUMA, true)
/*
* NUMA_RESIST_LOWER will resist moving tasks towards nodes where a
* lower number of hinting faults have been recorded. As this has
* the potential to prevent a task ever migrating to a new node
* due to CPU overload it is disabled by default.
*/
SCHED_FEAT(NUMA_RESIST_LOWER, false)
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment