Commit 6499b1b2 authored by Vincent Guittot's avatar Vincent Guittot Committed by Ingo Molnar

sched/numa: Replace runnable_load_avg by load_avg

Similarly to what has been done for the normal load balancer, we can
replace runnable_load_avg by load_avg in numa load balancing and track the
other statistics like the utilization and the number of running tasks to
get to better view of the current state of a node.
Signed-off-by: default avatarVincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: default avatarMel Gorman <mgorman@techsingularity.net>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Reviewed-by: "Dietmar Eggemann <dietmar.eggemann@arm.com>"
Acked-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Phil Auld <pauld@redhat.com>
Cc: Hillf Danton <hdanton@sina.com>
Link: https://lore.kernel.org/r/20200224095223.13361-6-mgorman@techsingularity.net
parent 6d4d2246
...@@ -1473,38 +1473,35 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page, ...@@ -1473,38 +1473,35 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4; group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
} }
static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq); /*
* 'numa_type' describes the node at the moment of load balancing.
static unsigned long cpu_runnable_load(struct rq *rq) */
{ enum numa_type {
return cfs_rq_runnable_load_avg(&rq->cfs); /* The node has spare capacity that can be used to run more tasks. */
} node_has_spare = 0,
/*
* The node is fully used and the tasks don't compete for more CPU
* cycles. Nevertheless, some tasks might wait before running.
*/
node_fully_busy,
/*
* The node is overloaded and can't provide expected CPU cycles to all
* tasks.
*/
node_overloaded
};
/* Cached statistics for all CPUs within a node */ /* Cached statistics for all CPUs within a node */
struct numa_stats { struct numa_stats {
unsigned long load; unsigned long load;
unsigned long util;
/* Total compute capacity of CPUs on a node */ /* Total compute capacity of CPUs on a node */
unsigned long compute_capacity; unsigned long compute_capacity;
unsigned int nr_running;
unsigned int weight;
enum numa_type node_type;
}; };
/*
* XXX borrowed from update_sg_lb_stats
*/
static void update_numa_stats(struct numa_stats *ns, int nid)
{
int cpu;
memset(ns, 0, sizeof(*ns));
for_each_cpu(cpu, cpumask_of_node(nid)) {
struct rq *rq = cpu_rq(cpu);
ns->load += cpu_runnable_load(rq);
ns->compute_capacity += capacity_of(cpu);
}
}
struct task_numa_env { struct task_numa_env {
struct task_struct *p; struct task_struct *p;
...@@ -1521,6 +1518,47 @@ struct task_numa_env { ...@@ -1521,6 +1518,47 @@ struct task_numa_env {
int best_cpu; int best_cpu;
}; };
static unsigned long cpu_load(struct rq *rq);
static unsigned long cpu_util(int cpu);
static inline enum
numa_type numa_classify(unsigned int imbalance_pct,
struct numa_stats *ns)
{
if ((ns->nr_running > ns->weight) &&
((ns->compute_capacity * 100) < (ns->util * imbalance_pct)))
return node_overloaded;
if ((ns->nr_running < ns->weight) ||
((ns->compute_capacity * 100) > (ns->util * imbalance_pct)))
return node_has_spare;
return node_fully_busy;
}
/*
* XXX borrowed from update_sg_lb_stats
*/
static void update_numa_stats(struct task_numa_env *env,
struct numa_stats *ns, int nid)
{
int cpu;
memset(ns, 0, sizeof(*ns));
for_each_cpu(cpu, cpumask_of_node(nid)) {
struct rq *rq = cpu_rq(cpu);
ns->load += cpu_load(rq);
ns->util += cpu_util(cpu);
ns->nr_running += rq->cfs.h_nr_running;
ns->compute_capacity += capacity_of(cpu);
}
ns->weight = cpumask_weight(cpumask_of_node(nid));
ns->node_type = numa_classify(env->imbalance_pct, ns);
}
static void task_numa_assign(struct task_numa_env *env, static void task_numa_assign(struct task_numa_env *env,
struct task_struct *p, long imp) struct task_struct *p, long imp)
{ {
...@@ -1556,6 +1594,11 @@ static bool load_too_imbalanced(long src_load, long dst_load, ...@@ -1556,6 +1594,11 @@ static bool load_too_imbalanced(long src_load, long dst_load,
long orig_src_load, orig_dst_load; long orig_src_load, orig_dst_load;
long src_capacity, dst_capacity; long src_capacity, dst_capacity;
/* If dst node has spare capacity, there is no real load imbalance */
if (env->dst_stats.node_type == node_has_spare)
return false;
/* /*
* The load is corrected for the CPU capacity available on each node. * The load is corrected for the CPU capacity available on each node.
* *
...@@ -1788,10 +1831,10 @@ static int task_numa_migrate(struct task_struct *p) ...@@ -1788,10 +1831,10 @@ static int task_numa_migrate(struct task_struct *p)
dist = env.dist = node_distance(env.src_nid, env.dst_nid); dist = env.dist = node_distance(env.src_nid, env.dst_nid);
taskweight = task_weight(p, env.src_nid, dist); taskweight = task_weight(p, env.src_nid, dist);
groupweight = group_weight(p, env.src_nid, dist); groupweight = group_weight(p, env.src_nid, dist);
update_numa_stats(&env.src_stats, env.src_nid); update_numa_stats(&env, &env.src_stats, env.src_nid);
taskimp = task_weight(p, env.dst_nid, dist) - taskweight; taskimp = task_weight(p, env.dst_nid, dist) - taskweight;
groupimp = group_weight(p, env.dst_nid, dist) - groupweight; groupimp = group_weight(p, env.dst_nid, dist) - groupweight;
update_numa_stats(&env.dst_stats, env.dst_nid); update_numa_stats(&env, &env.dst_stats, env.dst_nid);
/* Try to find a spot on the preferred nid. */ /* Try to find a spot on the preferred nid. */
task_numa_find_cpu(&env, taskimp, groupimp); task_numa_find_cpu(&env, taskimp, groupimp);
...@@ -1824,7 +1867,7 @@ static int task_numa_migrate(struct task_struct *p) ...@@ -1824,7 +1867,7 @@ static int task_numa_migrate(struct task_struct *p)
env.dist = dist; env.dist = dist;
env.dst_nid = nid; env.dst_nid = nid;
update_numa_stats(&env.dst_stats, env.dst_nid); update_numa_stats(&env, &env.dst_stats, env.dst_nid);
task_numa_find_cpu(&env, taskimp, groupimp); task_numa_find_cpu(&env, taskimp, groupimp);
} }
} }
...@@ -3686,11 +3729,6 @@ static void remove_entity_load_avg(struct sched_entity *se) ...@@ -3686,11 +3729,6 @@ static void remove_entity_load_avg(struct sched_entity *se)
raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags); raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
} }
static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq)
{
return cfs_rq->avg.runnable_load_avg;
}
static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq) static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
{ {
return cfs_rq->avg.load_avg; return cfs_rq->avg.load_avg;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment