Commit 8a7a2318 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] sched: consolidate sched domains

  Teach the generic domains builder about SMT, and consolidate all
  architecture specific domain code into that.  Also, the SD_*_INIT macros can
  now be redefined by arch code without duplicating the entire setup code. 
  This can be done by defining ARCH_HASH_SCHED_TUNE.

  The generic builder has been simplified with the addition of a helper
  macro which will probably prove to be useful to arch specific code as well
  and should be exported if that is the case.
Signed-off-by: default avatarNick Piggin <nickpiggin@yahoo.com.au>

From: Matthew Dobson <colpatch@us.ibm.com>

  The attached patch is against 2.6.8-rc2-mm2, and removes Nick's
  conditional definition & population of cpu_sibling_map[] in favor of my
  unconditional ones.  This does not affect how cpu_sibling_map is used, just
  gives it broader scope.

From: Nick Piggin <nickpiggin@yahoo.com.au>

  Small fix to sched-consolidate-domains.patch picked up by

From: Suresh <suresh.b.siddha@intel.com>

  another sched consolidate domains fix

From: Nick Piggin <nickpiggin@yahoo.com.au>

  Don't use cpu_sibling_map if !CONFIG_SCHED_SMT

  This one spotted by Dimitri Sivanich <sivanich@sgi.com>
Signed-off-by: default avatarNick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent c62e7cdb
...@@ -5,12 +5,13 @@ MUST be NULL terminated, and domain structures should be per-CPU as they ...@@ -5,12 +5,13 @@ MUST be NULL terminated, and domain structures should be per-CPU as they
are locklessly updated. are locklessly updated.
Each scheduling domain spans a number of CPUs (stored in the ->span field). Each scheduling domain spans a number of CPUs (stored in the ->span field).
A domain's span MUST be a superset of it child's span, and a base domain A domain's span MUST be a superset of it child's span (this restriction could
for CPU i MUST span at least i. The top domain for each CPU will generally be relaxed if the need arises), and a base domain for CPU i MUST span at least
span all CPUs in the system although strictly it doesn't have to, but this i. The top domain for each CPU will generally span all CPUs in the system
could lead to a case where some CPUs will never be given tasks to run unless although strictly it doesn't have to, but this could lead to a case where some
the CPUs allowed mask is explicitly set. A sched domain's span means "balance CPUs will never be given tasks to run unless the CPUs allowed mask is
process load among these CPUs". explicitly set. A sched domain's span means "balance process load among these
CPUs".
Each scheduling domain must have one or more CPU groups (struct sched_group) Each scheduling domain must have one or more CPU groups (struct sched_group)
which are organised as a circular one way linked list from the ->groups which are organised as a circular one way linked list from the ->groups
...@@ -46,6 +47,20 @@ The implementor should read comments in include/linux/sched.h: ...@@ -46,6 +47,20 @@ The implementor should read comments in include/linux/sched.h:
struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of
the specifics and what to tune. the specifics and what to tune.
For SMT, the architecture must define CONFIG_SCHED_SMT and provide a
cpumask_t cpu_sibling_map[NR_CPUS], where cpu_sibling_map[i] is the mask of
all "i"'s siblings as well as "i" itself.
Architectures may retain the regular override the default SD_*_INIT flags
while using the generic domain builder in kernel/sched.c if they wish to
retain the traditional SMT->SMP->NUMA topology (or some subset of that). This
can be done by #define'ing ARCH_HASH_SCHED_TUNE.
Alternatively, the architecture may completely override the generic domain
builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your
arch_init_sched_domains function. This function will attach domains to all
CPUs using cpu_attach_domain.
Implementors should change the line Implementors should change the line
#undef SCHED_DOMAIN_DEBUG #undef SCHED_DOMAIN_DEBUG
to to
......
...@@ -1135,213 +1135,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) ...@@ -1135,213 +1135,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
synchronize_tsc_bp(); synchronize_tsc_bp();
} }
#ifdef CONFIG_SCHED_SMT
#ifdef CONFIG_NUMA
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static struct sched_group sched_group_nodes[MAX_NUMNODES];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
static DEFINE_PER_CPU(struct sched_domain, node_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
struct sched_domain *node_domain = &per_cpu(node_domains, i);
int node = cpu_to_node(i);
cpumask_t nodemask = node_to_cpumask(node);
*cpu_domain = SD_SIBLING_INIT;
cpu_domain->span = cpu_sibling_map[i];
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = nodemask;
phys_domain->parent = node_domain;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
*node_domain = SD_NODE_INIT;
node_domain->span = cpu_possible_map;
node_domain->groups = &sched_group_nodes[cpu_to_node(i)];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpu->cpumask = CPU_MASK_NONE;
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
for (i = 0; i < MAX_NUMNODES; i++) {
int j;
cpumask_t nodemask;
struct sched_group *node = &sched_group_nodes[i];
cpumask_t node_cpumask = node_to_cpumask(i);
cpus_and(nodemask, node_cpumask, cpu_possible_map);
if (cpus_empty(nodemask))
continue;
first = last = NULL;
/* Set up physical groups */
for_each_cpu_mask(j, nodemask) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j);
struct sched_group *cpu = &sched_group_phys[j];
if (j != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/*
* Make each extra sibling increase power by 10% of
* the basic CPU. This is very arbitrary.
*/
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
node->cpu_power += cpu->cpu_power;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
/* Set up nodes */
first = last = NULL;
for (i = 0; i < MAX_NUMNODES; i++) {
struct sched_group *cpu = &sched_group_nodes[i];
cpumask_t nodemask;
cpumask_t node_cpumask = node_to_cpumask(i);
cpus_and(nodemask, node_cpumask, cpu_possible_map);
if (cpus_empty(nodemask))
continue;
cpu->cpumask = nodemask;
/* ->cpu_power already setup */
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#else /* !CONFIG_NUMA */
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
*cpu_domain = SD_SIBLING_INIT;
cpu_domain->span = cpu_sibling_map[i];
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = cpu_possible_map;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
first = last = NULL;
/* Set up physical groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_group *cpu = &sched_group_phys[i];
if (i != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/* See SMT+NUMA setup for comment */
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#endif /* CONFIG_NUMA */
#endif /* CONFIG_SCHED_SMT */
/* These are wrappers to interface to the new boot process. Someone /* These are wrappers to interface to the new boot process. Someone
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
void __init smp_prepare_cpus(unsigned int max_cpus) void __init smp_prepare_cpus(unsigned int max_cpus)
......
...@@ -1006,218 +1006,3 @@ void __init smp_cpus_done(unsigned int max_cpus) ...@@ -1006,218 +1006,3 @@ void __init smp_cpus_done(unsigned int max_cpus)
set_cpus_allowed(current, old_mask); set_cpus_allowed(current, old_mask);
} }
#ifdef CONFIG_SCHED_SMT
#ifdef CONFIG_NUMA
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static struct sched_group sched_group_nodes[MAX_NUMNODES];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
static DEFINE_PER_CPU(struct sched_domain, node_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
struct sched_domain *node_domain = &per_cpu(node_domains, i);
int node = cpu_to_node(i);
cpumask_t nodemask = node_to_cpumask(node);
cpumask_t my_cpumask = cpumask_of_cpu(i);
cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
*cpu_domain = SD_SIBLING_INIT;
if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
else
cpu_domain->span = my_cpumask;
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = nodemask;
phys_domain->parent = node_domain;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
*node_domain = SD_NODE_INIT;
node_domain->span = cpu_possible_map;
node_domain->groups = &sched_group_nodes[node];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
for (i = 0; i < MAX_NUMNODES; i++) {
int j;
cpumask_t nodemask;
struct sched_group *node = &sched_group_nodes[i];
cpumask_t node_cpumask = node_to_cpumask(i);
cpus_and(nodemask, node_cpumask, cpu_possible_map);
if (cpus_empty(nodemask))
continue;
first = last = NULL;
/* Set up physical groups */
for_each_cpu_mask(j, nodemask) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j);
struct sched_group *cpu = &sched_group_phys[j];
if (j != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/*
* Make each extra sibling increase power by 10% of
* the basic CPU. This is very arbitrary.
*/
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
node->cpu_power += cpu->cpu_power;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
/* Set up nodes */
first = last = NULL;
for (i = 0; i < MAX_NUMNODES; i++) {
struct sched_group *cpu = &sched_group_nodes[i];
cpumask_t nodemask;
cpumask_t node_cpumask = node_to_cpumask(i);
cpus_and(nodemask, node_cpumask, cpu_possible_map);
if (cpus_empty(nodemask))
continue;
cpu->cpumask = nodemask;
/* ->cpu_power already setup */
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#else /* !CONFIG_NUMA */
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
cpumask_t my_cpumask = cpumask_of_cpu(i);
cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1);
*cpu_domain = SD_SIBLING_INIT;
if (cur_cpu_spec->cpu_features & CPU_FTR_SMT)
cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask);
else
cpu_domain->span = my_cpumask;
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = cpu_possible_map;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
first = last = NULL;
/* Set up physical groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_group *cpu = &sched_group_phys[i];
if (i != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/* See SMT+NUMA setup for comment */
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#endif /* CONFIG_NUMA */
#endif /* CONFIG_SCHED_SMT */
...@@ -25,7 +25,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ...@@ -25,7 +25,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_SCHED_SMT) += domain.o
obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULES) += module.o
......
...@@ -25,7 +25,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ...@@ -25,7 +25,6 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_SCHED_SMT) += domain.o
obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULES) += module.o
......
#include <linux/init.h>
#include <linux/sched.h>
/* Don't do any NUMA setup on Opteron right now. They seem to be
better off with flat scheduling. This is just for SMT. */
#ifdef CONFIG_SCHED_SMT
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
*cpu_domain = SD_SIBLING_INIT;
/* Disable SMT NICE for CMP */
/* RED-PEN use a generic flag */
if (cpu_data[i].x86_vendor == X86_VENDOR_AMD)
cpu_domain->flags &= ~SD_SHARE_CPUPOWER;
cpu_domain->span = cpu_sibling_map[i];
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = cpu_possible_map;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
first = last = NULL;
/* Set up physical groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_group *cpu = &sched_group_phys[i];
if (i != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/*
* Make each extra sibling increase power by 10% of
* the basic CPU. This is very arbitrary.
*/
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#endif
...@@ -647,9 +647,4 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c); ...@@ -647,9 +647,4 @@ extern void select_idle_routine(const struct cpuinfo_x86 *c);
#define cache_line_size() (boot_cpu_data.x86_cache_alignment) #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
#ifdef CONFIG_SCHED_SMT
#define ARCH_HAS_SCHED_DOMAIN
#define ARCH_HAS_SCHED_WAKE_IDLE
#endif
#endif /* __ASM_I386_PROCESSOR_H */ #endif /* __ASM_I386_PROCESSOR_H */
...@@ -626,11 +626,6 @@ static inline void prefetchw(const void *x) ...@@ -626,11 +626,6 @@ static inline void prefetchw(const void *x)
#define spin_lock_prefetch(x) prefetchw(x) #define spin_lock_prefetch(x) prefetchw(x)
#ifdef CONFIG_SCHED_SMT
#define ARCH_HAS_SCHED_DOMAIN
#define ARCH_HAS_SCHED_WAKE_IDLE
#endif
#endif /* ASSEMBLY */ #endif /* ASSEMBLY */
/* /*
......
...@@ -456,9 +456,4 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) ...@@ -456,9 +456,4 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
#define cache_line_size() (boot_cpu_data.x86_cache_alignment) #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
#ifdef CONFIG_SCHED_SMT
#define ARCH_HAS_SCHED_DOMAIN
#define ARCH_HAS_SCHED_WAKE_IDLE
#endif
#endif /* __ASM_X86_64_PROCESSOR_H */ #endif /* __ASM_X86_64_PROCESSOR_H */
...@@ -612,6 +612,9 @@ struct sched_domain { ...@@ -612,6 +612,9 @@ struct sched_domain {
unsigned int nr_balance_failed; /* initialise to 0 */ unsigned int nr_balance_failed; /* initialise to 0 */
}; };
#ifndef ARCH_HAS_SCHED_TUNE
#ifdef CONFIG_SCHED_SMT
#define ARCH_HAS_SCHED_WAKE_IDLE
/* Common values for SMT siblings */ /* Common values for SMT siblings */
#define SD_SIBLING_INIT (struct sched_domain) { \ #define SD_SIBLING_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \ .span = CPU_MASK_NONE, \
...@@ -633,6 +636,7 @@ struct sched_domain { ...@@ -633,6 +636,7 @@ struct sched_domain {
.balance_interval = 1, \ .balance_interval = 1, \
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
} }
#endif
/* Common values for CPUs */ /* Common values for CPUs */
#define SD_CPU_INIT (struct sched_domain) { \ #define SD_CPU_INIT (struct sched_domain) { \
...@@ -675,6 +679,7 @@ struct sched_domain { ...@@ -675,6 +679,7 @@ struct sched_domain {
.nr_balance_failed = 0, \ .nr_balance_failed = 0, \
} }
#endif #endif
#endif /* ARCH_HAS_SCHED_TUNE */
extern void cpu_attach_domain(struct sched_domain *sd, int cpu); extern void cpu_attach_domain(struct sched_domain *sd, int cpu);
......
...@@ -3659,118 +3659,182 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu) ...@@ -3659,118 +3659,182 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu)
#ifdef ARCH_HAS_SCHED_DOMAIN #ifdef ARCH_HAS_SCHED_DOMAIN
extern void __init arch_init_sched_domains(void); extern void __init arch_init_sched_domains(void);
#else #else
static struct sched_group sched_group_cpus[NR_CPUS];
#ifdef CONFIG_SCHED_SMT
static DEFINE_PER_CPU(struct sched_domain, cpu_domains); static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
#ifdef CONFIG_NUMA static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_nodes[MAX_NUMNODES]; __init static int cpu_to_cpu_group(int cpu)
static DEFINE_PER_CPU(struct sched_domain, node_domains);
static void __init arch_init_sched_domains(void)
{ {
int i; return cpu;
struct sched_group *first_node = NULL, *last_node = NULL; }
#endif
/* Set up domains */ static DEFINE_PER_CPU(struct sched_domain, phys_domains);
for_each_cpu(i) { static struct sched_group sched_group_phys[NR_CPUS];
int node = cpu_to_node(i); __init static int cpu_to_phys_group(int cpu)
cpumask_t nodemask = node_to_cpumask(node); {
struct sched_domain *node_sd = &per_cpu(node_domains, i); #ifdef CONFIG_SCHED_SMT
struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); return first_cpu(cpu_sibling_map[cpu]);
#else
return cpu;
#endif
}
*node_sd = SD_NODE_INIT; #ifdef CONFIG_NUMA
node_sd->span = cpu_possible_map; static DEFINE_PER_CPU(struct sched_domain, node_domains);
node_sd->groups = &sched_group_nodes[cpu_to_node(i)]; static struct sched_group sched_group_nodes[MAX_NUMNODES];
__init static int cpu_to_node_group(int cpu)
{
return cpu_to_node(cpu);
}
#endif
*cpu_sd = SD_CPU_INIT; /*
cpus_and(cpu_sd->span, nodemask, cpu_possible_map); * init_sched_build_groups takes an array of groups, the cpumask we wish
cpu_sd->groups = &sched_group_cpus[i]; * to span, and a pointer to a function which identifies what group a CPU
cpu_sd->parent = node_sd; * belongs to. The return value of group_fn must be a valid index into the
} * groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we
* keep track of groups covered with a cpumask_t).
*
* init_sched_build_groups will build a circular linked list of the groups
* covered by the given span, and will set each group's ->cpumask correctly,
* and ->cpu_power to 0.
*/
__init static void init_sched_build_groups(struct sched_group groups[],
cpumask_t span, int (*group_fn)(int cpu))
{
struct sched_group *first = NULL, *last = NULL;
cpumask_t covered = CPU_MASK_NONE;
int i;
/* Set up groups */ for_each_cpu_mask(i, span) {
for (i = 0; i < MAX_NUMNODES; i++) { int group = group_fn(i);
cpumask_t tmp = node_to_cpumask(i); struct sched_group *sg = &groups[group];
cpumask_t nodemask;
struct sched_group *first_cpu = NULL, *last_cpu = NULL;
struct sched_group *node = &sched_group_nodes[i];
int j; int j;
cpus_and(nodemask, tmp, cpu_possible_map); if (cpu_isset(i, covered))
if (cpus_empty(nodemask))
continue; continue;
node->cpumask = nodemask; sg->cpumask = CPU_MASK_NONE;
node->cpu_power = SCHED_LOAD_SCALE * cpus_weight(node->cpumask); sg->cpu_power = 0;
for_each_cpu_mask(j, node->cpumask) { for_each_cpu_mask(j, span) {
struct sched_group *cpu = &sched_group_cpus[j]; if (group_fn(j) != group)
continue;
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first_cpu) cpu_set(j, covered);
first_cpu = cpu; cpu_set(j, sg->cpumask);
if (last_cpu)
last_cpu->next = cpu;
last_cpu = cpu;
} }
last_cpu->next = first_cpu; if (!first)
first = sg;
if (!first_node) if (last)
first_node = node; last->next = sg;
if (last_node) last = sg;
last_node->next = node;
last_node = node;
}
last_node->next = first_node;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_sd, i);
} }
last->next = first;
} }
#else /* !CONFIG_NUMA */ __init static void arch_init_sched_domains(void)
static void __init arch_init_sched_domains(void)
{ {
int i; int i;
struct sched_group *first_cpu = NULL, *last_cpu = NULL;
/* Set up domains */ /* Set up domains */
for_each_cpu(i) { for_each_cpu(i) {
struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); int group;
struct sched_domain *sd = NULL, *p;
cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
#ifdef CONFIG_NUMA
sd = &per_cpu(node_domains, i);
group = cpu_to_node_group(i);
*sd = SD_NODE_INIT;
sd->span = cpu_possible_map;
sd->groups = &sched_group_nodes[group];
#endif
*cpu_sd = SD_CPU_INIT; p = sd;
cpu_sd->span = cpu_possible_map; sd = &per_cpu(phys_domains, i);
cpu_sd->groups = &sched_group_cpus[i]; group = cpu_to_phys_group(i);
*sd = SD_CPU_INIT;
sd->span = nodemask;
sd->parent = p;
sd->groups = &sched_group_phys[group];
#ifdef CONFIG_SCHED_SMT
p = sd;
sd = &per_cpu(cpu_domains, i);
group = cpu_to_cpu_group(i);
*sd = SD_SIBLING_INIT;
sd->span = cpu_sibling_map[i];
sd->parent = p;
sd->groups = &sched_group_cpus[group];
#endif
} }
/* Set up CPU groups */ #ifdef CONFIG_SCHED_SMT
for_each_cpu_mask(i, cpu_possible_map) { /* Set up CPU (sibling) groups */
struct sched_group *cpu = &sched_group_cpus[i]; for_each_cpu(i) {
if (i != first_cpu(cpu_sibling_map[i]))
continue;
cpus_clear(cpu->cpumask); init_sched_build_groups(sched_group_cpus, cpu_sibling_map[i],
cpu_set(i, cpu->cpumask); &cpu_to_cpu_group);
cpu->cpu_power = SCHED_LOAD_SCALE; }
#endif
/* Set up physical groups */
for (i = 0; i < MAX_NUMNODES; i++) {
cpumask_t nodemask = node_to_cpumask(i);
if (!first_cpu) cpus_and(nodemask, nodemask, cpu_possible_map);
first_cpu = cpu; if (cpus_empty(nodemask))
if (last_cpu) continue;
last_cpu->next = cpu;
last_cpu = cpu; init_sched_build_groups(sched_group_phys, nodemask,
&cpu_to_phys_group);
} }
last_cpu->next = first_cpu;
mb(); /* domains were modified outside the lock */ #ifdef CONFIG_NUMA
/* Set up node groups */
init_sched_build_groups(sched_group_nodes, cpu_possible_map,
&cpu_to_node_group);
#endif
/* Calculate CPU power for physical packages and nodes */
for_each_cpu(i) { for_each_cpu(i) {
struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); int power;
cpu_attach_domain(cpu_sd, i); struct sched_domain *sd;
#ifdef CONFIG_SCHED_SMT
sd = &per_cpu(cpu_domains, i);
power = SCHED_LOAD_SCALE;
sd->groups->cpu_power = power;
#endif
sd = &per_cpu(phys_domains, i);
power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
(cpus_weight(sd->groups->cpumask)-1) / 10;
sd->groups->cpu_power = power;
#ifdef CONFIG_NUMA
if (i == first_cpu(sd->groups->cpumask)) {
/* Only add "power" once for each physical package. */
sd = &per_cpu(node_domains, i);
sd->groups->cpu_power += power;
}
#endif
} }
}
#endif /* CONFIG_NUMA */ /* Attach the domains */
for_each_cpu(i) {
struct sched_domain *sd;
#ifdef CONFIG_SCHED_SMT
sd = &per_cpu(cpu_domains, i);
#else
sd = &per_cpu(phys_domains, i);
#endif
cpu_attach_domain(sd, i);
}
}
#endif /* ARCH_HAS_SCHED_DOMAIN */ #endif /* ARCH_HAS_SCHED_DOMAIN */
#define SCHED_DOMAIN_DEBUG #define SCHED_DOMAIN_DEBUG
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment