Commit 5034465f authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] Add SMT setup for domain scheduler on x86-64

From: Andi Kleen <ak@muc.de>

Set up SMT for the domain scheduler on x86-64.  This way the scheduling
works better on HyperThreading aware systems; in particular it will use
both physical CPUs before sharing two virtual CPUs on the same package. 
This improves performance considerably in some cases.

Based on the i386 code and a previous patch from Suresh B. Siddha.
parent b5c58730
...@@ -239,6 +239,16 @@ config PREEMPT ...@@ -239,6 +239,16 @@ config PREEMPT
Say Y here if you are feeling brave and building a kernel for a Say Y here if you are feeling brave and building a kernel for a
desktop, embedded or real-time system. Say N if you are unsure. desktop, embedded or real-time system. Say N if you are unsure.
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
depends on SMP
default off
help
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
cost of slightly increased overhead in some places. If unsure say
N here.
# someone write a better help text please. # someone write a better help text please.
config K8_NUMA config K8_NUMA
bool "K8 NUMA support" bool "K8 NUMA support"
......
...@@ -25,6 +25,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ...@@ -25,6 +25,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_SCHED_SMT) += domain.o
obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULES) += module.o
......
#include <linux/init.h>
#include <linux/sched.h>
/* Don't do any NUMA setup on Opteron right now. They seem to be
better off with flat scheduling. This is just for SMT. */
#ifdef CONFIG_SCHED_SMT
static struct sched_group sched_group_cpus[NR_CPUS];
static struct sched_group sched_group_phys[NR_CPUS];
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
__init void arch_init_sched_domains(void)
{
int i;
struct sched_group *first = NULL, *last = NULL;
/* Set up domains */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
*cpu_domain = SD_SIBLING_INIT;
cpu_domain->span = cpu_sibling_map[i];
cpu_domain->parent = phys_domain;
cpu_domain->groups = &sched_group_cpus[i];
*phys_domain = SD_CPU_INIT;
phys_domain->span = cpu_possible_map;
phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)];
}
/* Set up CPU (sibling) groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
int j;
first = last = NULL;
if (i != first_cpu(cpu_domain->span))
continue;
for_each_cpu_mask(j, cpu_domain->span) {
struct sched_group *cpu = &sched_group_cpus[j];
cpus_clear(cpu->cpumask);
cpu_set(j, cpu->cpumask);
cpu->cpu_power = SCHED_LOAD_SCALE;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
}
first = last = NULL;
/* Set up physical groups */
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
struct sched_group *cpu = &sched_group_phys[i];
if (i != first_cpu(cpu_domain->span))
continue;
cpu->cpumask = cpu_domain->span;
/*
* Make each extra sibling increase power by 10% of
* the basic CPU. This is very arbitrary.
*/
cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10;
if (!first)
first = cpu;
if (last)
last->next = cpu;
last = cpu;
}
last->next = first;
mb();
for_each_cpu(i) {
struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i);
cpu_attach_domain(cpu_domain, i);
}
}
#endif
...@@ -456,4 +456,9 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) ...@@ -456,4 +456,9 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
#define cache_line_size() (boot_cpu_data.x86_cache_alignment) #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
#ifdef CONFIG_SCHED_SMT
#define ARCH_HAS_SCHED_DOMAIN
#define ARCH_HAS_SCHED_WAKE_IDLE
#endif
#endif /* __ASM_X86_64_PROCESSOR_H */ #endif /* __ASM_X86_64_PROCESSOR_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment