Commit 13588209 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (50 commits)
  x86, mm: Allow ZONE_DMA to be configurable
  x86, NUMA: Trim numa meminfo with max_pfn in a separate loop
  x86, NUMA: Rename setup_node_bootmem() to setup_node_data()
  x86, NUMA: Enable emulation on 32bit too
  x86, NUMA: Enable CONFIG_AMD_NUMA on 32bit too
  x86, NUMA: Rename amdtopology_64.c to amdtopology.c
  x86, NUMA: Make numa_init_array() static
  x86, NUMA: Make 32bit use common NUMA init path
  x86, NUMA: Initialize and use remap allocator from setup_node_bootmem()
  x86-32, NUMA: Add @start and @end to init_alloc_remap()
  x86, NUMA: Remove long 64bit assumption from numa.c
  x86, NUMA: Enable build of generic NUMA init code on 32bit
  x86, NUMA: Move NUMA init logic from numa_64.c to numa.c
  x86-32, NUMA: Update numaq to use new NUMA init protocol
  x86-32, NUMA: Replace srat_32.c with srat.c
  x86-32, NUMA: implement temporary NUMA init shims
  x86, NUMA: Move numa_nodes_parsed to numa.[hc]
  x86-32, NUMA: Move get_memcfg_numa() into numa_32.c
  x86, NUMA: make srat.c 32bit safe
  x86, NUMA: rename srat_64.c to srat.c
  ...
parents ac2941f5 dc382fd5
......@@ -112,7 +112,14 @@ config MMU
def_bool y
config ZONE_DMA
def_bool y
bool "DMA memory allocation support" if EXPERT
default y
help
DMA memory allocation support allows devices with less than 32-bit
addressing to allocate within the first 16MB of address space.
Disable if no such devices will be used.
If unsure, say Y.
config SBUS
bool
......@@ -1164,7 +1171,7 @@ comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
config AMD_NUMA
def_bool y
prompt "Old style AMD Opteron NUMA detection"
depends on X86_64 && NUMA && PCI
depends on NUMA && PCI
---help---
Enable AMD NUMA node topology detection. You should say Y here if
you have a multi processor AMD system. This uses an old method to
......@@ -1191,7 +1198,7 @@ config NODES_SPAN_OTHER_NODES
config NUMA_EMU
bool "NUMA emulation"
depends on X86_64 && NUMA
depends on NUMA
---help---
Enable NUMA emulation. A flat machine will be split
into virtual nodes when booted with "numa=fake=N", where N is the
......@@ -1213,6 +1220,10 @@ config HAVE_ARCH_BOOTMEM
def_bool y
depends on X86_32 && NUMA
config HAVE_ARCH_ALLOC_REMAP
def_bool y
depends on X86_32 && NUMA
config ARCH_HAVE_MEMORY_PRESENT
def_bool y
depends on X86_32 && DISCONTIGMEM
......@@ -1221,13 +1232,9 @@ config NEED_NODE_MEMMAP_SIZE
def_bool y
depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
config HAVE_ARCH_ALLOC_REMAP
def_bool y
depends on X86_32 && NUMA
config ARCH_FLATMEM_ENABLE
def_bool y
depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
depends on X86_32 && !NUMA
config ARCH_DISCONTIGMEM_ENABLE
def_bool y
......@@ -1237,20 +1244,16 @@ config ARCH_DISCONTIGMEM_DEFAULT
def_bool y
depends on NUMA && X86_32
config ARCH_PROC_KCORE_TEXT
def_bool y
depends on X86_64 && PROC_KCORE
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on X86_64
config ARCH_SPARSEMEM_ENABLE
def_bool y
depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD
select SPARSEMEM_STATIC if X86_32
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on X86_64
config ARCH_SELECT_MEMORY_MODEL
def_bool y
depends on ARCH_SPARSEMEM_ENABLE
......@@ -1259,6 +1262,10 @@ config ARCH_MEMORY_PROBE
def_bool X86_64
depends on MEMORY_HOTPLUG
config ARCH_PROC_KCORE_TEXT
def_bool y
depends on X86_64 && PROC_KCORE
config ILLEGAL_POINTER_VALUE
hex
default 0 if X86_32
......@@ -1693,10 +1700,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
def_bool y
depends on MEMORY_HOTPLUG
config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool X86_64
depends on NUMA
config USE_PERCPU_NUMA_NODE_ID
def_bool y
depends on NUMA
......
......@@ -183,8 +183,6 @@ static inline void disable_acpi(void) { }
#define ARCH_HAS_POWER_INIT 1
struct bootnode;
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
extern int x86_acpi_numa_init(void);
......
......@@ -11,7 +11,6 @@ struct amd_nb_bus_dev_range {
extern const struct pci_device_id amd_nb_misc_ids[];
extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
struct bootnode;
extern bool early_is_amd_nb(u32 value);
extern int amd_cache_northbridges(void);
......
......@@ -363,7 +363,12 @@ struct apic {
*/
int (*x86_32_early_logical_apicid)(int cpu);
/* determine CPU -> NUMA node mapping */
/*
* Optional method called from setup_local_APIC() after logical
* apicid is guaranteed to be known to initialize apicid -> node
* mapping if NUMA initialization hasn't done so already. Don't
* add new users.
*/
int (*x86_32_numa_cpu_node)(int cpu);
#endif
};
......@@ -537,8 +542,6 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
return cpuid_apic >> index_msb;
}
extern int default_x86_32_numa_cpu_node(int cpu);
#endif
static inline unsigned int
......
......@@ -208,8 +208,7 @@ extern const char * const x86_power_flags[32];
#define test_cpu_cap(c, bit) \
test_bit(bit, (unsigned long *)((c)->x86_capability))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && \
#define REQUIRED_MASK_BIT_SET(bit) \
( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \
(((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \
(((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \
......@@ -219,10 +218,16 @@ extern const char * const x86_power_flags[32];
(((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \
(((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \
(((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \
(((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) \
? 1 : \
(((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
test_cpu_cap(c, bit))
#define this_cpu_has(bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
......
......@@ -69,22 +69,18 @@
#define MAX_DMA_CHANNELS 8
#ifdef CONFIG_X86_32
/* The maximum address that we can perform a DMA transfer to on this platform */
#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000)
#else
/* 16MB ISA DMA zone */
#define MAX_DMA_PFN ((16 * 1024 * 1024) >> PAGE_SHIFT)
/* 4GB broken PCI/AGP hardware bus master zone */
#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT)
#ifdef CONFIG_X86_32
/* The maximum address that we can perform a DMA transfer to on this platform */
#define MAX_DMA_ADDRESS (PAGE_OFFSET + 0x1000000)
#else
/* Compat define for old dma zone */
#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT))
#endif
/* 8237 DMA controllers */
......
......@@ -13,31 +13,11 @@ extern struct pglist_data *node_data[];
#define NODE_DATA(nid) (node_data[nid])
#include <asm/numaq.h>
/* summit or generic arch */
#include <asm/srat.h>
extern int get_memcfg_numa_flat(void);
/*
* This allows any one NUMA architecture to be compiled
* for, and still fall back to the flat function if it
* fails.
*/
static inline void get_memcfg_numa(void)
{
if (get_memcfg_numaq())
return;
if (get_memcfg_from_srat())
return;
get_memcfg_numa_flat();
}
extern void resume_map_numa_kva(pgd_t *pgd);
#else /* !CONFIG_NUMA */
#define get_memcfg_numa get_memcfg_numa_flat
static inline void resume_map_numa_kva(pgd_t *pgd) {}
#endif /* CONFIG_NUMA */
......
......@@ -4,36 +4,13 @@
#ifndef _ASM_X86_MMZONE_64_H
#define _ASM_X86_MMZONE_64_H
#ifdef CONFIG_NUMA
#include <linux/mmdebug.h>
#include <asm/smp.h>
/* Simple perfect hash to map physical addresses to node numbers */
struct memnode {
int shift;
unsigned int mapsize;
s16 *map;
s16 embedded_map[64 - 8];
} ____cacheline_aligned; /* total size = 128 bytes */
extern struct memnode memnode;
#define memnode_shift memnode.shift
#define memnodemap memnode.map
#define memnodemapsize memnode.mapsize
extern struct pglist_data *node_data[];
static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
{
unsigned nid;
VIRTUAL_BUG_ON(!memnodemap);
nid = memnodemap[addr >> memnode_shift];
VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
return nid;
}
#define NODE_DATA(nid) (node_data[nid])
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
......
#ifndef _ASM_X86_NUMA_H
#define _ASM_X86_NUMA_H
#include <linux/nodemask.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
#ifdef CONFIG_NUMA
#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
/*
* Too small node sizes may confuse the VM badly. Usually they
* result from BIOS bugs. So dont recognize nodes as standalone
* NUMA entities that have less than this amount of RAM listed:
*/
#define NODE_MIN_SIZE (4*1024*1024)
extern int numa_off;
/*
* __apicid_to_node[] stores the raw mapping between physical apicid and
......@@ -17,15 +29,27 @@
* numa_cpu_node().
*/
extern s16 __apicid_to_node[MAX_LOCAL_APIC];
extern nodemask_t numa_nodes_parsed __initdata;
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
extern void __init numa_set_distance(int from, int to, int distance);
static inline void set_apicid_to_node(int apicid, s16 node)
{
__apicid_to_node[apicid] = node;
}
extern int __cpuinit numa_cpu_node(int cpu);
#else /* CONFIG_NUMA */
static inline void set_apicid_to_node(int apicid, s16 node)
{
}
static inline int numa_cpu_node(int cpu)
{
return NUMA_NO_NODE;
}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_X86_32
......@@ -37,14 +61,12 @@ static inline void set_apicid_to_node(int apicid, s16 node)
#ifdef CONFIG_NUMA
extern void __cpuinit numa_set_node(int cpu, int node);
extern void __cpuinit numa_clear_node(int cpu);
extern void __init numa_init_array(void);
extern void __init init_cpu_to_node(void);
extern void __cpuinit numa_add_cpu(int cpu);
extern void __cpuinit numa_remove_cpu(int cpu);
#else /* CONFIG_NUMA */
static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
static inline void numa_init_array(void) { }
static inline void init_cpu_to_node(void) { }
static inline void numa_add_cpu(int cpu) { }
static inline void numa_remove_cpu(int cpu) { }
......@@ -54,4 +76,10 @@ static inline void numa_remove_cpu(int cpu) { }
void debug_cpumask_set_cpu(int cpu, int node, bool enable);
#endif
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
void numa_emu_cmdline(char *);
#endif /* CONFIG_NUMA_EMU */
#endif /* _ASM_X86_NUMA_H */
#ifndef _ASM_X86_NUMA_32_H
#define _ASM_X86_NUMA_32_H
extern int numa_off;
extern int pxm_to_nid(int pxm);
#ifdef CONFIG_NUMA
extern int __cpuinit numa_cpu_node(int cpu);
#else /* CONFIG_NUMA */
static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
#endif /* CONFIG_NUMA */
#ifdef CONFIG_HIGHMEM
extern void set_highmem_pages_init(void);
#else
......
#ifndef _ASM_X86_NUMA_64_H
#define _ASM_X86_NUMA_64_H
#include <linux/nodemask.h>
struct bootnode {
u64 start;
u64 end;
};
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
extern int numa_off;
extern unsigned long numa_free_all_bootmem(void);
extern void setup_node_bootmem(int nodeid, unsigned long start,
unsigned long end);
#ifdef CONFIG_NUMA
/*
* Too small node sizes may confuse the VM badly. Usually they
* result from BIOS bugs. So dont recognize nodes as standalone
* NUMA entities that have less than this amount of RAM listed:
*/
#define NODE_MIN_SIZE (4*1024*1024)
extern nodemask_t numa_nodes_parsed __initdata;
extern int __cpuinit numa_cpu_node(int cpu);
extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
extern void __init numa_set_distance(int from, int to, int distance);
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
void numa_emu_cmdline(char *);
#endif /* CONFIG_NUMA_EMU */
#else
static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
#endif
#endif /* _ASM_X86_NUMA_64_H */
......@@ -29,7 +29,7 @@
#ifdef CONFIG_X86_NUMAQ
extern int found_numaq;
extern int get_memcfg_numaq(void);
extern int numaq_numa_init(void);
extern int pci_numaq_init(void);
extern void *xquad_portio;
......@@ -166,11 +166,6 @@ struct sys_cfg_data {
void numaq_tsc_disable(void);
#else
static inline int get_memcfg_numaq(void)
{
return 0;
}
#endif /* CONFIG_X86_NUMAQ */
#endif /* _ASM_X86_NUMAQ_H */
......@@ -542,6 +542,33 @@ do { \
old__; \
})
static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
const unsigned long __percpu *addr)
{
unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
}
static inline int x86_this_cpu_variable_test_bit(int nr,
const unsigned long __percpu *addr)
{
int oldbit;
asm volatile("bt "__percpu_arg(2)",%1\n\t"
"sbb %0,%0"
: "=r" (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
return oldbit;
}
#define x86_this_cpu_test_bit(nr, addr) \
(__builtin_constant_p((nr)) \
? x86_this_cpu_constant_test_bit((nr), (addr)) \
: x86_this_cpu_variable_test_bit((nr), (addr)))
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */
......
/*
* Some of the code in this file has been gleaned from the 64 bit
* discontigmem support code base.
*
* Copyright (C) 2002, IBM Corp.
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to Pat Gaughen <gone@us.ibm.com>
*/
#ifndef _ASM_X86_SRAT_H
#define _ASM_X86_SRAT_H
#ifdef CONFIG_ACPI_NUMA
extern int get_memcfg_from_srat(void);
#else
static inline int get_memcfg_from_srat(void)
{
return 0;
}
#endif
#endif /* _ASM_X86_SRAT_H */
......@@ -93,19 +93,11 @@ extern void setup_node_to_cpumask_map(void);
#define pcibus_to_node(bus) __pcibus_to_node(bus)
#ifdef CONFIG_X86_32
extern unsigned long node_start_pfn[];
extern unsigned long node_end_pfn[];
extern unsigned long node_remap_size[];
#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
# define SD_CACHE_NICE_TRIES 1
# define SD_IDLE_IDX 1
#else
# define SD_CACHE_NICE_TRIES 2
# define SD_IDLE_IDX 2
#endif
/* sched_domains SD_NODE_INIT for NUMA machines */
......
......@@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
if (this_cpu_has(X86_FEATURE_ARAT)) {
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
/* Make LAPIC timer preferrable over percpu HPET */
lapic_clockevent.rating = 150;
......@@ -1237,6 +1237,17 @@ void __cpuinit setup_local_APIC(void)
/* always use the value from LDR */
early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
logical_smp_processor_id();
/*
* Some NUMA implementations (NUMAQ) don't initialize apicid to
* node mapping during NUMA init. Now that logical apicid is
* guaranteed to be known, give it another chance. This is already
* a bit too late - percpu allocation has already happened without
* proper NUMA affinity.
*/
if (apic->x86_32_numa_cpu_node)
set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
apic->x86_32_numa_cpu_node(cpu));
#endif
/*
......@@ -2014,21 +2025,6 @@ void default_init_apic_ldr(void)
apic_write(APIC_LDR, val);
}
#ifdef CONFIG_X86_32
int default_x86_32_numa_cpu_node(int cpu)
{
#ifdef CONFIG_NUMA
int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
if (apicid != BAD_APICID)
return __apicid_to_node[apicid];
return NUMA_NO_NODE;
#else
return 0;
#endif
}
#endif
/*
* Power management
*/
......
......@@ -119,14 +119,6 @@ static void noop_apic_write(u32 reg, u32 v)
WARN_ON_ONCE(cpu_has_apic && !disable_apic);
}
#ifdef CONFIG_X86_32
static int noop_x86_32_numa_cpu_node(int cpu)
{
/* we're always on node 0 */
return 0;
}
#endif
struct apic apic_noop = {
.name = "noop",
.probe = noop_probe,
......@@ -195,6 +187,5 @@ struct apic apic_noop = {
#ifdef CONFIG_X86_32
.x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
.x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node,
#endif
};
......@@ -253,5 +253,4 @@ struct apic apic_bigsmp = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
.x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
.x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
......@@ -510,11 +510,6 @@ static void es7000_setup_apic_routing(void)
nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
}
static int es7000_numa_cpu_node(int cpu)
{
return 0;
}
static int es7000_cpu_present_to_apicid(int mps_cpu)
{
if (!mps_cpu)
......@@ -688,7 +683,6 @@ struct apic __refdata apic_es7000_cluster = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
.x86_32_early_logical_apicid = es7000_early_logical_apicid,
.x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
struct apic __refdata apic_es7000 = {
......@@ -752,5 +746,4 @@ struct apic __refdata apic_es7000 = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
.x86_32_early_logical_apicid = es7000_early_logical_apicid,
.x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
......@@ -48,8 +48,6 @@
#include <asm/e820.h>
#include <asm/ipi.h>
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
int found_numaq;
/*
......@@ -79,31 +77,20 @@ int quad_local_to_mp_bus_id[NR_CPUS/4][4];
static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
{
struct eachquadmem *eq = scd->eq + node;
u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20;
u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20;
int ret;
node_set_online(node);
/* Convert to pages */
node_start_pfn[node] =
MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
node_end_pfn[node] =
MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
memblock_x86_register_active_regions(node, node_start_pfn[node],
node_end_pfn[node]);
memory_present(node, node_start_pfn[node], node_end_pfn[node]);
node_remap_size[node] = node_memmap_size_bytes(node,
node_start_pfn[node],
node_end_pfn[node]);
node_set(node, numa_nodes_parsed);
ret = numa_add_memblk(node, start, end);
BUG_ON(ret < 0);
}
/*
* Function: smp_dump_qct()
*
* Description: gets memory layout from the quad config table. This
* function also updates node_online_map with the nodes (quads) present.
* function also updates numa_nodes_parsed with the nodes (quads) present.
*/
static void __init smp_dump_qct(void)
{
......@@ -112,7 +99,6 @@ static void __init smp_dump_qct(void)
scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
nodes_clear(node_online_map);
for_each_node(node) {
if (scd->quads_present31_0 & (1 << node))
numaq_register_node(node, scd);
......@@ -282,14 +268,14 @@ static __init void early_check_numaq(void)
}
}
int __init get_memcfg_numaq(void)
int __init numaq_numa_init(void)
{
early_check_numaq();
if (!found_numaq)
return 0;
return -ENOENT;
smp_dump_qct();
return 1;
return 0;
}
#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
......
......@@ -172,7 +172,6 @@ struct apic apic_default = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
.x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
.x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
extern struct apic apic_numaq;
......
......@@ -551,5 +551,4 @@ struct apic apic_summit = {
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
.x86_32_early_logical_apicid = summit_early_logical_apicid,
.x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
......@@ -353,7 +353,6 @@ static void notify_thresholds(__u64 msr_val)
static void intel_thermal_interrupt(void)
{
__u64 msr_val;
struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
......@@ -365,19 +364,19 @@ static void intel_thermal_interrupt(void)
CORE_LEVEL) != 0)
mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
if (cpu_has(c, X86_FEATURE_PLN))
if (this_cpu_has(X86_FEATURE_PLN))
if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
POWER_LIMIT_EVENT,
CORE_LEVEL) != 0)
mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
if (cpu_has(c, X86_FEATURE_PTS)) {
if (this_cpu_has(X86_FEATURE_PTS)) {
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
THERMAL_THROTTLING_EVENT,
PACKAGE_LEVEL) != 0)
mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
if (cpu_has(c, X86_FEATURE_PLN))
if (this_cpu_has(X86_FEATURE_PLN))
if (therm_throt_process(msr_val &
PACKAGE_THERM_STATUS_POWER_LIMIT,
POWER_LIMIT_EVENT,
......
......@@ -715,7 +715,7 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
}
}
static int
static int __init
check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
{
if (!mpc_new_phys || count <= mpc_new_length) {
......
......@@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
if (!need_resched()) {
if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
__monitor((void *)&current_thread_info()->flags, 0, 0);
......@@ -465,7 +465,7 @@ static void mwait_idle(void)
if (!need_resched()) {
trace_power_start(POWER_CSTATE, 1, smp_processor_id());
trace_cpu_idle(1, smp_processor_id());
if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
__monitor((void *)&current_thread_info()->flags, 0, 0);
......
......@@ -1332,9 +1332,9 @@ static inline void mwait_play_dead(void)
void *mwait_ptr;
struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)))
if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
return;
if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
if (!this_cpu_has(X86_FEATURE_CLFLSH))
return;
if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
return;
......
......@@ -23,8 +23,8 @@ mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o
obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
......
......@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/nodemask.h>
#include <linux/memblock.h>
#include <linux/bootmem.h>
#include <asm/io.h>
#include <linux/pci_ids.h>
......@@ -69,10 +70,10 @@ static __init void early_get_boot_cpu_id(void)
int __init amd_numa_init(void)
{
unsigned long start = PFN_PHYS(0);
unsigned long end = PFN_PHYS(max_pfn);
u64 start = PFN_PHYS(0);
u64 end = PFN_PHYS(max_pfn);
unsigned numnodes;
unsigned long prevbase;
u64 prevbase;
int i, j, nb;
u32 nodeid, reg;
unsigned int bits, cores, apicid_base;
......@@ -95,7 +96,7 @@ int __init amd_numa_init(void)
prevbase = 0;
for (i = 0; i < 8; i++) {
unsigned long base, limit;
u64 base, limit;
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
......@@ -107,18 +108,18 @@ int __init amd_numa_init(void)
continue;
}
if (nodeid >= numnodes) {
pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
pr_info("Ignoring excess node %d (%Lx:%Lx)\n", nodeid,
base, limit);
continue;
}
if (!limit) {
pr_info("Skipping node entry %d (base %lx)\n",
pr_info("Skipping node entry %d (base %Lx)\n",
i, base);
continue;
}
if ((base >> 8) & 3 || (limit >> 8) & 3) {
pr_err("Node %d using interleaving mode %lx/%lx\n",
pr_err("Node %d using interleaving mode %Lx/%Lx\n",
nodeid, (base >> 8) & 3, (limit >> 8) & 3);
return -EINVAL;
}
......@@ -150,19 +151,19 @@ int __init amd_numa_init(void)
continue;
}
if (limit < base) {
pr_err("Node %d bogus settings %lx-%lx.\n",
pr_err("Node %d bogus settings %Lx-%Lx.\n",
nodeid, base, limit);
continue;
}
/* Could sort here, but pun for now. Should not happen anyroads. */
if (prevbase > base) {
pr_err("Node map not sorted %lx,%lx\n",
pr_err("Node map not sorted %Lx,%Lx\n",
prevbase, base);
return -EINVAL;
}
pr_info("Node %d MemBase %016lx Limit %016lx\n",
pr_info("Node %d MemBase %016Lx Limit %016Lx\n",
nodeid, base, limit);
prevbase = base;
......
......@@ -678,8 +678,10 @@ static void __init zone_sizes_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] =
virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
......@@ -716,6 +718,7 @@ void __init paging_init(void)
* NOTE: at this point the bootmem allocator is fully available.
*/
olpc_dt_build_devicetree();
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
zone_sizes_init();
}
......
......@@ -616,7 +616,9 @@ void __init paging_init(void)
unsigned long max_zone_pfns[MAX_NR_ZONES];
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
#endif
max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
max_zone_pfns[ZONE_NORMAL] = max_pfn;
......@@ -679,14 +681,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
}
EXPORT_SYMBOL_GPL(arch_add_memory);
#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
int memory_add_physaddr_to_nid(u64 start)
{
return 0;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
#endif /* CONFIG_MEMORY_HOTPLUG */
static struct kcore_list kcore_vsyscall;
......
......@@ -90,13 +90,6 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
if (is_ISA_range(phys_addr, last_addr))
return (__force void __iomem *)phys_to_virt(phys_addr);
/*
* Check if the request spans more than any BAR in the iomem resource
* tree.
*/
WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
/*
* Don't allow anybody to remap normal RAM that we're using..
*/
......@@ -170,6 +163,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
ret_addr = (void __iomem *) (vaddr + offset);
mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
/*
* Check if the request spans more than any BAR in the iomem resource
* tree.
*/
WARN_ONCE(iomem_map_sanity_check(unaligned_phys_addr, unaligned_size),
KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
return ret_addr;
err_free_area:
free_vm_area(area);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -5,6 +5,7 @@
#include <linux/errno.h>
#include <linux/topology.h>
#include <linux/memblock.h>
#include <linux/bootmem.h>
#include <asm/dma.h>
#include "numa_internal.h"
......@@ -84,7 +85,13 @@ static int __init split_nodes_interleave(struct numa_meminfo *ei,
nr_nodes = MAX_NUMNODES;
}
size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
/*
* Calculate target node size. x86_32 freaks on __udivdi3() so do
* the division in ulong number of pages and convert back.
*/
size = max_addr - addr - memblock_x86_hole_size(addr, max_addr);
size = PFN_PHYS((unsigned long)(size >> PAGE_SHIFT) / nr_nodes);
/*
* Calculate the number of big nodes that can be allocated as a result
* of consolidating the remainder.
......@@ -226,7 +233,7 @@ static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
*/
while (nodes_weight(physnode_mask)) {
for_each_node_mask(i, physnode_mask) {
u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
u64 start, limit, end;
int phys_blk;
......@@ -298,7 +305,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
{
static struct numa_meminfo ei __initdata;
static struct numa_meminfo pi __initdata;
const u64 max_addr = max_pfn << PAGE_SHIFT;
const u64 max_addr = PFN_PHYS(max_pfn);
u8 *phys_dist = NULL;
size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
int max_emu_nid, dfl_phys_nid;
......@@ -342,8 +349,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
if (numa_dist_cnt) {
u64 phys;
phys = memblock_find_in_range(0,
(u64)max_pfn_mapped << PAGE_SHIFT,
phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
phys_size, PAGE_SIZE);
if (phys == MEMBLOCK_ERROR) {
pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
......
......@@ -19,6 +19,14 @@ void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
void __init numa_reset_distance(void);
void __init x86_numa_init(void);
#ifdef CONFIG_X86_64
static inline void init_alloc_remap(int nid, u64 start, u64 end) { }
#else
void __init init_alloc_remap(int nid, u64 start, u64 end);
#endif
#ifdef CONFIG_NUMA_EMU
void __init numa_emulation(struct numa_meminfo *numa_meminfo,
int numa_dist_cnt);
......
......@@ -26,8 +26,6 @@
int acpi_numa __initdata;
static struct bootnode nodes_add[MAX_NUMNODES];
static __init int setup_node(int pxm)
{
return acpi_map_pxm_to_node(pxm);
......@@ -37,7 +35,6 @@ static __init void bad_srat(void)
{
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
memset(nodes_add, 0, sizeof(nodes_add));
}
static __init inline int srat_disabled(void)
......@@ -131,73 +128,17 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
pxm, apic_id, node);
}
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
#ifdef CONFIG_MEMORY_HOTPLUG
static inline int save_add_info(void) {return 1;}
#else
static inline int save_add_info(void) {return 0;}
#endif
/*
* Update nodes_add[]
* This code supports one contiguous hot add area per node
*/
static void __init
update_nodes_add(int node, unsigned long start, unsigned long end)
{
unsigned long s_pfn = start >> PAGE_SHIFT;
unsigned long e_pfn = end >> PAGE_SHIFT;
int changed = 0;
struct bootnode *nd = &nodes_add[node];
/* I had some trouble with strange memory hotadd regions breaking
the boot. Be very strict here and reject anything unexpected.
If you want working memory hotadd write correct SRATs.
The node size check is a basic sanity check to guard against
mistakes */
if ((signed long)(end - start) < NODE_MIN_SIZE) {
printk(KERN_ERR "SRAT: Hotplug area too small\n");
return;
}
/* This check might be a bit too strict, but I'm keeping it for now. */
if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
printk(KERN_ERR
"SRAT: Hotplug area %lu -> %lu has existing memory\n",
s_pfn, e_pfn);
return;
}
/* Looks good */
if (nd->start == nd->end) {
nd->start = start;
nd->end = end;
changed = 1;
} else {
if (nd->start == end) {
nd->start = start;
changed = 1;
}
if (nd->end == start) {
nd->end = end;
changed = 1;
}
if (!changed)
printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
}
if (changed) {
node_set(node, numa_nodes_parsed);
printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
nd->start, nd->end);
}
}
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
void __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
unsigned long start, end;
u64 start, end;
int node, pxm;
if (srat_disabled())
......@@ -226,11 +167,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
return;
}
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
start, end);
if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
update_nodes_add(node, start, end);
}
void __init acpi_numa_arch_fixup(void) {}
......@@ -244,17 +182,3 @@ int __init x86_acpi_numa_init(void)
return ret;
return srat_disabled() ? -EINVAL : 0;
}
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
int memory_add_physaddr_to_nid(u64 start)
{
int i, ret = 0;
for_each_node(i)
if (nodes_add[i].start <= start && nodes_add[i].end > start)
ret = i;
return ret;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
/*
* Some of the code in this file has been gleaned from the 64 bit
* discontigmem support code base.
*
* Copyright (C) 2002, IBM Corp.
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to Pat Gaughen <gone@us.ibm.com>
*/
#include <linux/mm.h>
#include <linux/bootmem.h>
#include <linux/memblock.h>
#include <linux/mmzone.h>
#include <linux/acpi.h>
#include <linux/nodemask.h>
#include <asm/srat.h>
#include <asm/topology.h>
#include <asm/smp.h>
#include <asm/e820.h>
/*
* proximity macros and definitions
*/
#define NODE_ARRAY_INDEX(x) ((x) / 8) /* 8 bits/char */
#define NODE_ARRAY_OFFSET(x) ((x) % 8) /* 8 bits/char */
#define BMAP_SET(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] |= 1 << NODE_ARRAY_OFFSET(bit))
#define BMAP_TEST(bmap, bit) ((bmap)[NODE_ARRAY_INDEX(bit)] & (1 << NODE_ARRAY_OFFSET(bit)))
/* bitmap length; _PXM is at most 255 */
#define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8)
static u8 __initdata pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */
#define MAX_CHUNKS_PER_NODE 3
#define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES)
struct node_memory_chunk_s {
unsigned long start_pfn;
unsigned long end_pfn;
u8 pxm; // proximity domain of node
u8 nid; // which cnode contains this chunk?
u8 bank; // which mem bank on this node
};
static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
static int __initdata num_memory_chunks; /* total number of memory chunks */
static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
int acpi_numa __initdata;
static __init void bad_srat(void)
{
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
num_memory_chunks = 0;
}
static __init inline int srat_disabled(void)
{
return numa_off || acpi_numa < 0;
}
/* Identify CPU proximity domains */
void __init
acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *cpu_affinity)
{
if (srat_disabled())
return;
if (cpu_affinity->header.length !=
sizeof(struct acpi_srat_cpu_affinity)) {
bad_srat();
return;
}
if ((cpu_affinity->flags & ACPI_SRAT_CPU_ENABLED) == 0)
return; /* empty entry */
/* mark this node as "seen" in node bitmap */
BMAP_SET(pxm_bitmap, cpu_affinity->proximity_domain_lo);
/* don't need to check apic_id here, because it is always 8 bits */
apicid_to_pxm[cpu_affinity->apic_id] = cpu_affinity->proximity_domain_lo;
printk(KERN_DEBUG "CPU %02x in proximity domain %02x\n",
cpu_affinity->apic_id, cpu_affinity->proximity_domain_lo);
}
/*
* Identify memory proximity domains and hot-remove capabilities.
* Fill node memory chunk list structure.
*/
void __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
{
unsigned long long paddr, size;
unsigned long start_pfn, end_pfn;
u8 pxm;
struct node_memory_chunk_s *p, *q, *pend;
if (srat_disabled())
return;
if (memory_affinity->header.length !=
sizeof(struct acpi_srat_mem_affinity)) {
bad_srat();
return;
}
if ((memory_affinity->flags & ACPI_SRAT_MEM_ENABLED) == 0)
return; /* empty entry */
pxm = memory_affinity->proximity_domain & 0xff;
/* mark this node as "seen" in node bitmap */
BMAP_SET(pxm_bitmap, pxm);
/* calculate info for memory chunk structure */
paddr = memory_affinity->base_address;
size = memory_affinity->length;
start_pfn = paddr >> PAGE_SHIFT;
end_pfn = (paddr + size) >> PAGE_SHIFT;
if (num_memory_chunks >= MAXCHUNKS) {
printk(KERN_WARNING "Too many mem chunks in SRAT."
" Ignoring %lld MBytes at %llx\n",
size/(1024*1024), paddr);
return;
}
/* Insertion sort based on base address */
pend = &node_memory_chunk[num_memory_chunks];
for (p = &node_memory_chunk[0]; p < pend; p++) {
if (start_pfn < p->start_pfn)
break;
}
if (p < pend) {
for (q = pend; q >= p; q--)
*(q + 1) = *q;
}
p->start_pfn = start_pfn;
p->end_pfn = end_pfn;
p->pxm = pxm;
num_memory_chunks++;
printk(KERN_DEBUG "Memory range %08lx to %08lx"
" in proximity domain %02x %s\n",
start_pfn, end_pfn,
pxm,
((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
"enabled and removable" : "enabled" ) );
}
/* Callback for SLIT parsing */
void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
{
}
void acpi_numa_arch_fixup(void)
{
}
/*
* The SRAT table always lists ascending addresses, so can always
* assume that the first "start" address that you see is the real
* start of the node, and that the current "end" address is after
* the previous one.
*/
static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk)
{
/*
* Only add present memory as told by the e820.
* There is no guarantee from the SRAT that the memory it
* enumerates is present at boot time because it represents
* *possible* memory hotplug areas the same as normal RAM.
*/
if (memory_chunk->start_pfn >= max_pfn) {
printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n",
memory_chunk->start_pfn, memory_chunk->end_pfn);
return -1;
}
if (memory_chunk->nid != nid)
return -1;
if (!node_has_online_mem(nid))
node_start_pfn[nid] = memory_chunk->start_pfn;
if (node_start_pfn[nid] > memory_chunk->start_pfn)
node_start_pfn[nid] = memory_chunk->start_pfn;
if (node_end_pfn[nid] < memory_chunk->end_pfn)
node_end_pfn[nid] = memory_chunk->end_pfn;
return 0;
}
int __init get_memcfg_from_srat(void)
{
int i, j, nid;
if (srat_disabled())
goto out_fail;
if (acpi_numa_init() < 0)
goto out_fail;
if (num_memory_chunks == 0) {
printk(KERN_DEBUG
"could not find any ACPI SRAT memory areas.\n");
goto out_fail;
}
/* Calculate total number of nodes in system from PXM bitmap and create
* a set of sequential node IDs starting at zero. (ACPI doesn't seem
* to specify the range of _PXM values.)
*/
/*
* MCD - we no longer HAVE to number nodes sequentially. PXM domain
* numbers could go as high as 256, and MAX_NUMNODES for i386 is typically
* 32, so we will continue numbering them in this manner until MAX_NUMNODES
* approaches MAX_PXM_DOMAINS for i386.
*/
nodes_clear(node_online_map);
for (i = 0; i < MAX_PXM_DOMAINS; i++) {
if (BMAP_TEST(pxm_bitmap, i)) {
int nid = acpi_map_pxm_to_node(i);
node_set_online(nid);
}
}
BUG_ON(num_online_nodes() == 0);
/* set cnode id in memory chunk structure */
for (i = 0; i < num_memory_chunks; i++)
node_memory_chunk[i].nid = pxm_to_node(node_memory_chunk[i].pxm);
printk(KERN_DEBUG "pxm bitmap: ");
for (i = 0; i < sizeof(pxm_bitmap); i++) {
printk(KERN_CONT "%02x ", pxm_bitmap[i]);
}
printk(KERN_CONT "\n");
printk(KERN_DEBUG "Number of logical nodes in system = %d\n",
num_online_nodes());
printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
num_memory_chunks);
for (i = 0; i < MAX_LOCAL_APIC; i++)
set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
for (j = 0; j < num_memory_chunks; j++){
struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
printk(KERN_DEBUG
"chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
if (node_read_chunk(chunk->nid, chunk))
continue;
memblock_x86_register_active_regions(chunk->nid, chunk->start_pfn,
min(chunk->end_pfn, max_pfn));
}
/* for out of order entries in SRAT */
sort_node_map();
for_each_online_node(nid) {
unsigned long start = node_start_pfn[nid];
unsigned long end = min(node_end_pfn[nid], max_pfn);
memory_present(nid, start, end);
node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
}
return 1;
out_fail:
printk(KERN_DEBUG "failed to get NUMA memory information from SRAT"
" table\n");
return 0;
}
......@@ -710,20 +710,14 @@ static int acpi_processor_get_throttling_fadt(struct acpi_processor *pr)
}
#ifdef CONFIG_X86
static int acpi_throttling_rdmsr(struct acpi_processor *pr,
u64 *value)
static int acpi_throttling_rdmsr(u64 *value)
{
struct cpuinfo_x86 *c;
u64 msr_high, msr_low;
unsigned int cpu;
u64 msr = 0;
int ret = -1;
cpu = pr->id;
c = &cpu_data(cpu);
if ((c->x86_vendor != X86_VENDOR_INTEL) ||
!cpu_has(c, X86_FEATURE_ACPI)) {
if ((this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_INTEL) ||
!this_cpu_has(X86_FEATURE_ACPI)) {
printk(KERN_ERR PREFIX
"HARDWARE addr space,NOT supported yet\n");
} else {
......@@ -738,18 +732,13 @@ static int acpi_throttling_rdmsr(struct acpi_processor *pr,
return ret;
}
static int acpi_throttling_wrmsr(struct acpi_processor *pr, u64 value)
static int acpi_throttling_wrmsr(u64 value)
{
struct cpuinfo_x86 *c;
unsigned int cpu;
int ret = -1;
u64 msr;
cpu = pr->id;
c = &cpu_data(cpu);
if ((c->x86_vendor != X86_VENDOR_INTEL) ||
!cpu_has(c, X86_FEATURE_ACPI)) {
if ((this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_INTEL) ||
!this_cpu_has(X86_FEATURE_ACPI)) {
printk(KERN_ERR PREFIX
"HARDWARE addr space,NOT supported yet\n");
} else {
......@@ -761,15 +750,14 @@ static int acpi_throttling_wrmsr(struct acpi_processor *pr, u64 value)
return ret;
}
#else
static int acpi_throttling_rdmsr(struct acpi_processor *pr,
u64 *value)
static int acpi_throttling_rdmsr(u64 *value)
{
printk(KERN_ERR PREFIX
"HARDWARE addr space,NOT supported yet\n");
return -1;
}
static int acpi_throttling_wrmsr(struct acpi_processor *pr, u64 value)
static int acpi_throttling_wrmsr(u64 value)
{
printk(KERN_ERR PREFIX
"HARDWARE addr space,NOT supported yet\n");
......@@ -801,7 +789,7 @@ static int acpi_read_throttling_status(struct acpi_processor *pr,
ret = 0;
break;
case ACPI_ADR_SPACE_FIXED_HARDWARE:
ret = acpi_throttling_rdmsr(pr, value);
ret = acpi_throttling_rdmsr(value);
break;
default:
printk(KERN_ERR PREFIX "Unknown addr space %d\n",
......@@ -834,7 +822,7 @@ static int acpi_write_throttling_state(struct acpi_processor *pr,
ret = 0;
break;
case ACPI_ADR_SPACE_FIXED_HARDWARE:
ret = acpi_throttling_wrmsr(pr, value);
ret = acpi_throttling_wrmsr(value);
break;
default:
printk(KERN_ERR PREFIX "Unknown addr space %d\n",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment