Commit 43aa05c1 authored by Kimio Suganuma's avatar Kimio Suganuma Committed by David Mosberger

[PATCH] ia64: discontigmem patch for 2.5 ia64

Here is the latest discontigmem patch for ia64 against
2.5.39 + ia64 patch + Erich's acpi_numa patch.
parent 4ed8eeb9
......@@ -66,6 +66,10 @@ fi
if [ "$CONFIG_IA64_GENERIC" = "y" -o "$CONFIG_IA64_DIG" = "y" -o "$CONFIG_IA64_HP_ZX1" = "y" ];
then
bool ' Enable NUMA support' CONFIG_NUMA
if [ "$CONFIG_NUMA" = "y" ]; then
define_bool CONFIG_DISCONTIGMEM y
fi
bool ' Enable IA-64 Machine Check Abort' CONFIG_IA64_MCA
define_bool CONFIG_PM y
define_bool CONFIG_IOSAPIC y
......
......@@ -501,7 +501,7 @@ acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
void __init
acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
{
unsigned long paddr, size;
unsigned long paddr, size, hole_size, min_hole_size;
u8 pxm;
struct node_memblk_s *p, *q, *pend;
......@@ -523,6 +523,34 @@ acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
if (!ma->flags.enabled)
return;
/*
* When the chunk is not the first one in the node, check distance
* from the other chunks. When the hole is too huge ignore the chunk.
* This restriction should be removed when multiple chunks per node
* is supported.
*/
pend = &node_memblk[num_memblks];
min_hole_size = 0;
for (p = &node_memblk[0]; p < pend; p++) {
if (p->nid != pxm)
continue;
if (p->start_paddr < paddr)
hole_size = paddr - (p->start_paddr + p->size);
else
hole_size = p->start_paddr - (paddr + size);
if (!min_hole_size || hole_size < min_hole_size)
min_hole_size = hole_size;
}
if (min_hole_size) {
if (min_hole_size > size) {
printk("Too huge memory hole. Ignoring %ld MBytes at %lx\n",
size/(1024*1024), paddr);
return;
}
}
/* record this node in proximity bitmap */
pxm_bit_set(pxm);
......
......@@ -34,6 +34,7 @@
#include <asm/ia32.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/machvec.h>
#include <asm/processor.h>
#include <asm/sal.h>
......@@ -49,9 +50,6 @@
# error "struct cpuinfo_ia64 too big!"
#endif
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
extern char _end;
#ifdef CONFIG_SMP
......@@ -95,6 +93,10 @@ struct rsvd_region {
static struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
static int num_rsvd_regions;
#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
#ifndef CONFIG_DISCONTIGMEM
static unsigned long bootmap_start; /* physical address where the bootmem map is located */
static int
......@@ -108,17 +110,60 @@ find_max_pfn (unsigned long start, unsigned long end, void *arg)
return 0;
}
#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
#else /* CONFIG_DISCONTIGMEM */
/*
* efi_memmap_walk() knows nothing about layout of memory across nodes. Find
* out to which node a block of memory belongs. Ignore memory that we cannot
* identify, and split blocks that run across multiple nodes.
*
* Take this opportunity to round the start address up and the end address
* down to page boundaries.
*/
void
call_pernode_memory (unsigned long start, unsigned long end, void *arg)
{
unsigned long rs, re;
void (*func)(unsigned long, unsigned long, int, int);
int i;
start = PAGE_ALIGN(start);
end &= PAGE_MASK;
if (start >= end)
return;
func = arg;
if (!num_memblks) {
/* this machine doesn't have SRAT, */
/* so call func with nid=0, bank=0 */
if (start < end)
(*func)(start, end - start, 0, 0);
return;
}
for (i = 0; i < num_memblks; i++) {
rs = max(start, node_memblk[i].start_paddr);
re = min(end, node_memblk[i].start_paddr+node_memblk[i].size);
if (rs < re)
(*func)(rs, re-rs, node_memblk[i].nid,
node_memblk[i].bank);
}
}
#endif /* CONFIG_DISCONTIGMEM */
/*
* Free available memory based on the primitive map created from
* the boot parameters. This routine does not assume the incoming
* segments are sorted.
* Filter incoming memory segments based on the primitive map created from
* the boot parameters. Segments contained in the map are removed from the
* memory ranges. A caller-specified function is called with the memory
* ranges that remain after filtering.
* This routine does not assume the incoming segments are sorted.
*/
static int
free_available_memory (unsigned long start, unsigned long end, void *arg)
int
filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
{
unsigned long range_start, range_end, prev_start;
void (*func)(unsigned long, unsigned long);
int i;
#if IGNORE_PFN0
......@@ -132,13 +177,18 @@ free_available_memory (unsigned long start, unsigned long end, void *arg)
* lowest possible address(walker uses virtual)
*/
prev_start = PAGE_OFFSET;
func = arg;
for (i = 0; i < num_rsvd_regions; ++i) {
range_start = MAX(start, prev_start);
range_end = MIN(end, rsvd_region[i].start);
range_start = max(start, prev_start);
range_end = min(end, rsvd_region[i].start);
if (range_start < range_end)
free_bootmem(__pa(range_start), range_end - range_start);
#ifdef CONFIG_DISCONTIGMEM
call_pernode_memory(__pa(range_start), __pa(range_end), func);
#else
(*func)(__pa(range_start), range_end - range_start);
#endif
/* nothing more available in this segment */
if (range_end == end) return 0;
......@@ -150,6 +200,7 @@ free_available_memory (unsigned long start, unsigned long end, void *arg)
}
#ifndef CONFIG_DISCONTIGMEM
/*
* Find a place to put the bootmap and return its starting address in bootmap_start.
* This address must be page-aligned.
......@@ -171,8 +222,8 @@ find_bootmap_location (unsigned long start, unsigned long end, void *arg)
free_start = PAGE_OFFSET;
for (i = 0; i < num_rsvd_regions; i++) {
range_start = MAX(start, free_start);
range_end = MIN(end, rsvd_region[i].start & PAGE_MASK);
range_start = max(start, free_start);
range_end = min(end, rsvd_region[i].start & PAGE_MASK);
if (range_end <= range_start) continue; /* skip over empty range */
......@@ -188,6 +239,7 @@ find_bootmap_location (unsigned long start, unsigned long end, void *arg)
}
return 0;
}
#endif /* CONFIG_DISCONTIGMEM */
static void
sort_regions (struct rsvd_region *rsvd_region, int max)
......@@ -252,6 +304,14 @@ find_memory (void)
sort_regions(rsvd_region, num_rsvd_regions);
#ifdef CONFIG_DISCONTIGMEM
{
extern void discontig_mem_init(void);
bootmap_size = max_pfn = 0; /* stop gcc warnings */
discontig_mem_init();
}
#else /* !CONFIG_DISCONTIGMEM */
/* first find highest page frame number */
max_pfn = 0;
efi_memmap_walk(find_max_pfn, &max_pfn);
......@@ -268,8 +328,9 @@ find_memory (void)
bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
/* Free all available memory, then mark bootmem-map as being in use. */
efi_memmap_walk(free_available_memory, 0);
efi_memmap_walk(filter_rsvd_memory, free_bootmem);
reserve_bootmem(bootmap_start, bootmap_size);
#endif /* !CONFIG_DISCONTIGMEM */
#ifdef CONFIG_BLK_DEV_INITRD
if (ia64_boot_param->initrd_start) {
......@@ -540,6 +601,9 @@ setup_per_cpu_areas (void)
/* start_kernel() requires this... */
}
static unsigned long boot_cpu_data;
/*
* cpu_init() initializes state that is per-CPU. This function acts
* as a 'CPU state barrier', nothing should get across.
......@@ -563,6 +627,21 @@ cpu_init (void)
panic("Per-cpu data area too big! (%Zu > %Zu)",
__per_cpu_end - __per_cpu_start, PAGE_SIZE);
#ifdef CONFIG_NUMA
/* get_free_pages() cannot be used before cpu_init() done. */
/* BSP allocates "NR_CPUS" pages for all CPUs to avoid */
/* that AP calls get_free_pages(). */
if (cpu == 0)
boot_cpu_data = (unsigned long)alloc_bootmem_pages(PAGE_SIZE * NR_CPUS);
my_cpu_data = (void *)(boot_cpu_data + (cpu * PAGE_SIZE));
memcpy(my_cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
__per_cpu_offset[cpu] = (char *) my_cpu_data - __per_cpu_start;
my_cpu_info = my_cpu_data + ((char *) &__get_cpu_var(cpu_info) - __per_cpu_start);
my_cpu_info->node_data = get_node_data_ptr();
my_cpu_info->nodeid = boot_get_local_nodeid();
#else /* !CONFIG_NUMA */
/*
* On the BSP, the page allocator isn't initialized by the time we get here. On
* the APs, the bootmem allocator is no longer available...
......@@ -574,9 +653,10 @@ cpu_init (void)
memcpy(my_cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
__per_cpu_offset[cpu] = (char *) my_cpu_data - __per_cpu_start;
my_cpu_info = my_cpu_data + ((char *) &__get_cpu_var(cpu_info) - __per_cpu_start);
#else
#endif /* !CONFIG_NUMA */
#else /* !CONFIG_SMP */
my_cpu_data = __phys_per_cpu_start;
#endif
#endif /* !CONFIG_SMP */
my_cpu_info = my_cpu_data + ((char *) &__get_cpu_var(cpu_info) - __per_cpu_start);
/*
......
......@@ -10,5 +10,6 @@
obj-y := init.o fault.o tlb.o extable.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_DISCONTIGMEM) += discontig.o
include $(TOPDIR)/Rules.make
/*
* Copyright (c) 2000 Silicon Graphics, Inc. All rights reserved.
* Copyright (c) 2001 Intel Corp.
* Copyright (c) 2001 Tony Luck <tony.luck@intel.com>
* Copyright (c) 2002 NEC Corp.
* Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
*/
/*
* Platform initialization for Discontig Memory
*/
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/bootmem.h>
#include <linux/mmzone.h>
#include <linux/acpi.h>
#include <linux/efi.h>
/*
* Round an address upward to the next multiple of GRANULE size.
*/
#define GRANULEROUNDUP(n) (((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1))
static struct ia64_node_data *node_data[NR_NODES];
static long boot_pg_data[8*NR_NODES+sizeof(pg_data_t)] __initdata;
static pg_data_t *pg_data_ptr[NR_NODES] __initdata;
static bootmem_data_t bdata[NR_NODES][NR_BANKS_PER_NODE+1] __initdata;
extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
/*
* Return the compact node number of this cpu. Used prior to
* setting up the cpu_data area.
* Note - not fast, intended for boot use only!!
*/
int
boot_get_local_nodeid(void)
{
int i;
for (i = 0; i < NR_CPUS; i++)
if (node_cpuid[i].phys_id == hard_smp_processor_id())
return node_cpuid[i].nid;
/* node info missing, so nid should be 0.. */
return 0;
}
/*
* Return a pointer to the pg_data structure for a node.
* This function is used ONLY in early boot before the cpu_data
* structure is available.
*/
pg_data_t* __init
boot_get_pg_data_ptr(long node)
{
return pg_data_ptr[node];
}
/*
* Return a pointer to the node data for the current node.
* (boottime initialization only)
*/
struct ia64_node_data *
get_node_data_ptr(void)
{
return node_data[boot_get_local_nodeid()];
}
/*
* We allocate one of the bootmem_data_t structs for each piece of memory
* that we wish to treat as a contiguous block. Each such block must start
* on a BANKSIZE boundary. Multiple banks per node is not supported.
*/
static int __init
build_maps(unsigned long pstart, unsigned long length, int node)
{
bootmem_data_t *bdp;
unsigned long cstart, epfn;
bdp = pg_data_ptr[node]->bdata;
epfn = GRANULEROUNDUP(pstart + length) >> PAGE_SHIFT;
cstart = pstart & ~(BANKSIZE - 1);
if (!bdp->node_low_pfn) {
bdp->node_boot_start = cstart;
bdp->node_low_pfn = epfn;
} else {
bdp->node_boot_start = min(cstart, bdp->node_boot_start);
bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
}
min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
return 0;
}
/*
* Find space on each node for the bootmem map.
*
* Called by efi_memmap_walk to find boot memory on each node. Note that
* only blocks that are free are passed to this routine (currently filtered by
* free_available_memory).
*/
static int __init
find_bootmap_space(unsigned long pstart, unsigned long length, int node)
{
unsigned long mapsize, pages, epfn;
bootmem_data_t *bdp;
epfn = (pstart + length) >> PAGE_SHIFT;
bdp = &pg_data_ptr[node]->bdata[0];
if (pstart < bdp->node_boot_start || epfn > bdp->node_low_pfn)
return 0;
if (!bdp->node_bootmem_map) {
pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
if (length > mapsize) {
init_bootmem_node(
BOOT_NODE_DATA(node),
pstart>>PAGE_SHIFT,
bdp->node_boot_start>>PAGE_SHIFT,
bdp->node_low_pfn);
}
}
return 0;
}
/*
* Free available memory to the bootmem allocator.
*
* Note that only blocks that are free are passed to this routine (currently
* filtered by free_available_memory).
*
*/
static int __init
discontig_free_bootmem_node(unsigned long pstart, unsigned long length, int node)
{
free_bootmem_node(BOOT_NODE_DATA(node), pstart, length);
return 0;
}
/*
* Reserve the space used by the bootmem maps.
*/
static void __init
discontig_reserve_bootmem(void)
{
int node;
unsigned long mapbase, mapsize, pages;
bootmem_data_t *bdp;
for (node = 0; node < numnodes; node++) {
bdp = BOOT_NODE_DATA(node)->bdata;
pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
mapbase = __pa(bdp->node_bootmem_map);
reserve_bootmem_node(BOOT_NODE_DATA(node), mapbase, mapsize);
}
}
/*
* Allocate per node tables.
* - the pg_data structure is allocated on each node. This minimizes offnode
* memory references
* - the node data is allocated & initialized. Portions of this structure is read-only (after
* boot) and contains node-local pointers to usefuls data structures located on
* other nodes.
*
* We also switch to using the "real" pg_data structures at this point. Earlier in boot, we
* use a different structure. The only use for pg_data prior to the point in boot is to get
* the pointer to the bdata for the node.
*/
static void __init
allocate_pernode_structures(void)
{
pg_data_t *pgdat=0, *new_pgdat_list=0;
int node, mynode;
mynode = boot_get_local_nodeid();
for (node = numnodes - 1; node >= 0 ; node--) {
node_data[node] = alloc_bootmem_node(BOOT_NODE_DATA(node), sizeof (struct ia64_node_data));
pgdat = __alloc_bootmem_node(BOOT_NODE_DATA(node), sizeof(pg_data_t), SMP_CACHE_BYTES, 0);
pgdat->bdata = &(bdata[node][0]);
pg_data_ptr[node] = pgdat;
pgdat->pgdat_next = new_pgdat_list;
new_pgdat_list = pgdat;
}
memcpy(node_data[mynode]->pg_data_ptrs, pg_data_ptr, sizeof(pg_data_ptr));
memcpy(node_data[mynode]->node_data_ptrs, node_data, sizeof(node_data));
pgdat_list = new_pgdat_list;
}
/*
* Called early in boot to setup the boot memory allocator, and to
* allocate the node-local pg_data & node-directory data structures..
*/
void __init
discontig_mem_init(void)
{
int node;
if (numnodes == 0) {
printk("node info missing!\n");
numnodes = 1;
}
for (node = 0; node < numnodes; node++) {
pg_data_ptr[node] = (pg_data_t*) &boot_pg_data[node];
pg_data_ptr[node]->bdata = &bdata[node][0];
}
min_low_pfn = -1;
max_low_pfn = 0;
efi_memmap_walk(filter_rsvd_memory, build_maps);
efi_memmap_walk(filter_rsvd_memory, find_bootmap_space);
efi_memmap_walk(filter_rsvd_memory, discontig_free_bootmem_node);
discontig_reserve_bootmem();
allocate_pernode_structures();
}
/*
* Initialize the paging system.
* - determine sizes of each node
* - initialize the paging system for the node
* - build the nodedir for the node. This contains pointers to
* the per-bank mem_map entries.
* - fix the page struct "virtual" pointers. These are bank specific
* values that the paging system doesnt understand.
* - replicate the nodedir structure to other nodes
*/
void __init
discontig_paging_init(void)
{
int node, mynode;
unsigned long max_dma, zones_size[MAX_NR_ZONES];
unsigned long kaddr, ekaddr, bid;
struct page *page;
bootmem_data_t *bdp;
max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
mynode = boot_get_local_nodeid();
for (node = 0; node < numnodes; node++) {
long pfn, startpfn;
memset(zones_size, 0, sizeof(zones_size));
startpfn = -1;
bdp = BOOT_NODE_DATA(node)->bdata;
pfn = bdp->node_boot_start >> PAGE_SHIFT;
if (startpfn == -1)
startpfn = pfn;
if (pfn > max_dma)
zones_size[ZONE_NORMAL] += (bdp->node_low_pfn - pfn);
else if (bdp->node_low_pfn < max_dma)
zones_size[ZONE_DMA] += (bdp->node_low_pfn - pfn);
else {
zones_size[ZONE_DMA] += (max_dma - pfn);
zones_size[ZONE_NORMAL] += (bdp->node_low_pfn - max_dma);
}
free_area_init_node(node, NODE_DATA(node), NULL, zones_size, startpfn, 0);
page = NODE_DATA(node)->node_mem_map;
bdp = BOOT_NODE_DATA(node)->bdata;
kaddr = (unsigned long)__va(bdp->node_boot_start);
ekaddr = (unsigned long)__va(bdp->node_low_pfn << PAGE_SHIFT);
while (kaddr < ekaddr) {
bid = BANK_MEM_MAP_INDEX(kaddr);
node_data[mynode]->node_id_map[bid] = node;
node_data[mynode]->bank_mem_map_base[bid] = page;
kaddr += BANKSIZE;
page += BANKSIZE/PAGE_SIZE;
}
}
/*
* Finish setting up the node data for this node, then copy it to the other nodes.
*/
for (node=0; node < numnodes; node++)
if (mynode != node) {
memcpy(node_data[node], node_data[mynode], sizeof(struct ia64_node_data));
node_data[node]->node = node;
}
}
......@@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/efi.h>
#include <linux/mmzone.h>
#include <asm/a.out.h>
#include <asm/bitops.h>
......@@ -347,6 +348,15 @@ extern long htlbzone_pages;
extern struct list_head htlbpage_freelist;
#endif
#ifdef CONFIG_DISCONTIGMEM
void
paging_init (void)
{
extern void discontig_paging_init(void);
discontig_paging_init();
}
#else /* !CONFIG_DISCONTIGMEM */
void
paging_init (void)
{
......@@ -365,6 +375,7 @@ paging_init (void)
}
free_area_init(zones_size);
}
#endif /* !CONFIG_DISCONTIGMEM */
static int
count_pages (u64 start, u64 end, void *arg)
......@@ -382,9 +393,15 @@ count_reserved_pages (u64 start, u64 end, void *arg)
unsigned long *count = arg;
struct page *pg;
#ifdef CONFIG_DISCONTIGMEM
for (; start < end; start += PAGE_SIZE)
if (PageReserved(virt_to_page(start)))
++num_reserved;
#else
for (pg = virt_to_page(start); pg < virt_to_page(end); ++pg)
if (PageReserved(pg))
++num_reserved;
#endif
*count += num_reserved;
return 0;
}
......@@ -395,6 +412,7 @@ mem_init (void)
extern char __start_gate_section[];
long reserved_pages, codesize, datasize, initsize;
unsigned long num_pgt_pages;
pg_data_t *pgdat;
#ifdef CONFIG_PCI
/*
......@@ -405,16 +423,19 @@ mem_init (void)
platform_pci_dma_init();
#endif
#ifndef CONFIG_DISCONTIGMEM
if (!mem_map)
BUG();
max_mapnr = max_low_pfn;
#endif
num_physpages = 0;
efi_memmap_walk(count_pages, &num_physpages);
max_mapnr = max_low_pfn;
high_memory = __va(max_low_pfn * PAGE_SIZE);
totalram_pages += free_all_bootmem();
for_each_pgdat(pgdat)
totalram_pages += free_all_bootmem_node(pgdat);
reserved_pages = 0;
efi_memmap_walk(count_reserved_pages, &reserved_pages);
......@@ -425,7 +446,7 @@ mem_init (void)
printk("Memory: %luk/%luk available (%luk code, %luk reserved, %luk data, %luk init)\n",
(unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
max_mapnr << (PAGE_SHIFT - 10), codesize >> 10, reserved_pages << (PAGE_SHIFT - 10),
num_physpages << (PAGE_SHIFT - 10), codesize >> 10, reserved_pages << (PAGE_SHIFT - 10),
datasize >> 10, initsize >> 10);
/*
......@@ -441,6 +462,8 @@ mem_init (void)
if (num_pgt_pages > pgt_cache_water[1])
pgt_cache_water[1] = num_pgt_pages;
show_mem();
/* install the gate page in the global page table: */
put_gate_page(virt_to_page(__start_gate_section), GATE_ADDR);
......
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (c) 2000 Silicon Graphics, Inc. All rights reserved.
* Copyright (c) 2002 NEC Corp.
* Copyright (c) 2002 Erich Focht <efocht@ess.nec.de>
* Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
*/
#ifndef _ASM_IA64_MMZONE_H
#define _ASM_IA64_MMZONE_H
#include <linux/config.h>
#include <linux/init.h>
/*
* Given a kaddr, find the base mem_map address for the start of the mem_map
* entries for the bank containing the kaddr.
*/
#define BANK_MEM_MAP_BASE(kaddr) local_node_data->bank_mem_map_base[BANK_MEM_MAP_INDEX(kaddr)]
/*
* Given a kaddr, this macro return the relative map number
* within the bank.
*/
#define BANK_MAP_NR(kaddr) (BANK_OFFSET(kaddr) >> PAGE_SHIFT)
/*
* Given a pte, this macro returns a pointer to the page struct for the pte.
*/
#define pte_page(pte) virt_to_page(PAGE_OFFSET | (pte_val(pte)&_PFN_MASK))
/*
* Determine if a kaddr is a valid memory address of memory that
* actually exists.
*
* The check consists of 2 parts:
* - verify that the address is a region 7 address & does not
* contain any bits that preclude it from being a valid platform
* memory address
* - verify that the chunk actually exists.
*
* Note that IO addresses are NOT considered valid addresses.
*
* Note, many platforms can simply check if kaddr exceeds a specific size.
* (However, this wont work on SGI platforms since IO space is embedded
* within the range of valid memory addresses & nodes have holes in the
* address range between banks).
*/
#define kern_addr_valid(kaddr) ({long _kav=(long)(kaddr); \
VALID_MEM_KADDR(_kav);})
/*
* Given a kaddr, return a pointer to the page struct for the page.
* If the kaddr does not represent RAM memory that potentially exists, return
* a pointer the page struct for max_mapnr. IO addresses will
* return the page for max_nr. Addresses in unpopulated RAM banks may
* return undefined results OR may panic the system.
*
*/
#define virt_to_page(kaddr) ({long _kvtp=(long)(kaddr); \
(VALID_MEM_KADDR(_kvtp)) \
? BANK_MEM_MAP_BASE(_kvtp) + BANK_MAP_NR(_kvtp) \
: NULL;})
/*
* Given a page struct entry, return the physical address that the page struct represents.
* Since IA64 has all memory in the DMA zone, the following works:
*/
#define page_to_phys(page) __pa(page_address(page))
#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map)
#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn)
#define pfn_to_page(pfn) (struct page *)(node_mem_map(pfn_to_nid(pfn)) + node_localnr(pfn, pfn_to_nid(pfn)))
#define pfn_to_nid(pfn) local_node_data->node_id_map[(pfn << PAGE_SHIFT) >> DIG_BANKSHIFT]
#define page_to_pfn(page) (long)((page - page_zone(page)->zone_mem_map) + page_zone(page)->zone_start_pfn)
/*
* pfn_valid should be made as fast as possible, and the current definition
* is valid for machines that are NUMA, but still contiguous, which is what
* is currently supported. A more generalised, but slower definition would
* be something like this - mbligh:
* ( pfn_to_pgdat(pfn) && (pfn < node_end_pfn(pfn_to_nid(pfn))) )
*/
#define pfn_valid(pfn) (pfn < max_low_pfn)
extern unsigned long max_low_pfn;
#ifdef CONFIG_IA64_DIG
/*
* Platform definitions for DIG platform with contiguous memory.
*/
#define MAX_PHYSNODE_ID 8 /* Maximum node number +1 */
#define NR_NODES 8 /* Maximum number of nodes in SSI */
#define MAX_PHYS_MEMORY (1UL << 40) /* 1 TB */
/*
* Bank definitions.
* Current settings for DIG: 512MB/bank, 16GB/node.
*/
#define NR_BANKS_PER_NODE 32
#define BANK_OFFSET(addr) ((unsigned long)(addr) & (BANKSIZE-1))
#define DIG_BANKSHIFT 29
#define BANKSIZE (1UL << DIG_BANKSHIFT)
#define NR_BANKS (NR_BANKS_PER_NODE * NR_NODES)
/*
* VALID_MEM_KADDR returns a boolean to indicate if a kaddr is
* potentially a valid cacheable identity mapped RAM memory address.
* Note that the RAM may or may not actually be present!!
*/
#define VALID_MEM_KADDR(kaddr) 1
/*
* Given a nodeid & a bank number, find the address of the mem_map
* entry for the first page of the bank.
*/
#define BANK_MEM_MAP_INDEX(kaddr) \
(((unsigned long)(kaddr) & (MAX_PHYS_MEMORY-1)) >> DIG_BANKSHIFT)
#endif /* CONFIG_IA64_DIG */
#endif /* _ASM_IA64_MMZONE_H */
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (c) 2000 Silicon Graphics, Inc. All rights reserved.
* Copyright (c) 2002 NEC Corp.
* Copyright (c) 2002 Erich Focht <efocht@ess.nec.de>
* Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
*/
#ifndef _ASM_IA64_NODEDATA_H
#define _ASM_IA64_NODEDATA_H
#include <asm/mmzone.h>
/*
* Node Data. One of these structures is located on each node of a NUMA system.
*/
struct pglist_data;
struct ia64_node_data {
short node;
struct pglist_data *pg_data_ptrs[NR_NODES];
struct page *bank_mem_map_base[NR_BANKS];
struct ia64_node_data *node_data_ptrs[NR_NODES];
short node_id_map[NR_BANKS];
};
/*
* Return a pointer to the node_data structure for the executing cpu.
*/
#define local_node_data (local_cpu_data->node_data)
/*
* Return a pointer to the node_data structure for the specified node.
*/
#define node_data(node) (local_node_data->node_data_ptrs[node])
/*
* Get a pointer to the node_id/node_data for the current cpu.
* (boot time only)
*/
extern int boot_get_local_nodeid(void);
extern struct ia64_node_data *get_node_data_ptr(void);
/*
* Given a node id, return a pointer to the pg_data_t for the node.
* The following 2 macros are similar.
*
* NODE_DATA - should be used in all code not related to system
* initialization. It uses pernode data structures to minimize
* offnode memory references. However, these structure are not
* present during boot. This macro can be used once cpu_init
* completes.
*
* BOOT_NODE_DATA
* - should be used during system initialization
* prior to freeing __initdata. It does not depend on the percpu
* area being present.
*
* NOTE: The names of these macros are misleading but are difficult to change
* since they are used in generic linux & on other architecures.
*/
#define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid])
#define BOOT_NODE_DATA(nid) boot_get_pg_data_ptr((long)(nid))
struct pglist_data;
extern struct pglist_data * __init boot_get_pg_data_ptr(long);
#endif /* _ASM_IA64_NODEDATA_H */
......@@ -15,8 +15,7 @@
#ifdef CONFIG_DISCONTIGMEM
# include <asm/mmzone.h>
# define NR_NODES (PLAT_MAX_COMPACT_NODES)
# define NR_MEMBLKS (PLAT_MAXCLUMPS)
# define NR_MEMBLKS (NR_BANKS)
#else
# define NR_NODES (8)
# define NR_MEMBLKS (NR_NODES * 8)
......
#ifndef _ASM_MAX_NUMNODES_H
#define _ASM_MAX_NUMNODES_H
#include <asm/mmzone.h>
#define MAX_NUMNODES NR_NODES
#endif /* _ASM_MAX_NUMNODES_H */
......@@ -82,12 +82,15 @@ do { \
flush_dcache_page(page); \
} while (0)
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#ifndef CONFIG_DISCONTIGMEM
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define page_to_pfn(page) ((unsigned long) (page - mem_map))
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
#endif
typedef union ia64_va {
struct {
......
......@@ -89,6 +89,9 @@
#include <asm/rse.h>
#include <asm/unwind.h>
#include <asm/atomic.h>
#ifdef CONFIG_NUMA
#include <asm/nodedata.h>
#endif
/* like above but expressed as bitfields for more efficient access: */
struct ia64_psr {
......@@ -174,6 +177,10 @@ struct cpuinfo_ia64 {
__u64 prof_counter;
__u64 prof_multiplier;
#endif
#ifdef CONFIG_NUMA
struct ia64_node_data *node_data;
int nodeid;
#endif
};
DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info);
......@@ -185,6 +192,10 @@ DECLARE_PER_CPU(struct cpuinfo_ia64, cpu_info);
#define local_cpu_data (&__get_cpu_var(cpu_info))
#define cpu_data(cpu) (&per_cpu(cpu_info, cpu))
#ifdef CONFIG_NUMA
#define numa_node_id() (local_cpu_data->nodeid)
#endif
extern void identify_cpu (struct cpuinfo_ia64 *);
extern void print_cpu_info (struct cpuinfo_ia64 *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment