Commit 2ad452ff authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Michael Ellerman

powerpc/64s/radix: Allocate kernel page tables node-local if possible

Try to allocate kernel page tables for direct mapping and vmemmap
according to the node of the memory they will map. The node is not
available for the linear map in early boot, so use range allocation
to allocate the page tables from the region they map, which is
effectively node-local.
Signed-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
[mpe: Fix build error in radix__create_section_mapping()]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 0633dafc
...@@ -46,11 +46,26 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz ...@@ -46,11 +46,26 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz
return 0; return 0;
} }
static __ref void *early_alloc_pgtable(unsigned long size) static __ref void *early_alloc_pgtable(unsigned long size, int nid,
unsigned long region_start, unsigned long region_end)
{ {
unsigned long pa = 0;
void *pt; void *pt;
pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE)); if (region_start || region_end) /* has region hint */
pa = memblock_alloc_range(size, size, region_start, region_end,
MEMBLOCK_NONE);
else if (nid != -1) /* has node hint */
pa = memblock_alloc_base_nid(size, size,
MEMBLOCK_ALLOC_ANYWHERE,
nid, MEMBLOCK_NONE);
if (!pa)
pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE);
BUG_ON(!pa);
pt = __va(pa);
memset(pt, 0, size); memset(pt, 0, size);
return pt; return pt;
...@@ -58,8 +73,11 @@ static __ref void *early_alloc_pgtable(unsigned long size) ...@@ -58,8 +73,11 @@ static __ref void *early_alloc_pgtable(unsigned long size)
static int early_map_kernel_page(unsigned long ea, unsigned long pa, static int early_map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags, pgprot_t flags,
unsigned int map_page_size) unsigned int map_page_size,
int nid,
unsigned long region_start, unsigned long region_end)
{ {
unsigned long pfn = pa >> PAGE_SHIFT;
pgd_t *pgdp; pgd_t *pgdp;
pud_t *pudp; pud_t *pudp;
pmd_t *pmdp; pmd_t *pmdp;
...@@ -67,8 +85,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, ...@@ -67,8 +85,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
pgdp = pgd_offset_k(ea); pgdp = pgd_offset_k(ea);
if (pgd_none(*pgdp)) { if (pgd_none(*pgdp)) {
pudp = early_alloc_pgtable(PUD_TABLE_SIZE); pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
BUG_ON(pudp == NULL); region_start, region_end);
pgd_populate(&init_mm, pgdp, pudp); pgd_populate(&init_mm, pgdp, pudp);
} }
pudp = pud_offset(pgdp, ea); pudp = pud_offset(pgdp, ea);
...@@ -77,8 +95,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, ...@@ -77,8 +95,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
goto set_the_pte; goto set_the_pte;
} }
if (pud_none(*pudp)) { if (pud_none(*pudp)) {
pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
BUG_ON(pmdp == NULL); region_start, region_end);
pud_populate(&init_mm, pudp, pmdp); pud_populate(&init_mm, pudp, pmdp);
} }
pmdp = pmd_offset(pudp, ea); pmdp = pmd_offset(pudp, ea);
...@@ -87,23 +105,29 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, ...@@ -87,23 +105,29 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
goto set_the_pte; goto set_the_pte;
} }
if (!pmd_present(*pmdp)) { if (!pmd_present(*pmdp)) {
ptep = early_alloc_pgtable(PAGE_SIZE); ptep = early_alloc_pgtable(PAGE_SIZE, nid,
BUG_ON(ptep == NULL); region_start, region_end);
pmd_populate_kernel(&init_mm, pmdp, ptep); pmd_populate_kernel(&init_mm, pmdp, ptep);
} }
ptep = pte_offset_kernel(pmdp, ea); ptep = pte_offset_kernel(pmdp, ea);
set_the_pte: set_the_pte:
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags)); set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
smp_wmb(); smp_wmb();
return 0; return 0;
} }
/*
int radix__map_kernel_page(unsigned long ea, unsigned long pa, * nid, region_start, and region_end are hints to try to place the page
* table memory in the same node or region.
*/
static int __map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags, pgprot_t flags,
unsigned int map_page_size) unsigned int map_page_size,
int nid,
unsigned long region_start, unsigned long region_end)
{ {
unsigned long pfn = pa >> PAGE_SHIFT;
pgd_t *pgdp; pgd_t *pgdp;
pud_t *pudp; pud_t *pudp;
pmd_t *pmdp; pmd_t *pmdp;
...@@ -113,9 +137,15 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, ...@@ -113,9 +137,15 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
*/ */
BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
if (!slab_is_available()) if (unlikely(!slab_is_available()))
return early_map_kernel_page(ea, pa, flags, map_page_size); return early_map_kernel_page(ea, pa, flags, map_page_size,
nid, region_start, region_end);
/*
* Should make page table allocation functions be able to take a
* node, so we can place kernel page tables on the right nodes after
* boot.
*/
pgdp = pgd_offset_k(ea); pgdp = pgd_offset_k(ea);
pudp = pud_alloc(&init_mm, pgdp, ea); pudp = pud_alloc(&init_mm, pgdp, ea);
if (!pudp) if (!pudp)
...@@ -136,11 +166,25 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa, ...@@ -136,11 +166,25 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
return -ENOMEM; return -ENOMEM;
set_the_pte: set_the_pte:
set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags)); set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
smp_wmb(); smp_wmb();
return 0; return 0;
} }
static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
pgprot_t flags,
unsigned int map_page_size, int nid)
{
return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
}
int radix__map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags,
unsigned int map_page_size)
{
return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
}
#ifdef CONFIG_STRICT_KERNEL_RWX #ifdef CONFIG_STRICT_KERNEL_RWX
void radix__change_memory_range(unsigned long start, unsigned long end, void radix__change_memory_range(unsigned long start, unsigned long end,
unsigned long clear) unsigned long clear)
...@@ -227,7 +271,8 @@ static inline void __meminit print_mapping(unsigned long start, ...@@ -227,7 +271,8 @@ static inline void __meminit print_mapping(unsigned long start,
} }
static int __meminit create_physical_mapping(unsigned long start, static int __meminit create_physical_mapping(unsigned long start,
unsigned long end) unsigned long end,
int nid)
{ {
unsigned long vaddr, addr, mapping_size = 0; unsigned long vaddr, addr, mapping_size = 0;
pgprot_t prot; pgprot_t prot;
...@@ -283,7 +328,7 @@ static int __meminit create_physical_mapping(unsigned long start, ...@@ -283,7 +328,7 @@ static int __meminit create_physical_mapping(unsigned long start,
else else
prot = PAGE_KERNEL; prot = PAGE_KERNEL;
rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size); rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
if (rc) if (rc)
return rc; return rc;
} }
...@@ -292,7 +337,7 @@ static int __meminit create_physical_mapping(unsigned long start, ...@@ -292,7 +337,7 @@ static int __meminit create_physical_mapping(unsigned long start,
return 0; return 0;
} }
static void __init radix_init_pgtable(void) void __init radix_init_pgtable(void)
{ {
unsigned long rts_field; unsigned long rts_field;
struct memblock_region *reg; struct memblock_region *reg;
...@@ -302,9 +347,16 @@ static void __init radix_init_pgtable(void) ...@@ -302,9 +347,16 @@ static void __init radix_init_pgtable(void)
/* /*
* Create the linear mapping, using standard page size for now * Create the linear mapping, using standard page size for now
*/ */
for_each_memblock(memory, reg) for_each_memblock(memory, reg) {
/*
* The memblock allocator is up at this point, so the
* page tables will be allocated within the range. No
* need or a node (which we don't have yet).
*/
WARN_ON(create_physical_mapping(reg->base, WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size)); reg->base + reg->size,
-1));
}
/* Find out how many PID bits are supported */ /* Find out how many PID bits are supported */
if (cpu_has_feature(CPU_FTR_HVMODE)) { if (cpu_has_feature(CPU_FTR_HVMODE)) {
...@@ -333,7 +385,7 @@ static void __init radix_init_pgtable(void) ...@@ -333,7 +385,7 @@ static void __init radix_init_pgtable(void)
* host. * host.
*/ */
BUG_ON(PRTB_SIZE_SHIFT > 36); BUG_ON(PRTB_SIZE_SHIFT > 36);
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
/* /*
* Fill in the process table. * Fill in the process table.
*/ */
...@@ -810,7 +862,7 @@ static void remove_pagetable(unsigned long start, unsigned long end) ...@@ -810,7 +862,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
int __ref radix__create_section_mapping(unsigned long start, unsigned long end) int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
{ {
return create_physical_mapping(start, end); return create_physical_mapping(start, end, -1);
} }
int radix__remove_section_mapping(unsigned long start, unsigned long end) int radix__remove_section_mapping(unsigned long start, unsigned long end)
...@@ -827,8 +879,12 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, ...@@ -827,8 +879,12 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
{ {
/* Create a PTE encoding */ /* Create a PTE encoding */
unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW; unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
int ret;
ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
BUG_ON(ret);
BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment