Commit 907cd439 authored by Xiong Zhang's avatar Xiong Zhang Committed by Ingo Molnar

x86/xen: Change __xen_pgd_walk() and xen_cleanmfnmap() to support p4d

Split these helpers into a couple of per-level functions and add support for
an additional page table level.
Signed-off-by: default avatarXiong Zhang <xiong.y.zhang@intel.com>
[ Split off into separate patch ]
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-arch@vger.kernel.org
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/20170317185515.8636-6-kirill.shutemov@linux.intel.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent d691a3cf
...@@ -593,6 +593,64 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val) ...@@ -593,6 +593,64 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val)
} }
#endif /* CONFIG_PGTABLE_LEVELS == 4 */ #endif /* CONFIG_PGTABLE_LEVELS == 4 */
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
bool last, unsigned long limit)
{
int i, nr, flush = 0;
nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
for (i = 0; i < nr; i++) {
if (!pmd_none(pmd[i]))
flush |= (*func)(mm, pmd_page(pmd[i]), PT_PTE);
}
return flush;
}
static int xen_pud_walk(struct mm_struct *mm, pud_t *pud,
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
bool last, unsigned long limit)
{
int i, nr, flush = 0;
nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
for (i = 0; i < nr; i++) {
pmd_t *pmd;
if (pud_none(pud[i]))
continue;
pmd = pmd_offset(&pud[i], 0);
if (PTRS_PER_PMD > 1)
flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
flush |= xen_pmd_walk(mm, pmd, func,
last && i == nr - 1, limit);
}
return flush;
}
static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
bool last, unsigned long limit)
{
int i, nr, flush = 0;
nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
for (i = 0; i < nr; i++) {
pud_t *pud;
if (p4d_none(p4d[i]))
continue;
pud = pud_offset(&p4d[i], 0);
if (PTRS_PER_PUD > 1)
flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
flush |= xen_pud_walk(mm, pud, func,
last && i == nr - 1, limit);
}
return flush;
}
/* /*
* (Yet another) pagetable walker. This one is intended for pinning a * (Yet another) pagetable walker. This one is intended for pinning a
* pagetable. This means that it walks a pagetable and calls the * pagetable. This means that it walks a pagetable and calls the
...@@ -613,10 +671,8 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, ...@@ -613,10 +671,8 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
enum pt_level), enum pt_level),
unsigned long limit) unsigned long limit)
{ {
int flush = 0; int i, nr, flush = 0;
unsigned hole_low, hole_high; unsigned hole_low, hole_high;
unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
unsigned pgdidx, pudidx, pmdidx;
/* The limit is the last byte to be touched */ /* The limit is the last byte to be touched */
limit--; limit--;
...@@ -633,65 +689,22 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, ...@@ -633,65 +689,22 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
hole_low = pgd_index(USER_LIMIT); hole_low = pgd_index(USER_LIMIT);
hole_high = pgd_index(PAGE_OFFSET); hole_high = pgd_index(PAGE_OFFSET);
pgdidx_limit = pgd_index(limit); nr = pgd_index(limit) + 1;
#if PTRS_PER_PUD > 1 for (i = 0; i < nr; i++) {
pudidx_limit = pud_index(limit); p4d_t *p4d;
#else
pudidx_limit = 0;
#endif
#if PTRS_PER_PMD > 1
pmdidx_limit = pmd_index(limit);
#else
pmdidx_limit = 0;
#endif
for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
pud_t *pud;
if (pgdidx >= hole_low && pgdidx < hole_high) if (i >= hole_low && i < hole_high)
continue; continue;
if (!pgd_val(pgd[pgdidx])) if (pgd_none(pgd[i]))
continue; continue;
pud = pud_offset(&pgd[pgdidx], 0); p4d = p4d_offset(&pgd[i], 0);
if (PTRS_PER_P4D > 1)
if (PTRS_PER_PUD > 1) /* not folded */ flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
flush |= (*func)(mm, virt_to_page(pud), PT_PUD); flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
pmd_t *pmd;
if (pgdidx == pgdidx_limit &&
pudidx > pudidx_limit)
goto out;
if (pud_none(pud[pudidx]))
continue;
pmd = pmd_offset(&pud[pudidx], 0);
if (PTRS_PER_PMD > 1) /* not folded */
flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
struct page *pte;
if (pgdidx == pgdidx_limit &&
pudidx == pudidx_limit &&
pmdidx > pmdidx_limit)
goto out;
if (pmd_none(pmd[pmdidx]))
continue;
pte = pmd_page(pmd[pmdidx]);
flush |= (*func)(mm, pte, PT_PTE);
}
}
} }
out:
/* Do the top level last, so that the callbacks can use it as /* Do the top level last, so that the callbacks can use it as
a cue to do final things like tlb flushes. */ a cue to do final things like tlb flushes. */
flush |= (*func)(mm, virt_to_page(pgd), PT_PGD); flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
...@@ -1150,57 +1163,97 @@ static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin) ...@@ -1150,57 +1163,97 @@ static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
xen_free_ro_pages(pa, PAGE_SIZE); xen_free_ro_pages(pa, PAGE_SIZE);
} }
/* static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
* Since it is well isolated we can (and since it is perhaps large we should)
* also free the page tables mapping the initial P->M table.
*/
static void __init xen_cleanmfnmap(unsigned long vaddr)
{ {
unsigned long va = vaddr & PMD_MASK;
unsigned long pa; unsigned long pa;
pgd_t *pgd = pgd_offset_k(va); pte_t *pte_tbl;
pud_t *pud_page = pud_offset(pgd, 0); int i;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned int i;
bool unpin;
unpin = (vaddr == 2 * PGDIR_SIZE);
set_pgd(pgd, __pgd(0));
do {
pud = pud_page + pud_index(va);
if (pud_none(*pud)) {
va += PUD_SIZE;
} else if (pud_large(*pud)) {
pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
xen_free_ro_pages(pa, PUD_SIZE);
va += PUD_SIZE;
} else {
pmd = pmd_offset(pud, va);
if (pmd_large(*pmd)) { if (pmd_large(*pmd)) {
pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK; pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
xen_free_ro_pages(pa, PMD_SIZE); xen_free_ro_pages(pa, PMD_SIZE);
} else if (!pmd_none(*pmd)) { return;
pte = pte_offset_kernel(pmd, va); }
set_pmd(pmd, __pmd(0));
for (i = 0; i < PTRS_PER_PTE; ++i) { pte_tbl = pte_offset_kernel(pmd, 0);
if (pte_none(pte[i])) for (i = 0; i < PTRS_PER_PTE; i++) {
break; if (pte_none(pte_tbl[i]))
pa = pte_pfn(pte[i]) << PAGE_SHIFT; continue;
pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
xen_free_ro_pages(pa, PAGE_SIZE); xen_free_ro_pages(pa, PAGE_SIZE);
} }
xen_cleanmfnmap_free_pgtbl(pte, unpin); set_pmd(pmd, __pmd(0));
xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
}
static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
{
unsigned long pa;
pmd_t *pmd_tbl;
int i;
if (pud_large(*pud)) {
pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
xen_free_ro_pages(pa, PUD_SIZE);
return;
} }
va += PMD_SIZE;
if (pmd_index(va)) pmd_tbl = pmd_offset(pud, 0);
for (i = 0; i < PTRS_PER_PMD; i++) {
if (pmd_none(pmd_tbl[i]))
continue; continue;
xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
}
set_pud(pud, __pud(0)); set_pud(pud, __pud(0));
xen_cleanmfnmap_free_pgtbl(pmd, unpin); xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
}
static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
{
unsigned long pa;
pud_t *pud_tbl;
int i;
if (p4d_large(*p4d)) {
pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
xen_free_ro_pages(pa, P4D_SIZE);
return;
} }
} while (pud_index(va) || pmd_index(va)); pud_tbl = pud_offset(p4d, 0);
xen_cleanmfnmap_free_pgtbl(pud_page, unpin); for (i = 0; i < PTRS_PER_PUD; i++) {
if (pud_none(pud_tbl[i]))
continue;
xen_cleanmfnmap_pud(pud_tbl + i, unpin);
}
set_p4d(p4d, __p4d(0));
xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
}
/*
* Since it is well isolated we can (and since it is perhaps large we should)
* also free the page tables mapping the initial P->M table.
*/
static void __init xen_cleanmfnmap(unsigned long vaddr)
{
pgd_t *pgd;
p4d_t *p4d;
unsigned int i;
bool unpin;
unpin = (vaddr == 2 * PGDIR_SIZE);
vaddr &= PMD_MASK;
pgd = pgd_offset_k(vaddr);
p4d = p4d_offset(pgd, 0);
for (i = 0; i < PTRS_PER_P4D; i++) {
if (p4d_none(p4d[i]))
continue;
xen_cleanmfnmap_p4d(p4d + i, unpin);
}
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
set_pgd(pgd, __pgd(0));
xen_cleanmfnmap_free_pgtbl(p4d, unpin);
}
} }
static void __init xen_pagetable_p2m_free(void) static void __init xen_pagetable_p2m_free(void)
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
enum pt_level { enum pt_level {
PT_PGD, PT_PGD,
PT_P4D,
PT_PUD, PT_PUD,
PT_PMD, PT_PMD,
PT_PTE PT_PTE
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment