Commit 2ba3e694 authored by Joerg Roedel's avatar Joerg Roedel Committed by Linus Torvalds

mm/vmalloc: track which page-table levels were modified

Track at which levels in the page-table entries were modified by
vmap/vunmap.

After the page-table has been modified, use that information do decide
whether the new arch_sync_kernel_mappings() needs to be called.

[akpm@linux-foundation.org: map_kernel_range_noflush() needs the arch_sync_kernel_mappings() call]
Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Acked-by: default avatarAndy Lutomirski <luto@kernel.org>
Acked-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Link: http://lkml.kernel.org/r/20200515140023.25469-3-joro@8bytes.orgSigned-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d8626138
...@@ -133,6 +133,22 @@ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, ...@@ -133,6 +133,22 @@ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
void vmalloc_sync_mappings(void); void vmalloc_sync_mappings(void);
void vmalloc_sync_unmappings(void); void vmalloc_sync_unmappings(void);
/*
* Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
* and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
* needs to be called.
*/
#ifndef ARCH_PAGE_TABLE_SYNC_MASK
#define ARCH_PAGE_TABLE_SYNC_MASK 0
#endif
/*
* There is no default implementation for arch_sync_kernel_mappings(). It is
* relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
* is 0.
*/
void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
/* /*
* Lowlevel-APIs (not for driver use!) * Lowlevel-APIs (not for driver use!)
*/ */
......
...@@ -69,7 +69,8 @@ static void free_work(struct work_struct *w) ...@@ -69,7 +69,8 @@ static void free_work(struct work_struct *w)
/*** Page table manipulation functions ***/ /*** Page table manipulation functions ***/
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
{ {
pte_t *pte; pte_t *pte;
...@@ -78,59 +79,81 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) ...@@ -78,59 +79,81 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte); pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
WARN_ON(!pte_none(ptent) && !pte_present(ptent)); WARN_ON(!pte_none(ptent) && !pte_present(ptent));
} while (pte++, addr += PAGE_SIZE, addr != end); } while (pte++, addr += PAGE_SIZE, addr != end);
*mask |= PGTBL_PTE_MODIFIED;
} }
static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end) static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
{ {
pmd_t *pmd; pmd_t *pmd;
unsigned long next; unsigned long next;
int cleared;
pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr);
do { do {
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
if (pmd_clear_huge(pmd))
cleared = pmd_clear_huge(pmd);
if (cleared || pmd_bad(*pmd))
*mask |= PGTBL_PMD_MODIFIED;
if (cleared)
continue; continue;
if (pmd_none_or_clear_bad(pmd)) if (pmd_none_or_clear_bad(pmd))
continue; continue;
vunmap_pte_range(pmd, addr, next); vunmap_pte_range(pmd, addr, next, mask);
} while (pmd++, addr = next, addr != end); } while (pmd++, addr = next, addr != end);
} }
static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end) static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
int cleared;
pud = pud_offset(p4d, addr); pud = pud_offset(p4d, addr);
do { do {
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (pud_clear_huge(pud))
cleared = pud_clear_huge(pud);
if (cleared || pud_bad(*pud))
*mask |= PGTBL_PUD_MODIFIED;
if (cleared)
continue; continue;
if (pud_none_or_clear_bad(pud)) if (pud_none_or_clear_bad(pud))
continue; continue;
vunmap_pmd_range(pud, addr, next); vunmap_pmd_range(pud, addr, next, mask);
} while (pud++, addr = next, addr != end); } while (pud++, addr = next, addr != end);
} }
static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end) static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
{ {
p4d_t *p4d; p4d_t *p4d;
unsigned long next; unsigned long next;
int cleared;
p4d = p4d_offset(pgd, addr); p4d = p4d_offset(pgd, addr);
do { do {
next = p4d_addr_end(addr, end); next = p4d_addr_end(addr, end);
if (p4d_clear_huge(p4d))
cleared = p4d_clear_huge(p4d);
if (cleared || p4d_bad(*p4d))
*mask |= PGTBL_P4D_MODIFIED;
if (cleared)
continue; continue;
if (p4d_none_or_clear_bad(p4d)) if (p4d_none_or_clear_bad(p4d))
continue; continue;
vunmap_pud_range(p4d, addr, next); vunmap_pud_range(p4d, addr, next, mask);
} while (p4d++, addr = next, addr != end); } while (p4d++, addr = next, addr != end);
} }
/** /**
* unmap_kernel_range_noflush - unmap kernel VM area * unmap_kernel_range_noflush - unmap kernel VM area
* @addr: start of the VM area to unmap * @start: start of the VM area to unmap
* @size: size of the VM area to unmap * @size: size of the VM area to unmap
* *
* Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size specify * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size specify
...@@ -141,24 +164,33 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end) ...@@ -141,24 +164,33 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
* for calling flush_cache_vunmap() on to-be-mapped areas before calling this * for calling flush_cache_vunmap() on to-be-mapped areas before calling this
* function and flush_tlb_kernel_range() after. * function and flush_tlb_kernel_range() after.
*/ */
void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) void unmap_kernel_range_noflush(unsigned long start, unsigned long size)
{ {
unsigned long end = addr + size; unsigned long end = start + size;
unsigned long next; unsigned long next;
pgd_t *pgd; pgd_t *pgd;
unsigned long addr = start;
pgtbl_mod_mask mask = 0;
BUG_ON(addr >= end); BUG_ON(addr >= end);
start = addr;
pgd = pgd_offset_k(addr); pgd = pgd_offset_k(addr);
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
if (pgd_bad(*pgd))
mask |= PGTBL_PGD_MODIFIED;
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
vunmap_p4d_range(pgd, addr, next); vunmap_p4d_range(pgd, addr, next, &mask);
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
arch_sync_kernel_mappings(start, end);
} }
static int vmap_pte_range(pmd_t *pmd, unsigned long addr, static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr) unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{ {
pte_t *pte; pte_t *pte;
...@@ -167,7 +199,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -167,7 +199,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
* callers keep track of where we're up to. * callers keep track of where we're up to.
*/ */
pte = pte_alloc_kernel(pmd, addr); pte = pte_alloc_kernel_track(pmd, addr, mask);
if (!pte) if (!pte)
return -ENOMEM; return -ENOMEM;
do { do {
...@@ -180,55 +212,59 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -180,55 +212,59 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
set_pte_at(&init_mm, addr, pte, mk_pte(page, prot)); set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
(*nr)++; (*nr)++;
} while (pte++, addr += PAGE_SIZE, addr != end); } while (pte++, addr += PAGE_SIZE, addr != end);
*mask |= PGTBL_PTE_MODIFIED;
return 0; return 0;
} }
static int vmap_pmd_range(pud_t *pud, unsigned long addr, static int vmap_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr) unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{ {
pmd_t *pmd; pmd_t *pmd;
unsigned long next; unsigned long next;
pmd = pmd_alloc(&init_mm, pud, addr); pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
if (!pmd) if (!pmd)
return -ENOMEM; return -ENOMEM;
do { do {
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
if (vmap_pte_range(pmd, addr, next, prot, pages, nr)) if (vmap_pte_range(pmd, addr, next, prot, pages, nr, mask))
return -ENOMEM; return -ENOMEM;
} while (pmd++, addr = next, addr != end); } while (pmd++, addr = next, addr != end);
return 0; return 0;
} }
static int vmap_pud_range(p4d_t *p4d, unsigned long addr, static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr) unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{ {
pud_t *pud; pud_t *pud;
unsigned long next; unsigned long next;
pud = pud_alloc(&init_mm, p4d, addr); pud = pud_alloc_track(&init_mm, p4d, addr, mask);
if (!pud) if (!pud)
return -ENOMEM; return -ENOMEM;
do { do {
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (vmap_pmd_range(pud, addr, next, prot, pages, nr)) if (vmap_pmd_range(pud, addr, next, prot, pages, nr, mask))
return -ENOMEM; return -ENOMEM;
} while (pud++, addr = next, addr != end); } while (pud++, addr = next, addr != end);
return 0; return 0;
} }
static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr) unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{ {
p4d_t *p4d; p4d_t *p4d;
unsigned long next; unsigned long next;
p4d = p4d_alloc(&init_mm, pgd, addr); p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
if (!p4d) if (!p4d)
return -ENOMEM; return -ENOMEM;
do { do {
next = p4d_addr_end(addr, end); next = p4d_addr_end(addr, end);
if (vmap_pud_range(p4d, addr, next, prot, pages, nr)) if (vmap_pud_range(p4d, addr, next, prot, pages, nr, mask))
return -ENOMEM; return -ENOMEM;
} while (p4d++, addr = next, addr != end); } while (p4d++, addr = next, addr != end);
return 0; return 0;
...@@ -255,21 +291,28 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, ...@@ -255,21 +291,28 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
int map_kernel_range_noflush(unsigned long addr, unsigned long size, int map_kernel_range_noflush(unsigned long addr, unsigned long size,
pgprot_t prot, struct page **pages) pgprot_t prot, struct page **pages)
{ {
unsigned long start = addr;
unsigned long end = addr + size; unsigned long end = addr + size;
unsigned long next; unsigned long next;
pgd_t *pgd; pgd_t *pgd;
int err = 0; int err = 0;
int nr = 0; int nr = 0;
pgtbl_mod_mask mask = 0;
BUG_ON(addr >= end); BUG_ON(addr >= end);
pgd = pgd_offset_k(addr); pgd = pgd_offset_k(addr);
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr); if (pgd_bad(*pgd))
mask |= PGTBL_PGD_MODIFIED;
err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
if (err) if (err)
return err; return err;
} while (pgd++, addr = next, addr != end); } while (pgd++, addr = next, addr != end);
if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
arch_sync_kernel_mappings(start, end);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment