Commit 0b5a5063 authored by Stefan Bader's avatar Stefan Bader Committed by David Vrabel

x86/xen: don't copy bogus duplicate entries into kernel page tables

When RANDOMIZE_BASE (KASLR) is enabled; or the sum of all loaded
modules exceeds 512 MiB, then loading modules fails with a warning
(and hence a vmalloc allocation failure) because the PTEs for the
newly-allocated vmalloc address space are not zero.

  WARNING: CPU: 0 PID: 494 at linux/mm/vmalloc.c:128
           vmap_page_range_noflush+0x2a1/0x360()

This is caused by xen_setup_kernel_pagetables() copying
level2_kernel_pgt into level2_fixmap_pgt, overwriting many non-present
entries.

Without KASLR, the normal kernel image size only covers the first half
of level2_kernel_pgt and module space starts after that.

L4[511]->level3_kernel_pgt[510]->level2_kernel_pgt[  0..255]->kernel
                                                  [256..511]->module
                          [511]->level2_fixmap_pgt[  0..505]->module

This allows 512 MiB of of module vmalloc space to be used before
having to use the corrupted level2_fixmap_pgt entries.

With KASLR enabled, the kernel image uses the full PUD range of 1G and
module space starts in the level2_fixmap_pgt. So basically:

L4[511]->level3_kernel_pgt[510]->level2_kernel_pgt[0..511]->kernel
                          [511]->level2_fixmap_pgt[0..505]->module

And now no module vmalloc space can be used without using the corrupt
level2_fixmap_pgt entries.

Fix this by properly converting the level2_fixmap_pgt entries to MFNs,
and setting level1_fixmap_pgt as read-only.

A number of comments were also using the the wrong L3 offset for
level2_kernel_pgt.  These have been corrected.
Signed-off-by: default avatarStefan Bader <stefan.bader@canonical.com>
Signed-off-by: default avatarDavid Vrabel <david.vrabel@citrix.com>
Reviewed-by: default avatarBoris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: stable@vger.kernel.org
parent 5903c6bd
...@@ -19,6 +19,7 @@ extern pud_t level3_ident_pgt[512]; ...@@ -19,6 +19,7 @@ extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_kernel_pgt[512];
extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_fixmap_pgt[512];
extern pmd_t level2_ident_pgt[512]; extern pmd_t level2_ident_pgt[512];
extern pte_t level1_fixmap_pgt[512];
extern pgd_t init_level4_pgt[]; extern pgd_t init_level4_pgt[];
#define swapper_pg_dir init_level4_pgt #define swapper_pg_dir init_level4_pgt
......
...@@ -1866,12 +1866,11 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, ...@@ -1866,12 +1866,11 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
* *
* We can construct this by grafting the Xen provided pagetable into * We can construct this by grafting the Xen provided pagetable into
* head_64.S's preconstructed pagetables. We copy the Xen L2's into * head_64.S's preconstructed pagetables. We copy the Xen L2's into
* level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This * level2_ident_pgt, and level2_kernel_pgt. This means that only the
* means that only the kernel has a physical mapping to start with - * kernel has a physical mapping to start with - but that's enough to
* but that's enough to get __va working. We need to fill in the rest * get __va working. We need to fill in the rest of the physical
* of the physical mapping once some sort of allocator has been set * mapping once some sort of allocator has been set up. NOTE: for
* up. * PVH, the page tables are native.
* NOTE: for PVH, the page tables are native.
*/ */
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{ {
...@@ -1902,8 +1901,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) ...@@ -1902,8 +1901,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* L3_i[0] -> level2_ident_pgt */ /* L3_i[0] -> level2_ident_pgt */
convert_pfn_mfn(level3_ident_pgt); convert_pfn_mfn(level3_ident_pgt);
/* L3_k[510] -> level2_kernel_pgt /* L3_k[510] -> level2_kernel_pgt
* L3_i[511] -> level2_fixmap_pgt */ * L3_k[511] -> level2_fixmap_pgt */
convert_pfn_mfn(level3_kernel_pgt); convert_pfn_mfn(level3_kernel_pgt);
/* L3_k[511][506] -> level1_fixmap_pgt */
convert_pfn_mfn(level2_fixmap_pgt);
} }
/* We get [511][511] and have Xen's version of level2_kernel_pgt */ /* We get [511][511] and have Xen's version of level2_kernel_pgt */
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
...@@ -1913,21 +1915,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) ...@@ -1913,21 +1915,15 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
addr[1] = (unsigned long)l3; addr[1] = (unsigned long)l3;
addr[2] = (unsigned long)l2; addr[2] = (unsigned long)l2;
/* Graft it onto L4[272][0]. Note that we creating an aliasing problem: /* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
* Both L4[272][0] and L4[511][511] have entries that point to the same * Both L4[272][0] and L4[511][510] have entries that point to the same
* L2 (PMD) tables. Meaning that if you modify it in __va space * L2 (PMD) tables. Meaning that if you modify it in __va space
* it will be also modified in the __ka space! (But if you just * it will be also modified in the __ka space! (But if you just
* modify the PMD table to point to other PTE's or none, then you * modify the PMD table to point to other PTE's or none, then you
* are OK - which is what cleanup_highmap does) */ * are OK - which is what cleanup_highmap does) */
copy_page(level2_ident_pgt, l2); copy_page(level2_ident_pgt, l2);
/* Graft it onto L4[511][511] */ /* Graft it onto L4[511][510] */
copy_page(level2_kernel_pgt, l2); copy_page(level2_kernel_pgt, l2);
/* Get [511][510] and graft that in level2_fixmap_pgt */
l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
copy_page(level2_fixmap_pgt, l2);
/* Note that we don't do anything with level1_fixmap_pgt which
* we don't need. */
if (!xen_feature(XENFEAT_auto_translated_physmap)) { if (!xen_feature(XENFEAT_auto_translated_physmap)) {
/* Make pagetable pieces RO */ /* Make pagetable pieces RO */
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
...@@ -1937,6 +1933,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) ...@@ -1937,6 +1933,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
/* Pin down new L4 */ /* Pin down new L4 */
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment