Commit 98104c34 authored by Konrad Rzeszutek Wilk's avatar Konrad Rzeszutek Wilk

Merge branch 'stable/128gb.v5.1' into stable/for-linus-3.7

* stable/128gb.v5.1:
  xen/mmu: If the revector fails, don't attempt to revector anything else.
  xen/p2m: When revectoring deal with holes in the P2M array.
  xen/mmu: Release just the MFN list, not MFN list and part of pagetables.
  xen/mmu: Remove from __ka space PMD entries for pagetables.
  xen/mmu: Copy and revector the P2M tree.
  xen/p2m: Add logic to revector a P2M tree to use __va leafs.
  xen/mmu: Recycle the Xen provided L4, L3, and L2 pages
  xen/mmu: For 64-bit do not call xen_map_identity_early
  xen/mmu: use copy_page instead of memcpy.
  xen/mmu: Provide comments describing the _ka and _va aliasing issue
  xen/mmu: The xen_setup_kernel_pagetable doesn't need to return anything.
  Revert "xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain." and "xen/x86: Use memblock_reserve for sensitive areas."
  xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain.
  xen/x86: Use memblock_reserve for sensitive areas.
  xen/p2m: Fix the comment describing the P2M tree.

Conflicts:
	arch/x86/xen/mmu.c

The pagetable_init is the old xen_pagetable_setup_done and xen_pagetable_setup_start
rolled in one.
Signed-off-by: default avatarKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
parents 25a765b7 32873187
...@@ -1290,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void) ...@@ -1290,7 +1290,6 @@ asmlinkage void __init xen_start_kernel(void)
{ {
struct physdev_set_iopl set_iopl; struct physdev_set_iopl set_iopl;
int rc; int rc;
pgd_t *pgd;
if (!xen_start_info) if (!xen_start_info)
return; return;
...@@ -1382,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void) ...@@ -1382,8 +1381,6 @@ asmlinkage void __init xen_start_kernel(void)
acpi_numa = -1; acpi_numa = -1;
#endif #endif
pgd = (pgd_t *)xen_start_info->pt_base;
/* Don't do the full vcpu_info placement stuff until we have a /* Don't do the full vcpu_info placement stuff until we have a
possible map and a non-dummy shared_info. */ possible map and a non-dummy shared_info. */
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
...@@ -1392,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void) ...@@ -1392,7 +1389,7 @@ asmlinkage void __init xen_start_kernel(void)
early_boot_irqs_disabled = true; early_boot_irqs_disabled = true;
xen_raw_console_write("mapping kernel into physical memory\n"); xen_raw_console_write("mapping kernel into physical memory\n");
pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
/* Allocate and initialize top and mid mfn levels for p2m structure */ /* Allocate and initialize top and mid mfn levels for p2m structure */
xen_build_mfn_list_list(); xen_build_mfn_list_list();
......
...@@ -84,6 +84,7 @@ ...@@ -84,6 +84,7 @@
*/ */
DEFINE_SPINLOCK(xen_reservation_lock); DEFINE_SPINLOCK(xen_reservation_lock);
#ifdef CONFIG_X86_32
/* /*
* Identity map, in addition to plain kernel map. This needs to be * Identity map, in addition to plain kernel map. This needs to be
* large enough to allocate page table pages to allocate the rest. * large enough to allocate page table pages to allocate the rest.
...@@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock); ...@@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock);
*/ */
#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
#endif
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* l3 pud for userspace vsyscall mapping */ /* l3 pud for userspace vsyscall mapping */
static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
...@@ -1176,13 +1177,6 @@ static void xen_exit_mmap(struct mm_struct *mm) ...@@ -1176,13 +1177,6 @@ static void xen_exit_mmap(struct mm_struct *mm)
static void xen_post_allocator_init(void); static void xen_post_allocator_init(void);
static void __init xen_pagetable_init(void)
{
paging_init();
xen_setup_shared_info();
xen_post_allocator_init();
}
static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
{ {
/* reserve the range used */ /* reserve the range used */
...@@ -1197,6 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) ...@@ -1197,6 +1191,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
} }
} }
#ifdef CONFIG_X86_64
static void __init xen_cleanhighmap(unsigned long vaddr,
unsigned long vaddr_end)
{
unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
/* NOTE: The loop is more greedy than the cleanup_highmap variant.
* We include the PMD passed in on _both_ boundaries. */
for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
pmd++, vaddr += PMD_SIZE) {
if (pmd_none(*pmd))
continue;
if (vaddr < (unsigned long) _text || vaddr > kernel_end)
set_pmd(pmd, __pmd(0));
}
/* In case we did something silly, we should crash in this function
* instead of somewhere later and be confusing. */
xen_mc_flush();
}
#endif
static void __init xen_pagetable_init(void)
{
#ifdef CONFIG_X86_64
unsigned long size;
unsigned long addr;
#endif
paging_init();
xen_setup_shared_info();
#ifdef CONFIG_X86_64
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
unsigned long new_mfn_list;
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
/* On 32-bit, we get zero so this never gets executed. */
new_mfn_list = xen_revector_p2m_tree();
if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) {
/* using __ka address and sticking INVALID_P2M_ENTRY! */
memset((void *)xen_start_info->mfn_list, 0xff, size);
/* We should be in __ka space. */
BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
addr = xen_start_info->mfn_list;
/* We roundup to the PMD, which means that if anybody at this stage is
* using the __ka address of xen_start_info or xen_start_info->shared_info
* they are in going to crash. Fortunatly we have already revectored
* in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
size = roundup(size, PMD_SIZE);
xen_cleanhighmap(addr, addr + size);
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
memblock_free(__pa(xen_start_info->mfn_list), size);
/* And revector! Bye bye old array */
xen_start_info->mfn_list = new_mfn_list;
} else
goto skip;
}
/* At this stage, cleanup_highmap has already cleaned __ka space
* from _brk_limit way up to the max_pfn_mapped (which is the end of
* the ramdisk). We continue on, erasing PMD entries that point to page
* tables - do note that they are accessible at this stage via __va.
* For good measure we also round up to the PMD - which means that if
* anybody is using __ka address to the initial boot-stack - and try
* to use it - they are going to crash. The xen_start_info has been
* taken care of already in xen_setup_kernel_pagetable. */
addr = xen_start_info->pt_base;
size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
xen_cleanhighmap(addr, addr + size);
xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
#ifdef DEBUG
/* This is superflous and is not neccessary, but you know what
* lets do it. The MODULES_VADDR -> MODULES_END should be clear of
* anything at this stage. */
xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
#endif
skip:
#endif
xen_post_allocator_init();
}
static void xen_write_cr2(unsigned long cr2) static void xen_write_cr2(unsigned long cr2)
{ {
this_cpu_read(xen_vcpu)->arch.cr2 = cr2; this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
...@@ -1652,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot) ...@@ -1652,7 +1727,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
BUG(); BUG();
} }
#ifdef CONFIG_X86_32
static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
{ {
unsigned pmdidx, pteidx; unsigned pmdidx, pteidx;
...@@ -1703,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) ...@@ -1703,7 +1778,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
set_page_prot(pmd, PAGE_KERNEL_RO); set_page_prot(pmd, PAGE_KERNEL_RO);
} }
#endif
void __init xen_setup_machphys_mapping(void) void __init xen_setup_machphys_mapping(void)
{ {
struct xen_machphys_mapping mapping; struct xen_machphys_mapping mapping;
...@@ -1731,7 +1806,20 @@ static void convert_pfn_mfn(void *v) ...@@ -1731,7 +1806,20 @@ static void convert_pfn_mfn(void *v)
for (i = 0; i < PTRS_PER_PTE; i++) for (i = 0; i < PTRS_PER_PTE; i++)
pte[i] = xen_make_pte(pte[i].pte); pte[i] = xen_make_pte(pte[i].pte);
} }
static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
unsigned long addr)
{
if (*pt_base == PFN_DOWN(__pa(addr))) {
set_page_prot((void *)addr, PAGE_KERNEL);
clear_page((void *)addr);
(*pt_base)++;
}
if (*pt_end == PFN_DOWN(__pa(addr))) {
set_page_prot((void *)addr, PAGE_KERNEL);
clear_page((void *)addr);
(*pt_end)--;
}
}
/* /*
* Set up the initial kernel pagetable. * Set up the initial kernel pagetable.
* *
...@@ -1743,11 +1831,13 @@ static void convert_pfn_mfn(void *v) ...@@ -1743,11 +1831,13 @@ static void convert_pfn_mfn(void *v)
* of the physical mapping once some sort of allocator has been set * of the physical mapping once some sort of allocator has been set
* up. * up.
*/ */
pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
unsigned long max_pfn)
{ {
pud_t *l3; pud_t *l3;
pmd_t *l2; pmd_t *l2;
unsigned long addr[3];
unsigned long pt_base, pt_end;
unsigned i;
/* max_pfn_mapped is the last pfn mapped in the initial memory /* max_pfn_mapped is the last pfn mapped in the initial memory
* mappings. Considering that on Xen after the kernel mappings we * mappings. Considering that on Xen after the kernel mappings we
...@@ -1755,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, ...@@ -1755,32 +1845,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
* set max_pfn_mapped to the last real pfn mapped. */ * set max_pfn_mapped to the last real pfn mapped. */
max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
pt_end = pt_base + xen_start_info->nr_pt_frames;
/* Zap identity mapping */ /* Zap identity mapping */
init_level4_pgt[0] = __pgd(0); init_level4_pgt[0] = __pgd(0);
/* Pre-constructed entries are in pfn, so convert to mfn */ /* Pre-constructed entries are in pfn, so convert to mfn */
/* L4[272] -> level3_ident_pgt
* L4[511] -> level3_kernel_pgt */
convert_pfn_mfn(init_level4_pgt); convert_pfn_mfn(init_level4_pgt);
/* L3_i[0] -> level2_ident_pgt */
convert_pfn_mfn(level3_ident_pgt); convert_pfn_mfn(level3_ident_pgt);
/* L3_k[510] -> level2_kernel_pgt
* L3_i[511] -> level2_fixmap_pgt */
convert_pfn_mfn(level3_kernel_pgt); convert_pfn_mfn(level3_kernel_pgt);
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); addr[0] = (unsigned long)pgd;
memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); addr[1] = (unsigned long)l3;
addr[2] = (unsigned long)l2;
/* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
* Both L4[272][0] and L4[511][511] have entries that point to the same
* L2 (PMD) tables. Meaning that if you modify it in __va space
* it will be also modified in the __ka space! (But if you just
* modify the PMD table to point to other PTE's or none, then you
* are OK - which is what cleanup_highmap does) */
copy_page(level2_ident_pgt, l2);
/* Graft it onto L4[511][511] */
copy_page(level2_kernel_pgt, l2);
/* Get [511][510] and graft that in level2_fixmap_pgt */
l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); copy_page(level2_fixmap_pgt, l2);
/* Note that we don't do anything with level1_fixmap_pgt which
/* Set up identity map */ * we don't need. */
xen_map_identity_early(level2_ident_pgt, max_pfn);
/* Make pagetable pieces RO */ /* Make pagetable pieces RO */
set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
...@@ -1791,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, ...@@ -1791,22 +1902,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
/* Unpin Xen-provided one */ /* Unpin Xen-provided one */
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
/* Switch over */
pgd = init_level4_pgt;
/* /*
* At this stage there can be no user pgd, and no page * At this stage there can be no user pgd, and no page
* structure to attach it to, so make sure we just set kernel * structure to attach it to, so make sure we just set kernel
* pgd. * pgd.
*/ */
xen_mc_batch(); xen_mc_batch();
__xen_write_cr3(true, __pa(pgd)); __xen_write_cr3(true, __pa(init_level4_pgt));
xen_mc_issue(PARAVIRT_LAZY_CPU); xen_mc_issue(PARAVIRT_LAZY_CPU);
memblock_reserve(__pa(xen_start_info->pt_base), /* We can't that easily rip out L3 and L2, as the Xen pagetables are
xen_start_info->nr_pt_frames * PAGE_SIZE); * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
* the initial domain. For guests using the toolstack, they are in:
* [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only
* rip out the [L4] (pgd), but for guests we shave off three pages.
*/
for (i = 0; i < ARRAY_SIZE(addr); i++)
check_pt_base(&pt_base, &pt_end, addr[i]);
return pgd; /* Our (by three pages) smaller Xen pagetable that we are using */
memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
/* Revector the xen_start_info */
xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
} }
#else /* !CONFIG_X86_64 */ #else /* !CONFIG_X86_64 */
static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
...@@ -1831,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) ...@@ -1831,8 +1948,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
*/ */
swapper_kernel_pmd = swapper_kernel_pmd =
extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
memcpy(swapper_kernel_pmd, initial_kernel_pmd, copy_page(swapper_kernel_pmd, initial_kernel_pmd);
sizeof(pmd_t) * PTRS_PER_PMD);
swapper_pg_dir[KERNEL_PGD_BOUNDARY] = swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
__pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
...@@ -1849,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) ...@@ -1849,8 +1965,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
pv_mmu_ops.write_cr3 = &xen_write_cr3; pv_mmu_ops.write_cr3 = &xen_write_cr3;
} }
pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
unsigned long max_pfn)
{ {
pmd_t *kernel_pmd; pmd_t *kernel_pmd;
...@@ -1862,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, ...@@ -1862,11 +1977,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
512*1024); 512*1024);
kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); copy_page(initial_kernel_pmd, kernel_pmd);
xen_map_identity_early(initial_kernel_pmd, max_pfn); xen_map_identity_early(initial_kernel_pmd, max_pfn);
memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); copy_page(initial_page_table, pgd);
initial_page_table[KERNEL_PGD_BOUNDARY] = initial_page_table[KERNEL_PGD_BOUNDARY] =
__pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
...@@ -1882,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, ...@@ -1882,8 +1997,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
memblock_reserve(__pa(xen_start_info->pt_base), memblock_reserve(__pa(xen_start_info->pt_base),
xen_start_info->nr_pt_frames * PAGE_SIZE); xen_start_info->nr_pt_frames * PAGE_SIZE);
return initial_page_table;
} }
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
* *
* P2M_PER_PAGE depends on the architecture, as a mfn is always * P2M_PER_PAGE depends on the architecture, as a mfn is always
* unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
* 512 and 1024 entries respectively. * 512 and 1024 entries respectively.
* *
* In short, these structures contain the Machine Frame Number (MFN) of the PFN. * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
* *
...@@ -139,11 +139,11 @@ ...@@ -139,11 +139,11 @@
* / | ~0, ~0, .... | * / | ~0, ~0, .... |
* | \---------------/ * | \---------------/
* | * |
* p2m_missing p2m_missing * p2m_mid_missing p2m_missing
* /------------------\ /------------\ * /-----------------\ /------------\
* | [p2m_mid_missing]+---->| ~0, ~0, ~0 | * | [p2m_missing] +---->| ~0, ~0, ~0 |
* | [p2m_mid_missing]+---->| ..., ~0 | * | [p2m_missing] +---->| ..., ~0 |
* \------------------/ \------------/ * \-----------------/ \------------/
* *
* where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
*/ */
...@@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void) ...@@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void)
m2p_override_init(); m2p_override_init();
} }
#ifdef CONFIG_X86_64
#include <linux/bootmem.h>
unsigned long __init xen_revector_p2m_tree(void)
{
unsigned long va_start;
unsigned long va_end;
unsigned long pfn;
unsigned long pfn_free = 0;
unsigned long *mfn_list = NULL;
unsigned long size;
va_start = xen_start_info->mfn_list;
/*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
* so make sure it is rounded up to that */
size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
va_end = va_start + size;
/* If we were revectored already, don't do it again. */
if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
return 0;
mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
if (!mfn_list) {
pr_warn("Could not allocate space for a new P2M tree!\n");
return xen_start_info->mfn_list;
}
/* Fill it out with INVALID_P2M_ENTRY value */
memset(mfn_list, 0xFF, size);
for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
unsigned topidx = p2m_top_index(pfn);
unsigned mididx;
unsigned long *mid_p;
if (!p2m_top[topidx])
continue;
if (p2m_top[topidx] == p2m_mid_missing)
continue;
mididx = p2m_mid_index(pfn);
mid_p = p2m_top[topidx][mididx];
if (!mid_p)
continue;
if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
continue;
if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
continue;
/* The old va. Rebase it on mfn_list */
if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
unsigned long *new;
if (pfn_free > (size / sizeof(unsigned long))) {
WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
size / sizeof(unsigned long), pfn_free);
return 0;
}
new = &mfn_list[pfn_free];
copy_page(new, mid_p);
p2m_top[topidx][mididx] = &mfn_list[pfn_free];
p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
pfn_free += P2M_PER_PAGE;
}
/* This should be the leafs allocated for identity from _brk. */
}
return (unsigned long)mfn_list;
}
#else
unsigned long __init xen_revector_p2m_tree(void)
{
return 0;
}
#endif
unsigned long get_phys_to_machine(unsigned long pfn) unsigned long get_phys_to_machine(unsigned long pfn)
{ {
unsigned topidx, mididx, idx; unsigned topidx, mididx, idx;
...@@ -430,7 +508,7 @@ static void free_p2m_page(void *p) ...@@ -430,7 +508,7 @@ static void free_p2m_page(void *p)
free_page((unsigned long)p); free_page((unsigned long)p);
} }
/* /*
* Fully allocate the p2m structure for a given pfn. We need to check * Fully allocate the p2m structure for a given pfn. We need to check
* that both the top and mid levels are allocated, and make sure the * that both the top and mid levels are allocated, and make sure the
* parallel mfn tree is kept in sync. We may race with other cpus, so * parallel mfn tree is kept in sync. We may race with other cpus, so
......
...@@ -431,6 +431,24 @@ char * __init xen_memory_setup(void) ...@@ -431,6 +431,24 @@ char * __init xen_memory_setup(void)
* - mfn_list * - mfn_list
* - xen_start_info * - xen_start_info
* See comment above "struct start_info" in <xen/interface/xen.h> * See comment above "struct start_info" in <xen/interface/xen.h>
* We tried to make the the memblock_reserve more selective so
* that it would be clear what region is reserved. Sadly we ran
* in the problem wherein on a 64-bit hypervisor with a 32-bit
* initial domain, the pt_base has the cr3 value which is not
* neccessarily where the pagetable starts! As Jan put it: "
* Actually, the adjustment turns out to be correct: The page
* tables for a 32-on-64 dom0 get allocated in the order "first L1",
* "first L2", "first L3", so the offset to the page table base is
* indeed 2. When reading xen/include/public/xen.h's comment
* very strictly, this is not a violation (since there nothing is said
* that the first thing in the page table space is pointed to by
* pt_base; I admit that this seems to be implied though, namely
* do I think that it is implied that the page table space is the
* range [pt_base, pt_base + nt_pt_frames), whereas that
* range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
* which - without a priori knowledge - the kernel would have
* difficulty to figure out)." - so lets just fall back to the
* easy way and reserve the whole region.
*/ */
memblock_reserve(__pa(xen_start_info->mfn_list), memblock_reserve(__pa(xen_start_info->mfn_list),
xen_start_info->pt_base - xen_start_info->mfn_list); xen_start_info->pt_base - xen_start_info->mfn_list);
......
...@@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void); ...@@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void);
void xen_setup_shared_info(void); void xen_setup_shared_info(void);
void xen_build_mfn_list_list(void); void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void); void xen_setup_machphys_mapping(void);
pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
void xen_reserve_top(void); void xen_reserve_top(void);
extern unsigned long xen_max_p2m_pfn; extern unsigned long xen_max_p2m_pfn;
...@@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void); ...@@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void);
void xen_unplug_emulated_devices(void); void xen_unplug_emulated_devices(void);
void __init xen_build_dynamic_phys_to_machine(void); void __init xen_build_dynamic_phys_to_machine(void);
unsigned long __init xen_revector_p2m_tree(void);
void xen_init_irq_ops(void); void xen_init_irq_ops(void);
void xen_setup_timer(int cpu); void xen_setup_timer(int cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment