Commit 4f28b187 authored by David Mosberger's avatar David Mosberger

ia64: hugepage_free_pgtables() bug-fix

	When there are two huge page mappings, like the two in the example
	below, first one at the end of PGDIR_SIZE, and second one starts at
	next PGDIR_SIZE (64GB with 16K page size):

	8000000ff0000000-8000001000000000 rw-s
	8000001000000000-8000001010000000 rw-s

	Unmapping the first vma would trick free_pgtable to think it
	can remove one set of pgd indexed at 0x400, and it went ahead
	purge the entire pmd/pte that are still in use by the second
	mapping. Now any subsequent access to pmd/pte for the second
	active mapping will trigger the bug.  We've seen hard kernel
	hang on some platform, some other platform will generate MCA,
	plus all kinds of unpleasant result.
parent d87af207
......@@ -144,17 +144,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
return 0;
}
/* This function checks if the address and address+len falls out of HugeTLB region. It
* return -EINVAL if any part of address range falls in HugeTLB region.
*/
int check_valid_hugepage_range(unsigned long addr, unsigned long len)
{
if (REGION_NUMBER(addr) == REGION_HPAGE)
return -EINVAL;
if (REGION_NUMBER(addr+len) == REGION_HPAGE)
return -EINVAL;
return 0;
}
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma)
......@@ -272,6 +261,59 @@ void huge_page_release(struct page *page)
free_huge_page(page);
}
/*
* Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset
* are hugetlb region specific.
*/
void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
unsigned long start, unsigned long end)
{
unsigned long first = start & HUGETLB_PGDIR_MASK;
unsigned long last = end + HUGETLB_PGDIR_SIZE - 1;
unsigned long start_index, end_index;
struct mm_struct *mm = tlb->mm;
if (!prev) {
prev = mm->mmap;
if (!prev)
goto no_mmaps;
if (prev->vm_end > start) {
if (last > prev->vm_start)
last = prev->vm_start;
goto no_mmaps;
}
}
for (;;) {
struct vm_area_struct *next = prev->vm_next;
if (next) {
if (next->vm_start < start) {
prev = next;
continue;
}
if (last > next->vm_start)
last = next->vm_start;
}
if (prev->vm_end > first)
first = prev->vm_end + HUGETLB_PGDIR_SIZE - 1;
break;
}
no_mmaps:
if (last < first) /* for arches with discontiguous pgd indices */
return;
/*
* If the PGD bits are not consecutive in the virtual address, the
* old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
*/
start_index = pgd_index(htlbpage_to_page(first));
end_index = pgd_index(htlbpage_to_page(last));
if (end_index > start_index) {
clear_page_tables(tlb, start_index, end_index - start_index);
}
}
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
......
......@@ -63,7 +63,7 @@
# define HPAGE_SIZE (__IA64_UL_CONST(1) << HPAGE_SHIFT)
# define HPAGE_MASK (~(HPAGE_SIZE - 1))
# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
# define ARCH_HAS_VALID_HUGEPAGE_RANGE
# define ARCH_HAS_HUGEPAGE_ONLY_RANGE
#endif /* CONFIG_HUGETLB_PAGE */
#ifdef __ASSEMBLY__
......@@ -137,7 +137,9 @@ typedef union ia64_va {
# define htlbpage_to_page(x) ((REGION_NUMBER(x) << 61) \
| (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT)))
# define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
extern int check_valid_hugepage_range(unsigned long addr, unsigned long len);
# define is_hugepage_only_range(addr, len) \
(REGION_NUMBER(addr) == REGION_HPAGE && \
REGION_NUMBER((addr)+(len)) == REGION_HPAGE)
#endif
static __inline__ int
......
......@@ -459,6 +459,15 @@ extern struct page *zero_page_memmap_ptr;
/* We provide our own get_unmapped_area to cope with VA holes for userland */
#define HAVE_ARCH_UNMAPPED_AREA
#ifdef CONFIG_HUGETLB_PAGE
#define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3))
#define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT)
#define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1))
struct mmu_gather;
extern void hugetlb_free_pgtables(struct mmu_gather *tlb,
struct vm_area_struct * prev, unsigned long start, unsigned long end);
#endif
typedef pte_t *pte_addr_t;
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment