Commit dff11abe authored by Mike Kravetz's avatar Mike Kravetz Committed by Greg Kroah-Hartman

hugetlb: take PMD sharing into account when flushing tlb/caches

When fixing an issue with PMD sharing and migration, it was discovered via
code inspection that other callers of huge_pmd_unshare potentially have an
issue with cache and tlb flushing.

Use the routine adjust_range_if_pmd_sharing_possible() to calculate worst
case ranges for mmu notifiers.  Ensure that this range is flushed if
huge_pmd_unshare succeeds and unmaps a PUD_SUZE area.

Link: http://lkml.kernel.org/r/20180823205917.16297-3-mike.kravetz@oracle.comSigned-off-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Acked-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 017b1660
...@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
struct page *page; struct page *page;
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h); unsigned long sz = huge_page_size(h);
const unsigned long mmun_start = start; /* For mmu_notifiers */ unsigned long mmun_start = start; /* For mmu_notifiers */
const unsigned long mmun_end = end; /* For mmu_notifiers */ unsigned long mmun_end = end; /* For mmu_notifiers */
WARN_ON(!is_vm_hugetlb_page(vma)); WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h)); BUG_ON(start & ~huge_page_mask(h));
...@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
*/ */
tlb_remove_check_page_size_change(tlb, sz); tlb_remove_check_page_size_change(tlb, sz);
tlb_start_vma(tlb, vma); tlb_start_vma(tlb, vma);
/*
* If sharing possible, alert mmu notifiers of worst case.
*/
adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
address = start; address = start;
for (; address < end; address += sz) { for (; address < end; address += sz) {
...@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
ptl = huge_pte_lock(h, mm, ptep); ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, &address, ptep)) { if (huge_pmd_unshare(mm, &address, ptep)) {
spin_unlock(ptl); spin_unlock(ptl);
/*
* We just unmapped a page of PMDs by clearing a PUD.
* The caller's TLB flush range should cover this area.
*/
continue; continue;
} }
...@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, ...@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
{ {
struct mm_struct *mm; struct mm_struct *mm;
struct mmu_gather tlb; struct mmu_gather tlb;
unsigned long tlb_start = start;
unsigned long tlb_end = end;
/*
* If shared PMDs were possibly used within this vma range, adjust
* start/end for worst case tlb flushing.
* Note that we can not be sure if PMDs are shared until we try to
* unmap pages. However, we want to make sure TLB flushing covers
* the largest possible range.
*/
adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
mm = vma->vm_mm; mm = vma->vm_mm;
tlb_gather_mmu(&tlb, mm, start, end); tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
__unmap_hugepage_range(&tlb, vma, start, end, ref_page); __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
tlb_finish_mmu(&tlb, start, end); tlb_finish_mmu(&tlb, tlb_start, tlb_end);
} }
/* /*
...@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ...@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
pte_t pte; pte_t pte;
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
unsigned long pages = 0; unsigned long pages = 0;
unsigned long f_start = start;
unsigned long f_end = end;
bool shared_pmd = false;
/*
* In the case of shared PMDs, the area to flush could be beyond
* start/end. Set f_start/f_end to cover the maximum possible
* range if PMD sharing is possible.
*/
adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);
BUG_ON(address >= end); BUG_ON(address >= end);
flush_cache_range(vma, address, end); flush_cache_range(vma, f_start, f_end);
mmu_notifier_invalidate_range_start(mm, start, end); mmu_notifier_invalidate_range_start(mm, f_start, f_end);
i_mmap_lock_write(vma->vm_file->f_mapping); i_mmap_lock_write(vma->vm_file->f_mapping);
for (; address < end; address += huge_page_size(h)) { for (; address < end; address += huge_page_size(h)) {
spinlock_t *ptl; spinlock_t *ptl;
...@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ...@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
if (huge_pmd_unshare(mm, &address, ptep)) { if (huge_pmd_unshare(mm, &address, ptep)) {
pages++; pages++;
spin_unlock(ptl); spin_unlock(ptl);
shared_pmd = true;
continue; continue;
} }
pte = huge_ptep_get(ptep); pte = huge_ptep_get(ptep);
...@@ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ...@@ -4348,9 +4379,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
* may have cleared our pud entry and done put_page on the page table: * may have cleared our pud entry and done put_page on the page table:
* once we release i_mmap_rwsem, another task can do the final put_page * once we release i_mmap_rwsem, another task can do the final put_page
* and that page table be reused and filled with junk. * and that page table be reused and filled with junk. If we actually
* did unshare a page of pmds, flush the range corresponding to the pud.
*/ */
flush_hugetlb_tlb_range(vma, start, end); if (shared_pmd)
flush_hugetlb_tlb_range(vma, f_start, f_end);
else
flush_hugetlb_tlb_range(vma, start, end);
/* /*
* No need to call mmu_notifier_invalidate_range() we are downgrading * No need to call mmu_notifier_invalidate_range() we are downgrading
* page table protection not changing it to point to a new page. * page table protection not changing it to point to a new page.
...@@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ...@@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* See Documentation/vm/mmu_notifier.rst * See Documentation/vm/mmu_notifier.rst
*/ */
i_mmap_unlock_write(vma->vm_file->f_mapping); i_mmap_unlock_write(vma->vm_file->f_mapping);
mmu_notifier_invalidate_range_end(mm, start, end); mmu_notifier_invalidate_range_end(mm, f_start, f_end);
return pages << h->order; return pages << h->order;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment