Commit d3f42511 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap 14: i_shared_lock fixes

From: Hugh Dickins <hugh@veritas.com>

First of batch of six patches which introduce Rajesh Venkatasubramanian's
implementation of a radix priority search tree of vmas, to handle object-based
reverse mapping corner cases well.

rmap 14 i_shared_lock fixes

Start the sequence with a couple of outstanding i_shared_lock fixes.

Since i_shared_sem became i_shared_lock, we've had to shift and then
temporarily remove mremap move's protection of concurrent truncation - if
mremap moves ptes while unmap_mapping_range_list is making its way through the
vmas, there's a danger we'd move a pte from an area yet to be cleaned back
into an area already cleared.

Now site the i_shared_lock with the page_table_lock in move_one_page.  Replace
page_table_present by get_one_pte_map, so we know when it's necessary to
allocate a new page table: in which case have to drop i_shared_lock, trylock
and perhaps reorder locks on the way back.  Yet another fix: must check for
NULL dst before pte_unmap(dst).

And over in rmap.c, try_to_unmap_file's cond_resched amidst its lengthy
nonlinear swapping was now causing might_sleep warnings: moved to a rather
unsatisfactory and less frequent cond_resched_lock on i_shared_lock when we
reach the end of the list; and one before starting on the nonlinears too: the
"cursor" may become out-of-date if we do schedule, but I doubt it's worth
bothering about.
parent c0868962
...@@ -56,16 +56,18 @@ static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr) ...@@ -56,16 +56,18 @@ static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr)
return pte; return pte;
} }
static inline int page_table_present(struct mm_struct *mm, unsigned long addr) static pte_t *get_one_pte_map(struct mm_struct *mm, unsigned long addr)
{ {
pgd_t *pgd; pgd_t *pgd;
pmd_t *pmd; pmd_t *pmd;
pgd = pgd_offset(mm, addr); pgd = pgd_offset(mm, addr);
if (pgd_none(*pgd)) if (pgd_none(*pgd))
return 0; return NULL;
pmd = pmd_offset(pgd, addr); pmd = pmd_offset(pgd, addr);
return pmd_present(*pmd); if (!pmd_present(*pmd))
return NULL;
return pte_offset_map(pmd, addr);
} }
static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr) static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
...@@ -98,11 +100,23 @@ static int ...@@ -98,11 +100,23 @@ static int
move_one_page(struct vm_area_struct *vma, unsigned long old_addr, move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr) unsigned long new_addr)
{ {
struct address_space *mapping = NULL;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
int error = 0; int error = 0;
pte_t *src, *dst; pte_t *src, *dst;
if (vma->vm_file) {
/*
* Subtle point from Rajesh Venkatasubramanian: before
* moving file-based ptes, we must lock vmtruncate out,
* since it might clean the dst vma before the src vma,
* and we propagate stale pages into the dst afterward.
*/
mapping = vma->vm_file->f_mapping;
spin_lock(&mapping->i_mmap_lock);
}
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
src = get_one_pte_map_nested(mm, old_addr); src = get_one_pte_map_nested(mm, old_addr);
if (src) { if (src) {
/* /*
...@@ -110,13 +124,19 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr, ...@@ -110,13 +124,19 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
* memory allocation. If it does then we need to drop the * memory allocation. If it does then we need to drop the
* atomic kmap * atomic kmap
*/ */
if (!page_table_present(mm, new_addr)) { dst = get_one_pte_map(mm, new_addr);
if (unlikely(!dst)) {
pte_unmap_nested(src); pte_unmap_nested(src);
src = NULL; if (mapping)
} spin_unlock(&mapping->i_mmap_lock);
dst = alloc_one_pte_map(mm, new_addr); dst = alloc_one_pte_map(mm, new_addr);
if (src == NULL) if (mapping && !spin_trylock(&mapping->i_mmap_lock)) {
spin_unlock(&mm->page_table_lock);
spin_lock(&mapping->i_mmap_lock);
spin_lock(&mm->page_table_lock);
}
src = get_one_pte_map_nested(mm, old_addr); src = get_one_pte_map_nested(mm, old_addr);
}
/* /*
* Since alloc_one_pte_map can drop and re-acquire * Since alloc_one_pte_map can drop and re-acquire
* page_table_lock, we should re-check the src entry... * page_table_lock, we should re-check the src entry...
...@@ -137,6 +157,8 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr, ...@@ -137,6 +157,8 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
pte_unmap(dst); pte_unmap(dst);
} }
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
if (mapping)
spin_unlock(&mapping->i_mmap_lock);
return error; return error;
} }
......
...@@ -794,6 +794,7 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -794,6 +794,7 @@ static inline int try_to_unmap_file(struct page *page)
* but even so use it as a guide to how hard we should try? * but even so use it as a guide to how hard we should try?
*/ */
page_map_unlock(page); page_map_unlock(page);
cond_resched_lock(&mapping->i_mmap_lock);
max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
if (max_nl_cursor == 0) if (max_nl_cursor == 0)
...@@ -816,13 +817,13 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -816,13 +817,13 @@ static inline int try_to_unmap_file(struct page *page)
vma->vm_private_data = (void *) cursor; vma->vm_private_data = (void *) cursor;
if ((int)mapcount <= 0) if ((int)mapcount <= 0)
goto relock; goto relock;
cond_resched();
} }
if (ret != SWAP_FAIL) if (ret != SWAP_FAIL)
vma->vm_private_data = vma->vm_private_data =
(void *) max_nl_cursor; (void *) max_nl_cursor;
ret = SWAP_AGAIN; ret = SWAP_AGAIN;
} }
cond_resched_lock(&mapping->i_mmap_lock);
max_nl_cursor += CLUSTER_SIZE; max_nl_cursor += CLUSTER_SIZE;
} while (max_nl_cursor <= max_nl_size); } while (max_nl_cursor <= max_nl_size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment