Commit cab971db authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap 7 object-based rmap

From: Hugh Dickins <hugh@veritas.com>

Dave McCracken's object-based reverse mapping scheme for file pages: why
build up and tear down chains of pte pointers for file pages, when
page->mapping has i_mmap and i_mmap_shared lists of all the vmas which
might contain that page, and it appears at one deterministic position
within the vma (unless vma is nonlinear - see next patch)?

Has some drawbacks: more work to locate the ptes from page_referenced and
try_to_unmap, especially if the i_mmap lists contain a lot of vmas covering
different ranges; has to down_trylock the i_shared_sem, and hope that
doesn't fail too often.  But attractive in that it uses less lowmem, and
shifts the rmap burden away from the hot paths, to swapout.

Hybrid scheme for the moment: carry on with pte_chains for anonymous pages,
that's unchanged; but file pages keep mapcount in the pte union of struct
page, where anonymous pages keep chain pointer or direct pte address: so
page_mapped(page) works on both.

Hugh massaged it a little: distinct page_add_file_rmap entry point; list
searches check rss so as not to waste time on mms fully swapped out; check
mapcount to terminate once all ptes have been found; and a WARN_ON if
page_referenced should have but couldn't find all the ptes.
parent d61ae266
......@@ -102,7 +102,7 @@
* can map.
*/
#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3))
#define PMD_SIZE (__IA64_UL(1) << PMD_SHIFT)
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PTRS_PER_PMD (__IA64_UL(1) << (PAGE_SHIFT-3))
......
......@@ -185,6 +185,7 @@ struct page {
struct pte_chain *chain;/* Reverse pte mapping pointer.
* protected by PG_chainlock */
pte_addr_t direct;
unsigned int mapcount; /* Count ptes mapped into mms */
} pte;
unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
......
......@@ -27,6 +27,7 @@ static inline void pte_chain_free(struct pte_chain *pte_chain)
struct pte_chain * fastcall
page_add_rmap(struct page *, pte_t *, struct pte_chain *);
void fastcall page_add_file_rmap(struct page *);
void fastcall page_remove_rmap(struct page *, pte_t *);
/*
......
......@@ -49,7 +49,7 @@ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
}
/*
* Install a page to a given virtual memory address, release any
* Install a file page to a given virtual memory address, release any
* previously existing mapping.
*/
int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
......@@ -60,11 +60,13 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
pgd_t *pgd;
pmd_t *pmd;
pte_t pte_val;
struct pte_chain *pte_chain;
pte_chain = pte_chain_alloc(GFP_KERNEL);
if (!pte_chain)
goto err;
/*
* We use page_add_file_rmap below: if install_page is
* ever extended to anonymous pages, this will warn us.
*/
BUG_ON(!page_mapping(page));
pgd = pgd_offset(mm, addr);
spin_lock(&mm->page_table_lock);
......@@ -81,18 +83,14 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
mm->rss++;
flush_icache_page(vma, page);
set_pte(pte, mk_pte(page, prot));
pte_chain = page_add_rmap(page, pte, pte_chain);
page_add_file_rmap(page);
pte_val = *pte;
pte_unmap(pte);
update_mmu_cache(vma, addr, pte_val);
spin_unlock(&mm->page_table_lock);
pte_chain_free(pte_chain);
return 0;
err = 0;
err_unlock:
spin_unlock(&mm->page_table_lock);
pte_chain_free(pte_chain);
err:
return err;
}
EXPORT_SYMBOL(install_page);
......
......@@ -331,8 +331,11 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK;
dst->rss++;
set_pte(dst_pte, pte);
pte_chain = page_add_rmap(page, dst_pte,
pte_chain);
if (PageAnon(page))
pte_chain = page_add_rmap(page,
dst_pte, pte_chain);
else
page_add_file_rmap(page);
if (pte_chain)
goto cont_copy_pte_range_noset;
pte_chain = pte_chain_alloc(GFP_ATOMIC | __GFP_NOWARN);
......@@ -1489,6 +1492,7 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct pte_chain *pte_chain;
int sequence = 0;
int ret = VM_FAULT_MINOR;
int anon = 0;
if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, page_table,
......@@ -1523,8 +1527,8 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto oom;
copy_user_highpage(page, new_page, address);
page_cache_release(new_page);
lru_cache_add_active(page);
new_page = page;
anon = 1;
}
spin_lock(&mm->page_table_lock);
......@@ -1562,7 +1566,12 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (write_access)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
set_pte(page_table, entry);
pte_chain = page_add_rmap(new_page, page_table, pte_chain);
if (anon) {
lru_cache_add_active(new_page);
pte_chain = page_add_rmap(new_page,
page_table, pte_chain);
} else
page_add_file_rmap(new_page);
pte_unmap(page_table);
} else {
/* One of our sibling threads was faster, back out. */
......
......@@ -90,10 +90,12 @@ copy_one_pte(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long pfn = pte_pfn(pte);
if (pfn_valid(pfn)) {
struct page *page = pfn_to_page(pfn);
if (PageAnon(page)) {
page_remove_rmap(page, src);
*pte_chainp = page_add_rmap(page, dst, *pte_chainp);
}
}
}
}
static int
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment