Commit edcc56dc authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] rmaplock: kill page_map_lock

The pte_chains rmap used pte_chain_lock (bit_spin_lock on PG_chainlock) to
lock its pte_chains.  We kept this (as page_map_lock: bit_spin_lock on
PG_maplock) when we moved to objrmap.  But the file objrmap locks its vma tree
with mapping->i_mmap_lock, and the anon objrmap locks its vma list with
anon_vma->lock: so isn't the page_map_lock superfluous?

Pretty much, yes.  The mapcount was protected by it, and needs to become an
atomic: starting at -1 like page _count, so nr_mapped can be tracked precisely
up and down.  The last page_remove_rmap can't clear anon page mapping any
more, because of races with page_add_rmap; from which some BUG_ONs must go for
the same reason, but they've served their purpose.

vmscan decisions are naturally racy, little change there beyond removing
page_map_lock/unlock.  But to stabilize the file-backed page->mapping against
truncation while acquiring i_mmap_lock, page_referenced_file now needs page
lock to be held even for refill_inactive_zone.  There's a similar issue in
acquiring anon_vma->lock, where page lock doesn't help: which this patch
pretends to handle, but actually it needs the next.

Roughly 10% cut off lmbench fork numbers on my 2*HT*P4.  Must confess my
testing failed to show the races even while they were knowingly exposed: would
benefit from testing on racier equipment.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 6f055bc1
...@@ -201,10 +201,9 @@ struct page { ...@@ -201,10 +201,9 @@ struct page {
page_flags_t flags; /* Atomic flags, some possibly page_flags_t flags; /* Atomic flags, some possibly
* updated asynchronously */ * updated asynchronously */
atomic_t _count; /* Usage count, see below. */ atomic_t _count; /* Usage count, see below. */
unsigned int mapcount; /* Count of ptes mapped in mms, atomic_t _mapcount; /* Count of ptes mapped in mms,
* to show when page is mapped * to show when page is mapped
* & limit reverse map searches, * & limit reverse map searches.
* protected by PG_maplock.
*/ */
unsigned long private; /* Mapping-private opaque data: unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads * usually used for buffer_heads
...@@ -477,12 +476,27 @@ static inline pgoff_t page_index(struct page *page) ...@@ -477,12 +476,27 @@ static inline pgoff_t page_index(struct page *page)
return page->index; return page->index;
} }
/*
* The atomic page->_mapcount, like _count, starts from -1:
* so that transitions both from it and to it can be tracked,
* using atomic_inc_and_test and atomic_add_negative(-1).
*/
static inline void reset_page_mapcount(struct page *page)
{
atomic_set(&(page)->_mapcount, -1);
}
static inline int page_mapcount(struct page *page)
{
return atomic_read(&(page)->_mapcount) + 1;
}
/* /*
* Return true if this page is mapped into pagetables. * Return true if this page is mapped into pagetables.
*/ */
static inline int page_mapped(struct page *page) static inline int page_mapped(struct page *page)
{ {
return page->mapcount != 0; return atomic_read(&(page)->_mapcount) >= 0;
} }
/* /*
......
...@@ -69,12 +69,11 @@ ...@@ -69,12 +69,11 @@
#define PG_private 12 /* Has something at ->private */ #define PG_private 12 /* Has something at ->private */
#define PG_writeback 13 /* Page is under writeback */ #define PG_writeback 13 /* Page is under writeback */
#define PG_nosave 14 /* Used for system suspend/resume */ #define PG_nosave 14 /* Used for system suspend/resume */
#define PG_maplock 15 /* Lock bit for rmap to ptes */ #define PG_compound 15 /* Part of a compound page */
#define PG_swapcache 16 /* Swap page: swp_entry_t in private */ #define PG_swapcache 16 /* Swap page: swp_entry_t in private */
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ #define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
#define PG_reclaim 18 /* To be reclaimed asap */ #define PG_reclaim 18 /* To be reclaimed asap */
#define PG_compound 19 /* Part of a compound page */
/* /*
......
...@@ -9,11 +9,6 @@ ...@@ -9,11 +9,6 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#define page_map_lock(page) \
bit_spin_lock(PG_maplock, (unsigned long *)&(page)->flags)
#define page_map_unlock(page) \
bit_spin_unlock(PG_maplock, (unsigned long *)&(page)->flags)
/* /*
* The anon_vma heads a list of private "related" vmas, to scan if * The anon_vma heads a list of private "related" vmas, to scan if
* an anonymous page pointing to this anon_vma needs to be unmapped: * an anonymous page pointing to this anon_vma needs to be unmapped:
...@@ -87,15 +82,13 @@ void page_remove_rmap(struct page *); ...@@ -87,15 +82,13 @@ void page_remove_rmap(struct page *);
*/ */
static inline void page_dup_rmap(struct page *page) static inline void page_dup_rmap(struct page *page)
{ {
page_map_lock(page); atomic_inc(&page->_mapcount);
page->mapcount++;
page_map_unlock(page);
} }
/* /*
* Called from mm/vmscan.c to handle paging out * Called from mm/vmscan.c to handle paging out
*/ */
int page_referenced(struct page *); int page_referenced(struct page *, int is_locked);
int try_to_unmap(struct page *); int try_to_unmap(struct page *);
#else /* !CONFIG_MMU */ #else /* !CONFIG_MMU */
...@@ -104,7 +97,7 @@ int try_to_unmap(struct page *); ...@@ -104,7 +97,7 @@ int try_to_unmap(struct page *);
#define anon_vma_prepare(vma) (0) #define anon_vma_prepare(vma) (0)
#define anon_vma_link(vma) do {} while (0) #define anon_vma_link(vma) do {} while (0)
#define page_referenced(page) TestClearPageReferenced(page) #define page_referenced(page,l) TestClearPageReferenced(page)
#define try_to_unmap(page) SWAP_FAIL #define try_to_unmap(page) SWAP_FAIL
#endif /* CONFIG_MMU */ #endif /* CONFIG_MMU */
......
...@@ -78,7 +78,7 @@ static void bad_page(const char *function, struct page *page) ...@@ -78,7 +78,7 @@ static void bad_page(const char *function, struct page *page)
function, current->comm, page); function, current->comm, page);
printk(KERN_EMERG "flags:0x%08lx mapping:%p mapcount:%d count:%d\n", printk(KERN_EMERG "flags:0x%08lx mapping:%p mapcount:%d count:%d\n",
(unsigned long)page->flags, page->mapping, (unsigned long)page->flags, page->mapping,
(int)page->mapcount, page_count(page)); page_mapcount(page), page_count(page));
printk(KERN_EMERG "Backtrace:\n"); printk(KERN_EMERG "Backtrace:\n");
dump_stack(); dump_stack();
printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"); printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n");
...@@ -87,12 +87,11 @@ static void bad_page(const char *function, struct page *page) ...@@ -87,12 +87,11 @@ static void bad_page(const char *function, struct page *page)
1 << PG_lru | 1 << PG_lru |
1 << PG_active | 1 << PG_active |
1 << PG_dirty | 1 << PG_dirty |
1 << PG_maplock |
1 << PG_swapcache | 1 << PG_swapcache |
1 << PG_writeback); 1 << PG_writeback);
set_page_count(page, 0); set_page_count(page, 0);
reset_page_mapcount(page);
page->mapping = NULL; page->mapping = NULL;
page->mapcount = 0;
} }
#ifndef CONFIG_HUGETLB_PAGE #ifndef CONFIG_HUGETLB_PAGE
...@@ -228,7 +227,6 @@ static inline void free_pages_check(const char *function, struct page *page) ...@@ -228,7 +227,6 @@ static inline void free_pages_check(const char *function, struct page *page)
1 << PG_active | 1 << PG_active |
1 << PG_reclaim | 1 << PG_reclaim |
1 << PG_slab | 1 << PG_slab |
1 << PG_maplock |
1 << PG_swapcache | 1 << PG_swapcache |
1 << PG_writeback ))) 1 << PG_writeback )))
bad_page(function, page); bad_page(function, page);
...@@ -350,7 +348,6 @@ static void prep_new_page(struct page *page, int order) ...@@ -350,7 +348,6 @@ static void prep_new_page(struct page *page, int order)
1 << PG_active | 1 << PG_active |
1 << PG_dirty | 1 << PG_dirty |
1 << PG_reclaim | 1 << PG_reclaim |
1 << PG_maplock |
1 << PG_swapcache | 1 << PG_swapcache |
1 << PG_writeback ))) 1 << PG_writeback )))
bad_page(__FUNCTION__, page); bad_page(__FUNCTION__, page);
...@@ -512,6 +509,8 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) ...@@ -512,6 +509,8 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
kernel_map_pages(page, 1, 0); kernel_map_pages(page, 1, 0);
inc_page_state(pgfree); inc_page_state(pgfree);
if (PageAnon(page))
page->mapping = NULL;
free_pages_check(__FUNCTION__, page); free_pages_check(__FUNCTION__, page);
pcp = &zone->pageset[get_cpu()].pcp[cold]; pcp = &zone->pageset[get_cpu()].pcp[cold];
local_irq_save(flags); local_irq_save(flags);
...@@ -1389,6 +1388,7 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, ...@@ -1389,6 +1388,7 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
for (page = start; page < (start + size); page++) { for (page = start; page < (start + size); page++) {
set_page_zone(page, NODEZONE(nid, zone)); set_page_zone(page, NODEZONE(nid, zone));
set_page_count(page, 0); set_page_count(page, 0);
reset_page_mapcount(page);
SetPageReserved(page); SetPageReserved(page);
INIT_LIST_HEAD(&page->lru); INIT_LIST_HEAD(&page->lru);
#ifdef WANT_PAGE_VIRTUAL #ifdef WANT_PAGE_VIRTUAL
......
...@@ -163,13 +163,6 @@ void __init anon_vma_init(void) ...@@ -163,13 +163,6 @@ void __init anon_vma_init(void)
sizeof(struct anon_vma), 0, SLAB_PANIC, anon_vma_ctor, NULL); sizeof(struct anon_vma), 0, SLAB_PANIC, anon_vma_ctor, NULL);
} }
/* this needs the page->flags PG_maplock held */
static inline void clear_page_anon(struct page *page)
{
BUG_ON(!page->mapping);
page->mapping = NULL;
}
/* /*
* At what user virtual address is page expected in vma? * At what user virtual address is page expected in vma?
*/ */
...@@ -242,15 +235,22 @@ static int page_referenced_one(struct page *page, ...@@ -242,15 +235,22 @@ static int page_referenced_one(struct page *page,
return referenced; return referenced;
} }
static inline int page_referenced_anon(struct page *page) static int page_referenced_anon(struct page *page)
{ {
unsigned int mapcount = page->mapcount; unsigned int mapcount;
struct anon_vma *anon_vma = (void *) page->mapping - PAGE_MAPPING_ANON; struct anon_vma *anon_vma = (void *) page->mapping - PAGE_MAPPING_ANON;
struct vm_area_struct *vma; struct vm_area_struct *vma;
int referenced = 0; int referenced = 0;
/*
* Recheck mapcount: it is not safe to take anon_vma->lock after
* last page_remove_rmap, since struct anon_vma might be reused.
*/
mapcount = page_mapcount(page);
if (!mapcount)
return referenced;
spin_lock(&anon_vma->lock); spin_lock(&anon_vma->lock);
BUG_ON(list_empty(&anon_vma->head));
list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
referenced += page_referenced_one(page, vma, &mapcount); referenced += page_referenced_one(page, vma, &mapcount);
if (!mapcount) if (!mapcount)
...@@ -274,18 +274,39 @@ static inline int page_referenced_anon(struct page *page) ...@@ -274,18 +274,39 @@ static inline int page_referenced_anon(struct page *page)
* The spinlock address_space->i_mmap_lock is tried. If it can't be gotten, * The spinlock address_space->i_mmap_lock is tried. If it can't be gotten,
* assume a reference count of 0, so try_to_unmap will then have a go. * assume a reference count of 0, so try_to_unmap will then have a go.
*/ */
static inline int page_referenced_file(struct page *page) static int page_referenced_file(struct page *page)
{ {
unsigned int mapcount = page->mapcount; unsigned int mapcount;
struct address_space *mapping = page->mapping; struct address_space *mapping = page->mapping;
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct prio_tree_iter iter; struct prio_tree_iter iter;
int referenced = 0; int referenced = 0;
/*
* The caller's checks on page->mapping and !PageAnon have made
* sure that this is a file page: the check for page->mapping
* excludes the case just before it gets set on an anon page.
*/
BUG_ON(PageAnon(page));
/*
* The page lock not only makes sure that page->mapping cannot
* suddenly be NULLified by truncation, it makes sure that the
* structure at mapping cannot be freed and reused yet,
* so we can safely take mapping->i_mmap_lock.
*/
BUG_ON(!PageLocked(page));
if (!spin_trylock(&mapping->i_mmap_lock)) if (!spin_trylock(&mapping->i_mmap_lock))
return 0; return 0;
/*
* i_mmap_lock does not stabilize mapcount at all, but mapcount
* is more likely to be accurate if we note it after spinning.
*/
mapcount = page_mapcount(page);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
== (VM_LOCKED|VM_MAYSHARE)) { == (VM_LOCKED|VM_MAYSHARE)) {
...@@ -304,12 +325,12 @@ static inline int page_referenced_file(struct page *page) ...@@ -304,12 +325,12 @@ static inline int page_referenced_file(struct page *page)
/** /**
* page_referenced - test if the page was referenced * page_referenced - test if the page was referenced
* @page: the page to test * @page: the page to test
* @is_locked: caller holds lock on the page
* *
* Quick test_and_clear_referenced for all mappings to a page, * Quick test_and_clear_referenced for all mappings to a page,
* returns the number of ptes which referenced the page. * returns the number of ptes which referenced the page.
* Caller needs to hold the rmap lock.
*/ */
int page_referenced(struct page *page) int page_referenced(struct page *page, int is_locked)
{ {
int referenced = 0; int referenced = 0;
...@@ -319,11 +340,17 @@ int page_referenced(struct page *page) ...@@ -319,11 +340,17 @@ int page_referenced(struct page *page)
if (TestClearPageReferenced(page)) if (TestClearPageReferenced(page))
referenced++; referenced++;
if (page->mapcount && page->mapping) { if (page_mapped(page) && page->mapping) {
if (PageAnon(page)) if (PageAnon(page))
referenced += page_referenced_anon(page); referenced += page_referenced_anon(page);
else else if (is_locked)
referenced += page_referenced_file(page);
else if (TestSetPageLocked(page))
referenced++;
else if (page->mapping) {
referenced += page_referenced_file(page); referenced += page_referenced_file(page);
unlock_page(page);
}
} }
return referenced; return referenced;
} }
...@@ -350,18 +377,12 @@ void page_add_anon_rmap(struct page *page, ...@@ -350,18 +377,12 @@ void page_add_anon_rmap(struct page *page,
index += vma->vm_pgoff; index += vma->vm_pgoff;
index >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; index >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
page_map_lock(page); if (atomic_inc_and_test(&page->_mapcount)) {
if (!page->mapcount) {
BUG_ON(page->mapping);
page->index = index; page->index = index;
page->mapping = (struct address_space *) anon_vma; page->mapping = (struct address_space *) anon_vma;
inc_page_state(nr_mapped); inc_page_state(nr_mapped);
} else {
BUG_ON(page->index != index);
BUG_ON(page->mapping != (struct address_space *) anon_vma);
} }
page->mapcount++; /* else checking page index and mapping is racy */
page_map_unlock(page);
} }
/** /**
...@@ -376,11 +397,8 @@ void page_add_file_rmap(struct page *page) ...@@ -376,11 +397,8 @@ void page_add_file_rmap(struct page *page)
if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) if (!pfn_valid(page_to_pfn(page)) || PageReserved(page))
return; return;
page_map_lock(page); if (atomic_inc_and_test(&page->_mapcount))
if (!page->mapcount)
inc_page_state(nr_mapped); inc_page_state(nr_mapped);
page->mapcount++;
page_map_unlock(page);
} }
/** /**
...@@ -392,18 +410,20 @@ void page_add_file_rmap(struct page *page) ...@@ -392,18 +410,20 @@ void page_add_file_rmap(struct page *page)
void page_remove_rmap(struct page *page) void page_remove_rmap(struct page *page)
{ {
BUG_ON(PageReserved(page)); BUG_ON(PageReserved(page));
BUG_ON(!page->mapcount);
page_map_lock(page); if (atomic_add_negative(-1, &page->_mapcount)) {
page->mapcount--; BUG_ON(page_mapcount(page) < 0);
if (!page->mapcount) { /*
* It would be tidy to reset the PageAnon mapping here,
* but that might overwrite a racing page_add_anon_rmap
* which increments mapcount after us but sets mapping
* before us: so leave the reset to free_hot_cold_page,
* and remember that it's only reliable while mapped.
*/
if (page_test_and_clear_dirty(page)) if (page_test_and_clear_dirty(page))
set_page_dirty(page); set_page_dirty(page);
if (PageAnon(page))
clear_page_anon(page);
dec_page_state(nr_mapped); dec_page_state(nr_mapped);
} }
page_map_unlock(page);
} }
/* /*
...@@ -475,7 +495,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) ...@@ -475,7 +495,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
* ptes from being unmapped, so swapoff can make progress. * ptes from being unmapped, so swapoff can make progress.
*/ */
if (PageSwapCache(page) && if (PageSwapCache(page) &&
page_count(page) != page->mapcount + 2) { page_count(page) != page_mapcount(page) + 2) {
ret = SWAP_FAIL; ret = SWAP_FAIL;
goto out_unmap; goto out_unmap;
} }
...@@ -501,8 +521,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) ...@@ -501,8 +521,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
} }
mm->rss--; mm->rss--;
BUG_ON(!page->mapcount); page_remove_rmap(page);
page->mapcount--;
page_cache_release(page); page_cache_release(page);
out_unmap: out_unmap:
...@@ -613,17 +632,23 @@ static int try_to_unmap_cluster(unsigned long cursor, ...@@ -613,17 +632,23 @@ static int try_to_unmap_cluster(unsigned long cursor,
return SWAP_AGAIN; return SWAP_AGAIN;
} }
static inline int try_to_unmap_anon(struct page *page) static int try_to_unmap_anon(struct page *page)
{ {
struct anon_vma *anon_vma = (void *) page->mapping - PAGE_MAPPING_ANON; struct anon_vma *anon_vma = (void *) page->mapping - PAGE_MAPPING_ANON;
struct vm_area_struct *vma; struct vm_area_struct *vma;
int ret = SWAP_AGAIN; int ret = SWAP_AGAIN;
/*
* Recheck mapped: it is not safe to take anon_vma->lock after
* last page_remove_rmap, since struct anon_vma might be reused.
*/
if (!page_mapped(page))
return ret;
spin_lock(&anon_vma->lock); spin_lock(&anon_vma->lock);
BUG_ON(list_empty(&anon_vma->head));
list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
ret = try_to_unmap_one(page, vma); ret = try_to_unmap_one(page, vma);
if (ret == SWAP_FAIL || !page->mapcount) if (ret == SWAP_FAIL || !page_mapped(page))
break; break;
} }
spin_unlock(&anon_vma->lock); spin_unlock(&anon_vma->lock);
...@@ -642,7 +667,7 @@ static inline int try_to_unmap_anon(struct page *page) ...@@ -642,7 +667,7 @@ static inline int try_to_unmap_anon(struct page *page)
* The spinlock address_space->i_mmap_lock is tried. If it can't be gotten, * The spinlock address_space->i_mmap_lock is tried. If it can't be gotten,
* return a temporary error. * return a temporary error.
*/ */
static inline int try_to_unmap_file(struct page *page) static int try_to_unmap_file(struct page *page)
{ {
struct address_space *mapping = page->mapping; struct address_space *mapping = page->mapping;
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
...@@ -659,7 +684,7 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -659,7 +684,7 @@ static inline int try_to_unmap_file(struct page *page)
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
ret = try_to_unmap_one(page, vma); ret = try_to_unmap_one(page, vma);
if (ret == SWAP_FAIL || !page->mapcount) if (ret == SWAP_FAIL || !page_mapped(page))
goto out; goto out;
} }
...@@ -688,8 +713,9 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -688,8 +713,9 @@ static inline int try_to_unmap_file(struct page *page)
* The mapcount of the page we came in with is irrelevant, * The mapcount of the page we came in with is irrelevant,
* but even so use it as a guide to how hard we should try? * but even so use it as a guide to how hard we should try?
*/ */
mapcount = page->mapcount; mapcount = page_mapcount(page);
page_map_unlock(page); if (!mapcount)
goto out;
cond_resched_lock(&mapping->i_mmap_lock); cond_resched_lock(&mapping->i_mmap_lock);
max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
...@@ -712,7 +738,7 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -712,7 +738,7 @@ static inline int try_to_unmap_file(struct page *page)
cursor += CLUSTER_SIZE; cursor += CLUSTER_SIZE;
vma->vm_private_data = (void *) cursor; vma->vm_private_data = (void *) cursor;
if ((int)mapcount <= 0) if ((int)mapcount <= 0)
goto relock; goto out;
} }
if (ret != SWAP_FAIL) if (ret != SWAP_FAIL)
vma->vm_private_data = vma->vm_private_data =
...@@ -733,8 +759,6 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -733,8 +759,6 @@ static inline int try_to_unmap_file(struct page *page)
if (!(vma->vm_flags & VM_RESERVED)) if (!(vma->vm_flags & VM_RESERVED))
vma->vm_private_data = NULL; vma->vm_private_data = NULL;
} }
relock:
page_map_lock(page);
out: out:
spin_unlock(&mapping->i_mmap_lock); spin_unlock(&mapping->i_mmap_lock);
return ret; return ret;
...@@ -745,11 +769,11 @@ static inline int try_to_unmap_file(struct page *page) ...@@ -745,11 +769,11 @@ static inline int try_to_unmap_file(struct page *page)
* @page: the page to get unmapped * @page: the page to get unmapped
* *
* Tries to remove all the page table entries which are mapping this * Tries to remove all the page table entries which are mapping this
* page, used in the pageout path. Caller must hold the page lock * page, used in the pageout path. Caller must hold the page lock.
* and its rmap lock. Return values are: * Return values are:
* *
* SWAP_SUCCESS - we succeeded in removing all mappings * SWAP_SUCCESS - we succeeded in removing all mappings
* SWAP_AGAIN - we missed a trylock, try again later * SWAP_AGAIN - we missed a mapping, try again later
* SWAP_FAIL - the page is unswappable * SWAP_FAIL - the page is unswappable
*/ */
int try_to_unmap(struct page *page) int try_to_unmap(struct page *page)
...@@ -758,20 +782,13 @@ int try_to_unmap(struct page *page) ...@@ -758,20 +782,13 @@ int try_to_unmap(struct page *page)
BUG_ON(PageReserved(page)); BUG_ON(PageReserved(page));
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
BUG_ON(!page->mapcount);
if (PageAnon(page)) if (PageAnon(page))
ret = try_to_unmap_anon(page); ret = try_to_unmap_anon(page);
else else
ret = try_to_unmap_file(page); ret = try_to_unmap_file(page);
if (!page->mapcount) { if (!page_mapped(page))
if (page_test_and_clear_dirty(page))
set_page_dirty(page);
if (PageAnon(page))
clear_page_anon(page);
dec_page_state(nr_mapped);
ret = SWAP_SUCCESS; ret = SWAP_SUCCESS;
}
return ret; return ret;
} }
...@@ -219,7 +219,7 @@ static int shrink_slab(unsigned long scanned, unsigned int gfp_mask, ...@@ -219,7 +219,7 @@ static int shrink_slab(unsigned long scanned, unsigned int gfp_mask,
return 0; return 0;
} }
/* Must be called with page's rmap lock held. */ /* Called without lock on whether page is mapped, so answer is unstable */
static inline int page_mapping_inuse(struct page *page) static inline int page_mapping_inuse(struct page *page)
{ {
struct address_space *mapping; struct address_space *mapping;
...@@ -377,26 +377,19 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) ...@@ -377,26 +377,19 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
if (page_mapped(page) || PageSwapCache(page)) if (page_mapped(page) || PageSwapCache(page))
sc->nr_scanned++; sc->nr_scanned++;
page_map_lock(page); referenced = page_referenced(page, 1);
referenced = page_referenced(page); /* In active use or really unfreeable? Activate it. */
if (referenced && page_mapping_inuse(page)) { if (referenced && page_mapping_inuse(page))
/* In active use or really unfreeable. Activate it. */
page_map_unlock(page);
goto activate_locked; goto activate_locked;
}
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
/* /*
* Anonymous process memory has backing store? * Anonymous process memory has backing store?
* Try to allocate it some swap space here. * Try to allocate it some swap space here.
*
* XXX: implement swap clustering ?
*/ */
if (PageAnon(page) && !PageSwapCache(page)) { if (PageAnon(page) && !PageSwapCache(page)) {
page_map_unlock(page);
if (!add_to_swap(page)) if (!add_to_swap(page))
goto activate_locked; goto activate_locked;
page_map_lock(page);
} }
#endif /* CONFIG_SWAP */ #endif /* CONFIG_SWAP */
...@@ -411,16 +404,13 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) ...@@ -411,16 +404,13 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
if (page_mapped(page) && mapping) { if (page_mapped(page) && mapping) {
switch (try_to_unmap(page)) { switch (try_to_unmap(page)) {
case SWAP_FAIL: case SWAP_FAIL:
page_map_unlock(page);
goto activate_locked; goto activate_locked;
case SWAP_AGAIN: case SWAP_AGAIN:
page_map_unlock(page);
goto keep_locked; goto keep_locked;
case SWAP_SUCCESS: case SWAP_SUCCESS:
; /* try to free the page below */ ; /* try to free the page below */
} }
} }
page_map_unlock(page);
if (PageDirty(page)) { if (PageDirty(page)) {
if (referenced) if (referenced)
...@@ -723,25 +713,12 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) ...@@ -723,25 +713,12 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
page = lru_to_page(&l_hold); page = lru_to_page(&l_hold);
list_del(&page->lru); list_del(&page->lru);
if (page_mapped(page)) { if (page_mapped(page)) {
if (!reclaim_mapped) { if (!reclaim_mapped ||
list_add(&page->lru, &l_active); (total_swap_pages == 0 && PageAnon(page)) ||
continue; page_referenced(page, 0)) {
}
page_map_lock(page);
if (page_referenced(page)) {
page_map_unlock(page);
list_add(&page->lru, &l_active); list_add(&page->lru, &l_active);
continue; continue;
} }
page_map_unlock(page);
}
/*
* FIXME: need to consider page_count(page) here if/when we
* reap orphaned pages via the LRU (Daniel's locking stuff)
*/
if (total_swap_pages == 0 && PageAnon(page)) {
list_add(&page->lru, &l_active);
continue;
} }
list_add(&page->lru, &l_inactive); list_add(&page->lru, &l_inactive);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment