Commit 4875a601 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap 2 anon and swapcache

From: Hugh Dickins <hugh@veritas.com>

Tracking anonymous pages by anon_vma,pgoff or mm,address needs a
pointer,offset pair in struct page: mapping,index the natural choice.  But
swapcache uses those for &swapper_space,swp_entry_t.

It's trivial to separate swapcache from pagecache with radix tree; most of
swapper_space is actually unused, just a fiction to pretend swap like file;
and page->private is a good place to keep swp_entry_t, now that swap never
uses bufferheads.

Define PG_anon bit, page_add_rmap SetPageAnon and put an oopsable address in
page->mapping to test that we're not confused by it.  Define
page_mapping(page) macro to give NULL when PageAnon, whatever may be in
page->mapping.  Define PG_swapcache bit, deduce swapper_space from that in
the few places we need it.

add_to_swap_cache now distinct from add_to_page_cache.  Separating the caches
somewhat simplifies the tmpfs swizzling in swap_state.c, now the page can
briefly be in both caches.

The rmap method remains pte chains, no change to that yet.  But one small
functional difference: the use of PageAnon implies that a page truncated
while still mapped will no longer be found and freed (swapped out) by
try_to_unmap, will only be freed by exit or munmap.  But normally pages are
unmapped by vmtruncate: this should only affect nonlinear mappings, and a
later patch not in this batch will fix that.
parent 4c4acd24
...@@ -836,19 +836,10 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); ...@@ -836,19 +836,10 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
* *
* FIXME: may need to call ->reservepage here as well. That's rather up to the * FIXME: may need to call ->reservepage here as well. That's rather up to the
* address_space though. * address_space though.
*
* For now, we treat swapper_space specially. It doesn't use the normal
* block a_ops.
*/ */
int __set_page_dirty_buffers(struct page *page) int __set_page_dirty_buffers(struct page *page)
{ {
struct address_space * const mapping = page->mapping; struct address_space * const mapping = page->mapping;
int ret = 0;
if (mapping == NULL) {
SetPageDirty(page);
goto out;
}
spin_lock(&mapping->private_lock); spin_lock(&mapping->private_lock);
if (page_has_buffers(page)) { if (page_has_buffers(page)) {
...@@ -877,8 +868,7 @@ int __set_page_dirty_buffers(struct page *page) ...@@ -877,8 +868,7 @@ int __set_page_dirty_buffers(struct page *page)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
} }
out: return 0;
return ret;
} }
EXPORT_SYMBOL(__set_page_dirty_buffers); EXPORT_SYMBOL(__set_page_dirty_buffers);
...@@ -1577,8 +1567,7 @@ int try_to_release_page(struct page *page, int gfp_mask) ...@@ -1577,8 +1567,7 @@ int try_to_release_page(struct page *page, int gfp_mask)
{ {
struct address_space * const mapping = page->mapping; struct address_space * const mapping = page->mapping;
if (!PageLocked(page)) BUG_ON(!PageLocked(page));
BUG();
if (PageWriteback(page)) if (PageWriteback(page))
return 0; return 0;
...@@ -2895,14 +2884,14 @@ int try_to_free_buffers(struct page *page) ...@@ -2895,14 +2884,14 @@ int try_to_free_buffers(struct page *page)
if (PageWriteback(page)) if (PageWriteback(page))
return 0; return 0;
if (mapping == NULL) { /* swapped-in anon page */ if (mapping == NULL) { /* can this still happen? */
ret = drop_buffers(page, &buffers_to_free); ret = drop_buffers(page, &buffers_to_free);
goto out; goto out;
} }
spin_lock(&mapping->private_lock); spin_lock(&mapping->private_lock);
ret = drop_buffers(page, &buffers_to_free); ret = drop_buffers(page, &buffers_to_free);
if (ret && !PageSwapCache(page)) { if (ret) {
/* /*
* If the filesystem writes its buffers by hand (eg ext3) * If the filesystem writes its buffers by hand (eg ext3)
* then we can have clean buffers against a dirty page. We * then we can have clean buffers against a dirty page. We
......
...@@ -189,8 +189,11 @@ struct page { ...@@ -189,8 +189,11 @@ struct page {
* protected by PG_chainlock */ * protected by PG_chainlock */
pte_addr_t direct; pte_addr_t direct;
} pte; } pte;
unsigned long private; /* mapping-private opaque data */ unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache
*/
/* /*
* On machines where all RAM is mapped into kernel address space, * On machines where all RAM is mapped into kernel address space,
* we can simply calculate the virtual address. On machines with * we can simply calculate the virtual address. On machines with
...@@ -402,6 +405,19 @@ void page_address_init(void); ...@@ -402,6 +405,19 @@ void page_address_init(void);
#define page_address_init() do { } while(0) #define page_address_init() do { } while(0)
#endif #endif
/*
* On an anonymous page mapped into a user virtual memory area,
* page->mapping points to its anon_vma, not to a struct address_space.
*
* Please note that, confusingly, "page_mapping" refers to the inode
* address_space which maps the page from disk; whereas "page_mapped"
* refers to user virtual address space into which the page is mapped.
*/
static inline struct address_space *page_mapping(struct page *page)
{
return PageAnon(page)? NULL: page->mapping;
}
/* /*
* Return true if this page is mapped into pagetables. Subtle: test pte.direct * Return true if this page is mapped into pagetables. Subtle: test pte.direct
* rather than pte.chain. Because sometimes pte.direct is 64-bit, and .chain * rather than pte.chain. Because sometimes pte.direct is 64-bit, and .chain
...@@ -471,6 +487,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long ...@@ -471,6 +487,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long
int __set_page_dirty_buffers(struct page *page); int __set_page_dirty_buffers(struct page *page);
int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_nobuffers(struct page *page);
int FASTCALL(set_page_dirty(struct page *page));
int set_page_dirty_lock(struct page *page); int set_page_dirty_lock(struct page *page);
int clear_page_dirty_for_io(struct page *page); int clear_page_dirty_for_io(struct page *page);
...@@ -497,23 +514,6 @@ struct shrinker; ...@@ -497,23 +514,6 @@ struct shrinker;
extern struct shrinker *set_shrinker(int, shrinker_t); extern struct shrinker *set_shrinker(int, shrinker_t);
extern void remove_shrinker(struct shrinker *shrinker); extern void remove_shrinker(struct shrinker *shrinker);
/*
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
* FIXME: make the method unconditional.
*/
static inline int set_page_dirty(struct page *page)
{
if (page->mapping) {
int (*spd)(struct page *);
spd = page->mapping->a_ops->set_page_dirty;
if (spd)
return (*spd)(page);
}
return __set_page_dirty_buffers(page);
}
/* /*
* On a two-level page table, this ends up being trivial. Thus the * On a two-level page table, this ends up being trivial. Thus the
* inlining and the symmetry break with pte_alloc_map() that does all * inlining and the symmetry break with pte_alloc_map() that does all
......
...@@ -75,6 +75,8 @@ ...@@ -75,6 +75,8 @@
#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ #define PG_mappedtodisk 17 /* Has blocks allocated on-disk */
#define PG_reclaim 18 /* To be reclaimed asap */ #define PG_reclaim 18 /* To be reclaimed asap */
#define PG_compound 19 /* Part of a compound page */ #define PG_compound 19 /* Part of a compound page */
#define PG_anon 20 /* Anonymous page: anon_vma in mapping*/
#define PG_swapcache 21 /* Swap page: swp_entry_t in private */
/* /*
...@@ -298,13 +300,14 @@ extern void get_full_page_state(struct page_state *ret); ...@@ -298,13 +300,14 @@ extern void get_full_page_state(struct page_state *ret);
#define SetPageCompound(page) set_bit(PG_compound, &(page)->flags) #define SetPageCompound(page) set_bit(PG_compound, &(page)->flags)
#define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags) #define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags)
/* #define PageAnon(page) test_bit(PG_anon, &(page)->flags)
* The PageSwapCache predicate doesn't use a PG_flag at this time, #define SetPageAnon(page) set_bit(PG_anon, &(page)->flags)
* but it may again do so one day. #define ClearPageAnon(page) clear_bit(PG_anon, &(page)->flags)
*/
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
extern struct address_space swapper_space; #define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags)
#define PageSwapCache(page) ((page)->mapping == &swapper_space) #define SetPageSwapCache(page) set_bit(PG_swapcache, &(page)->flags)
#define ClearPageSwapCache(page) clear_bit(PG_swapcache, &(page)->flags)
#else #else
#define PageSwapCache(page) 0 #define PageSwapCache(page) 0
#endif #endif
......
...@@ -122,9 +122,13 @@ static inline int sync_page(struct page *page) ...@@ -122,9 +122,13 @@ static inline int sync_page(struct page *page)
struct address_space *mapping; struct address_space *mapping;
smp_mb(); smp_mb();
mapping = page->mapping; mapping = page_mapping(page);
if (mapping && mapping->a_ops && mapping->a_ops->sync_page) if (mapping) {
if (mapping->a_ops && mapping->a_ops->sync_page)
return mapping->a_ops->sync_page(page); return mapping->a_ops->sync_page(page);
} else if (PageSwapCache(page)) {
swap_unplug_io_fn(NULL);
}
return 0; return 0;
} }
...@@ -242,13 +246,9 @@ int filemap_write_and_wait(struct address_space *mapping) ...@@ -242,13 +246,9 @@ int filemap_write_and_wait(struct address_space *mapping)
* This function is used for two things: adding newly allocated pagecache * This function is used for two things: adding newly allocated pagecache
* pages and for moving existing anon pages into swapcache. * pages and for moving existing anon pages into swapcache.
* *
* In the case of pagecache pages, the page is new, so we can just run * This function is used to add newly allocated pagecache pages:
* SetPageLocked() against it. The other page state flags were set by * the page is new, so we can just run SetPageLocked() against it.
* rmqueue() * The other page state flags were set by rmqueue().
*
* In the case of swapcache, try_to_swap_out() has already locked the page, so
* SetPageLocked() is ugly-but-OK there too. The required page state has been
* set up by swap_out_add_to_swap_cache().
* *
* This function does not add the page to the LRU. The caller must do that. * This function does not add the page to the LRU. The caller must do that.
*/ */
...@@ -263,7 +263,10 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, ...@@ -263,7 +263,10 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
error = radix_tree_insert(&mapping->page_tree, offset, page); error = radix_tree_insert(&mapping->page_tree, offset, page);
if (!error) { if (!error) {
SetPageLocked(page); SetPageLocked(page);
___add_to_page_cache(page, mapping, offset); page->mapping = mapping;
page->index = offset;
mapping->nrpages++;
pagecache_acct(1);
} else { } else {
page_cache_release(page); page_cache_release(page);
} }
......
...@@ -417,8 +417,8 @@ zap_pte_range(struct mmu_gather *tlb, pmd_t * pmd, ...@@ -417,8 +417,8 @@ zap_pte_range(struct mmu_gather *tlb, pmd_t * pmd,
if (!PageReserved(page)) { if (!PageReserved(page)) {
if (pte_dirty(pte)) if (pte_dirty(pte))
set_page_dirty(page); set_page_dirty(page);
if (page->mapping && pte_young(pte) && if (pte_young(pte) &&
!PageSwapCache(page)) page_mapping(page))
mark_page_accessed(page); mark_page_accessed(page);
tlb->freed++; tlb->freed++;
page_remove_rmap(page, ptep); page_remove_rmap(page, ptep);
......
...@@ -579,6 +579,24 @@ int __set_page_dirty_nobuffers(struct page *page) ...@@ -579,6 +579,24 @@ int __set_page_dirty_nobuffers(struct page *page)
} }
EXPORT_SYMBOL(__set_page_dirty_nobuffers); EXPORT_SYMBOL(__set_page_dirty_nobuffers);
/*
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
*/
int fastcall set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page);
int (*spd)(struct page *);
if (!mapping) {
SetPageDirty(page);
return 0;
}
spd = mapping->a_ops->set_page_dirty;
return spd? (*spd)(page): __set_page_dirty_buffers(page);
}
EXPORT_SYMBOL(set_page_dirty);
/* /*
* set_page_dirty() is racy if the caller has no reference against * set_page_dirty() is racy if the caller has no reference against
* page->mapping->host, and if the page is unlocked. This is because another * page->mapping->host, and if the page is unlocked. This is because another
...@@ -606,7 +624,7 @@ EXPORT_SYMBOL(set_page_dirty_lock); ...@@ -606,7 +624,7 @@ EXPORT_SYMBOL(set_page_dirty_lock);
*/ */
int test_clear_page_dirty(struct page *page) int test_clear_page_dirty(struct page *page)
{ {
struct address_space *mapping = page->mapping; struct address_space *mapping = page_mapping(page);
unsigned long flags; unsigned long flags;
if (mapping) { if (mapping) {
...@@ -642,7 +660,7 @@ EXPORT_SYMBOL(test_clear_page_dirty); ...@@ -642,7 +660,7 @@ EXPORT_SYMBOL(test_clear_page_dirty);
*/ */
int clear_page_dirty_for_io(struct page *page) int clear_page_dirty_for_io(struct page *page)
{ {
struct address_space *mapping = page->mapping; struct address_space *mapping = page_mapping(page);
if (mapping) { if (mapping) {
if (TestClearPageDirty(page)) { if (TestClearPageDirty(page)) {
...@@ -661,7 +679,7 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); ...@@ -661,7 +679,7 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
*/ */
int __clear_page_dirty(struct page *page) int __clear_page_dirty(struct page *page)
{ {
struct address_space *mapping = page->mapping; struct address_space *mapping = page_mapping(page);
if (mapping) { if (mapping) {
unsigned long flags; unsigned long flags;
...@@ -681,7 +699,7 @@ int __clear_page_dirty(struct page *page) ...@@ -681,7 +699,7 @@ int __clear_page_dirty(struct page *page)
int test_clear_page_writeback(struct page *page) int test_clear_page_writeback(struct page *page)
{ {
struct address_space *mapping = page->mapping; struct address_space *mapping = page_mapping(page);
int ret; int ret;
if (mapping) { if (mapping) {
...@@ -701,7 +719,7 @@ int test_clear_page_writeback(struct page *page) ...@@ -701,7 +719,7 @@ int test_clear_page_writeback(struct page *page)
int test_set_page_writeback(struct page *page) int test_set_page_writeback(struct page *page)
{ {
struct address_space *mapping = page->mapping; struct address_space *mapping = page_mapping(page);
int ret; int ret;
if (mapping) { if (mapping) {
......
...@@ -84,6 +84,9 @@ static void bad_page(const char *function, struct page *page) ...@@ -84,6 +84,9 @@ static void bad_page(const char *function, struct page *page)
1 << PG_lru | 1 << PG_lru |
1 << PG_active | 1 << PG_active |
1 << PG_dirty | 1 << PG_dirty |
1 << PG_maplock |
1 << PG_anon |
1 << PG_swapcache |
1 << PG_writeback); 1 << PG_writeback);
set_page_count(page, 0); set_page_count(page, 0);
page->mapping = NULL; page->mapping = NULL;
...@@ -224,6 +227,9 @@ static inline void free_pages_check(const char *function, struct page *page) ...@@ -224,6 +227,9 @@ static inline void free_pages_check(const char *function, struct page *page)
1 << PG_active | 1 << PG_active |
1 << PG_reclaim | 1 << PG_reclaim |
1 << PG_slab | 1 << PG_slab |
1 << PG_maplock |
1 << PG_anon |
1 << PG_swapcache |
1 << PG_writeback ))) 1 << PG_writeback )))
bad_page(function, page); bad_page(function, page);
if (PageDirty(page)) if (PageDirty(page))
...@@ -331,6 +337,9 @@ static void prep_new_page(struct page *page, int order) ...@@ -331,6 +337,9 @@ static void prep_new_page(struct page *page, int order)
1 << PG_active | 1 << PG_active |
1 << PG_dirty | 1 << PG_dirty |
1 << PG_reclaim | 1 << PG_reclaim |
1 << PG_maplock |
1 << PG_anon |
1 << PG_swapcache |
1 << PG_writeback ))) 1 << PG_writeback )))
bad_page(__FUNCTION__, page); bad_page(__FUNCTION__, page);
......
...@@ -16,8 +16,6 @@ ...@@ -16,8 +16,6 @@
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/swapops.h> #include <linux/swapops.h>
#include <linux/buffer_head.h> /* for block_sync_page() */
#include <linux/mpage.h>
#include <linux/writeback.h> #include <linux/writeback.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -32,7 +30,7 @@ get_swap_bio(int gfp_flags, struct page *page, bio_end_io_t end_io) ...@@ -32,7 +30,7 @@ get_swap_bio(int gfp_flags, struct page *page, bio_end_io_t end_io)
swp_entry_t entry; swp_entry_t entry;
BUG_ON(!PageSwapCache(page)); BUG_ON(!PageSwapCache(page));
entry.val = page->index; entry.val = page->private;
sis = get_swap_info_struct(swp_type(entry)); sis = get_swap_info_struct(swp_type(entry));
bio->bi_sector = map_swap_page(sis, swp_offset(entry)) * bio->bi_sector = map_swap_page(sis, swp_offset(entry)) *
...@@ -132,13 +130,6 @@ int swap_readpage(struct file *file, struct page *page) ...@@ -132,13 +130,6 @@ int swap_readpage(struct file *file, struct page *page)
return ret; return ret;
} }
struct address_space_operations swap_aops = {
.writepage = swap_writepage,
.readpage = swap_readpage,
.sync_page = block_sync_page,
.set_page_dirty = __set_page_dirty_nobuffers,
};
#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_PM_DISK) #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_PM_DISK)
/* /*
...@@ -148,25 +139,15 @@ struct address_space_operations swap_aops = { ...@@ -148,25 +139,15 @@ struct address_space_operations swap_aops = {
int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page) int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page)
{ {
int ret; int ret;
unsigned long save_private;
struct writeback_control swap_wbc = { struct writeback_control swap_wbc = {
.sync_mode = WB_SYNC_ALL, .sync_mode = WB_SYNC_ALL,
}; };
lock_page(page); lock_page(page);
SetPageSwapCache(page);
BUG_ON(page->mapping); save_private = page->private;
ret = add_to_page_cache(page, &swapper_space, page->private = entry.val;
entry.val, GFP_NOIO|__GFP_NOFAIL);
if (ret) {
unlock_page(page);
goto out;
}
/*
* get one more reference to make page non-exclusive so
* remove_exclusive_swap_page won't mess with it.
*/
page_cache_get(page);
if (rw == READ) { if (rw == READ) {
ret = swap_readpage(NULL, page); ret = swap_readpage(NULL, page);
...@@ -176,15 +157,10 @@ int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page) ...@@ -176,15 +157,10 @@ int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page)
wait_on_page_writeback(page); wait_on_page_writeback(page);
} }
lock_page(page); ClearPageSwapCache(page);
remove_from_page_cache(page); page->private = save_private;
unlock_page(page);
page_cache_release(page);
page_cache_release(page); /* For add_to_page_cache() */
if (ret == 0 && (!PageUptodate(page) || PageError(page))) if (ret == 0 && (!PageUptodate(page) || PageError(page)))
ret = -EIO; ret = -EIO;
out:
return ret; return ret;
} }
#endif #endif
...@@ -35,7 +35,18 @@ ...@@ -35,7 +35,18 @@
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
/* #define DEBUG_RMAP */ /*
* Something oopsable to put for now in the page->mapping
* of an anonymous page, to test that it is ignored.
*/
#define ANON_MAPPING_DEBUG ((struct address_space *) 0xADB)
static inline void clear_page_anon(struct page *page)
{
BUG_ON(page->mapping != ANON_MAPPING_DEBUG);
page->mapping = NULL;
ClearPageAnon(page);
}
/* /*
* Shared pages have a chain of pte_chain structures, used to locate * Shared pages have a chain of pte_chain structures, used to locate
...@@ -180,6 +191,10 @@ page_add_rmap(struct page *page, pte_t *ptep, struct pte_chain *pte_chain) ...@@ -180,6 +191,10 @@ page_add_rmap(struct page *page, pte_t *ptep, struct pte_chain *pte_chain)
if (page->pte.direct == 0) { if (page->pte.direct == 0) {
page->pte.direct = pte_paddr; page->pte.direct = pte_paddr;
SetPageDirect(page); SetPageDirect(page);
if (!page->mapping) {
SetPageAnon(page);
page->mapping = ANON_MAPPING_DEBUG;
}
inc_page_state(nr_mapped); inc_page_state(nr_mapped);
goto out; goto out;
} }
...@@ -271,10 +286,13 @@ void fastcall page_remove_rmap(struct page *page, pte_t *ptep) ...@@ -271,10 +286,13 @@ void fastcall page_remove_rmap(struct page *page, pte_t *ptep)
} }
} }
out: out:
if (page->pte.direct == 0 && page_test_and_clear_dirty(page)) if (!page_mapped(page)) {
if (page_test_and_clear_dirty(page))
set_page_dirty(page); set_page_dirty(page);
if (!page_mapped(page)) if (PageAnon(page))
clear_page_anon(page);
dec_page_state(nr_mapped); dec_page_state(nr_mapped);
}
out_unlock: out_unlock:
rmap_unlock(page); rmap_unlock(page);
} }
...@@ -330,12 +348,13 @@ static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr) ...@@ -330,12 +348,13 @@ static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr)
flush_cache_page(vma, address); flush_cache_page(vma, address);
pte = ptep_clear_flush(vma, address, ptep); pte = ptep_clear_flush(vma, address, ptep);
if (PageSwapCache(page)) { if (PageAnon(page)) {
swp_entry_t entry = { .val = page->private };
/* /*
* Store the swap location in the pte. * Store the swap location in the pte.
* See handle_pte_fault() ... * See handle_pte_fault() ...
*/ */
swp_entry_t entry = { .val = page->index }; BUG_ON(!PageSwapCache(page));
swap_duplicate(entry); swap_duplicate(entry);
set_pte(ptep, swp_entry_to_pte(entry)); set_pte(ptep, swp_entry_to_pte(entry));
BUG_ON(pte_file(*ptep)); BUG_ON(pte_file(*ptep));
...@@ -345,6 +364,7 @@ static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr) ...@@ -345,6 +364,7 @@ static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr)
* If a nonlinear mapping then store the file page offset * If a nonlinear mapping then store the file page offset
* in the pte. * in the pte.
*/ */
BUG_ON(!page->mapping);
pgidx = (address - vma->vm_start) >> PAGE_SHIFT; pgidx = (address - vma->vm_start) >> PAGE_SHIFT;
pgidx += vma->vm_pgoff; pgidx += vma->vm_pgoff;
pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
...@@ -391,15 +411,10 @@ int fastcall try_to_unmap(struct page * page) ...@@ -391,15 +411,10 @@ int fastcall try_to_unmap(struct page * page)
BUG(); BUG();
if (!PageLocked(page)) if (!PageLocked(page))
BUG(); BUG();
/* We need backing store to swap out a page. */
if (!page->mapping)
BUG();
if (PageDirect(page)) { if (PageDirect(page)) {
ret = try_to_unmap_one(page, page->pte.direct); ret = try_to_unmap_one(page, page->pte.direct);
if (ret == SWAP_SUCCESS) { if (ret == SWAP_SUCCESS) {
if (page_test_and_clear_dirty(page))
set_page_dirty(page);
page->pte.direct = 0; page->pte.direct = 0;
ClearPageDirect(page); ClearPageDirect(page);
} }
...@@ -436,9 +451,6 @@ int fastcall try_to_unmap(struct page * page) ...@@ -436,9 +451,6 @@ int fastcall try_to_unmap(struct page * page)
} else { } else {
start->next_and_idx++; start->next_and_idx++;
} }
if (page->pte.direct == 0 &&
page_test_and_clear_dirty(page))
set_page_dirty(page);
break; break;
case SWAP_AGAIN: case SWAP_AGAIN:
/* Skip this pte, remembering status. */ /* Skip this pte, remembering status. */
...@@ -451,8 +463,14 @@ int fastcall try_to_unmap(struct page * page) ...@@ -451,8 +463,14 @@ int fastcall try_to_unmap(struct page * page)
} }
} }
out: out:
if (!page_mapped(page)) if (!page_mapped(page)) {
if (page_test_and_clear_dirty(page))
set_page_dirty(page);
if (PageAnon(page))
clear_page_anon(page);
dec_page_state(nr_mapped); dec_page_state(nr_mapped);
ret = SWAP_SUCCESS;
}
return ret; return ret;
} }
......
...@@ -16,25 +16,24 @@ ...@@ -16,25 +16,24 @@
#include <asm/pgtable.h> #include <asm/pgtable.h>
/*
* swapper_space is a fiction, retained to simplify the path through
* vmscan's shrink_list. Only those fields initialized below are used.
*/
static struct address_space_operations swap_aops = {
.writepage = swap_writepage,
};
static struct backing_dev_info swap_backing_dev_info = { static struct backing_dev_info swap_backing_dev_info = {
.ra_pages = 0, /* No readahead */
.memory_backed = 1, /* Does not contribute to dirty memory */ .memory_backed = 1, /* Does not contribute to dirty memory */
.unplug_io_fn = swap_unplug_io_fn, .unplug_io_fn = swap_unplug_io_fn,
}; };
extern struct address_space_operations swap_aops;
struct address_space swapper_space = { struct address_space swapper_space = {
.page_tree = RADIX_TREE_INIT(GFP_ATOMIC), .page_tree = RADIX_TREE_INIT(GFP_ATOMIC),
.tree_lock = SPIN_LOCK_UNLOCKED, .tree_lock = SPIN_LOCK_UNLOCKED,
.a_ops = &swap_aops, .a_ops = &swap_aops,
.backing_dev_info = &swap_backing_dev_info, .backing_dev_info = &swap_backing_dev_info,
.i_mmap = LIST_HEAD_INIT(swapper_space.i_mmap),
.i_mmap_shared = LIST_HEAD_INIT(swapper_space.i_mmap_shared),
.i_shared_sem = __MUTEX_INITIALIZER(swapper_space.i_shared_sem),
.truncate_count = ATOMIC_INIT(0),
.private_lock = SPIN_LOCK_UNLOCKED,
.private_list = LIST_HEAD_INIT(swapper_space.private_list),
}; };
#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
...@@ -56,30 +55,55 @@ void show_swap_cache_info(void) ...@@ -56,30 +55,55 @@ void show_swap_cache_info(void)
swap_cache_info.noent_race, swap_cache_info.exist_race); swap_cache_info.noent_race, swap_cache_info.exist_race);
} }
/*
* __add_to_swap_cache resembles add_to_page_cache on swapper_space,
* but sets SwapCache flag and private instead of mapping and index.
*/
static int __add_to_swap_cache(struct page *page,
swp_entry_t entry, int gfp_mask)
{
int error;
BUG_ON(PageSwapCache(page));
BUG_ON(PagePrivate(page));
error = radix_tree_preload(gfp_mask);
if (!error) {
page_cache_get(page);
spin_lock(&swapper_space.tree_lock);
error = radix_tree_insert(&swapper_space.page_tree,
entry.val, page);
if (!error) {
SetPageLocked(page);
SetPageSwapCache(page);
page->private = entry.val;
total_swapcache_pages++;
pagecache_acct(1);
} else
page_cache_release(page);
spin_unlock(&swapper_space.tree_lock);
radix_tree_preload_end();
}
return error;
}
static int add_to_swap_cache(struct page *page, swp_entry_t entry) static int add_to_swap_cache(struct page *page, swp_entry_t entry)
{ {
int error; int error;
if (page->mapping)
BUG();
if (!swap_duplicate(entry)) { if (!swap_duplicate(entry)) {
INC_CACHE_INFO(noent_race); INC_CACHE_INFO(noent_race);
return -ENOENT; return -ENOENT;
} }
error = add_to_page_cache(page, &swapper_space, entry.val, GFP_KERNEL); error = __add_to_swap_cache(page, entry, GFP_KERNEL);
/* /*
* Anon pages are already on the LRU, we don't run lru_cache_add here. * Anon pages are already on the LRU, we don't run lru_cache_add here.
*/ */
if (error != 0) { if (error) {
swap_free(entry); swap_free(entry);
if (error == -EEXIST) if (error == -EEXIST)
INC_CACHE_INFO(exist_race); INC_CACHE_INFO(exist_race);
return error; return error;
} }
if (!PageLocked(page))
BUG();
if (!PageSwapCache(page))
BUG();
INC_CACHE_INFO(add_total); INC_CACHE_INFO(add_total);
return 0; return 0;
} }
...@@ -93,7 +117,12 @@ void __delete_from_swap_cache(struct page *page) ...@@ -93,7 +117,12 @@ void __delete_from_swap_cache(struct page *page)
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
BUG_ON(!PageSwapCache(page)); BUG_ON(!PageSwapCache(page));
BUG_ON(PageWriteback(page)); BUG_ON(PageWriteback(page));
__remove_from_page_cache(page);
radix_tree_delete(&swapper_space.page_tree, page->private);
page->private = 0;
ClearPageSwapCache(page);
total_swapcache_pages--;
pagecache_acct(-1);
INC_CACHE_INFO(del_total); INC_CACHE_INFO(del_total);
} }
...@@ -137,8 +166,7 @@ int add_to_swap(struct page * page) ...@@ -137,8 +166,7 @@ int add_to_swap(struct page * page)
/* /*
* Add it to the swap cache and mark it dirty * Add it to the swap cache and mark it dirty
*/ */
err = add_to_page_cache(page, &swapper_space, err = __add_to_swap_cache(page, entry, GFP_ATOMIC);
entry.val, GFP_ATOMIC);
if (pf_flags & PF_MEMALLOC) if (pf_flags & PF_MEMALLOC)
current->flags |= PF_MEMALLOC; current->flags |= PF_MEMALLOC;
...@@ -146,8 +174,7 @@ int add_to_swap(struct page * page) ...@@ -146,8 +174,7 @@ int add_to_swap(struct page * page)
switch (err) { switch (err) {
case 0: /* Success */ case 0: /* Success */
SetPageUptodate(page); SetPageUptodate(page);
__clear_page_dirty(page); SetPageDirty(page);
set_page_dirty(page);
INC_CACHE_INFO(add_total); INC_CACHE_INFO(add_total);
return 1; return 1;
case -EEXIST: case -EEXIST:
...@@ -173,81 +200,55 @@ void delete_from_swap_cache(struct page *page) ...@@ -173,81 +200,55 @@ void delete_from_swap_cache(struct page *page)
{ {
swp_entry_t entry; swp_entry_t entry;
BUG_ON(!PageSwapCache(page));
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page)); BUG_ON(PageWriteback(page));
BUG_ON(PagePrivate(page)); BUG_ON(PagePrivate(page));
entry.val = page->index; entry.val = page->private;
spin_lock_irq(&swapper_space.tree_lock); spin_lock(&swapper_space.tree_lock);
__delete_from_swap_cache(page); __delete_from_swap_cache(page);
spin_unlock_irq(&swapper_space.tree_lock); spin_unlock(&swapper_space.tree_lock);
swap_free(entry); swap_free(entry);
page_cache_release(page); page_cache_release(page);
} }
/*
* Strange swizzling function only for use by shmem_writepage
*/
int move_to_swap_cache(struct page *page, swp_entry_t entry) int move_to_swap_cache(struct page *page, swp_entry_t entry)
{ {
struct address_space *mapping = page->mapping; int err = __add_to_swap_cache(page, entry, GFP_ATOMIC);
int err;
spin_lock_irq(&swapper_space.tree_lock);
spin_lock(&mapping->tree_lock);
err = radix_tree_insert(&swapper_space.page_tree, entry.val, page);
if (!err) {
__remove_from_page_cache(page);
___add_to_page_cache(page, &swapper_space, entry.val);
}
spin_unlock(&mapping->tree_lock);
spin_unlock_irq(&swapper_space.tree_lock);
if (!err) { if (!err) {
remove_from_page_cache(page);
page_cache_release(page); /* pagecache ref */
if (!swap_duplicate(entry)) if (!swap_duplicate(entry))
BUG(); BUG();
BUG_ON(PageDirty(page)); SetPageDirty(page);
set_page_dirty(page);
INC_CACHE_INFO(add_total); INC_CACHE_INFO(add_total);
} else if (err == -EEXIST) } else if (err == -EEXIST)
INC_CACHE_INFO(exist_race); INC_CACHE_INFO(exist_race);
return err; return err;
} }
/*
* Strange swizzling function for shmem_getpage (and shmem_unuse)
*/
int move_from_swap_cache(struct page *page, unsigned long index, int move_from_swap_cache(struct page *page, unsigned long index,
struct address_space *mapping) struct address_space *mapping)
{ {
swp_entry_t entry; int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC);
int err;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
BUG_ON(PagePrivate(page));
entry.val = page->index;
spin_lock_irq(&swapper_space.tree_lock);
spin_lock(&mapping->tree_lock);
err = radix_tree_insert(&mapping->page_tree, index, page);
if (!err) { if (!err) {
__delete_from_swap_cache(page); delete_from_swap_cache(page);
___add_to_page_cache(page, mapping, index); /* shift page from clean_pages to dirty_pages list */
} ClearPageDirty(page);
spin_unlock(&mapping->tree_lock);
spin_unlock_irq(&swapper_space.tree_lock);
if (!err) {
swap_free(entry);
__clear_page_dirty(page);
set_page_dirty(page); set_page_dirty(page);
} }
return err; return err;
} }
/* /*
* If we are the only user, then try to free up the swap cache. * If we are the only user, then try to free up the swap cache.
* *
...@@ -305,19 +306,17 @@ void free_pages_and_swap_cache(struct page **pages, int nr) ...@@ -305,19 +306,17 @@ void free_pages_and_swap_cache(struct page **pages, int nr)
*/ */
struct page * lookup_swap_cache(swp_entry_t entry) struct page * lookup_swap_cache(swp_entry_t entry)
{ {
struct page *found; struct page *page;
found = find_get_page(&swapper_space, entry.val); spin_lock(&swapper_space.tree_lock);
/* page = radix_tree_lookup(&swapper_space.page_tree, entry.val);
* Unsafe to assert PageSwapCache and mapping on page found: if (page) {
* if SMP nothing prevents swapoff from deleting this page from page_cache_get(page);
* the swap cache at this moment. find_lock_page would prevent
* that, but no need to change: we _have_ got the right page.
*/
INC_CACHE_INFO(find_total);
if (found)
INC_CACHE_INFO(find_success); INC_CACHE_INFO(find_success);
return found; }
spin_unlock(&swapper_space.tree_lock);
INC_CACHE_INFO(find_total);
return page;
} }
/* /*
...@@ -335,10 +334,14 @@ struct page * read_swap_cache_async(swp_entry_t entry) ...@@ -335,10 +334,14 @@ struct page * read_swap_cache_async(swp_entry_t entry)
/* /*
* First check the swap cache. Since this is normally * First check the swap cache. Since this is normally
* called after lookup_swap_cache() failed, re-calling * called after lookup_swap_cache() failed, re-calling
* that would confuse statistics: use find_get_page() * that would confuse statistics.
* directly.
*/ */
found_page = find_get_page(&swapper_space, entry.val); spin_lock(&swapper_space.tree_lock);
found_page = radix_tree_lookup(&swapper_space.page_tree,
entry.val);
if (found_page)
page_cache_get(found_page);
spin_unlock(&swapper_space.tree_lock);
if (found_page) if (found_page)
break; break;
......
...@@ -304,16 +304,16 @@ static int exclusive_swap_page(struct page *page) ...@@ -304,16 +304,16 @@ static int exclusive_swap_page(struct page *page)
struct swap_info_struct * p; struct swap_info_struct * p;
swp_entry_t entry; swp_entry_t entry;
entry.val = page->index; entry.val = page->private;
p = swap_info_get(entry); p = swap_info_get(entry);
if (p) { if (p) {
/* Is the only swap cache user the cache itself? */ /* Is the only swap cache user the cache itself? */
if (p->swap_map[swp_offset(entry)] == 1) { if (p->swap_map[swp_offset(entry)] == 1) {
/* Recheck the page count with the pagecache lock held.. */ /* Recheck the page count with the swapcache lock held.. */
spin_lock_irq(&swapper_space.tree_lock); spin_lock(&swapper_space.tree_lock);
if (page_count(page) - !!PagePrivate(page) == 2) if (page_count(page) == 2)
retval = 1; retval = 1;
spin_unlock_irq(&swapper_space.tree_lock); spin_unlock(&swapper_space.tree_lock);
} }
swap_info_put(p); swap_info_put(p);
} }
...@@ -372,7 +372,7 @@ int remove_exclusive_swap_page(struct page *page) ...@@ -372,7 +372,7 @@ int remove_exclusive_swap_page(struct page *page)
if (page_count(page) != 2) /* 2: us + cache */ if (page_count(page) != 2) /* 2: us + cache */
return 0; return 0;
entry.val = page->index; entry.val = page->private;
p = swap_info_get(entry); p = swap_info_get(entry);
if (!p) if (!p)
return 0; return 0;
...@@ -380,14 +380,14 @@ int remove_exclusive_swap_page(struct page *page) ...@@ -380,14 +380,14 @@ int remove_exclusive_swap_page(struct page *page)
/* Is the only swap cache user the cache itself? */ /* Is the only swap cache user the cache itself? */
retval = 0; retval = 0;
if (p->swap_map[swp_offset(entry)] == 1) { if (p->swap_map[swp_offset(entry)] == 1) {
/* Recheck the page count with the pagecache lock held.. */ /* Recheck the page count with the swapcache lock held.. */
spin_lock_irq(&swapper_space.tree_lock); spin_lock(&swapper_space.tree_lock);
if ((page_count(page) == 2) && !PageWriteback(page)) { if ((page_count(page) == 2) && !PageWriteback(page)) {
__delete_from_swap_cache(page); __delete_from_swap_cache(page);
SetPageDirty(page); SetPageDirty(page);
retval = 1; retval = 1;
} }
spin_unlock_irq(&swapper_space.tree_lock); spin_unlock(&swapper_space.tree_lock);
} }
swap_info_put(p); swap_info_put(p);
...@@ -410,8 +410,14 @@ void free_swap_and_cache(swp_entry_t entry) ...@@ -410,8 +410,14 @@ void free_swap_and_cache(swp_entry_t entry)
p = swap_info_get(entry); p = swap_info_get(entry);
if (p) { if (p) {
if (swap_entry_free(p, swp_offset(entry)) == 1) if (swap_entry_free(p, swp_offset(entry)) == 1) {
page = find_trylock_page(&swapper_space, entry.val); spin_lock(&swapper_space.tree_lock);
page = radix_tree_lookup(&swapper_space.page_tree,
entry.val);
if (page && TestSetPageLocked(page))
page = NULL;
spin_unlock(&swapper_space.tree_lock);
}
swap_info_put(p); swap_info_put(p);
} }
if (page) { if (page) {
...@@ -1053,14 +1059,14 @@ int page_queue_congested(struct page *page) ...@@ -1053,14 +1059,14 @@ int page_queue_congested(struct page *page)
BUG_ON(!PageLocked(page)); /* It pins the swap_info_struct */ BUG_ON(!PageLocked(page)); /* It pins the swap_info_struct */
bdi = page->mapping->backing_dev_info;
if (PageSwapCache(page)) { if (PageSwapCache(page)) {
swp_entry_t entry = { .val = page->index }; swp_entry_t entry = { .val = page->private };
struct swap_info_struct *sis; struct swap_info_struct *sis;
sis = get_swap_info_struct(swp_type(entry)); sis = get_swap_info_struct(swp_type(entry));
bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info; bdi = sis->bdev->bd_inode->i_mapping->backing_dev_info;
} } else
bdi = page->mapping->backing_dev_info;
return bdi_write_congested(bdi); return bdi_write_congested(bdi);
} }
#endif #endif
......
...@@ -176,20 +176,20 @@ static int shrink_slab(unsigned long scanned, unsigned int gfp_mask) ...@@ -176,20 +176,20 @@ static int shrink_slab(unsigned long scanned, unsigned int gfp_mask)
/* Must be called with page's rmap lock held. */ /* Must be called with page's rmap lock held. */
static inline int page_mapping_inuse(struct page *page) static inline int page_mapping_inuse(struct page *page)
{ {
struct address_space *mapping = page->mapping; struct address_space *mapping;
/* Page is in somebody's page tables. */ /* Page is in somebody's page tables. */
if (page_mapped(page)) if (page_mapped(page))
return 1; return 1;
/* XXX: does this happen ? */
if (!mapping)
return 0;
/* Be more reluctant to reclaim swapcache than pagecache */ /* Be more reluctant to reclaim swapcache than pagecache */
if (PageSwapCache(page)) if (PageSwapCache(page))
return 1; return 1;
mapping = page_mapping(page);
if (!mapping)
return 0;
/* File is mmap'd by somebody. */ /* File is mmap'd by somebody. */
if (!list_empty(&mapping->i_mmap)) if (!list_empty(&mapping->i_mmap))
return 1; return 1;
...@@ -233,7 +233,7 @@ static void handle_write_error(struct address_space *mapping, ...@@ -233,7 +233,7 @@ static void handle_write_error(struct address_space *mapping,
struct page *page, int error) struct page *page, int error)
{ {
lock_page(page); lock_page(page);
if (page->mapping == mapping) { if (page_mapping(page) == mapping) {
if (error == -ENOSPC) if (error == -ENOSPC)
set_bit(AS_ENOSPC, &mapping->flags); set_bit(AS_ENOSPC, &mapping->flags);
else else
...@@ -286,27 +286,28 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask, ...@@ -286,27 +286,28 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask,
goto activate_locked; goto activate_locked;
} }
mapping = page->mapping; mapping = page_mapping(page);
may_enter_fs = (gfp_mask & __GFP_FS);
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
/* /*
* Anonymous process memory without backing store. Try to * Anonymous process memory has backing store?
* allocate it some swap space here. * Try to allocate it some swap space here.
* *
* XXX: implement swap clustering ? * XXX: implement swap clustering ?
*/ */
if (page_mapped(page) && !mapping && !PagePrivate(page)) { if (PageAnon(page) && !PageSwapCache(page)) {
rmap_unlock(page); rmap_unlock(page);
if (!add_to_swap(page)) if (!add_to_swap(page))
goto activate_locked; goto activate_locked;
rmap_lock(page); rmap_lock(page);
mapping = page->mapping; }
if (PageSwapCache(page)) {
mapping = &swapper_space;
may_enter_fs = (gfp_mask & __GFP_IO);
} }
#endif /* CONFIG_SWAP */ #endif /* CONFIG_SWAP */
may_enter_fs = (gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (gfp_mask & __GFP_IO));
/* /*
* The page is mapped into the page tables of one or more * The page is mapped into the page tables of one or more
* processes. Try to unmap it here. * processes. Try to unmap it here.
...@@ -427,7 +428,7 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask, ...@@ -427,7 +428,7 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask,
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
if (PageSwapCache(page)) { if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page->index }; swp_entry_t swap = { .val = page->private };
__delete_from_swap_cache(page); __delete_from_swap_cache(page);
spin_unlock_irq(&mapping->tree_lock); spin_unlock_irq(&mapping->tree_lock);
swap_free(swap); swap_free(swap);
...@@ -669,8 +670,7 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in, ...@@ -669,8 +670,7 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in,
* FIXME: need to consider page_count(page) here if/when we * FIXME: need to consider page_count(page) here if/when we
* reap orphaned pages via the LRU (Daniel's locking stuff) * reap orphaned pages via the LRU (Daniel's locking stuff)
*/ */
if (total_swap_pages == 0 && !page->mapping && if (total_swap_pages == 0 && PageAnon(page)) {
!PagePrivate(page)) {
list_add(&page->lru, &l_active); list_add(&page->lru, &l_active);
continue; continue;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment