Commit 1f90eedd authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] exact dirty state accounting

Some adjustments to global dirty page accounting.

Previously, dirty page accounting counted all dirty pages.  Even dirty
anonymous pages.  This has potential to upset the throttling logic in
balance_dirty_pages().  Particularly as I suspect we should decrease
the dirty memory writeback thresholds by a lot.

So this patch changes it so that we only account for dirty pagecache
pages which have backing store.  Not anonymous pages, not swapcache,
not in-memory filesystem pages.

To support this, the `memory_backed' boolean has been added to struct
backing_dev_info.  When an address space's backing device is marked as
memory-backed, the core kernel knows to not include that mapping's
pages in the dirty memory accounting.

For memory-backed mappings, dirtiness is a way of pinning the page, and
there's nothing the kernel can to do clean the page to make it freeable.

driverfs, tmpfs, and ranfs have been coverted to mark their mappings as
memory-backed.

The ramdisk driver hasn't been converted.  I have a separate patch for
ramdisk, which fails to fix the longstanding problems in there :(

With this patch, /bin/sync now sends /proc/meminfo:Dirty to zero, which
is rather comforting.
parent 6a0fb424
...@@ -2513,7 +2513,7 @@ int try_to_free_buffers(struct page *page) ...@@ -2513,7 +2513,7 @@ int try_to_free_buffers(struct page *page)
* This only applies in the rare case where try_to_free_buffers * This only applies in the rare case where try_to_free_buffers
* succeeds but the page is not freed. * succeeds but the page is not freed.
*/ */
ClearPageDirty(page); clear_page_dirty(page);
} }
spin_unlock(&mapping->private_lock); spin_unlock(&mapping->private_lock);
out: out:
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/driverfs_fs.h> #include <linux/driverfs_fs.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -56,6 +57,11 @@ static struct vfsmount *driverfs_mount; ...@@ -56,6 +57,11 @@ static struct vfsmount *driverfs_mount;
static spinlock_t mount_lock = SPIN_LOCK_UNLOCKED; static spinlock_t mount_lock = SPIN_LOCK_UNLOCKED;
static int mount_count = 0; static int mount_count = 0;
static struct backing_dev_info driverfs_backing_dev_info = {
.ra_pages = 0, /* No readahead */
.memory_backed = 1, /* Does not contribute to dirty memory */
};
static int driverfs_readpage(struct file *file, struct page * page) static int driverfs_readpage(struct file *file, struct page * page)
{ {
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
...@@ -108,6 +114,7 @@ struct inode *driverfs_get_inode(struct super_block *sb, int mode, int dev) ...@@ -108,6 +114,7 @@ struct inode *driverfs_get_inode(struct super_block *sb, int mode, int dev)
inode->i_rdev = NODEV; inode->i_rdev = NODEV;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_mapping->a_ops = &driverfs_aops; inode->i_mapping->a_ops = &driverfs_aops;
inode->i_mapping->backing_dev_info = &driverfs_backing_dev_info;
switch (mode & S_IFMT) { switch (mode & S_IFMT) {
default: default:
init_special_inode(inode, mode, dev); init_special_inode(inode, mode, dev);
......
...@@ -571,7 +571,7 @@ mpage_writepages(struct address_space *mapping, ...@@ -571,7 +571,7 @@ mpage_writepages(struct address_space *mapping,
wait_on_page_writeback(page); wait_on_page_writeback(page);
if (page->mapping && !PageWriteback(page) && if (page->mapping && !PageWriteback(page) &&
TestClearPageDirty(page)) { test_clear_page_dirty(page)) {
if (writepage) { if (writepage) {
ret = (*writepage)(page); ret = (*writepage)(page);
} else { } else {
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -41,6 +42,11 @@ static struct address_space_operations ramfs_aops; ...@@ -41,6 +42,11 @@ static struct address_space_operations ramfs_aops;
static struct file_operations ramfs_file_operations; static struct file_operations ramfs_file_operations;
static struct inode_operations ramfs_dir_inode_operations; static struct inode_operations ramfs_dir_inode_operations;
static struct backing_dev_info ramfs_backing_dev_info = {
.ra_pages = 0, /* No readahead */
.memory_backed = 1, /* Does not contribute to dirty memory */
};
/* /*
* Read a page. Again trivial. If it didn't already exist * Read a page. Again trivial. If it didn't already exist
* in the page cache, it is zero-filled. * in the page cache, it is zero-filled.
...@@ -69,7 +75,7 @@ static int ramfs_prepare_write(struct file *file, struct page *page, unsigned of ...@@ -69,7 +75,7 @@ static int ramfs_prepare_write(struct file *file, struct page *page, unsigned of
kunmap_atomic(kaddr, KM_USER0); kunmap_atomic(kaddr, KM_USER0);
SetPageUptodate(page); SetPageUptodate(page);
} }
SetPageDirty(page); set_page_dirty(page);
return 0; return 0;
} }
...@@ -95,6 +101,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, int dev) ...@@ -95,6 +101,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, int dev)
inode->i_blocks = 0; inode->i_blocks = 0;
inode->i_rdev = NODEV; inode->i_rdev = NODEV;
inode->i_mapping->a_ops = &ramfs_aops; inode->i_mapping->a_ops = &ramfs_aops;
inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch (mode & S_IFMT) { switch (mode & S_IFMT) {
default: default:
......
...@@ -19,6 +19,7 @@ enum bdi_state { ...@@ -19,6 +19,7 @@ enum bdi_state {
struct backing_dev_info { struct backing_dev_info {
unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */
unsigned long state; /* Always use atomic bitops on this */ unsigned long state; /* Always use atomic bitops on this */
int memory_backed; /* Cannot clean pages with writepage */
}; };
extern struct backing_dev_info default_backing_dev_info; extern struct backing_dev_info default_backing_dev_info;
......
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
#define PG_referenced 2 #define PG_referenced 2
#define PG_uptodate 3 #define PG_uptodate 3
#define PG_dirty_dontuse 4 #define PG_dirty 4
#define PG_lru 5 #define PG_lru 5
#define PG_active 6 #define PG_active 6
#define PG_slab 7 /* slab debug (Suparna wants this) */ #define PG_slab 7 /* slab debug (Suparna wants this) */
...@@ -120,37 +120,11 @@ extern void get_page_state(struct page_state *ret); ...@@ -120,37 +120,11 @@ extern void get_page_state(struct page_state *ret);
#define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags) #define SetPageUptodate(page) set_bit(PG_uptodate, &(page)->flags)
#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) #define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags)
#define PageDirty(page) test_bit(PG_dirty_dontuse, &(page)->flags) #define PageDirty(page) test_bit(PG_dirty, &(page)->flags)
#define SetPageDirty(page) \ #define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags)
do { \ #define TestSetPageDirty(page) test_and_set_bit(PG_dirty, &(page)->flags)
if (!test_and_set_bit(PG_dirty_dontuse, \ #define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags)
&(page)->flags)) \ #define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags)
inc_page_state(nr_dirty); \
} while (0)
#define TestSetPageDirty(page) \
({ \
int ret; \
ret = test_and_set_bit(PG_dirty_dontuse, \
&(page)->flags); \
if (!ret) \
inc_page_state(nr_dirty); \
ret; \
})
#define ClearPageDirty(page) \
do { \
if (test_and_clear_bit(PG_dirty_dontuse, \
&(page)->flags)) \
dec_page_state(nr_dirty); \
} while (0)
#define TestClearPageDirty(page) \
({ \
int ret; \
ret = test_and_clear_bit(PG_dirty_dontuse, \
&(page)->flags); \
if (ret) \
dec_page_state(nr_dirty); \
ret; \
})
#define SetPageLRU(page) set_bit(PG_lru, &(page)->flags) #define SetPageLRU(page) set_bit(PG_lru, &(page)->flags)
#define PageLRU(page) test_bit(PG_lru, &(page)->flags) #define PageLRU(page) test_bit(PG_lru, &(page)->flags)
...@@ -235,4 +209,11 @@ extern void get_page_state(struct page_state *ret); ...@@ -235,4 +209,11 @@ extern void get_page_state(struct page_state *ret);
extern struct address_space swapper_space; extern struct address_space swapper_space;
#define PageSwapCache(page) ((page)->mapping == &swapper_space) #define PageSwapCache(page) ((page)->mapping == &swapper_space)
int test_clear_page_dirty(struct page *page);
static inline void clear_page_dirty(struct page *page)
{
test_clear_page_dirty(page);
}
#endif /* PAGE_FLAGS_H */ #endif /* PAGE_FLAGS_H */
...@@ -181,7 +181,7 @@ static void truncate_complete_page(struct page *page) ...@@ -181,7 +181,7 @@ static void truncate_complete_page(struct page *page)
if (PagePrivate(page)) if (PagePrivate(page))
do_invalidatepage(page, 0); do_invalidatepage(page, 0);
ClearPageDirty(page); clear_page_dirty(page);
ClearPageUptodate(page); ClearPageUptodate(page);
remove_from_page_cache(page); remove_from_page_cache(page);
page_cache_release(page); page_cache_release(page);
...@@ -280,7 +280,7 @@ static void clean_list_pages(struct address_space *mapping, ...@@ -280,7 +280,7 @@ static void clean_list_pages(struct address_space *mapping,
for (curr = head->next; curr != head; curr = curr->next) { for (curr = head->next; curr != head; curr = curr->next) {
page = list_entry(curr, struct page, list); page = list_entry(curr, struct page, list);
if (page->index > start) if (page->index > start)
ClearPageDirty(page); clear_page_dirty(page);
} }
} }
...@@ -348,7 +348,7 @@ static inline int invalidate_this_page2(struct address_space * mapping, ...@@ -348,7 +348,7 @@ static inline int invalidate_this_page2(struct address_space * mapping,
} else } else
unlocked = 0; unlocked = 0;
ClearPageDirty(page); clear_page_dirty(page);
ClearPageUptodate(page); ClearPageUptodate(page);
} }
...@@ -557,8 +557,8 @@ int add_to_page_cache(struct page *page, ...@@ -557,8 +557,8 @@ int add_to_page_cache(struct page *page,
error = radix_tree_insert(&mapping->page_tree, offset, page); error = radix_tree_insert(&mapping->page_tree, offset, page);
if (!error) { if (!error) {
SetPageLocked(page); SetPageLocked(page);
ClearPageDirty(page);
___add_to_page_cache(page, mapping, offset); ___add_to_page_cache(page, mapping, offset);
ClearPageDirty(page);
} else { } else {
page_cache_release(page); page_cache_release(page);
} }
......
...@@ -350,7 +350,7 @@ int generic_vm_writeback(struct page *page, int *nr_to_write) ...@@ -350,7 +350,7 @@ int generic_vm_writeback(struct page *page, int *nr_to_write)
#if 0 #if 0
if (!PageWriteback(page) && PageDirty(page)) { if (!PageWriteback(page) && PageDirty(page)) {
lock_page(page); lock_page(page);
if (!PageWriteback(page) && TestClearPageDirty(page)) { if (!PageWriteback(page)&&test_clear_page_dirty(page)) {
int ret; int ret;
ret = page->mapping->a_ops->writepage(page); ret = page->mapping->a_ops->writepage(page);
...@@ -395,7 +395,7 @@ int write_one_page(struct page *page, int wait) ...@@ -395,7 +395,7 @@ int write_one_page(struct page *page, int wait)
write_lock(&mapping->page_lock); write_lock(&mapping->page_lock);
list_del(&page->list); list_del(&page->list);
if (TestClearPageDirty(page)) { if (test_clear_page_dirty(page)) {
list_add(&page->list, &mapping->locked_pages); list_add(&page->list, &mapping->locked_pages);
page_cache_get(page); page_cache_get(page);
write_unlock(&mapping->page_lock); write_unlock(&mapping->page_lock);
...@@ -487,6 +487,8 @@ int __set_page_dirty_buffers(struct page *page) ...@@ -487,6 +487,8 @@ int __set_page_dirty_buffers(struct page *page)
if (!TestSetPageDirty(page)) { if (!TestSetPageDirty(page)) {
write_lock(&mapping->page_lock); write_lock(&mapping->page_lock);
if (page->mapping) { /* Race with truncate? */ if (page->mapping) { /* Race with truncate? */
if (!mapping->backing_dev_info->memory_backed)
inc_page_state(nr_dirty);
list_del(&page->list); list_del(&page->list);
list_add(&page->list, &mapping->dirty_pages); list_add(&page->list, &mapping->dirty_pages);
} }
...@@ -523,6 +525,8 @@ int __set_page_dirty_nobuffers(struct page *page) ...@@ -523,6 +525,8 @@ int __set_page_dirty_nobuffers(struct page *page)
if (mapping) { if (mapping) {
write_lock(&mapping->page_lock); write_lock(&mapping->page_lock);
if (page->mapping) { /* Race with truncate? */ if (page->mapping) { /* Race with truncate? */
if (!mapping->backing_dev_info->memory_backed)
inc_page_state(nr_dirty);
list_del(&page->list); list_del(&page->list);
list_add(&page->list, &mapping->dirty_pages); list_add(&page->list, &mapping->dirty_pages);
} }
...@@ -534,4 +538,18 @@ int __set_page_dirty_nobuffers(struct page *page) ...@@ -534,4 +538,18 @@ int __set_page_dirty_nobuffers(struct page *page)
} }
EXPORT_SYMBOL(__set_page_dirty_nobuffers); EXPORT_SYMBOL(__set_page_dirty_nobuffers);
/*
* Clear a page's dirty flag, while caring for dirty memory accounting.
* Returns true if the page was previously dirty.
*/
int test_clear_page_dirty(struct page *page)
{
if (TestClearPageDirty(page)) {
struct address_space *mapping = page->mapping;
if (mapping && !mapping->backing_dev_info->memory_backed)
dec_page_state(nr_dirty);
return 1;
}
return 0;
}
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/string.h> #include <linux/string.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/shmem_fs.h> #include <linux/shmem_fs.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -56,6 +57,11 @@ static struct inode_operations shmem_inode_operations; ...@@ -56,6 +57,11 @@ static struct inode_operations shmem_inode_operations;
static struct inode_operations shmem_dir_inode_operations; static struct inode_operations shmem_dir_inode_operations;
static struct vm_operations_struct shmem_vm_ops; static struct vm_operations_struct shmem_vm_ops;
static struct backing_dev_info shmem_backing_dev_info = {
.ra_pages = 0, /* No readahead */
.memory_backed = 1, /* Does not contribute to dirty memory */
};
LIST_HEAD (shmem_inodes); LIST_HEAD (shmem_inodes);
static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED; static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */ atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */
...@@ -789,6 +795,7 @@ struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev) ...@@ -789,6 +795,7 @@ struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
inode->i_blocks = 0; inode->i_blocks = 0;
inode->i_rdev = NODEV; inode->i_rdev = NODEV;
inode->i_mapping->a_ops = &shmem_aops; inode->i_mapping->a_ops = &shmem_aops;
inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
info = SHMEM_I(inode); info = SHMEM_I(inode);
spin_lock_init (&info->lock); spin_lock_init (&info->lock);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/backing-dev.h>
#include <linux/buffer_head.h> /* block_sync_page() */ #include <linux/buffer_head.h> /* block_sync_page() */
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -25,20 +26,26 @@ static struct inode swapper_inode = { ...@@ -25,20 +26,26 @@ static struct inode swapper_inode = {
.i_mapping = &swapper_space, .i_mapping = &swapper_space,
}; };
static struct backing_dev_info swap_backing_dev_info = {
.ra_pages = 0, /* No readahead */
.memory_backed = 1, /* Does not contribute to dirty memory */
};
extern struct address_space_operations swap_aops; extern struct address_space_operations swap_aops;
struct address_space swapper_space = { struct address_space swapper_space = {
.page_tree = RADIX_TREE_INIT(GFP_ATOMIC), .page_tree = RADIX_TREE_INIT(GFP_ATOMIC),
.page_lock = RW_LOCK_UNLOCKED, .page_lock = RW_LOCK_UNLOCKED,
.clean_pages = LIST_HEAD_INIT(swapper_space.clean_pages), .clean_pages = LIST_HEAD_INIT(swapper_space.clean_pages),
.dirty_pages = LIST_HEAD_INIT(swapper_space.dirty_pages), .dirty_pages = LIST_HEAD_INIT(swapper_space.dirty_pages),
.io_pages = LIST_HEAD_INIT(swapper_space.io_pages), .io_pages = LIST_HEAD_INIT(swapper_space.io_pages),
.locked_pages = LIST_HEAD_INIT(swapper_space.locked_pages), .locked_pages = LIST_HEAD_INIT(swapper_space.locked_pages),
.host = &swapper_inode, .host = &swapper_inode,
.a_ops = &swap_aops, .a_ops = &swap_aops,
.i_shared_lock = SPIN_LOCK_UNLOCKED, .backing_dev_info = &swap_backing_dev_info,
.private_lock = SPIN_LOCK_UNLOCKED, .i_shared_lock = SPIN_LOCK_UNLOCKED,
.private_list = LIST_HEAD_INIT(swapper_space.private_list), .private_lock = SPIN_LOCK_UNLOCKED,
.private_list = LIST_HEAD_INIT(swapper_space.private_list),
}; };
#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment