Commit 3e9afe4c authored by Andrew Morton's avatar Andrew Morton Committed by Jaroslav Kysela

[PATCH] Remove fail_writepage, redux

fail_writepage() does not work.  Its activate_page() call cannot
activate the page because it is not on the LRU.

So perform that function (more efficiently) in the VM.  Remove
fail_writepage() and, if the filesystem does not implement
->writepage() then activate the page from shrink_list().

A special case is tmpfs, which does have a writepage, but which
sometimes wants to activate the pages anyway.  The most important case
is when there is no swap online and we don't want to keep all those
pages on the inactive list.  So just as a tmpfs special-case, allow
writepage() to return WRITEPAGE_ACTIVATE, and handle that in the VM.

Also, the whole idea of allowing ->writepage() to return -EAGAIN, and
handling that in the caller has been reverted.  If a writepage()
implementation wants to back out and not write the page, it must
redirty the page, unlock it and return zero.  (This is Hugh's preferred
way).

And remove the now-unneeded shmem_writepages() - shmem inodes are
marked as `memory backed' so it will not be called.

And remove the test for non-null ->writepage() in generic_file_mmap().
Memory-backed files _are_ mmappable, and they do not have a
writepage().  It just isn't called.

So the locking rules for writepage() are unchanged.  They are:

- Called with the page locked
- Returns with the page unlocked
- Must redirty the page itself if it wasn't all written.

But there is a new, special, hidden, undocumented, secret hack for
tmpfs: writepage may return WRITEPAGE_ACTIVATE to tell the VM to move
the page to the active list.  The page must be kept locked in this one
case.
parent 660282aa
......@@ -147,7 +147,7 @@ locking rules:
All except set_page_dirty may block
BKL PageLocked(page)
writepage: no yes, unlocks
writepage: no yes, unlocks (see below)
readpage: no yes, unlocks
readpages: no
sync_page: no maybe
......@@ -165,16 +165,37 @@ may be called from the request handler (/dev/loop).
->readpage() unlocks the page, either synchronously or via I/O
completion.
->readpages() populates the pagecache with the passed pages and starts I/O against them. They come unlocked upon I/O completion.
->readpages() populates the pagecache with the passed pages and starts
I/O against them. They come unlocked upon I/O completion.
->writepage() unlocks the page synchronously, before returning to
the caller. If the page has write I/O underway against it, writepage()
should run SetPageWriteback() against the page prior to unlocking it.
The write I/O completion handler should run ClearPageWriteback against
the page.
->writepage() is used for two purposes: for "memory cleansing" and for
"sync". These are quite different operations and the behaviour may differ
depending upon the mode. (Yes, there should be two a_ops for this, or
writepage should take a writeback_control*)
That is: after 2.5.12, pages which are under writeout are *not*
locked.
If writepage is called for sync (current->flags & PF_SYNC) then it *must*
write the page, even if that would involve blocking on in-progress I/O.
If writepage is called for memory cleansing (!(current->flags & PF_SYNC))
then its role is to get as much writeout underway as possible. So writepage
should try to avoid blocking against currently-in-progress I/O.
If the filesystem is not called for "sync" and it determines that it
would need to block against in-progress I/O to be able to start new I/O
against the page the filesystem shoud redirty the page (usually with
__set_page_dirty_nobuffers()), then unlock the page and return zero.
This may also be done to avoid internal deadlocks, but rarely.
If the filesytem is called for sync then it must wait on any
in-progress I/O and then start new I/O.
The filesystem should unlock the page synchronously, before returning
to the caller. If the page has write I/O underway against it,
writepage() should run SetPageWriteback() against the page prior to
unlocking it. The write I/O completion handler should run
end_page_writeback() against the page.
That is: after 2.5.12, pages which are under writeout are *not* locked.
->sync_page() locking rules are not well-defined - usually it is called
with lock on page, but that is not guaranteed. Considering the currently
......
......@@ -132,7 +132,6 @@ static int ramdisk_commit_write(struct file *file, struct page *page, unsigned o
static struct address_space_operations ramdisk_aops = {
.readpage = ramdisk_readpage,
.writepage = fail_writepage,
.prepare_write = ramdisk_prepare_write,
.commit_write = ramdisk_commit_write,
};
......
......@@ -1622,7 +1622,7 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
* state inside lock_buffer().
*
* If block_write_full_page() is called for regular writeback
* (called_for_sync() is false) then it will return -EAGAIN for a locked
* (called_for_sync() is false) then it will redirty a page which has a locked
* buffer. This only can happen if someone has written the buffer directly,
* with submit_bh(). At the address_space level PageWriteback prevents this
* contention from occurring.
......@@ -1631,7 +1631,6 @@ static int __block_write_full_page(struct inode *inode,
struct page *page, get_block_t *get_block)
{
int err;
int ret = 0;
unsigned long block;
unsigned long last_block;
struct buffer_head *bh, *head;
......@@ -1705,7 +1704,7 @@ static int __block_write_full_page(struct inode *inode,
lock_buffer(bh);
} else {
if (test_set_buffer_locked(bh)) {
ret = -EAGAIN;
__set_page_dirty_nobuffers(page);
continue;
}
}
......@@ -1757,8 +1756,6 @@ static int __block_write_full_page(struct inode *inode,
SetPageUptodate(page);
end_page_writeback(page);
}
if (err == 0)
return ret;
return err;
recover:
......
......@@ -1371,11 +1371,10 @@ static int ext3_writepage(struct page *page)
/*
* We have to fail this writepage to avoid cross-fs transactions.
* Return EAGAIN so the caller will the page back on
* mapping->dirty_pages. The page's buffers' dirty state will be left
* as-is.
* Put the page back on mapping->dirty_pages. The page's buffers'
* dirty state will be left as-is.
*/
ret = -EAGAIN;
__set_page_dirty_nobuffers(page);
unlock_page(page);
return ret;
}
......
......@@ -448,7 +448,6 @@ static int hugetlbfs_symlink(struct inode * dir, struct dentry *dentry, const ch
static struct address_space_operations hugetlbfs_aops = {
.readpage = hugetlbfs_readpage,
.writepage = fail_writepage,
.prepare_write = hugetlbfs_prepare_write,
.commit_write = hugetlbfs_commit_write
};
......
......@@ -610,10 +610,6 @@ mpage_writepages(struct address_space *mapping,
test_clear_page_dirty(page)) {
if (writepage) {
ret = (*writepage)(page);
if (ret == -EAGAIN) {
__set_page_dirty_nobuffers(page);
ret = 0;
}
} else {
bio = mpage_writepage(bio, page, get_block,
&last_block_in_bio, &ret);
......
......@@ -135,7 +135,6 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char *
static struct address_space_operations ramfs_aops = {
.readpage = simple_readpage,
.writepage = fail_writepage,
.prepare_write = simple_prepare_write,
.commit_write = simple_commit_write
};
......
......@@ -382,7 +382,6 @@ static struct file_operations sysfs_file_operations = {
static struct address_space_operations sysfs_aops = {
.readpage = simple_readpage,
.writepage = fail_writepage,
.prepare_write = simple_prepare_write,
.commit_write = simple_commit_write
};
......
......@@ -206,8 +206,7 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err)
else
UDF_I_ALLOCTYPE(inode) = ICBTAG_FLAG_AD_LONG;
if (inode->i_data.a_ops->writepage(page) == -EAGAIN)
__set_page_dirty_nobuffers(page);
inode->i_data.a_ops->writepage(page);
page_cache_release(page);
mark_inode_dirty(inode);
......
......@@ -355,7 +355,6 @@ extern struct page *mem_map;
extern void show_free_areas(void);
extern int fail_writepage(struct page *);
struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused);
struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags);
extern void shmem_lock(struct file * file, int lock);
......
/*
* include/linux/writeback.h.
*
* These declarations are private to fs/ and mm/.
* Declarations which are exported to filesystems do not
* get placed here.
*/
#ifndef WRITEBACK_H
#define WRITEBACK_H
......@@ -47,6 +43,15 @@ struct writeback_control {
int encountered_congestion; /* An output: a queue is full */
};
/*
* ->writepage() return values (make these much larger than a pagesize, in
* case some fs is returning number-of-bytes-written from writepage)
*/
#define WRITEPAGE_ACTIVATE 0x80000 /* IO was not started: activate page */
/*
* fs/fs-writeback.c
*/
void writeback_inodes(struct writeback_control *wbc);
void wake_up_inode(struct inode *inode);
void __wait_on_inode(struct inode * inode);
......
......@@ -108,37 +108,6 @@ static inline int sync_page(struct page *page)
return 0;
}
/*
* In-memory filesystems have to fail their
* writepage function - and this has to be
* worked around in the VM layer..
*
* We
* - mark the page dirty again (but do NOT
* add it back to the inode dirty list, as
* that would livelock in fdatasync)
* - activate the page so that the page stealer
* doesn't try to write it out over and over
* again.
*
* NOTE! The livelock in fdatasync went away, due to io_pages.
* So this function can now call set_page_dirty().
*/
int fail_writepage(struct page *page)
{
/* Only activate on memory-pressure, not fsync.. */
if (current->flags & PF_MEMALLOC) {
if (!PageActive(page))
activate_page(page);
if (!PageReferenced(page))
SetPageReferenced(page);
}
unlock_page(page);
return -EAGAIN; /* It will be set dirty again */
}
EXPORT_SYMBOL(fail_writepage);
/**
* filemap_fdatawrite - start writeback against all of a mapping's dirty pages
* @mapping: address space structure to write
......@@ -160,6 +129,9 @@ int filemap_fdatawrite(struct address_space *mapping)
.nr_to_write = mapping->nrpages * 2,
};
if (mapping->backing_dev_info->memory_backed)
return 0;
current->flags |= PF_SYNC;
ret = do_writepages(mapping, &wbc);
current->flags &= ~PF_SYNC;
......@@ -1327,10 +1299,6 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
struct inode *inode = mapping->host;
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
if (!mapping->a_ops->writepage)
return -EINVAL;
}
if (!mapping->a_ops->readpage)
return -ENOEXEC;
UPDATE_ATIME(inode);
......
......@@ -424,10 +424,6 @@ int write_one_page(struct page *page, int wait)
page_cache_get(page);
write_unlock(&mapping->page_lock);
ret = mapping->a_ops->writepage(page);
if (ret == -EAGAIN) {
__set_page_dirty_nobuffers(page);
ret = 0;
}
if (ret == 0 && wait) {
wait_on_page_writeback(page);
if (PageError(page))
......
......@@ -31,6 +31,7 @@
#include <linux/backing-dev.h>
#include <linux/shmem_fs.h>
#include <linux/mount.h>
#include <linux/writeback.h>
#include <asm/uaccess.h>
......@@ -686,10 +687,10 @@ static int shmem_writepage(struct page *page)
inode = mapping->host;
info = SHMEM_I(inode);
if (info->flags & VM_LOCKED)
return fail_writepage(page);
goto redirty;
swap = get_swap_page();
if (!swap.val)
return fail_writepage(page);
goto redirty;
spin_lock(&info->lock);
shmem_recalc_inode(inode);
......@@ -709,12 +710,9 @@ static int shmem_writepage(struct page *page)
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
swap_free(swap);
return fail_writepage(page);
}
static int shmem_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
return 0;
redirty:
set_page_dirty(page);
return WRITEPAGE_ACTIVATE; /* Return with the page locked */
}
/*
......@@ -1802,7 +1800,6 @@ static void destroy_inodecache(void)
static struct address_space_operations shmem_aops = {
.writepage = shmem_writepage,
.writepages = shmem_writepages,
.set_page_dirty = __set_page_dirty_nobuffers,
#ifdef CONFIG_TMPFS
.readpage = shmem_readpage,
......
......@@ -309,7 +309,7 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask,
goto keep_locked;
if (!mapping)
goto keep_locked;
if (mapping->a_ops->writepage == fail_writepage)
if (mapping->a_ops->writepage == NULL)
goto activate_locked;
if (!may_enter_fs)
goto keep_locked;
......@@ -327,14 +327,12 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask,
SetPageReclaim(page);
res = mapping->a_ops->writepage(page);
if (res == -EAGAIN) {
if (res == WRITEPAGE_ACTIVATE) {
ClearPageReclaim(page);
__set_page_dirty_nobuffers(page);
} else if (!PageWriteback(page)) {
/*
* synchronous writeout or broken
* a_ops?
*/
goto activate_locked;
}
if (!PageWriteback(page)) {
/* synchronous write or broken a_ops? */
ClearPageReclaim(page);
}
goto keep;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment