Commit 994fc28c authored by Zach Brown's avatar Zach Brown Committed by Joel Becker

[PATCH] add AOP_TRUNCATED_PAGE, prepend AOP_ to WRITEPAGE_ACTIVATE

readpage(), prepare_write(), and commit_write() callers are updated to
understand the special return code AOP_TRUNCATED_PAGE in the style of
writepage() and WRITEPAGE_ACTIVATE.  AOP_TRUNCATED_PAGE tells the caller that
the callee has unlocked the page and that the operation should be tried again
with a new page.  OCFS2 uses this to detect and work around a lock inversion in
its aop methods.  There should be no change in behaviour for methods that don't
return AOP_TRUNCATED_PAGE.

WRITEPAGE_ACTIVATE is also prepended with AOP_ for consistency and they are
made enums so that kerneldoc can be used to document their semantics.
Signed-off-by: default avatarZach Brown <zach.brown@oracle.com>
parent 7063fbf2
...@@ -213,7 +213,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, ...@@ -213,7 +213,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
struct address_space_operations *aops = mapping->a_ops; struct address_space_operations *aops = mapping->a_ops;
pgoff_t index; pgoff_t index;
unsigned offset, bv_offs; unsigned offset, bv_offs;
int len, ret = 0; int len, ret;
down(&mapping->host->i_sem); down(&mapping->host->i_sem);
index = pos >> PAGE_CACHE_SHIFT; index = pos >> PAGE_CACHE_SHIFT;
...@@ -232,9 +232,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, ...@@ -232,9 +232,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
page = grab_cache_page(mapping, index); page = grab_cache_page(mapping, index);
if (unlikely(!page)) if (unlikely(!page))
goto fail; goto fail;
if (unlikely(aops->prepare_write(file, page, offset, ret = aops->prepare_write(file, page, offset,
offset + size))) offset + size);
if (unlikely(ret)) {
if (ret == AOP_TRUNCATED_PAGE) {
page_cache_release(page);
continue;
}
goto unlock; goto unlock;
}
transfer_result = lo_do_transfer(lo, WRITE, page, offset, transfer_result = lo_do_transfer(lo, WRITE, page, offset,
bvec->bv_page, bv_offs, size, IV); bvec->bv_page, bv_offs, size, IV);
if (unlikely(transfer_result)) { if (unlikely(transfer_result)) {
...@@ -251,9 +257,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, ...@@ -251,9 +257,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
kunmap_atomic(kaddr, KM_USER0); kunmap_atomic(kaddr, KM_USER0);
} }
flush_dcache_page(page); flush_dcache_page(page);
if (unlikely(aops->commit_write(file, page, offset, ret = aops->commit_write(file, page, offset,
offset + size))) offset + size);
if (unlikely(ret)) {
if (ret == AOP_TRUNCATED_PAGE) {
page_cache_release(page);
continue;
}
goto unlock; goto unlock;
}
if (unlikely(transfer_result)) if (unlikely(transfer_result))
goto unlock; goto unlock;
bv_offs += size; bv_offs += size;
...@@ -264,6 +276,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, ...@@ -264,6 +276,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
} }
ret = 0;
out: out:
up(&mapping->host->i_sem); up(&mapping->host->i_sem);
return ret; return ret;
......
...@@ -154,7 +154,7 @@ static int ramdisk_commit_write(struct file *file, struct page *page, ...@@ -154,7 +154,7 @@ static int ramdisk_commit_write(struct file *file, struct page *page,
/* /*
* ->writepage to the the blockdev's mapping has to redirty the page so that the * ->writepage to the the blockdev's mapping has to redirty the page so that the
* VM doesn't go and steal it. We return WRITEPAGE_ACTIVATE so that the VM * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM
* won't try to (pointlessly) write the page again for a while. * won't try to (pointlessly) write the page again for a while.
* *
* Really, these pages should not be on the LRU at all. * Really, these pages should not be on the LRU at all.
...@@ -165,7 +165,7 @@ static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) ...@@ -165,7 +165,7 @@ static int ramdisk_writepage(struct page *page, struct writeback_control *wbc)
make_page_uptodate(page); make_page_uptodate(page);
SetPageDirty(page); SetPageDirty(page);
if (wbc->for_reclaim) if (wbc->for_reclaim)
return WRITEPAGE_ACTIVATE; return AOP_WRITEPAGE_ACTIVATE;
unlock_page(page); unlock_page(page);
return 0; return 0;
} }
......
...@@ -721,7 +721,7 @@ mpage_writepages(struct address_space *mapping, ...@@ -721,7 +721,7 @@ mpage_writepages(struct address_space *mapping,
&last_block_in_bio, &ret, wbc, &last_block_in_bio, &ret, wbc,
page->mapping->a_ops->writepage); page->mapping->a_ops->writepage);
} }
if (unlikely(ret == WRITEPAGE_ACTIVATE)) if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
unlock_page(page); unlock_page(page);
if (ret || (--(wbc->nr_to_write) <= 0)) if (ret || (--(wbc->nr_to_write) <= 0))
done = 1; done = 1;
......
...@@ -302,6 +302,37 @@ struct iattr { ...@@ -302,6 +302,37 @@ struct iattr {
*/ */
#include <linux/quota.h> #include <linux/quota.h>
/**
* enum positive_aop_returns - aop return codes with specific semantics
*
* @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
* completed, that the page is still locked, and
* should be considered active. The VM uses this hint
* to return the page to the active list -- it won't
* be a candidate for writeback again in the near
* future. Other callers must be careful to unlock
* the page if they get this return. Returned by
* writepage();
*
* @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
* unlocked it and the page might have been truncated.
* The caller should back up to acquiring a new page and
* trying again. The aop will be taking reasonable
* precautions not to livelock. If the caller held a page
* reference, it should drop it before retrying. Returned
* by readpage(), prepare_write(), and commit_write().
*
* address_space_operation functions return these large constants to indicate
* special semantics to the caller. These are much larger than the bytes in a
* page to allow for functions that return the number of bytes operated on in a
* given page.
*/
enum positive_aop_returns {
AOP_WRITEPAGE_ACTIVATE = 0x80000,
AOP_TRUNCATED_PAGE = 0x80001,
};
/* /*
* oh the beauties of C type declarations. * oh the beauties of C type declarations.
*/ */
......
...@@ -59,12 +59,6 @@ struct writeback_control { ...@@ -59,12 +59,6 @@ struct writeback_control {
unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned for_reclaim:1; /* Invoked from the page allocator */
}; };
/*
* ->writepage() return values (make these much larger than a pagesize, in
* case some fs is returning number-of-bytes-written from writepage)
*/
#define WRITEPAGE_ACTIVATE 0x80000 /* IO was not started: activate page */
/* /*
* fs/fs-writeback.c * fs/fs-writeback.c
*/ */
......
...@@ -831,8 +831,13 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -831,8 +831,13 @@ void do_generic_mapping_read(struct address_space *mapping,
/* Start the actual read. The read will unlock the page. */ /* Start the actual read. The read will unlock the page. */
error = mapping->a_ops->readpage(filp, page); error = mapping->a_ops->readpage(filp, page);
if (unlikely(error)) if (unlikely(error)) {
if (error == AOP_TRUNCATED_PAGE) {
page_cache_release(page);
goto find_page;
}
goto readpage_error; goto readpage_error;
}
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
lock_page(page); lock_page(page);
...@@ -1152,26 +1157,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) ...@@ -1152,26 +1157,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
{ {
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
struct page *page; struct page *page;
int error; int ret;
page = page_cache_alloc_cold(mapping); do {
if (!page) page = page_cache_alloc_cold(mapping);
return -ENOMEM; if (!page)
return -ENOMEM;
ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
if (ret == 0)
ret = mapping->a_ops->readpage(file, page);
else if (ret == -EEXIST)
ret = 0; /* losing race to add is OK */
error = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
if (!error) {
error = mapping->a_ops->readpage(file, page);
page_cache_release(page); page_cache_release(page);
return error;
}
/* } while (ret == AOP_TRUNCATED_PAGE);
* We arrive here in the unlikely event that someone
* raced with us and added our page to the cache first return ret;
* or we are out of memory for radix-tree nodes.
*/
page_cache_release(page);
return error == -EEXIST ? 0 : error;
} }
#define MMAP_LOTSAMISS (100) #define MMAP_LOTSAMISS (100)
...@@ -1331,10 +1334,14 @@ struct page *filemap_nopage(struct vm_area_struct *area, ...@@ -1331,10 +1334,14 @@ struct page *filemap_nopage(struct vm_area_struct *area,
goto success; goto success;
} }
if (!mapping->a_ops->readpage(file, page)) { error = mapping->a_ops->readpage(file, page);
if (!error) {
wait_on_page_locked(page); wait_on_page_locked(page);
if (PageUptodate(page)) if (PageUptodate(page))
goto success; goto success;
} else if (error == AOP_TRUNCATED_PAGE) {
page_cache_release(page);
goto retry_find;
} }
/* /*
...@@ -1358,10 +1365,14 @@ struct page *filemap_nopage(struct vm_area_struct *area, ...@@ -1358,10 +1365,14 @@ struct page *filemap_nopage(struct vm_area_struct *area,
goto success; goto success;
} }
ClearPageError(page); ClearPageError(page);
if (!mapping->a_ops->readpage(file, page)) { error = mapping->a_ops->readpage(file, page);
if (!error) {
wait_on_page_locked(page); wait_on_page_locked(page);
if (PageUptodate(page)) if (PageUptodate(page))
goto success; goto success;
} else if (error == AOP_TRUNCATED_PAGE) {
page_cache_release(page);
goto retry_find;
} }
/* /*
...@@ -1444,10 +1455,14 @@ static struct page * filemap_getpage(struct file *file, unsigned long pgoff, ...@@ -1444,10 +1455,14 @@ static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
goto success; goto success;
} }
if (!mapping->a_ops->readpage(file, page)) { error = mapping->a_ops->readpage(file, page);
if (!error) {
wait_on_page_locked(page); wait_on_page_locked(page);
if (PageUptodate(page)) if (PageUptodate(page))
goto success; goto success;
} else if (error == AOP_TRUNCATED_PAGE) {
page_cache_release(page);
goto retry_find;
} }
/* /*
...@@ -1470,10 +1485,14 @@ static struct page * filemap_getpage(struct file *file, unsigned long pgoff, ...@@ -1470,10 +1485,14 @@ static struct page * filemap_getpage(struct file *file, unsigned long pgoff,
} }
ClearPageError(page); ClearPageError(page);
if (!mapping->a_ops->readpage(file, page)) { error = mapping->a_ops->readpage(file, page);
if (!error) {
wait_on_page_locked(page); wait_on_page_locked(page);
if (PageUptodate(page)) if (PageUptodate(page))
goto success; goto success;
} else if (error == AOP_TRUNCATED_PAGE) {
page_cache_release(page);
goto retry_find;
} }
/* /*
...@@ -1934,12 +1953,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -1934,12 +1953,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
status = a_ops->prepare_write(file, page, offset, offset+bytes); status = a_ops->prepare_write(file, page, offset, offset+bytes);
if (unlikely(status)) { if (unlikely(status)) {
loff_t isize = i_size_read(inode); loff_t isize = i_size_read(inode);
if (status != AOP_TRUNCATED_PAGE)
unlock_page(page);
page_cache_release(page);
if (status == AOP_TRUNCATED_PAGE)
continue;
/* /*
* prepare_write() may have instantiated a few blocks * prepare_write() may have instantiated a few blocks
* outside i_size. Trim these off again. * outside i_size. Trim these off again.
*/ */
unlock_page(page);
page_cache_release(page);
if (pos + bytes > isize) if (pos + bytes > isize)
vmtruncate(inode, isize); vmtruncate(inode, isize);
break; break;
...@@ -1952,6 +1975,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -1952,6 +1975,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
cur_iov, iov_base, bytes); cur_iov, iov_base, bytes);
flush_dcache_page(page); flush_dcache_page(page);
status = a_ops->commit_write(file, page, offset, offset+bytes); status = a_ops->commit_write(file, page, offset, offset+bytes);
if (status == AOP_TRUNCATED_PAGE) {
page_cache_release(page);
continue;
}
if (likely(copied > 0)) { if (likely(copied > 0)) {
if (!status) if (!status)
status = copied; status = copied;
......
...@@ -158,7 +158,7 @@ static int read_pages(struct address_space *mapping, struct file *filp, ...@@ -158,7 +158,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
{ {
unsigned page_idx; unsigned page_idx;
struct pagevec lru_pvec; struct pagevec lru_pvec;
int ret = 0; int ret;
if (mapping->a_ops->readpages) { if (mapping->a_ops->readpages) {
ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
...@@ -171,14 +171,17 @@ static int read_pages(struct address_space *mapping, struct file *filp, ...@@ -171,14 +171,17 @@ static int read_pages(struct address_space *mapping, struct file *filp,
list_del(&page->lru); list_del(&page->lru);
if (!add_to_page_cache(page, mapping, if (!add_to_page_cache(page, mapping,
page->index, GFP_KERNEL)) { page->index, GFP_KERNEL)) {
mapping->a_ops->readpage(filp, page); ret = mapping->a_ops->readpage(filp, page);
if (!pagevec_add(&lru_pvec, page)) if (ret != AOP_TRUNCATED_PAGE) {
__pagevec_lru_add(&lru_pvec); if (!pagevec_add(&lru_pvec, page))
} else { __pagevec_lru_add(&lru_pvec);
page_cache_release(page); continue;
} /* else fall through to release */
} }
page_cache_release(page);
} }
pagevec_lru_add(&lru_pvec); pagevec_lru_add(&lru_pvec);
ret = 0;
out: out:
return ret; return ret;
} }
......
...@@ -855,7 +855,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) ...@@ -855,7 +855,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
swap_free(swap); swap_free(swap);
redirty: redirty:
set_page_dirty(page); set_page_dirty(page);
return WRITEPAGE_ACTIVATE; /* Return with the page locked */ return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */
} }
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
......
...@@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) ...@@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
res = mapping->a_ops->writepage(page, &wbc); res = mapping->a_ops->writepage(page, &wbc);
if (res < 0) if (res < 0)
handle_write_error(mapping, page, res); handle_write_error(mapping, page, res);
if (res == WRITEPAGE_ACTIVATE) { if (res == AOP_WRITEPAGE_ACTIVATE) {
ClearPageReclaim(page); ClearPageReclaim(page);
return PAGE_ACTIVATE; return PAGE_ACTIVATE;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment