Commit 5cffff9e authored by Wengang Wang's avatar Wengang Wang Committed by Sunil Mushran

ocfs2: Fix ocfs2_page_mkwrite()

This patch address two shortcomings in ocfs2_page_mkwrite():
1. Makes the function return better VM_FAULT_* errors.
2. It handles a error that is triggered when a page is dropped from the mapping
due to memory pressure. This patch locks the page to prevent that.

[Patch was cleaned up by Sunil Mushran.]
Signed-off-by: default avatarWengang Wang <wen.gang.wang@oracle.com>
Signed-off-by: default avatarSunil Mushran <sunil.mushran@oracle.com>
parent a035bff6
...@@ -862,6 +862,12 @@ struct ocfs2_write_ctxt { ...@@ -862,6 +862,12 @@ struct ocfs2_write_ctxt {
struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
struct page *w_target_page; struct page *w_target_page;
/*
* w_target_locked is used for page_mkwrite path indicating no unlocking
* against w_target_page in ocfs2_write_end_nolock.
*/
unsigned int w_target_locked:1;
/* /*
* ocfs2_write_end() uses this to know what the real range to * ocfs2_write_end() uses this to know what the real range to
* write in the target should be. * write in the target should be.
...@@ -895,6 +901,24 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) ...@@ -895,6 +901,24 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
{ {
int i;
/*
* w_target_locked is only set to true in the page_mkwrite() case.
* The intent is to allow us to lock the target page from write_begin()
* to write_end(). The caller must hold a ref on w_target_page.
*/
if (wc->w_target_locked) {
BUG_ON(!wc->w_target_page);
for (i = 0; i < wc->w_num_pages; i++) {
if (wc->w_target_page == wc->w_pages[i]) {
wc->w_pages[i] = NULL;
break;
}
}
mark_page_accessed(wc->w_target_page);
page_cache_release(wc->w_target_page);
}
ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
brelse(wc->w_di_bh); brelse(wc->w_di_bh);
...@@ -1132,20 +1156,17 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, ...@@ -1132,20 +1156,17 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
*/ */
lock_page(mmap_page); lock_page(mmap_page);
/* Exit and let the caller retry */
if (mmap_page->mapping != mapping) { if (mmap_page->mapping != mapping) {
WARN_ON(mmap_page->mapping);
unlock_page(mmap_page); unlock_page(mmap_page);
/* ret = -EAGAIN;
* Sanity check - the locking in
* ocfs2_pagemkwrite() should ensure
* that this code doesn't trigger.
*/
ret = -EINVAL;
mlog_errno(ret);
goto out; goto out;
} }
page_cache_get(mmap_page); page_cache_get(mmap_page);
wc->w_pages[i] = mmap_page; wc->w_pages[i] = mmap_page;
wc->w_target_locked = true;
} else { } else {
wc->w_pages[i] = find_or_create_page(mapping, index, wc->w_pages[i] = find_or_create_page(mapping, index,
GFP_NOFS); GFP_NOFS);
...@@ -1160,6 +1181,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, ...@@ -1160,6 +1181,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
wc->w_target_page = wc->w_pages[i]; wc->w_target_page = wc->w_pages[i];
} }
out: out:
if (ret)
wc->w_target_locked = false;
return ret; return ret;
} }
...@@ -1817,11 +1840,23 @@ int ocfs2_write_begin_nolock(struct file *filp, ...@@ -1817,11 +1840,23 @@ int ocfs2_write_begin_nolock(struct file *filp,
*/ */
ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
cluster_of_pages, mmap_page); cluster_of_pages, mmap_page);
if (ret) { if (ret && ret != -EAGAIN) {
mlog_errno(ret); mlog_errno(ret);
goto out_quota; goto out_quota;
} }
/*
* ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
* the target page. In this case, we exit with no error and no target
* page. This will trigger the caller, page_mkwrite(), to re-try
* the operation.
*/
if (ret == -EAGAIN) {
BUG_ON(wc->w_target_page);
ret = 0;
goto out_quota;
}
ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
len); len);
if (ret) { if (ret) {
......
...@@ -61,7 +61,7 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) ...@@ -61,7 +61,7 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
struct page *page) struct page *page)
{ {
int ret; int ret = VM_FAULT_NOPAGE;
struct inode *inode = file->f_path.dentry->d_inode; struct inode *inode = file->f_path.dentry->d_inode;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
loff_t pos = page_offset(page); loff_t pos = page_offset(page);
...@@ -71,32 +71,25 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, ...@@ -71,32 +71,25 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
void *fsdata; void *fsdata;
loff_t size = i_size_read(inode); loff_t size = i_size_read(inode);
/*
* Another node might have truncated while we were waiting on
* cluster locks.
* We don't check size == 0 before the shift. This is borrowed
* from do_generic_file_read.
*/
last_index = (size - 1) >> PAGE_CACHE_SHIFT; last_index = (size - 1) >> PAGE_CACHE_SHIFT;
if (unlikely(!size || page->index > last_index)) {
ret = -EINVAL;
goto out;
}
/* /*
* The i_size check above doesn't catch the case where nodes * There are cases that lead to the page no longer bebongs to the
* truncated and then re-extended the file. We'll re-check the * mapping.
* page mapping after taking the page lock inside of * 1) pagecache truncates locally due to memory pressure.
* ocfs2_write_begin_nolock(). * 2) pagecache truncates when another is taking EX lock against
* inode lock. see ocfs2_data_convert_worker.
*
* The i_size check doesn't catch the case where nodes truncated and
* then re-extended the file. We'll re-check the page mapping after
* taking the page lock inside of ocfs2_write_begin_nolock().
*
* Let VM retry with these cases.
*/ */
if (!PageUptodate(page) || page->mapping != inode->i_mapping) { if ((page->mapping != inode->i_mapping) ||
/* (!PageUptodate(page)) ||
* the page has been umapped in ocfs2_data_downconvert_worker. (page_offset(page) >= size))
* So return 0 here and let VFS retry.
*/
ret = 0;
goto out; goto out;
}
/* /*
* Call ocfs2_write_begin() and ocfs2_write_end() to take * Call ocfs2_write_begin() and ocfs2_write_end() to take
...@@ -116,17 +109,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, ...@@ -116,17 +109,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
if (ret) { if (ret) {
if (ret != -ENOSPC) if (ret != -ENOSPC)
mlog_errno(ret); mlog_errno(ret);
if (ret == -ENOMEM)
ret = VM_FAULT_OOM;
else
ret = VM_FAULT_SIGBUS;
goto out; goto out;
} }
ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, if (!locked_page) {
fsdata); ret = VM_FAULT_NOPAGE;
if (ret < 0) {
mlog_errno(ret);
goto out; goto out;
} }
ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
fsdata);
BUG_ON(ret != len); BUG_ON(ret != len);
ret = 0; ret = VM_FAULT_LOCKED;
out: out:
return ret; return ret;
} }
...@@ -168,8 +165,6 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -168,8 +165,6 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
out: out:
ocfs2_unblock_signals(&oldset); ocfs2_unblock_signals(&oldset);
if (ret)
ret = VM_FAULT_SIGBUS;
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment