Commit af9cc401 authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov

ceph: invalidate pages that beyond EOF in ceph_writepages_start()

Dirty pages can be associated with different capsnap. Different capsnap
may have different EOF value. So invalidating dirty pages according to
the largest EOF value is wrong. Dirty pages beyond EOF, but associated
with other capsnap, do not get invalidated.
Signed-off-by: default avatar"Yan, Zheng" <zyan@redhat.com>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent bc4b5ad3
...@@ -801,7 +801,7 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -801,7 +801,7 @@ static int ceph_writepages_start(struct address_space *mapping,
struct ceph_osd_request *req = NULL; struct ceph_osd_request *req = NULL;
struct ceph_writeback_ctl ceph_wbc; struct ceph_writeback_ctl ceph_wbc;
bool should_loop, range_whole = false; bool should_loop, range_whole = false;
bool stop, done = false; bool done = false;
dout("writepages_start %p (mode=%s)\n", inode, dout("writepages_start %p (mode=%s)\n", inode,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" : wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
...@@ -865,8 +865,7 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -865,8 +865,7 @@ static int ceph_writepages_start(struct address_space *mapping,
ceph_put_snap_context(last_snapc); ceph_put_snap_context(last_snapc);
last_snapc = snapc; last_snapc = snapc;
stop = false; while (!done && index <= end) {
while (!stop && index <= end) {
int num_ops = 0, op_idx; int num_ops = 0, op_idx;
unsigned i, pvec_pages, max_pages, locked_pages = 0; unsigned i, pvec_pages, max_pages, locked_pages = 0;
struct page **pages = NULL, **data_pages; struct page **pages = NULL, **data_pages;
...@@ -899,16 +898,26 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -899,16 +898,26 @@ static int ceph_writepages_start(struct address_space *mapping,
unlock_page(page); unlock_page(page);
continue; continue;
} }
if (strip_unit_end && (page->index > strip_unit_end)) { /* only if matching snap context */
dout("end of strip unit %p\n", page); pgsnapc = page_snap_context(page);
if (pgsnapc != snapc) {
dout("page snapc %p %lld != oldest %p %lld\n",
pgsnapc, pgsnapc->seq, snapc, snapc->seq);
unlock_page(page); unlock_page(page);
break; continue;
} }
if (page_offset(page) >= ceph_wbc.i_size) { if (page_offset(page) >= ceph_wbc.i_size) {
dout("%p page eof %llu\n", dout("%p page eof %llu\n",
page, ceph_wbc.i_size); page, ceph_wbc.i_size);
/* not done if range_cyclic */ if (ceph_wbc.size_stable ||
stop = true; page_offset(page) >= i_size_read(inode))
mapping->a_ops->invalidatepage(page,
0, PAGE_SIZE);
unlock_page(page);
continue;
}
if (strip_unit_end && (page->index > strip_unit_end)) {
dout("end of strip unit %p\n", page);
unlock_page(page); unlock_page(page);
break; break;
} }
...@@ -922,15 +931,6 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -922,15 +931,6 @@ static int ceph_writepages_start(struct address_space *mapping,
wait_on_page_writeback(page); wait_on_page_writeback(page);
} }
/* only if matching snap context */
pgsnapc = page_snap_context(page);
if (pgsnapc != snapc) {
dout("page snapc %p %lld != oldest %p %lld\n",
pgsnapc, pgsnapc->seq, snapc, snapc->seq);
unlock_page(page);
continue;
}
if (!clear_page_dirty_for_io(page)) { if (!clear_page_dirty_for_io(page)) {
dout("%p !clear_page_dirty_for_io\n", page); dout("%p !clear_page_dirty_for_io\n", page);
unlock_page(page); unlock_page(page);
...@@ -1143,7 +1143,7 @@ static int ceph_writepages_start(struct address_space *mapping, ...@@ -1143,7 +1143,7 @@ static int ceph_writepages_start(struct address_space *mapping,
* we tagged for writeback prior to entering this loop. * we tagged for writeback prior to entering this loop.
*/ */
if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
done = stop = true; done = true;
release_pvec_pages: release_pvec_pages:
dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr, dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
......
...@@ -1867,20 +1867,9 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) ...@@ -1867,20 +1867,9 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
* possibly truncate them.. so write AND block! * possibly truncate them.. so write AND block!
*/ */
if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
struct ceph_cap_snap *capsnap;
to = ci->i_truncate_size;
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
// MDS should have revoked Frw caps
WARN_ON_ONCE(capsnap->writing);
if (capsnap->dirty_pages && capsnap->size > to)
to = capsnap->size;
}
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
dout("__do_pending_vmtruncate %p flushing snaps first\n", dout("__do_pending_vmtruncate %p flushing snaps first\n",
inode); inode);
truncate_pagecache(inode, to);
filemap_write_and_wait_range(&inode->i_data, 0, filemap_write_and_wait_range(&inode->i_data, 0,
inode->i_sb->s_maxbytes); inode->i_sb->s_maxbytes);
goto retry; goto retry;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment