Commit c03cea42 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Darrick J. Wong

iomap: add initial support for writes without buffer heads

For now just limited to blocksize == PAGE_SIZE, where we can simply read
in the full page in write begin, and just set the whole page dirty after
copying data into it.  This code is enabled by default and XFS will now
be feed pages without buffer heads in ->writepage and ->writepages.

If a file system sets the IOMAP_F_BUFFER_HEAD flag on the iomap the old
path will still be used, this both helps the transition in XFS and
prepares for the gfs2 migration to the iomap infrastructure.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 72b4daa2
...@@ -348,6 +348,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) ...@@ -348,6 +348,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
truncate_pagecache_range(inode, max(pos, i_size), pos + len); truncate_pagecache_range(inode, max(pos, i_size), pos + len);
} }
static int
iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
unsigned poff, unsigned plen, unsigned from, unsigned to,
struct iomap *iomap)
{
struct bio_vec bvec;
struct bio bio;
if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
zero_user_segments(page, poff, from, to, poff + plen);
return 0;
}
bio_init(&bio, &bvec, 1);
bio.bi_opf = REQ_OP_READ;
bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
bio_set_dev(&bio, iomap->bdev);
__bio_add_page(&bio, page, plen, poff);
return submit_bio_wait(&bio);
}
static int
__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
struct page *page, struct iomap *iomap)
{
loff_t block_size = i_blocksize(inode);
loff_t block_start = pos & ~(block_size - 1);
loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
unsigned poff = block_start & (PAGE_SIZE - 1);
unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
if (PageUptodate(page))
return 0;
if (from <= poff && to >= poff + plen)
return 0;
return iomap_read_page_sync(inode, block_start, page,
poff, plen, from, to, iomap);
}
static int static int
iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, struct iomap *iomap) struct page **pagep, struct iomap *iomap)
...@@ -367,9 +409,10 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, ...@@ -367,9 +409,10 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
if (iomap->type == IOMAP_INLINE) if (iomap->type == IOMAP_INLINE)
iomap_read_inline_data(inode, page, iomap); iomap_read_inline_data(inode, page, iomap);
else else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
status = __block_write_begin_int(page, pos, len, NULL, iomap); status = __block_write_begin_int(page, pos, len, NULL, iomap);
else
status = __iomap_write_begin(inode, pos, len, page, iomap);
if (unlikely(status)) { if (unlikely(status)) {
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
...@@ -382,6 +425,57 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, ...@@ -382,6 +425,57 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
return status; return status;
} }
int
iomap_set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page);
int newly_dirty;
if (unlikely(!mapping))
return !TestSetPageDirty(page);
/*
* Lock out page->mem_cgroup migration to keep PageDirty
* synchronized with per-memcg dirty page counters.
*/
lock_page_memcg(page);
newly_dirty = !TestSetPageDirty(page);
if (newly_dirty)
__set_page_dirty(page, mapping, 0);
unlock_page_memcg(page);
if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
return newly_dirty;
}
EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
static int
__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page, struct iomap *iomap)
{
flush_dcache_page(page);
/*
* The blocks that were entirely written will now be uptodate, so we
* don't have to worry about a readpage reading them and overwriting a
* partial write. However if we have encountered a short write and only
* partially written into a block, it will not be marked uptodate, so a
* readpage might come in and destroy our partial write.
*
* Do the simplest thing, and just treat any short write to a non
* uptodate page as a zero-length write, and force the caller to redo
* the whole thing.
*/
if (unlikely(copied < len && !PageUptodate(page))) {
copied = 0;
} else {
SetPageUptodate(page);
iomap_set_page_dirty(page);
}
return __generic_write_end(inode, pos, copied, page);
}
static int static int
iomap_write_end_inline(struct inode *inode, struct page *page, iomap_write_end_inline(struct inode *inode, struct page *page,
struct iomap *iomap, loff_t pos, unsigned copied) struct iomap *iomap, loff_t pos, unsigned copied)
...@@ -408,9 +502,11 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, ...@@ -408,9 +502,11 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
if (iomap->type == IOMAP_INLINE) { if (iomap->type == IOMAP_INLINE) {
ret = iomap_write_end_inline(inode, page, iomap, pos, copied); ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
} else { } else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
ret = generic_write_end(NULL, inode->i_mapping, pos, len, ret = generic_write_end(NULL, inode->i_mapping, pos, len,
copied, page, NULL); copied, page, NULL);
} else {
ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
} }
if (iomap->page_done) if (iomap->page_done)
...@@ -703,11 +799,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -703,11 +799,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
struct page *page = data; struct page *page = data;
int ret; int ret;
ret = __block_write_begin_int(page, pos, length, NULL, iomap); if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
if (ret) ret = __block_write_begin_int(page, pos, length, NULL, iomap);
return ret; if (ret)
return ret;
block_commit_write(page, 0, length);
} else {
WARN_ON_ONCE(!PageUptodate(page));
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
}
block_commit_write(page, 0, length);
return length; return length;
} }
......
...@@ -626,7 +626,7 @@ xfs_file_iomap_begin_delay( ...@@ -626,7 +626,7 @@ xfs_file_iomap_begin_delay(
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail. * them out if the write happens to fail.
*/ */
iomap->flags = IOMAP_F_NEW; iomap->flags |= IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got); trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done: done:
if (isnullstartblock(got.br_startblock)) if (isnullstartblock(got.br_startblock))
...@@ -1019,6 +1019,8 @@ xfs_file_iomap_begin( ...@@ -1019,6 +1019,8 @@ xfs_file_iomap_begin(
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
iomap->flags |= IOMAP_F_BUFFER_HEAD;
if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) && if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */ /* Reserve delalloc blocks for regular writeback. */
...@@ -1119,7 +1121,7 @@ xfs_file_iomap_begin( ...@@ -1119,7 +1121,7 @@ xfs_file_iomap_begin(
if (error) if (error)
return error; return error;
iomap->flags = IOMAP_F_NEW; iomap->flags |= IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
out_finish: out_finish:
......
...@@ -30,6 +30,7 @@ struct vm_fault; ...@@ -30,6 +30,7 @@ struct vm_fault;
*/ */
#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */
#define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ #define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */
#define IOMAP_F_BUFFER_HEAD 0x04 /* file system requires buffer heads */
/* /*
* Flags that only need to be reported for IOMAP_REPORT requests: * Flags that only need to be reported for IOMAP_REPORT requests:
...@@ -102,6 +103,7 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, ...@@ -102,6 +103,7 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
int iomap_readpage(struct page *page, const struct iomap_ops *ops); int iomap_readpage(struct page *page, const struct iomap_ops *ops);
int iomap_readpages(struct address_space *mapping, struct list_head *pages, int iomap_readpages(struct address_space *mapping, struct list_head *pages,
unsigned nr_pages, const struct iomap_ops *ops); unsigned nr_pages, const struct iomap_ops *ops);
int iomap_set_page_dirty(struct page *page);
int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops); const struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment