Commit 161fa27f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'iomap-4.19-merge' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull fs iomap refactoring from Darrick Wong:
 "This is the first part of the XFS changes for 4.19.

  Christoph and Andreas coordinated some refactoring work on the iomap
  code in preparation for removing buffer heads from XFS and porting
  gfs2 to iomap. I'm sending this small pull request ahead of the main
  XFS merge to avoid holding up gfs2 unnecessarily"

* 'iomap-4.19-merge' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  iomap: add inline data support to iomap_readpage_actor
  iomap: support direct I/O to inline data
  iomap: refactor iomap_dio_actor
  iomap: add initial support for writes without buffer heads
  iomap: add an iomap-based readpage and readpages implementation
  iomap: add private pointer to struct iomap
  iomap: add a page_done callback
  iomap: generic inline data handling
  iomap: complete partial direct I/O writes synchronously
  iomap: mark newly allocated buffer heads as new
  fs: factor out a __generic_write_end helper
parents a1a4f841 806a1477
...@@ -1900,15 +1900,16 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, ...@@ -1900,15 +1900,16 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
break; break;
case IOMAP_UNWRITTEN: case IOMAP_UNWRITTEN:
/* /*
* For unwritten regions, we always need to ensure that * For unwritten regions, we always need to ensure that regions
* sub-block writes cause the regions in the block we are not * in the block we are not writing to are zeroed. Mark the
* writing to are zeroed. Set the buffer as new to ensure this. * buffer as new to ensure this.
*/ */
set_buffer_new(bh); set_buffer_new(bh);
set_buffer_unwritten(bh); set_buffer_unwritten(bh);
/* FALLTHRU */ /* FALLTHRU */
case IOMAP_MAPPED: case IOMAP_MAPPED:
if (offset >= i_size_read(inode)) if ((iomap->flags & IOMAP_F_NEW) ||
offset >= i_size_read(inode))
set_buffer_new(bh); set_buffer_new(bh);
bh->b_blocknr = (iomap->addr + offset - iomap->offset) >> bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
inode->i_blkbits; inode->i_blkbits;
...@@ -2076,6 +2077,40 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, ...@@ -2076,6 +2077,40 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
} }
EXPORT_SYMBOL(block_write_begin); EXPORT_SYMBOL(block_write_begin);
int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
struct page *page)
{
loff_t old_size = inode->i_size;
bool i_size_changed = false;
/*
* No need to use i_size_read() here, the i_size cannot change under us
* because we hold i_rwsem.
*
* But it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
*/
if (pos + copied > inode->i_size) {
i_size_write(inode, pos + copied);
i_size_changed = true;
}
unlock_page(page);
put_page(page);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
* ordering of page lock and transaction start for journaling
* filesystems.
*/
if (i_size_changed)
mark_inode_dirty(inode);
return copied;
}
int block_write_end(struct file *file, struct address_space *mapping, int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied, loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata) struct page *page, void *fsdata)
...@@ -2116,39 +2151,8 @@ int generic_write_end(struct file *file, struct address_space *mapping, ...@@ -2116,39 +2151,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied, loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata) struct page *page, void *fsdata)
{ {
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
int i_size_changed = 0;
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
return __generic_write_end(mapping->host, pos, copied, page);
/*
* No need to use i_size_read() here, the i_size
* cannot change under us because we hold i_mutex.
*
* But it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size.
*/
if (pos+copied > inode->i_size) {
i_size_write(inode, pos+copied);
i_size_changed = 1;
}
unlock_page(page);
put_page(page);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
* ordering of page lock and transaction start for journaling
* filesystems.
*/
if (i_size_changed)
mark_inode_dirty(inode);
return copied;
} }
EXPORT_SYMBOL(generic_write_end); EXPORT_SYMBOL(generic_write_end);
......
...@@ -43,6 +43,8 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) ...@@ -43,6 +43,8 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
extern void guard_bio_eod(int rw, struct bio *bio); extern void guard_bio_eod(int rw, struct bio *bio);
extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block, struct iomap *iomap); get_block_t *get_block, struct iomap *iomap);
int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
struct page *page);
/* /*
* char_dev.c * char_dev.c
......
/* /*
* Copyright (C) 2010 Red Hat, Inc. * Copyright (C) 2010 Red Hat, Inc.
* Copyright (c) 2016 Christoph Hellwig. * Copyright (c) 2016-2018 Christoph Hellwig.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/mm_inline.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/pagevec.h> #include <linux/pagevec.h>
...@@ -103,6 +104,243 @@ iomap_sector(struct iomap *iomap, loff_t pos) ...@@ -103,6 +104,243 @@ iomap_sector(struct iomap *iomap, loff_t pos)
return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
} }
static void
iomap_read_inline_data(struct inode *inode, struct page *page,
struct iomap *iomap)
{
size_t size = i_size_read(inode);
void *addr;
if (PageUptodate(page))
return;
BUG_ON(page->index);
BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
addr = kmap_atomic(page);
memcpy(addr, iomap->inline_data, size);
memset(addr + size, 0, PAGE_SIZE - size);
kunmap_atomic(addr);
SetPageUptodate(page);
}
static void
iomap_read_end_io(struct bio *bio)
{
int error = blk_status_to_errno(bio->bi_status);
struct bio_vec *bvec;
int i;
bio_for_each_segment_all(bvec, bio, i)
page_endio(bvec->bv_page, false, error);
bio_put(bio);
}
struct iomap_readpage_ctx {
struct page *cur_page;
bool cur_page_in_bio;
bool is_readahead;
struct bio *bio;
struct list_head *pages;
};
static loff_t
iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
struct iomap *iomap)
{
struct iomap_readpage_ctx *ctx = data;
struct page *page = ctx->cur_page;
unsigned poff = pos & (PAGE_SIZE - 1);
unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
bool is_contig = false;
sector_t sector;
if (iomap->type == IOMAP_INLINE) {
WARN_ON_ONCE(poff);
iomap_read_inline_data(inode, page, iomap);
return PAGE_SIZE;
}
/* we don't support blocksize < PAGE_SIZE quite yet. */
WARN_ON_ONCE(pos != page_offset(page));
WARN_ON_ONCE(plen != PAGE_SIZE);
if (iomap->type != IOMAP_MAPPED || pos >= i_size_read(inode)) {
zero_user(page, poff, plen);
SetPageUptodate(page);
goto done;
}
ctx->cur_page_in_bio = true;
/*
* Try to merge into a previous segment if we can.
*/
sector = iomap_sector(iomap, pos);
if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
if (__bio_try_merge_page(ctx->bio, page, plen, poff))
goto done;
is_contig = true;
}
if (!ctx->bio || !is_contig || bio_full(ctx->bio)) {
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (ctx->bio)
submit_bio(ctx->bio);
if (ctx->is_readahead) /* same as readahead_gfp_mask */
gfp |= __GFP_NORETRY | __GFP_NOWARN;
ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
ctx->bio->bi_opf = REQ_OP_READ;
if (ctx->is_readahead)
ctx->bio->bi_opf |= REQ_RAHEAD;
ctx->bio->bi_iter.bi_sector = sector;
bio_set_dev(ctx->bio, iomap->bdev);
ctx->bio->bi_end_io = iomap_read_end_io;
}
__bio_add_page(ctx->bio, page, plen, poff);
done:
return plen;
}
int
iomap_readpage(struct page *page, const struct iomap_ops *ops)
{
struct iomap_readpage_ctx ctx = { .cur_page = page };
struct inode *inode = page->mapping->host;
unsigned poff;
loff_t ret;
WARN_ON_ONCE(page_has_buffers(page));
for (poff = 0; poff < PAGE_SIZE; poff += ret) {
ret = iomap_apply(inode, page_offset(page) + poff,
PAGE_SIZE - poff, 0, ops, &ctx,
iomap_readpage_actor);
if (ret <= 0) {
WARN_ON_ONCE(ret == 0);
SetPageError(page);
break;
}
}
if (ctx.bio) {
submit_bio(ctx.bio);
WARN_ON_ONCE(!ctx.cur_page_in_bio);
} else {
WARN_ON_ONCE(ctx.cur_page_in_bio);
unlock_page(page);
}
/*
* Just like mpage_readpages and block_read_full_page we always
* return 0 and just mark the page as PageError on errors. This
* should be cleaned up all through the stack eventually.
*/
return 0;
}
EXPORT_SYMBOL_GPL(iomap_readpage);
static struct page *
iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
loff_t length, loff_t *done)
{
while (!list_empty(pages)) {
struct page *page = lru_to_page(pages);
if (page_offset(page) >= (u64)pos + length)
break;
list_del(&page->lru);
if (!add_to_page_cache_lru(page, inode->i_mapping, page->index,
GFP_NOFS))
return page;
/*
* If we already have a page in the page cache at index we are
* done. Upper layers don't care if it is uptodate after the
* readpages call itself as every page gets checked again once
* actually needed.
*/
*done += PAGE_SIZE;
put_page(page);
}
return NULL;
}
static loff_t
iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
void *data, struct iomap *iomap)
{
struct iomap_readpage_ctx *ctx = data;
loff_t done, ret;
for (done = 0; done < length; done += ret) {
if (ctx->cur_page && ((pos + done) & (PAGE_SIZE - 1)) == 0) {
if (!ctx->cur_page_in_bio)
unlock_page(ctx->cur_page);
put_page(ctx->cur_page);
ctx->cur_page = NULL;
}
if (!ctx->cur_page) {
ctx->cur_page = iomap_next_page(inode, ctx->pages,
pos, length, &done);
if (!ctx->cur_page)
break;
ctx->cur_page_in_bio = false;
}
ret = iomap_readpage_actor(inode, pos + done, length - done,
ctx, iomap);
}
return done;
}
int
iomap_readpages(struct address_space *mapping, struct list_head *pages,
unsigned nr_pages, const struct iomap_ops *ops)
{
struct iomap_readpage_ctx ctx = {
.pages = pages,
.is_readahead = true,
};
loff_t pos = page_offset(list_entry(pages->prev, struct page, lru));
loff_t last = page_offset(list_entry(pages->next, struct page, lru));
loff_t length = last - pos + PAGE_SIZE, ret = 0;
while (length > 0) {
ret = iomap_apply(mapping->host, pos, length, 0, ops,
&ctx, iomap_readpages_actor);
if (ret <= 0) {
WARN_ON_ONCE(ret == 0);
goto done;
}
pos += ret;
length -= ret;
}
ret = 0;
done:
if (ctx.bio)
submit_bio(ctx.bio);
if (ctx.cur_page) {
if (!ctx.cur_page_in_bio)
unlock_page(ctx.cur_page);
put_page(ctx.cur_page);
}
/*
* Check that we didn't lose a page due to the arcance calling
* conventions..
*/
WARN_ON_ONCE(!ret && !list_empty(ctx.pages));
return ret;
}
EXPORT_SYMBOL_GPL(iomap_readpages);
static void static void
iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
{ {
...@@ -116,6 +354,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) ...@@ -116,6 +354,48 @@ iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
truncate_pagecache_range(inode, max(pos, i_size), pos + len); truncate_pagecache_range(inode, max(pos, i_size), pos + len);
} }
static int
iomap_read_page_sync(struct inode *inode, loff_t block_start, struct page *page,
unsigned poff, unsigned plen, unsigned from, unsigned to,
struct iomap *iomap)
{
struct bio_vec bvec;
struct bio bio;
if (iomap->type != IOMAP_MAPPED || block_start >= i_size_read(inode)) {
zero_user_segments(page, poff, from, to, poff + plen);
return 0;
}
bio_init(&bio, &bvec, 1);
bio.bi_opf = REQ_OP_READ;
bio.bi_iter.bi_sector = iomap_sector(iomap, block_start);
bio_set_dev(&bio, iomap->bdev);
__bio_add_page(&bio, page, plen, poff);
return submit_bio_wait(&bio);
}
static int
__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
struct page *page, struct iomap *iomap)
{
loff_t block_size = i_blocksize(inode);
loff_t block_start = pos & ~(block_size - 1);
loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1);
unsigned poff = block_start & (PAGE_SIZE - 1);
unsigned plen = min_t(loff_t, PAGE_SIZE - poff, block_end - block_start);
unsigned from = pos & (PAGE_SIZE - 1), to = from + len;
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
if (PageUptodate(page))
return 0;
if (from <= poff && to >= poff + plen)
return 0;
return iomap_read_page_sync(inode, block_start, page,
poff, plen, from, to, iomap);
}
static int static int
iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, struct iomap *iomap) struct page **pagep, struct iomap *iomap)
...@@ -133,7 +413,12 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, ...@@ -133,7 +413,12 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
if (iomap->type == IOMAP_INLINE)
iomap_read_inline_data(inode, page, iomap);
else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
status = __block_write_begin_int(page, pos, len, NULL, iomap); status = __block_write_begin_int(page, pos, len, NULL, iomap);
else
status = __iomap_write_begin(inode, pos, len, page, iomap);
if (unlikely(status)) { if (unlikely(status)) {
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
...@@ -146,14 +431,93 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, ...@@ -146,14 +431,93 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
return status; return status;
} }
int
iomap_set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page);
int newly_dirty;
if (unlikely(!mapping))
return !TestSetPageDirty(page);
/*
* Lock out page->mem_cgroup migration to keep PageDirty
* synchronized with per-memcg dirty page counters.
*/
lock_page_memcg(page);
newly_dirty = !TestSetPageDirty(page);
if (newly_dirty)
__set_page_dirty(page, mapping, 0);
unlock_page_memcg(page);
if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
return newly_dirty;
}
EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
static int
__iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page, struct iomap *iomap)
{
flush_dcache_page(page);
/*
* The blocks that were entirely written will now be uptodate, so we
* don't have to worry about a readpage reading them and overwriting a
* partial write. However if we have encountered a short write and only
* partially written into a block, it will not be marked uptodate, so a
* readpage might come in and destroy our partial write.
*
* Do the simplest thing, and just treat any short write to a non
* uptodate page as a zero-length write, and force the caller to redo
* the whole thing.
*/
if (unlikely(copied < len && !PageUptodate(page))) {
copied = 0;
} else {
SetPageUptodate(page);
iomap_set_page_dirty(page);
}
return __generic_write_end(inode, pos, copied, page);
}
static int
iomap_write_end_inline(struct inode *inode, struct page *page,
struct iomap *iomap, loff_t pos, unsigned copied)
{
void *addr;
WARN_ON_ONCE(!PageUptodate(page));
BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
addr = kmap_atomic(page);
memcpy(iomap->inline_data + pos, addr + pos, copied);
kunmap_atomic(addr);
mark_inode_dirty(inode);
__generic_write_end(inode, pos, copied, page);
return copied;
}
static int static int
iomap_write_end(struct inode *inode, loff_t pos, unsigned len, iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
unsigned copied, struct page *page) unsigned copied, struct page *page, struct iomap *iomap)
{ {
int ret; int ret;
if (iomap->type == IOMAP_INLINE) {
ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
ret = generic_write_end(NULL, inode->i_mapping, pos, len, ret = generic_write_end(NULL, inode->i_mapping, pos, len,
copied, page, NULL); copied, page, NULL);
} else {
ret = __iomap_write_end(inode, pos, len, copied, page, iomap);
}
if (iomap->page_done)
iomap->page_done(inode, pos, copied, page, iomap);
if (ret < len) if (ret < len)
iomap_write_failed(inode, pos, len); iomap_write_failed(inode, pos, len);
return ret; return ret;
...@@ -208,7 +572,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ...@@ -208,7 +572,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
flush_dcache_page(page); flush_dcache_page(page);
status = iomap_write_end(inode, pos, bytes, copied, page); status = iomap_write_end(inode, pos, bytes, copied, page,
iomap);
if (unlikely(status < 0)) if (unlikely(status < 0))
break; break;
copied = status; copied = status;
...@@ -302,7 +667,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ...@@ -302,7 +667,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
WARN_ON_ONCE(!PageUptodate(page)); WARN_ON_ONCE(!PageUptodate(page));
status = iomap_write_end(inode, pos, bytes, bytes, page); status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
if (unlikely(status <= 0)) { if (unlikely(status <= 0)) {
if (WARN_ON_ONCE(status == 0)) if (WARN_ON_ONCE(status == 0))
return -EIO; return -EIO;
...@@ -354,7 +719,7 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, ...@@ -354,7 +719,7 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
zero_user(page, offset, bytes); zero_user(page, offset, bytes);
mark_page_accessed(page); mark_page_accessed(page);
return iomap_write_end(inode, pos, bytes, bytes, page); return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
} }
static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes, static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
...@@ -440,11 +805,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -440,11 +805,16 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
struct page *page = data; struct page *page = data;
int ret; int ret;
if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
ret = __block_write_begin_int(page, pos, length, NULL, iomap); ret = __block_write_begin_int(page, pos, length, NULL, iomap);
if (ret) if (ret)
return ret; return ret;
block_commit_write(page, 0, length); block_commit_write(page, 0, length);
} else {
WARN_ON_ONCE(!PageUptodate(page));
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE);
}
return length; return length;
} }
...@@ -811,6 +1181,7 @@ struct iomap_dio { ...@@ -811,6 +1181,7 @@ struct iomap_dio {
atomic_t ref; atomic_t ref;
unsigned flags; unsigned flags;
int error; int error;
bool wait_for_completion;
union { union {
/* used during submission and for synchronous completion: */ /* used during submission and for synchronous completion: */
...@@ -914,9 +1285,8 @@ static void iomap_dio_bio_end_io(struct bio *bio) ...@@ -914,9 +1285,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status)); iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
if (atomic_dec_and_test(&dio->ref)) { if (atomic_dec_and_test(&dio->ref)) {
if (is_sync_kiocb(dio->iocb)) { if (dio->wait_for_completion) {
struct task_struct *waiter = dio->submit.waiter; struct task_struct *waiter = dio->submit.waiter;
WRITE_ONCE(dio->submit.waiter, NULL); WRITE_ONCE(dio->submit.waiter, NULL);
wake_up_process(waiter); wake_up_process(waiter);
} else if (dio->flags & IOMAP_DIO_WRITE) { } else if (dio->flags & IOMAP_DIO_WRITE) {
...@@ -963,10 +1333,9 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, ...@@ -963,10 +1333,9 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
} }
static loff_t static loff_t
iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
void *data, struct iomap *iomap) struct iomap_dio *dio, struct iomap *iomap)
{ {
struct iomap_dio *dio = data;
unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
unsigned int fs_block_size = i_blocksize(inode), pad; unsigned int fs_block_size = i_blocksize(inode), pad;
unsigned int align = iov_iter_alignment(dio->submit.iter); unsigned int align = iov_iter_alignment(dio->submit.iter);
...@@ -980,23 +1349,14 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -980,23 +1349,14 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
if ((pos | length | align) & ((1 << blkbits) - 1)) if ((pos | length | align) & ((1 << blkbits) - 1))
return -EINVAL; return -EINVAL;
switch (iomap->type) { if (iomap->type == IOMAP_UNWRITTEN) {
case IOMAP_HOLE:
if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
return -EIO;
/*FALLTHRU*/
case IOMAP_UNWRITTEN:
if (!(dio->flags & IOMAP_DIO_WRITE)) {
length = iov_iter_zero(length, dio->submit.iter);
dio->size += length;
return length;
}
dio->flags |= IOMAP_DIO_UNWRITTEN; dio->flags |= IOMAP_DIO_UNWRITTEN;
need_zeroout = true; need_zeroout = true;
break; }
case IOMAP_MAPPED:
if (iomap->flags & IOMAP_F_SHARED) if (iomap->flags & IOMAP_F_SHARED)
dio->flags |= IOMAP_DIO_COW; dio->flags |= IOMAP_DIO_COW;
if (iomap->flags & IOMAP_F_NEW) { if (iomap->flags & IOMAP_F_NEW) {
need_zeroout = true; need_zeroout = true;
} else { } else {
...@@ -1011,11 +1371,6 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -1011,11 +1371,6 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
blk_queue_fua(bdev_get_queue(iomap->bdev))) blk_queue_fua(bdev_get_queue(iomap->bdev)))
use_fua = true; use_fua = true;
} }
break;
default:
WARN_ON_ONCE(1);
return -EIO;
}
/* /*
* Operate on a partial iter trimmed to the extent we were called for. * Operate on a partial iter trimmed to the extent we were called for.
...@@ -1093,6 +1448,66 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -1093,6 +1448,66 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
return copied; return copied;
} }
static loff_t
iomap_dio_hole_actor(loff_t length, struct iomap_dio *dio)
{
length = iov_iter_zero(length, dio->submit.iter);
dio->size += length;
return length;
}
static loff_t
iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
struct iomap_dio *dio, struct iomap *iomap)
{
struct iov_iter *iter = dio->submit.iter;
size_t copied;
BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data));
if (dio->flags & IOMAP_DIO_WRITE) {
loff_t size = inode->i_size;
if (pos > size)
memset(iomap->inline_data + size, 0, pos - size);
copied = copy_from_iter(iomap->inline_data + pos, length, iter);
if (copied) {
if (pos + copied > size)
i_size_write(inode, pos + copied);
mark_inode_dirty(inode);
}
} else {
copied = copy_to_iter(iomap->inline_data + pos, length, iter);
}
dio->size += copied;
return copied;
}
static loff_t
iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
void *data, struct iomap *iomap)
{
struct iomap_dio *dio = data;
switch (iomap->type) {
case IOMAP_HOLE:
if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
return -EIO;
return iomap_dio_hole_actor(length, dio);
case IOMAP_UNWRITTEN:
if (!(dio->flags & IOMAP_DIO_WRITE))
return iomap_dio_hole_actor(length, dio);
return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
case IOMAP_MAPPED:
return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
case IOMAP_INLINE:
return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
default:
WARN_ON_ONCE(1);
return -EIO;
}
}
/* /*
* iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO * iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO
* is being issued as AIO or not. This allows us to optimise pure data writes * is being issued as AIO or not. This allows us to optimise pure data writes
...@@ -1131,13 +1546,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -1131,13 +1546,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->end_io = end_io; dio->end_io = end_io;
dio->error = 0; dio->error = 0;
dio->flags = 0; dio->flags = 0;
dio->wait_for_completion = is_sync_kiocb(iocb);
dio->submit.iter = iter; dio->submit.iter = iter;
if (is_sync_kiocb(iocb)) {
dio->submit.waiter = current; dio->submit.waiter = current;
dio->submit.cookie = BLK_QC_T_NONE; dio->submit.cookie = BLK_QC_T_NONE;
dio->submit.last_queue = NULL; dio->submit.last_queue = NULL;
}
if (iov_iter_rw(iter) == READ) { if (iov_iter_rw(iter) == READ) {
if (pos >= dio->i_size) if (pos >= dio->i_size)
...@@ -1187,7 +1601,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -1187,7 +1601,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio_warn_stale_pagecache(iocb->ki_filp); dio_warn_stale_pagecache(iocb->ki_filp);
ret = 0; ret = 0;
if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
!inode->i_sb->s_dio_done_wq) { !inode->i_sb->s_dio_done_wq) {
ret = sb_init_dio_done_wq(inode->i_sb); ret = sb_init_dio_done_wq(inode->i_sb);
if (ret < 0) if (ret < 0)
...@@ -1202,8 +1616,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -1202,8 +1616,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
iomap_dio_actor); iomap_dio_actor);
if (ret <= 0) { if (ret <= 0) {
/* magic error code to fall back to buffered I/O */ /* magic error code to fall back to buffered I/O */
if (ret == -ENOTBLK) if (ret == -ENOTBLK) {
dio->wait_for_completion = true;
ret = 0; ret = 0;
}
break; break;
} }
pos += ret; pos += ret;
...@@ -1224,7 +1640,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -1224,7 +1640,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->flags &= ~IOMAP_DIO_NEED_SYNC; dio->flags &= ~IOMAP_DIO_NEED_SYNC;
if (!atomic_dec_and_test(&dio->ref)) { if (!atomic_dec_and_test(&dio->ref)) {
if (!is_sync_kiocb(iocb)) if (!dio->wait_for_completion)
return -EIOCBQUEUED; return -EIOCBQUEUED;
for (;;) { for (;;) {
......
...@@ -626,7 +626,7 @@ xfs_file_iomap_begin_delay( ...@@ -626,7 +626,7 @@ xfs_file_iomap_begin_delay(
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
* them out if the write happens to fail. * them out if the write happens to fail.
*/ */
iomap->flags = IOMAP_F_NEW; iomap->flags |= IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got); trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done: done:
if (isnullstartblock(got.br_startblock)) if (isnullstartblock(got.br_startblock))
...@@ -1032,6 +1032,8 @@ xfs_file_iomap_begin( ...@@ -1032,6 +1032,8 @@ xfs_file_iomap_begin(
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
iomap->flags |= IOMAP_F_BUFFER_HEAD;
if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) && if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) &&
!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */ /* Reserve delalloc blocks for regular writeback. */
...@@ -1132,7 +1134,7 @@ xfs_file_iomap_begin( ...@@ -1132,7 +1134,7 @@ xfs_file_iomap_begin(
if (error) if (error)
return error; return error;
iomap->flags = IOMAP_F_NEW; iomap->flags |= IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
out_finish: out_finish:
......
...@@ -9,6 +9,7 @@ struct fiemap_extent_info; ...@@ -9,6 +9,7 @@ struct fiemap_extent_info;
struct inode; struct inode;
struct iov_iter; struct iov_iter;
struct kiocb; struct kiocb;
struct page;
struct vm_area_struct; struct vm_area_struct;
struct vm_fault; struct vm_fault;
...@@ -29,6 +30,7 @@ struct vm_fault; ...@@ -29,6 +30,7 @@ struct vm_fault;
*/ */
#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */
#define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ #define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */
#define IOMAP_F_BUFFER_HEAD 0x04 /* file system requires buffer heads */
/* /*
* Flags that only need to be reported for IOMAP_REPORT requests: * Flags that only need to be reported for IOMAP_REPORT requests:
...@@ -55,6 +57,16 @@ struct iomap { ...@@ -55,6 +57,16 @@ struct iomap {
u16 flags; /* flags for mapping */ u16 flags; /* flags for mapping */
struct block_device *bdev; /* block device for I/O */ struct block_device *bdev; /* block device for I/O */
struct dax_device *dax_dev; /* dax_dev for dax operations */ struct dax_device *dax_dev; /* dax_dev for dax operations */
void *inline_data;
void *private; /* filesystem private */
/*
* Called when finished processing a page in the mapping returned in
* this iomap. At least for now this is only supported in the buffered
* write path.
*/
void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
struct page *page, struct iomap *iomap);
}; };
/* /*
...@@ -88,6 +100,10 @@ struct iomap_ops { ...@@ -88,6 +100,10 @@ struct iomap_ops {
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops); const struct iomap_ops *ops);
int iomap_readpage(struct page *page, const struct iomap_ops *ops);
int iomap_readpages(struct address_space *mapping, struct list_head *pages,
unsigned nr_pages, const struct iomap_ops *ops);
int iomap_set_page_dirty(struct page *page);
int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops); const struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment