Commit 37187df4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull iomap updates from Darrick Wong:
 "There's not a lot of new stuff going on here -- a little bit of code
  refactoring to make iomap workable with btrfs' fsync locking model,
  cleanups in preparation for adding THP support for filesystems, and
  fixing a data corruption issue for blocksize < pagesize filesystems.

  Summary:

   - Don't WARN_ON weird states that unprivileged users can create.

   - Don't invalidate page cache when direct writes want to fall back to
     buffered.

   - Fix some problems when readahead ios fail.

   - Fix a problem where inline data pages weren't getting flushed
     during an unshare operation.

   - Rework iomap to support arbitrarily many blocks per page in
     preparation to support THP for the page cache.

   - Fix a bug in the blocksize < pagesize buffered io path where we
     could fail to initialize the many-blocks-per-page uptodate bitmap
     correctly when the backing page is actually up to date. This could
     cause us to forget to write out dirty pages.

   - Split out the generic_write_sync at the end of the directio write
     path so that btrfs can drop the inode lock before sync'ing the
     file.

   - Call inode_dio_end before trying to sync the file after a O_DSYNC
     direct write (instead of afterwards) to match the behavior of the
     old directio code"

* tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  iomap: Call inode_dio_end() before generic_write_sync()
  iomap: Allow filesystem to call iomap_dio_complete without i_rwsem
  iomap: Set all uptodate bits for an Uptodate page
  iomap: Change calling convention for zeroing
  iomap: Convert iomap_write_end types
  iomap: Convert write_count to write_bytes_pending
  iomap: Convert read_count to read_bytes_pending
  iomap: Support arbitrarily many blocks per page
  iomap: Use bitmap ops to set uptodate bits
  iomap: Use kzalloc to allocate iomap_page
  fs: Introduce i_blocks_per_page
  iomap: Fix misplaced page flushing
  iomap: Use round_down/round_up macros in __iomap_write_begin
  iomap: Mark read blocks uptodate in write_begin
  iomap: Clear page error before beginning a write
  iomap: Fix direct I/O write consistency check
  iomap: fix WARN_ON_ONCE() from unprivileged users
parents 531d29b0 1a31182e
...@@ -1037,18 +1037,18 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, ...@@ -1037,18 +1037,18 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
return ret; return ret;
} }
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
struct iomap *iomap)
{ {
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK); sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
pgoff_t pgoff; pgoff_t pgoff;
long rc, id; long rc, id;
void *kaddr; void *kaddr;
bool page_aligned = false; bool page_aligned = false;
unsigned offset = offset_in_page(pos);
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) && if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
IS_ALIGNED(size, PAGE_SIZE)) (size == PAGE_SIZE))
page_aligned = true; page_aligned = true;
rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff); rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
...@@ -1058,8 +1058,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, ...@@ -1058,8 +1058,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
id = dax_read_lock(); id = dax_read_lock();
if (page_aligned) if (page_aligned)
rc = dax_zero_page_range(iomap->dax_dev, pgoff, rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
size >> PAGE_SHIFT);
else else
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL); rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) { if (rc < 0) {
...@@ -1072,7 +1071,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, ...@@ -1072,7 +1071,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
dax_flush(iomap->dax_dev, kaddr + offset, size); dax_flush(iomap->dax_dev, kaddr + offset, size);
} }
dax_read_unlock(id); dax_read_unlock(id);
return 0; return size;
} }
static loff_t static loff_t
......
...@@ -22,18 +22,25 @@ ...@@ -22,18 +22,25 @@
#include "../internal.h" #include "../internal.h"
/* /*
* Structure allocated for each page when block size < PAGE_SIZE to track * Structure allocated for each page or THP when block size < page size
* sub-page uptodate status and I/O completions. * to track sub-page uptodate status and I/O completions.
*/ */
struct iomap_page { struct iomap_page {
atomic_t read_count; atomic_t read_bytes_pending;
atomic_t write_count; atomic_t write_bytes_pending;
spinlock_t uptodate_lock; spinlock_t uptodate_lock;
DECLARE_BITMAP(uptodate, PAGE_SIZE / 512); unsigned long uptodate[];
}; };
static inline struct iomap_page *to_iomap_page(struct page *page) static inline struct iomap_page *to_iomap_page(struct page *page)
{ {
/*
* per-block data is stored in the head page. Callers should
* not be dealing with tail pages (and if they are, they can
* call thp_head() first.
*/
VM_BUG_ON_PGFLAGS(PageTail(page), page);
if (page_has_private(page)) if (page_has_private(page))
return (struct iomap_page *)page_private(page); return (struct iomap_page *)page_private(page);
return NULL; return NULL;
...@@ -45,20 +52,16 @@ static struct iomap_page * ...@@ -45,20 +52,16 @@ static struct iomap_page *
iomap_page_create(struct inode *inode, struct page *page) iomap_page_create(struct inode *inode, struct page *page)
{ {
struct iomap_page *iop = to_iomap_page(page); struct iomap_page *iop = to_iomap_page(page);
unsigned int nr_blocks = i_blocks_per_page(inode, page);
if (iop || i_blocksize(inode) == PAGE_SIZE) if (iop || nr_blocks <= 1)
return iop; return iop;
iop = kmalloc(sizeof(*iop), GFP_NOFS | __GFP_NOFAIL); iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
atomic_set(&iop->read_count, 0); GFP_NOFS | __GFP_NOFAIL);
atomic_set(&iop->write_count, 0);
spin_lock_init(&iop->uptodate_lock); spin_lock_init(&iop->uptodate_lock);
bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE); if (PageUptodate(page))
bitmap_fill(iop->uptodate, nr_blocks);
/*
* migrate_page_move_mapping() assumes that pages with private data have
* their count elevated by 1.
*/
attach_page_private(page, iop); attach_page_private(page, iop);
return iop; return iop;
} }
...@@ -67,11 +70,14 @@ static void ...@@ -67,11 +70,14 @@ static void
iomap_page_release(struct page *page) iomap_page_release(struct page *page)
{ {
struct iomap_page *iop = detach_page_private(page); struct iomap_page *iop = detach_page_private(page);
unsigned int nr_blocks = i_blocks_per_page(page->mapping->host, page);
if (!iop) if (!iop)
return; return;
WARN_ON_ONCE(atomic_read(&iop->read_count)); WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending));
WARN_ON_ONCE(atomic_read(&iop->write_count)); WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending));
WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
PageUptodate(page));
kfree(iop); kfree(iop);
} }
...@@ -142,19 +148,11 @@ iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len) ...@@ -142,19 +148,11 @@ iomap_iop_set_range_uptodate(struct page *page, unsigned off, unsigned len)
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
unsigned first = off >> inode->i_blkbits; unsigned first = off >> inode->i_blkbits;
unsigned last = (off + len - 1) >> inode->i_blkbits; unsigned last = (off + len - 1) >> inode->i_blkbits;
bool uptodate = true;
unsigned long flags; unsigned long flags;
unsigned int i;
spin_lock_irqsave(&iop->uptodate_lock, flags); spin_lock_irqsave(&iop->uptodate_lock, flags);
for (i = 0; i < PAGE_SIZE / i_blocksize(inode); i++) { bitmap_set(iop->uptodate, first, last - first + 1);
if (i >= first && i <= last) if (bitmap_full(iop->uptodate, i_blocks_per_page(inode, page)))
set_bit(i, iop->uptodate);
else if (!test_bit(i, iop->uptodate))
uptodate = false;
}
if (uptodate)
SetPageUptodate(page); SetPageUptodate(page);
spin_unlock_irqrestore(&iop->uptodate_lock, flags); spin_unlock_irqrestore(&iop->uptodate_lock, flags);
} }
...@@ -171,13 +169,6 @@ iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len) ...@@ -171,13 +169,6 @@ iomap_set_range_uptodate(struct page *page, unsigned off, unsigned len)
SetPageUptodate(page); SetPageUptodate(page);
} }
static void
iomap_read_finish(struct iomap_page *iop, struct page *page)
{
if (!iop || atomic_dec_and_test(&iop->read_count))
unlock_page(page);
}
static void static void
iomap_read_page_end_io(struct bio_vec *bvec, int error) iomap_read_page_end_io(struct bio_vec *bvec, int error)
{ {
...@@ -191,7 +182,8 @@ iomap_read_page_end_io(struct bio_vec *bvec, int error) ...@@ -191,7 +182,8 @@ iomap_read_page_end_io(struct bio_vec *bvec, int error)
iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len); iomap_set_range_uptodate(page, bvec->bv_offset, bvec->bv_len);
} }
iomap_read_finish(iop, page); if (!iop || atomic_sub_and_test(bvec->bv_len, &iop->read_bytes_pending))
unlock_page(page);
} }
static void static void
...@@ -271,30 +263,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ...@@ -271,30 +263,19 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
} }
ctx->cur_page_in_bio = true; ctx->cur_page_in_bio = true;
if (iop)
atomic_add(plen, &iop->read_bytes_pending);
/* /* Try to merge into a previous segment if we can */
* Try to merge into a previous segment if we can.
*/
sector = iomap_sector(iomap, pos); sector = iomap_sector(iomap, pos);
if (ctx->bio && bio_end_sector(ctx->bio) == sector) if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
if (__bio_try_merge_page(ctx->bio, page, plen, poff,
&same_page))
goto done;
is_contig = true; is_contig = true;
if (is_contig &&
__bio_try_merge_page(ctx->bio, page, plen, poff, &same_page)) {
if (!same_page && iop)
atomic_inc(&iop->read_count);
goto done;
} }
/* if (!is_contig || bio_full(ctx->bio, plen)) {
* If we start a new segment we need to increase the read count, and we
* need to do so before submitting any previous full bio to make sure
* that we don't prematurely unlock the page.
*/
if (iop)
atomic_inc(&iop->read_count);
if (!ctx->bio || !is_contig || bio_full(ctx->bio, plen)) {
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
gfp_t orig_gfp = gfp; gfp_t orig_gfp = gfp;
int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
...@@ -571,13 +552,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags, ...@@ -571,13 +552,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
{ {
struct iomap_page *iop = iomap_page_create(inode, page); struct iomap_page *iop = iomap_page_create(inode, page);
loff_t block_size = i_blocksize(inode); loff_t block_size = i_blocksize(inode);
loff_t block_start = pos & ~(block_size - 1); loff_t block_start = round_down(pos, block_size);
loff_t block_end = (pos + len + block_size - 1) & ~(block_size - 1); loff_t block_end = round_up(pos + len, block_size);
unsigned from = offset_in_page(pos), to = from + len, poff, plen; unsigned from = offset_in_page(pos), to = from + len, poff, plen;
int status;
if (PageUptodate(page)) if (PageUptodate(page))
return 0; return 0;
ClearPageError(page);
do { do {
iomap_adjust_read_range(inode, iop, &block_start, iomap_adjust_read_range(inode, iop, &block_start,
...@@ -594,14 +575,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags, ...@@ -594,14 +575,13 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE)) if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
return -EIO; return -EIO;
zero_user_segments(page, poff, from, to, poff + plen); zero_user_segments(page, poff, from, to, poff + plen);
iomap_set_range_uptodate(page, poff, plen); } else {
continue; int status = iomap_read_page_sync(block_start, page,
poff, plen, srcmap);
if (status)
return status;
} }
iomap_set_range_uptodate(page, poff, plen);
status = iomap_read_page_sync(block_start, page, poff, plen,
srcmap);
if (status)
return status;
} while ((block_start += plen) < block_end); } while ((block_start += plen) < block_end);
return 0; return 0;
...@@ -685,9 +665,8 @@ iomap_set_page_dirty(struct page *page) ...@@ -685,9 +665,8 @@ iomap_set_page_dirty(struct page *page)
} }
EXPORT_SYMBOL_GPL(iomap_set_page_dirty); EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
static int static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
__iomap_write_end(struct inode *inode, loff_t pos, unsigned len, size_t copied, struct page *page)
unsigned copied, struct page *page)
{ {
flush_dcache_page(page); flush_dcache_page(page);
...@@ -709,15 +688,15 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len, ...@@ -709,15 +688,15 @@ __iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
return copied; return copied;
} }
static int static size_t iomap_write_end_inline(struct inode *inode, struct page *page,
iomap_write_end_inline(struct inode *inode, struct page *page, struct iomap *iomap, loff_t pos, size_t copied)
struct iomap *iomap, loff_t pos, unsigned copied)
{ {
void *addr; void *addr;
WARN_ON_ONCE(!PageUptodate(page)); WARN_ON_ONCE(!PageUptodate(page));
BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data)); BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data));
flush_dcache_page(page);
addr = kmap_atomic(page); addr = kmap_atomic(page);
memcpy(iomap->inline_data + pos, addr + pos, copied); memcpy(iomap->inline_data + pos, addr + pos, copied);
kunmap_atomic(addr); kunmap_atomic(addr);
...@@ -726,13 +705,14 @@ iomap_write_end_inline(struct inode *inode, struct page *page, ...@@ -726,13 +705,14 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
return copied; return copied;
} }
static int /* Returns the number of bytes copied. May be 0. Cannot be an errno. */
iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied, static size_t iomap_write_end(struct inode *inode, loff_t pos, size_t len,
struct page *page, struct iomap *iomap, struct iomap *srcmap) size_t copied, struct page *page, struct iomap *iomap,
struct iomap *srcmap)
{ {
const struct iomap_page_ops *page_ops = iomap->page_ops; const struct iomap_page_ops *page_ops = iomap->page_ops;
loff_t old_size = inode->i_size; loff_t old_size = inode->i_size;
int ret; size_t ret;
if (srcmap->type == IOMAP_INLINE) { if (srcmap->type == IOMAP_INLINE) {
ret = iomap_write_end_inline(inode, page, iomap, pos, copied); ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
...@@ -811,13 +791,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ...@@ -811,13 +791,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
flush_dcache_page(page); copied = iomap_write_end(inode, pos, bytes, copied, page, iomap,
status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
srcmap); srcmap);
if (unlikely(status < 0))
break;
copied = status;
cond_resched(); cond_resched();
...@@ -891,11 +866,8 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ...@@ -891,11 +866,8 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
status = iomap_write_end(inode, pos, bytes, bytes, page, iomap, status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
srcmap); srcmap);
if (unlikely(status <= 0)) { if (WARN_ON_ONCE(status == 0))
if (WARN_ON_ONCE(status == 0)) return -EIO;
return -EIO;
return status;
}
cond_resched(); cond_resched();
...@@ -928,11 +900,13 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, ...@@ -928,11 +900,13 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
} }
EXPORT_SYMBOL_GPL(iomap_file_unshare); EXPORT_SYMBOL_GPL(iomap_file_unshare);
static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, static s64 iomap_zero(struct inode *inode, loff_t pos, u64 length,
unsigned bytes, struct iomap *iomap, struct iomap *srcmap) struct iomap *iomap, struct iomap *srcmap)
{ {
struct page *page; struct page *page;
int status; int status;
unsigned offset = offset_in_page(pos);
unsigned bytes = min_t(u64, PAGE_SIZE - offset, length);
status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap); status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
if (status) if (status)
...@@ -944,38 +918,33 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset, ...@@ -944,38 +918,33 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap); return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
} }
static loff_t static loff_t iomap_zero_range_actor(struct inode *inode, loff_t pos,
iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, loff_t length, void *data, struct iomap *iomap,
void *data, struct iomap *iomap, struct iomap *srcmap) struct iomap *srcmap)
{ {
bool *did_zero = data; bool *did_zero = data;
loff_t written = 0; loff_t written = 0;
int status;
/* already zeroed? we're done. */ /* already zeroed? we're done. */
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN) if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
return count; return length;
do { do {
unsigned offset, bytes; s64 bytes;
offset = offset_in_page(pos);
bytes = min_t(loff_t, PAGE_SIZE - offset, count);
if (IS_DAX(inode)) if (IS_DAX(inode))
status = dax_iomap_zero(pos, offset, bytes, iomap); bytes = dax_iomap_zero(pos, length, iomap);
else else
status = iomap_zero(inode, pos, offset, bytes, iomap, bytes = iomap_zero(inode, pos, length, iomap, srcmap);
srcmap); if (bytes < 0)
if (status < 0) return bytes;
return status;
pos += bytes; pos += bytes;
count -= bytes; length -= bytes;
written += bytes; written += bytes;
if (did_zero) if (did_zero)
*did_zero = true; *did_zero = true;
} while (count > 0); } while (length > 0);
return written; return written;
} }
...@@ -1070,7 +1039,7 @@ EXPORT_SYMBOL_GPL(iomap_page_mkwrite); ...@@ -1070,7 +1039,7 @@ EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
static void static void
iomap_finish_page_writeback(struct inode *inode, struct page *page, iomap_finish_page_writeback(struct inode *inode, struct page *page,
int error) int error, unsigned int len)
{ {
struct iomap_page *iop = to_iomap_page(page); struct iomap_page *iop = to_iomap_page(page);
...@@ -1079,10 +1048,10 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page, ...@@ -1079,10 +1048,10 @@ iomap_finish_page_writeback(struct inode *inode, struct page *page,
mapping_set_error(inode->i_mapping, -EIO); mapping_set_error(inode->i_mapping, -EIO);
} }
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop); WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
WARN_ON_ONCE(iop && atomic_read(&iop->write_count) <= 0); WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) <= 0);
if (!iop || atomic_dec_and_test(&iop->write_count)) if (!iop || atomic_sub_and_test(len, &iop->write_bytes_pending))
end_page_writeback(page); end_page_writeback(page);
} }
...@@ -1116,7 +1085,8 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) ...@@ -1116,7 +1085,8 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
/* walk each page on bio, ending page IO on them */ /* walk each page on bio, ending page IO on them */
bio_for_each_segment_all(bv, bio, iter_all) bio_for_each_segment_all(bv, bio, iter_all)
iomap_finish_page_writeback(inode, bv->bv_page, error); iomap_finish_page_writeback(inode, bv->bv_page, error,
bv->bv_len);
bio_put(bio); bio_put(bio);
} }
/* The ioend has been freed by bio_put() */ /* The ioend has been freed by bio_put() */
...@@ -1332,8 +1302,8 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page, ...@@ -1332,8 +1302,8 @@ iomap_add_to_ioend(struct inode *inode, loff_t offset, struct page *page,
merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
&same_page); &same_page);
if (iop && !same_page) if (iop)
atomic_inc(&iop->write_count); atomic_add(len, &iop->write_bytes_pending);
if (!merged) { if (!merged) {
if (bio_full(wpc->ioend->io_bio, len)) { if (bio_full(wpc->ioend->io_bio, len)) {
...@@ -1375,8 +1345,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, ...@@ -1375,8 +1345,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
int error = 0, count = 0, i; int error = 0, count = 0, i;
LIST_HEAD(submit_list); LIST_HEAD(submit_list);
WARN_ON_ONCE(i_blocksize(inode) < PAGE_SIZE && !iop); WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
WARN_ON_ONCE(iop && atomic_read(&iop->write_count) != 0); WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0);
/* /*
* Walk through the page to find areas to write back. If we run off the * Walk through the page to find areas to write back. If we run off the
......
...@@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap, ...@@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
dio->submit.cookie = submit_bio(bio); dio->submit.cookie = submit_bio(bio);
} }
static ssize_t iomap_dio_complete(struct iomap_dio *dio) ssize_t iomap_dio_complete(struct iomap_dio *dio)
{ {
const struct iomap_dio_ops *dops = dio->dops; const struct iomap_dio_ops *dops = dio->dops;
struct kiocb *iocb = dio->iocb; struct kiocb *iocb = dio->iocb;
...@@ -108,7 +108,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) ...@@ -108,7 +108,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
* ->end_io() when necessary, otherwise a racing buffer read would cache * ->end_io() when necessary, otherwise a racing buffer read would cache
* zeros from unwritten extents. * zeros from unwritten extents.
*/ */
if (!dio->error && if (!dio->error && dio->size &&
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
int err; int err;
err = invalidate_inode_pages2_range(inode->i_mapping, err = invalidate_inode_pages2_range(inode->i_mapping,
...@@ -118,6 +118,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) ...@@ -118,6 +118,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
dio_warn_stale_pagecache(iocb->ki_filp); dio_warn_stale_pagecache(iocb->ki_filp);
} }
inode_dio_end(file_inode(iocb->ki_filp));
/* /*
* If this is a DSYNC write, make sure we push it to stable storage now * If this is a DSYNC write, make sure we push it to stable storage now
* that we've written data. * that we've written data.
...@@ -125,11 +126,11 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) ...@@ -125,11 +126,11 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC)) if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
ret = generic_write_sync(iocb, ret); ret = generic_write_sync(iocb, ret);
inode_dio_end(file_inode(iocb->ki_filp));
kfree(dio); kfree(dio);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(iomap_dio_complete);
static void iomap_dio_complete_work(struct work_struct *work) static void iomap_dio_complete_work(struct work_struct *work)
{ {
...@@ -388,6 +389,16 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -388,6 +389,16 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
return iomap_dio_bio_actor(inode, pos, length, dio, iomap); return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
case IOMAP_INLINE: case IOMAP_INLINE:
return iomap_dio_inline_actor(inode, pos, length, dio, iomap); return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
case IOMAP_DELALLOC:
/*
* DIO is not serialised against mmap() access at all, and so
* if the page_mkwrite occurs between the writeback and the
* iomap_apply() call in the DIO path, then it will see the
* DELALLOC block that the page-mkwrite allocated.
*/
pr_warn_ratelimited("Direct I/O collision with buffered writes! File: %pD4 Comm: %.20s\n",
dio->iocb->ki_filp, current->comm);
return -EIO;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
return -EIO; return -EIO;
...@@ -406,8 +417,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -406,8 +417,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
* Returns -ENOTBLK In case of a page invalidation invalidation failure for * Returns -ENOTBLK In case of a page invalidation invalidation failure for
* writes. The callers needs to fall back to buffered I/O in this case. * writes. The callers needs to fall back to buffered I/O in this case.
*/ */
ssize_t struct iomap_dio *
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops, const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion) bool wait_for_completion)
{ {
...@@ -421,14 +432,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -421,14 +432,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_dio *dio; struct iomap_dio *dio;
if (!count) if (!count)
return 0; return NULL;
if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion)) if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
return -EIO; return ERR_PTR(-EIO);
dio = kmalloc(sizeof(*dio), GFP_KERNEL); dio = kmalloc(sizeof(*dio), GFP_KERNEL);
if (!dio) if (!dio)
return -ENOMEM; return ERR_PTR(-ENOMEM);
dio->iocb = iocb; dio->iocb = iocb;
atomic_set(&dio->ref, 1); atomic_set(&dio->ref, 1);
...@@ -558,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -558,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->wait_for_completion = wait_for_completion; dio->wait_for_completion = wait_for_completion;
if (!atomic_dec_and_test(&dio->ref)) { if (!atomic_dec_and_test(&dio->ref)) {
if (!wait_for_completion) if (!wait_for_completion)
return -EIOCBQUEUED; return ERR_PTR(-EIOCBQUEUED);
for (;;) { for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
...@@ -574,10 +585,26 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -574,10 +585,26 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
} }
return iomap_dio_complete(dio); return dio;
out_free_dio: out_free_dio:
kfree(dio); kfree(dio);
return ret; if (ret)
return ERR_PTR(ret);
return NULL;
}
EXPORT_SYMBOL_GPL(__iomap_dio_rw);
ssize_t
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion)
{
struct iomap_dio *dio;
dio = __iomap_dio_rw(iocb, iter, ops, dops, wait_for_completion);
if (IS_ERR_OR_NULL(dio))
return PTR_ERR_OR_ZERO(dio);
return iomap_dio_complete(dio);
} }
EXPORT_SYMBOL_GPL(iomap_dio_rw); EXPORT_SYMBOL_GPL(iomap_dio_rw);
...@@ -473,7 +473,7 @@ static int metapage_readpage(struct file *fp, struct page *page) ...@@ -473,7 +473,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
struct bio *bio = NULL; struct bio *bio = NULL;
int block_offset; int block_offset;
int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; int blocks_per_page = i_blocks_per_page(inode, page);
sector_t page_start; /* address of page in fs blocks */ sector_t page_start; /* address of page in fs blocks */
sector_t pblock; sector_t pblock;
int xlen; int xlen;
......
...@@ -544,7 +544,7 @@ xfs_discard_page( ...@@ -544,7 +544,7 @@ xfs_discard_page(
page, ip->i_ino, offset); page, ip->i_ino, offset);
error = xfs_bmap_punch_delalloc_range(ip, start_fsb, error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
PAGE_SIZE / i_blocksize(inode)); i_blocks_per_page(inode, page));
if (error && !XFS_FORCED_SHUTDOWN(mp)) if (error && !XFS_FORCED_SHUTDOWN(mp))
xfs_alert(mp, "page discard unable to remove delalloc mapping."); xfs_alert(mp, "page discard unable to remove delalloc mapping.");
out_invalidate: out_invalidate:
......
...@@ -231,8 +231,7 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, ...@@ -231,8 +231,7 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping, int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index); pgoff_t index);
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap);
struct iomap *iomap);
static inline bool dax_mapping(struct address_space *mapping) static inline bool dax_mapping(struct address_space *mapping)
{ {
return mapping->host && IS_DAX(mapping->host); return mapping->host && IS_DAX(mapping->host);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
struct address_space; struct address_space;
struct fiemap_extent_info; struct fiemap_extent_info;
struct inode; struct inode;
struct iomap_dio;
struct iomap_writepage_ctx; struct iomap_writepage_ctx;
struct iov_iter; struct iov_iter;
struct kiocb; struct kiocb;
...@@ -258,6 +259,10 @@ struct iomap_dio_ops { ...@@ -258,6 +259,10 @@ struct iomap_dio_ops {
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops, const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion); bool wait_for_completion);
struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion);
ssize_t iomap_dio_complete(struct iomap_dio *dio);
int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
......
...@@ -927,4 +927,20 @@ static inline int page_mkwrite_check_truncate(struct page *page, ...@@ -927,4 +927,20 @@ static inline int page_mkwrite_check_truncate(struct page *page,
return offset; return offset;
} }
/**
* i_blocks_per_page - How many blocks fit in this page.
* @inode: The inode which contains the blocks.
* @page: The page (head page if the page is a THP).
*
* If the block size is larger than the size of this page, return zero.
*
* Context: The caller should hold a refcount on the page to prevent it
* from being split.
* Return: The number of filesystem blocks covered by this page.
*/
static inline
unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
{
return thp_size(page) >> inode->i_blkbits;
}
#endif /* _LINUX_PAGEMAP_H */ #endif /* _LINUX_PAGEMAP_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment