Commit 73d3393a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-4.14-fixes-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:

 - fix some more CONFIG_XFS_RT related build problems

 - fix data loss when writeback at eof races eofblocks gc and loses

 - invalidate page cache after fs finishes a dio write

 - remove dirty page state when invalidating pages so releasepage does
   the right thing when handed a dirty page

* tag 'xfs-4.14-fixes-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: move two more RT specific functions into CONFIG_XFS_RT
  xfs: trim writepage mapping to within eof
  fs: invalidate page cache after end_io() in dio completion
  xfs: cancel dirty pages on invalidation
parents 020b3023 785545c8
...@@ -265,12 +265,24 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) ...@@ -265,12 +265,24 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
if (ret == 0) if (ret == 0)
ret = transferred; ret = transferred;
if (dio->end_io) {
// XXX: ki_pos??
err = dio->end_io(dio->iocb, offset, ret, dio->private);
if (err)
ret = err;
}
/* /*
* Try again to invalidate clean pages which might have been cached by * Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source * non-direct readahead, or faulted in by get_user_pages() if the source
* of the write was an mmap'ed region of the file we're writing. Either * of the write was an mmap'ed region of the file we're writing. Either
* one is a pretty crazy thing to do, so we don't support it 100%. If * one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked... * this invalidation fails, tough, the write still worked...
*
* And this page cache invalidation has to be after dio->end_io(), as
* some filesystems convert unwritten extents to real allocations in
* end_io() when necessary, otherwise a racing buffer read would cache
* zeros from unwritten extents.
*/ */
if (flags & DIO_COMPLETE_INVALIDATE && if (flags & DIO_COMPLETE_INVALIDATE &&
ret > 0 && dio->op == REQ_OP_WRITE && ret > 0 && dio->op == REQ_OP_WRITE &&
...@@ -281,14 +293,6 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) ...@@ -281,14 +293,6 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
WARN_ON_ONCE(err); WARN_ON_ONCE(err);
} }
if (dio->end_io) {
// XXX: ki_pos??
err = dio->end_io(dio->iocb, offset, ret, dio->private);
if (err)
ret = err;
}
if (!(dio->flags & DIO_SKIP_DIO_COUNT)) if (!(dio->flags & DIO_SKIP_DIO_COUNT))
inode_dio_end(dio->inode); inode_dio_end(dio->inode);
......
...@@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) ...@@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
{ {
struct kiocb *iocb = dio->iocb; struct kiocb *iocb = dio->iocb;
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
loff_t offset = iocb->ki_pos;
ssize_t ret; ssize_t ret;
/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
* of the write was an mmap'ed region of the file we're writing. Either
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
*/
if (!dio->error &&
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
ret = invalidate_inode_pages2_range(inode->i_mapping,
iocb->ki_pos >> PAGE_SHIFT,
(iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
}
if (dio->end_io) { if (dio->end_io) {
ret = dio->end_io(iocb, ret = dio->end_io(iocb,
dio->error ? dio->error : dio->size, dio->error ? dio->error : dio->size,
...@@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) ...@@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (likely(!ret)) { if (likely(!ret)) {
ret = dio->size; ret = dio->size;
/* check for short read */ /* check for short read */
if (iocb->ki_pos + ret > dio->i_size && if (offset + ret > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE)) !(dio->flags & IOMAP_DIO_WRITE))
ret = dio->i_size - iocb->ki_pos; ret = dio->i_size - offset;
iocb->ki_pos += ret; iocb->ki_pos += ret;
} }
/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
* of the write was an mmap'ed region of the file we're writing. Either
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
*
* And this page cache invalidation has to be after dio->end_io(), as
* some filesystems convert unwritten extents to real allocations in
* end_io() when necessary, otherwise a racing buffer read would cache
* zeros from unwritten extents.
*/
if (!dio->error &&
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
int err;
err = invalidate_inode_pages2_range(inode->i_mapping,
offset >> PAGE_SHIFT,
(offset + dio->size - 1) >> PAGE_SHIFT);
WARN_ON_ONCE(err);
}
inode_dio_end(file_inode(iocb->ki_filp)); inode_dio_end(file_inode(iocb->ki_filp));
kfree(dio); kfree(dio);
......
...@@ -3852,6 +3852,17 @@ xfs_trim_extent( ...@@ -3852,6 +3852,17 @@ xfs_trim_extent(
} }
} }
/* trim extent to within eof */
void
xfs_trim_extent_eof(
struct xfs_bmbt_irec *irec,
struct xfs_inode *ip)
{
xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
i_size_read(VFS_I(ip))));
}
/* /*
* Trim the returned map to the required bounds * Trim the returned map to the required bounds
*/ */
......
...@@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, ...@@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len); xfs_filblks_t len);
void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
......
...@@ -446,6 +446,19 @@ xfs_imap_valid( ...@@ -446,6 +446,19 @@ xfs_imap_valid(
{ {
offset >>= inode->i_blkbits; offset >>= inode->i_blkbits;
/*
* We have to make sure the cached mapping is within EOF to protect
* against eofblocks trimming on file release leaving us with a stale
* mapping. Otherwise, a page for a subsequent file extending buffered
* write could get picked up by this writeback cycle and written to the
* wrong blocks.
*
* Note that what we really want here is a generic mapping invalidation
* mechanism to protect us from arbitrary extent modifying contexts, not
* just eofblocks.
*/
xfs_trim_extent_eof(imap, XFS_I(inode));
return offset >= imap->br_startoff && return offset >= imap->br_startoff &&
offset < imap->br_startoff + imap->br_blockcount; offset < imap->br_startoff + imap->br_blockcount;
} }
...@@ -735,6 +748,14 @@ xfs_vm_invalidatepage( ...@@ -735,6 +748,14 @@ xfs_vm_invalidatepage(
{ {
trace_xfs_invalidatepage(page->mapping->host, page, offset, trace_xfs_invalidatepage(page->mapping->host, page, offset,
length); length);
/*
* If we are invalidating the entire page, clear the dirty state from it
* so that we can check for attempts to release dirty cached pages in
* xfs_vm_releasepage().
*/
if (offset == 0 && length >= PAGE_SIZE)
cancel_dirty_page(page);
block_invalidatepage(page, offset, length); block_invalidatepage(page, offset, length);
} }
...@@ -1190,25 +1211,27 @@ xfs_vm_releasepage( ...@@ -1190,25 +1211,27 @@ xfs_vm_releasepage(
* mm accommodates an old ext3 case where clean pages might not have had * mm accommodates an old ext3 case where clean pages might not have had
* the dirty bit cleared. Thus, it can send actual dirty pages to * the dirty bit cleared. Thus, it can send actual dirty pages to
* ->releasepage() via shrink_active_list(). Conversely, * ->releasepage() via shrink_active_list(). Conversely,
* block_invalidatepage() can send pages that are still marked dirty * block_invalidatepage() can send pages that are still marked dirty but
* but otherwise have invalidated buffers. * otherwise have invalidated buffers.
* *
* We want to release the latter to avoid unnecessary buildup of the * We want to release the latter to avoid unnecessary buildup of the
* LRU, skip the former and warn if we've left any lingering * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages
* delalloc/unwritten buffers on clean pages. Skip pages with delalloc * that are entirely invalidated and need to be released. Hence the
* or unwritten buffers and warn if the page is not dirty. Otherwise * only time we should get dirty pages here is through
* try to release the buffers. * shrink_active_list() and so we can simply skip those now.
*
* warn if we've left any lingering delalloc/unwritten buffers on clean
* or invalidated pages we are about to release.
*/ */
if (PageDirty(page))
return 0;
xfs_count_page_state(page, &delalloc, &unwritten); xfs_count_page_state(page, &delalloc, &unwritten);
if (delalloc) { if (WARN_ON_ONCE(delalloc))
WARN_ON_ONCE(!PageDirty(page));
return 0; return 0;
} if (WARN_ON_ONCE(unwritten))
if (unwritten) {
WARN_ON_ONCE(!PageDirty(page));
return 0; return 0;
}
return try_to_free_buffers(page); return try_to_free_buffers(page);
} }
......
...@@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper( ...@@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper(
return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
} }
/* Transform a rtbitmap "record" into a fsmap */
STATIC int
xfs_getfsmap_rtdev_rtbitmap_helper(
struct xfs_trans *tp,
struct xfs_rtalloc_rec *rec,
void *priv)
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_getfsmap_info *info = priv;
struct xfs_rmap_irec irec;
xfs_daddr_t rec_daddr;
rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
irec.rm_startblock = rec->ar_startblock;
irec.rm_blockcount = rec->ar_blockcount;
irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
irec.rm_offset = 0;
irec.rm_flags = 0;
return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
}
/* Transform a bnobt irec into a fsmap */ /* Transform a bnobt irec into a fsmap */
STATIC int STATIC int
xfs_getfsmap_datadev_bnobt_helper( xfs_getfsmap_datadev_bnobt_helper(
...@@ -475,6 +452,30 @@ xfs_getfsmap_logdev( ...@@ -475,6 +452,30 @@ xfs_getfsmap_logdev(
return xfs_getfsmap_helper(tp, info, &rmap, 0); return xfs_getfsmap_helper(tp, info, &rmap, 0);
} }
#ifdef CONFIG_XFS_RT
/* Transform a rtbitmap "record" into a fsmap */
STATIC int
xfs_getfsmap_rtdev_rtbitmap_helper(
struct xfs_trans *tp,
struct xfs_rtalloc_rec *rec,
void *priv)
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_getfsmap_info *info = priv;
struct xfs_rmap_irec irec;
xfs_daddr_t rec_daddr;
rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
irec.rm_startblock = rec->ar_startblock;
irec.rm_blockcount = rec->ar_blockcount;
irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
irec.rm_offset = 0;
irec.rm_flags = 0;
return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
}
/* Execute a getfsmap query against the realtime device. */ /* Execute a getfsmap query against the realtime device. */
STATIC int STATIC int
__xfs_getfsmap_rtdev( __xfs_getfsmap_rtdev(
...@@ -521,7 +522,6 @@ __xfs_getfsmap_rtdev( ...@@ -521,7 +522,6 @@ __xfs_getfsmap_rtdev(
return query_fn(tp, info); return query_fn(tp, info);
} }
#ifdef CONFIG_XFS_RT
/* Actually query the realtime bitmap. */ /* Actually query the realtime bitmap. */
STATIC int STATIC int
xfs_getfsmap_rtdev_rtbitmap_query( xfs_getfsmap_rtdev_rtbitmap_query(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment