Commit a601e637 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-v3.7-rc5' of git://oss.sgi.com/xfs/xfs

Pull xfs bugfixes from Ben Myers:

 - fix for large transactions spanning multiple iclog buffers

 - zero the allocation_args structure on the stack before using it to
   determine whether to use a worker for allocation
 - move allocation stack switch to xfs_bmapi_allocate in order to
   prevent deadlock on AGF buffers

 - growfs no longer reads in garbage for new secondary superblocks

 - silence a build warning

 - ensure that invalid buffers never get written to disk while on free
   list

 - don't vmap inode cluster buffers during free

 - fix buffer shutdown reference count mismatch

 - fix reading of wrapped log data

* tag 'for-linus-v3.7-rc5' of git://oss.sgi.com/xfs/xfs:
  xfs: fix reading of wrapped log data
  xfs: fix buffer shudown reference count mismatch
  xfs: don't vmap inode cluster buffers during free
  xfs: invalidate allocbt blocks moved to the free list
  xfs: silence uninitialised f.file warning.
  xfs: growfs: don't read garbage for new secondary superblocks
  xfs: move allocation stack switch up to xfs_bmapi_allocate
  xfs: introduce XFS_BMAPI_STACK_SWITCH
  xfs: zero allocation_args on the kernel stack
  xfs: only update the last_sync_lsn when a transaction completes
parents 0e4a43ed 6ce377af
......@@ -1866,6 +1866,7 @@ xfs_alloc_fix_freelist(
/*
* Initialize the args structure.
*/
memset(&targs, 0, sizeof(targs));
targs.tp = tp;
targs.mp = mp;
targs.agbp = agbp;
......@@ -2207,7 +2208,7 @@ xfs_alloc_read_agf(
* group or loop over the allocation groups to find the result.
*/
int /* error */
__xfs_alloc_vextent(
xfs_alloc_vextent(
xfs_alloc_arg_t *args) /* allocation argument structure */
{
xfs_agblock_t agsize; /* allocation group size */
......@@ -2417,46 +2418,6 @@ __xfs_alloc_vextent(
return error;
}
static void
xfs_alloc_vextent_worker(
struct work_struct *work)
{
struct xfs_alloc_arg *args = container_of(work,
struct xfs_alloc_arg, work);
unsigned long pflags;
/* we are in a transaction context here */
current_set_flags_nested(&pflags, PF_FSTRANS);
args->result = __xfs_alloc_vextent(args);
complete(args->done);
current_restore_flags_nested(&pflags, PF_FSTRANS);
}
/*
* Data allocation requests often come in with little stack to work on. Push
* them off to a worker thread so there is lots of stack to use. Metadata
* requests, OTOH, are generally from low stack usage paths, so avoid the
* context switch overhead here.
*/
int
xfs_alloc_vextent(
struct xfs_alloc_arg *args)
{
DECLARE_COMPLETION_ONSTACK(done);
if (!args->userdata)
return __xfs_alloc_vextent(args);
args->done = &done;
INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
queue_work(xfs_alloc_wq, &args->work);
wait_for_completion(&done);
return args->result;
}
/*
* Free an extent.
* Just break up the extent address and hand off to xfs_free_ag_extent
......
......@@ -120,9 +120,6 @@ typedef struct xfs_alloc_arg {
char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* set if this is user data */
xfs_fsblock_t firstblock; /* io first block allocated */
struct completion *done;
struct work_struct work;
int result;
} xfs_alloc_arg_t;
/*
......
......@@ -121,6 +121,8 @@ xfs_allocbt_free_block(
xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD);
xfs_trans_agbtree_delta(cur->bc_tp, -1);
xfs_trans_binval(cur->bc_tp, bp);
return 0;
}
......
......@@ -2437,6 +2437,7 @@ xfs_bmap_btalloc(
* Normal allocation, done through xfs_alloc_vextent.
*/
tryagain = isaligned = 0;
memset(&args, 0, sizeof(args));
args.tp = ap->tp;
args.mp = mp;
args.fsbno = ap->blkno;
......@@ -3082,6 +3083,7 @@ xfs_bmap_extents_to_btree(
* Convert to a btree with two levels, one record in root.
*/
XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = mp;
args.firstblock = *firstblock;
......@@ -3237,6 +3239,7 @@ xfs_bmap_local_to_extents(
xfs_buf_t *bp; /* buffer for extent block */
xfs_bmbt_rec_host_t *ep;/* extent record pointer */
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = ip->i_mount;
args.firstblock = *firstblock;
......@@ -4616,12 +4619,11 @@ xfs_bmapi_delay(
STATIC int
xfs_bmapi_allocate(
struct xfs_bmalloca *bma,
int flags)
__xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
{
struct xfs_mount *mp = bma->ip->i_mount;
int whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
int whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
XFS_ATTR_FORK : XFS_DATA_FORK;
struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
int tmp_logflags = 0;
......@@ -4654,24 +4656,27 @@ xfs_bmapi_allocate(
* Indicate if this is the first user data in the file, or just any
* user data.
*/
if (!(flags & XFS_BMAPI_METADATA)) {
if (!(bma->flags & XFS_BMAPI_METADATA)) {
bma->userdata = (bma->offset == 0) ?
XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
}
bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
/*
* Only want to do the alignment at the eof if it is userdata and
* allocation length is larger than a stripe unit.
*/
if (mp->m_dalign && bma->length >= mp->m_dalign &&
!(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
!(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
error = xfs_bmap_isaeof(bma, whichfork);
if (error)
return error;
}
if (bma->flags & XFS_BMAPI_STACK_SWITCH)
bma->stack_switch = 1;
error = xfs_bmap_alloc(bma);
if (error)
return error;
......@@ -4706,7 +4711,7 @@ xfs_bmapi_allocate(
* A wasdelay extent has been initialized, so shouldn't be flagged
* as unwritten.
*/
if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) &&
if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
xfs_sb_version_hasextflgbit(&mp->m_sb))
bma->got.br_state = XFS_EXT_UNWRITTEN;
......@@ -4734,6 +4739,45 @@ xfs_bmapi_allocate(
return 0;
}
static void
xfs_bmapi_allocate_worker(
struct work_struct *work)
{
struct xfs_bmalloca *args = container_of(work,
struct xfs_bmalloca, work);
unsigned long pflags;
/* we are in a transaction context here */
current_set_flags_nested(&pflags, PF_FSTRANS);
args->result = __xfs_bmapi_allocate(args);
complete(args->done);
current_restore_flags_nested(&pflags, PF_FSTRANS);
}
/*
* Some allocation requests often come in with little stack to work on. Push
* them off to a worker thread so there is lots of stack to use. Otherwise just
* call directly to avoid the context switch overhead here.
*/
int
xfs_bmapi_allocate(
struct xfs_bmalloca *args)
{
DECLARE_COMPLETION_ONSTACK(done);
if (!args->stack_switch)
return __xfs_bmapi_allocate(args);
args->done = &done;
INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
queue_work(xfs_alloc_wq, &args->work);
wait_for_completion(&done);
return args->result;
}
STATIC int
xfs_bmapi_convert_unwritten(
struct xfs_bmalloca *bma,
......@@ -4919,6 +4963,7 @@ xfs_bmapi_write(
bma.conv = !!(flags & XFS_BMAPI_CONVERT);
bma.wasdel = wasdelay;
bma.offset = bno;
bma.flags = flags;
/*
* There's a 32/64 bit type mismatch between the
......@@ -4934,7 +4979,7 @@ xfs_bmapi_write(
ASSERT(len > 0);
ASSERT(bma.length > 0);
error = xfs_bmapi_allocate(&bma, flags);
error = xfs_bmapi_allocate(&bma);
if (error)
goto error0;
if (bma.blkno == NULLFSBLOCK)
......
......@@ -77,6 +77,7 @@ typedef struct xfs_bmap_free
* from written to unwritten, otherwise convert from unwritten to written.
*/
#define XFS_BMAPI_CONVERT 0x040
#define XFS_BMAPI_STACK_SWITCH 0x080
#define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
......@@ -85,7 +86,8 @@ typedef struct xfs_bmap_free
{ XFS_BMAPI_PREALLOC, "PREALLOC" }, \
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \
{ XFS_BMAPI_CONVERT, "CONVERT" }
{ XFS_BMAPI_CONVERT, "CONVERT" }, \
{ XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
static inline int xfs_bmapi_aflag(int w)
......@@ -133,6 +135,11 @@ typedef struct xfs_bmalloca {
char userdata;/* set if is user data */
char aeof; /* allocated space at eof */
char conv; /* overwriting unwritten extents */
char stack_switch;
int flags;
struct completion *done;
struct work_struct work;
int result;
} xfs_bmalloca_t;
/*
......
......@@ -526,7 +526,25 @@ xfs_buf_item_unpin(
}
xfs_buf_relse(bp);
} else if (freed && remove) {
/*
* There are currently two references to the buffer - the active
* LRU reference and the buf log item. What we are about to do
* here - simulate a failed IO completion - requires 3
* references.
*
* The LRU reference is removed by the xfs_buf_stale() call. The
* buf item reference is removed by the xfs_buf_iodone()
* callback that is run by xfs_buf_do_callbacks() during ioend
* processing (via the bp->b_iodone callback), and then finally
* the ioend processing will drop the IO reference if the buffer
* is marked XBF_ASYNC.
*
* Hence we need to take an additional reference here so that IO
* completion processing doesn't free the buffer prematurely.
*/
xfs_buf_lock(bp);
xfs_buf_hold(bp);
bp->b_flags |= XBF_ASYNC;
xfs_buf_ioerror(bp, EIO);
XFS_BUF_UNDONE(bp);
xfs_buf_stale(bp);
......
......@@ -399,9 +399,26 @@ xfs_growfs_data_private(
/* update secondary superblocks. */
for (agno = 1; agno < nagcount; agno++) {
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
error = 0;
/*
* new secondary superblocks need to be zeroed, not read from
* disk as the contents of the new area we are growing into is
* completely unknown.
*/
if (agno < oagcount) {
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp);
} else {
bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
XFS_FSS_TO_BB(mp, 1), 0);
if (bp)
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
else
error = ENOMEM;
}
if (error) {
xfs_warn(mp,
"error %d reading secondary superblock for ag %d",
......@@ -423,7 +440,7 @@ xfs_growfs_data_private(
break; /* no point in continuing */
}
}
return 0;
return error;
error0:
xfs_trans_cancel(tp, XFS_TRANS_ABORT);
......
......@@ -250,6 +250,7 @@ xfs_ialloc_ag_alloc(
/* boundary */
struct xfs_perag *pag;
memset(&args, 0, sizeof(args));
args.tp = tp;
args.mp = tp->t_mountp;
......
......@@ -1509,7 +1509,8 @@ xfs_ifree_cluster(
* to mark all the active inodes on the buffer stale.
*/
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
mp->m_bsize * blks_per_cluster, 0);
mp->m_bsize * blks_per_cluster,
XBF_UNMAPPED);
if (!bp)
return ENOMEM;
......
......@@ -70,7 +70,7 @@ xfs_find_handle(
int hsize;
xfs_handle_t handle;
struct inode *inode;
struct fd f;
struct fd f = {0};
struct path path;
int error;
struct xfs_inode *ip;
......
......@@ -584,7 +584,9 @@ xfs_iomap_write_allocate(
* pointer that the caller gave to us.
*/
error = xfs_bmapi_write(tp, ip, map_start_fsb,
count_fsb, 0, &first_block, 1,
count_fsb,
XFS_BMAPI_STACK_SWITCH,
&first_block, 1,
imap, &nimaps, &free_list);
if (error)
goto trans_cancel;
......
......@@ -2387,14 +2387,27 @@ xlog_state_do_callback(
/*
* update the last_sync_lsn before we drop the
* Completion of a iclog IO does not imply that
* a transaction has completed, as transactions
* can be large enough to span many iclogs. We
* cannot change the tail of the log half way
* through a transaction as this may be the only
* transaction in the log and moving th etail to
* point to the middle of it will prevent
* recovery from finding the start of the
* transaction. Hence we should only update the
* last_sync_lsn if this iclog contains
* transaction completion callbacks on it.
*
* We have to do this before we drop the
* icloglock to ensure we are the only one that
* can update it.
*/
ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
atomic64_set(&log->l_last_sync_lsn,
be64_to_cpu(iclog->ic_header.h_lsn));
if (iclog->ic_callback)
atomic64_set(&log->l_last_sync_lsn,
be64_to_cpu(iclog->ic_header.h_lsn));
} else
ioerrors++;
......
......@@ -3541,7 +3541,7 @@ xlog_do_recovery_pass(
* - order is important.
*/
error = xlog_bread_offset(log, 0,
bblks - split_bblks, hbp,
bblks - split_bblks, dbp,
offset + BBTOB(split_bblks));
if (error)
goto bread_err2;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment