Commit 27ea95cc authored by Stephen Lord's avatar Stephen Lord Committed by Stephen Lord

[XFS] Implement deletion of inode clusters in XFS.

SGI Modid: 2.5.x-xfs:slinx:159536a
parent 611e7dfb
...@@ -162,6 +162,7 @@ xfs_buf_item_log_check( ...@@ -162,6 +162,7 @@ xfs_buf_item_log_check(
#endif #endif
STATIC void xfs_buf_error_relse(xfs_buf_t *bp); STATIC void xfs_buf_error_relse(xfs_buf_t *bp);
STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
/* /*
* This returns the number of log iovecs needed to log the * This returns the number of log iovecs needed to log the
...@@ -417,22 +418,25 @@ xfs_buf_item_unpin( ...@@ -417,22 +418,25 @@ xfs_buf_item_unpin(
ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
ASSERT(XFS_BUF_ISSTALE(bp)); ASSERT(XFS_BUF_ISSTALE(bp));
/**
ASSERT(bp->b_pincount == 0);
**/
ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
xfs_buf_item_trace("UNPIN STALE", bip); xfs_buf_item_trace("UNPIN STALE", bip);
xfs_buftrace("XFS_UNPIN STALE", bp); xfs_buftrace("XFS_UNPIN STALE", bp);
AIL_LOCK(mp,s);
/* /*
* If we get called here because of an IO error, we may * If we get called here because of an IO error, we may
* or may not have the item on the AIL. xfs_trans_delete_ail() * or may not have the item on the AIL. xfs_trans_delete_ail()
* will take care of that situation. * will take care of that situation.
* xfs_trans_delete_ail() drops the AIL lock. * xfs_trans_delete_ail() drops the AIL lock.
*/ */
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip);
XFS_BUF_FSPRIVATE(bp, void *) = NULL;
XFS_BUF_CLR_IODONE_FUNC(bp);
} else {
AIL_LOCK(mp,s);
xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s); xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s);
xfs_buf_item_relse(bp); xfs_buf_item_relse(bp);
ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
}
xfs_buf_relse(bp); xfs_buf_relse(bp);
} }
} }
......
...@@ -96,6 +96,7 @@ typedef struct xfs_buf_log_format_t { ...@@ -96,6 +96,7 @@ typedef struct xfs_buf_log_format_t {
#define XFS_BLI_STALE 0x04 #define XFS_BLI_STALE 0x04
#define XFS_BLI_LOGGED 0x08 #define XFS_BLI_LOGGED 0x08
#define XFS_BLI_INODE_ALLOC_BUF 0x10 #define XFS_BLI_INODE_ALLOC_BUF 0x10
#define XFS_BLI_STALE_INODE 0x20
#ifdef __KERNEL__ #ifdef __KERNEL__
......
...@@ -99,5 +99,6 @@ struct xfs_mount_args { ...@@ -99,5 +99,6 @@ struct xfs_mount_args {
#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */ #define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */
#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */ #define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */
#define XFSMNT_NOLOGFLUSH 0x04000000 /* Don't flush for log blocks */ #define XFSMNT_NOLOGFLUSH 0x04000000 /* Don't flush for log blocks */
#define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */
#endif /* __XFS_CLNT_H__ */ #endif /* __XFS_CLNT_H__ */
...@@ -57,6 +57,7 @@ ...@@ -57,6 +57,7 @@
#include "xfs_bit.h" #include "xfs_bit.h"
#include "xfs_rtalloc.h" #include "xfs_rtalloc.h"
#include "xfs_error.h" #include "xfs_error.h"
#include "xfs_bmap.h"
/* /*
* Log specified fields for the inode given by bp and off. * Log specified fields for the inode given by bp and off.
...@@ -921,7 +922,10 @@ xfs_dialloc( ...@@ -921,7 +922,10 @@ xfs_dialloc(
int int
xfs_difree( xfs_difree(
xfs_trans_t *tp, /* transaction pointer */ xfs_trans_t *tp, /* transaction pointer */
xfs_ino_t inode) /* inode to be freed */ xfs_ino_t inode, /* inode to be freed */
xfs_bmap_free_t *flist, /* extents to free */
int *delete, /* set if inode cluster was deleted */
xfs_ino_t *first_ino) /* first inode in deleted cluster */
{ {
/* REFERENCED */ /* REFERENCED */
xfs_agblock_t agbno; /* block number containing inode */ xfs_agblock_t agbno; /* block number containing inode */
...@@ -932,6 +936,7 @@ xfs_difree( ...@@ -932,6 +936,7 @@ xfs_difree(
xfs_btree_cur_t *cur; /* inode btree cursor */ xfs_btree_cur_t *cur; /* inode btree cursor */
int error; /* error return value */ int error; /* error return value */
int i; /* result code */ int i; /* result code */
int ilen; /* inodes in an inode cluster */
xfs_mount_t *mp; /* mount structure for filesystem */ xfs_mount_t *mp; /* mount structure for filesystem */
int off; /* offset of inode in inode chunk */ int off; /* offset of inode in inode chunk */
xfs_inobt_rec_t rec; /* btree record */ xfs_inobt_rec_t rec; /* btree record */
...@@ -995,10 +1000,11 @@ xfs_difree( ...@@ -995,10 +1000,11 @@ xfs_difree(
if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
&rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT))) &rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT)))
goto error0; goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (i) {
freecount += rec.ir_freecount; freecount += rec.ir_freecount;
if ((error = xfs_inobt_increment(cur, 0, &i))) if ((error = xfs_inobt_increment(cur, 0, &i)))
goto error0; goto error0;
}
} while (i == 1); } while (i == 1);
ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) || ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
XFS_FORCED_SHUTDOWN(mp)); XFS_FORCED_SHUTDOWN(mp));
...@@ -1033,6 +1039,43 @@ xfs_difree( ...@@ -1033,6 +1039,43 @@ xfs_difree(
*/ */
XFS_INOBT_SET_FREE(&rec, off, ARCH_NOCONVERT); XFS_INOBT_SET_FREE(&rec, off, ARCH_NOCONVERT);
rec.ir_freecount++; rec.ir_freecount++;
/*
* When an inode cluster is free, it becomes elgible for removal
*/
if ((mp->m_flags & XFS_MOUNT_IDELETE) &&
(rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
*delete = 1;
*first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
/*
* Remove the inode cluster from the AGI B+Tree, adjust the
* AGI and Superblock inode counts, and mark the disk space
* to be freed when the transaction is committed.
*/
ilen = XFS_IALLOC_INODES(mp);
INT_MOD(agi->agi_count, ARCH_CONVERT, -ilen);
INT_MOD(agi->agi_freecount, ARCH_CONVERT, -(ilen - 1));
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
down_read(&mp->m_peraglock);
mp->m_perag[agno].pagi_freecount -= ilen - 1;
up_read(&mp->m_peraglock);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
if ((error = xfs_inobt_delete(cur, &i))) {
cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n",
error, mp->m_fsname);
goto error0;
}
xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
XFS_IALLOC_BLOCKS(mp), flist, mp);
} else {
*delete = 0;
if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) { if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) {
cmn_err(CE_WARN, cmn_err(CE_WARN,
"xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.", "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.",
...@@ -1047,6 +1090,9 @@ xfs_difree( ...@@ -1047,6 +1090,9 @@ xfs_difree(
down_read(&mp->m_peraglock); down_read(&mp->m_peraglock);
mp->m_perag[agno].pagi_freecount++; mp->m_perag[agno].pagi_freecount++;
up_read(&mp->m_peraglock); up_read(&mp->m_peraglock);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
}
#ifdef DEBUG #ifdef DEBUG
if (cur->bc_nlevels == 1) { if (cur->bc_nlevels == 1) {
int freecount = 0; int freecount = 0;
...@@ -1054,20 +1100,23 @@ xfs_difree( ...@@ -1054,20 +1100,23 @@ xfs_difree(
if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
goto error0; goto error0;
do { do {
if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, if ((error = xfs_inobt_get_rec(cur,
&rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT))) &rec.ir_startino,
&rec.ir_freecount,
&rec.ir_free, &i,
ARCH_NOCONVERT)))
goto error0; goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (i) {
freecount += rec.ir_freecount; freecount += rec.ir_freecount;
if ((error = xfs_inobt_increment(cur, 0, &i))) if ((error = xfs_inobt_increment(cur, 0, &i)))
goto error0; goto error0;
}
} while (i == 1); } while (i == 1);
ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) || ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
XFS_FORCED_SHUTDOWN(mp)); XFS_FORCED_SHUTDOWN(mp));
} }
#endif #endif
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
return 0; return 0;
error0: error0:
......
...@@ -134,7 +134,10 @@ xfs_dialloc( ...@@ -134,7 +134,10 @@ xfs_dialloc(
int /* error */ int /* error */
xfs_difree( xfs_difree(
struct xfs_trans *tp, /* transaction pointer */ struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode); /* inode to be freed */ xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */
int *delete, /* set if inode cluster was deleted */
xfs_ino_t *first_ino); /* first inode in deleted cluster */
/* /*
* Return the location of the inode in bno/len/off, * Return the location of the inode in bno/len/off,
......
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include "xfs_btree.h" #include "xfs_btree.h"
#include "xfs_ialloc.h" #include "xfs_ialloc.h"
#include "xfs_alloc.h" #include "xfs_alloc.h"
#include "xfs_error.h"
/* /*
* Inode allocation management for XFS. * Inode allocation management for XFS.
...@@ -73,7 +74,6 @@ STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int); ...@@ -73,7 +74,6 @@ STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
* Internal functions. * Internal functions.
*/ */
#ifdef _NOTYET_
/* /*
* Single level of the xfs_inobt_delete record deletion routine. * Single level of the xfs_inobt_delete record deletion routine.
* Delete record pointed to by cur/level. * Delete record pointed to by cur/level.
...@@ -87,8 +87,7 @@ xfs_inobt_delrec( ...@@ -87,8 +87,7 @@ xfs_inobt_delrec(
int *stat) /* fail/done/go-on */ int *stat) /* fail/done/go-on */
{ {
xfs_buf_t *agbp; /* buffer for a.g. inode header */ xfs_buf_t *agbp; /* buffer for a.g. inode header */
xfs_agnumber_t agfbno; /* agf block of freed btree block */ xfs_mount_t *mp; /* mount structure */
xfs_buf_t *agfbp; /* bp of agf block of freed block */
xfs_agi_t *agi; /* allocation group inode header */ xfs_agi_t *agi; /* allocation group inode header */
xfs_inobt_block_t *block; /* btree block record/key lives in */ xfs_inobt_block_t *block; /* btree block record/key lives in */
xfs_agblock_t bno; /* btree block number */ xfs_agblock_t bno; /* btree block number */
...@@ -96,15 +95,15 @@ xfs_inobt_delrec( ...@@ -96,15 +95,15 @@ xfs_inobt_delrec(
int error; /* error return value */ int error; /* error return value */
int i; /* loop index */ int i; /* loop index */
xfs_inobt_key_t key; /* kp points here if block is level 0 */ xfs_inobt_key_t key; /* kp points here if block is level 0 */
xfs_inobt_key_t *kp; /* pointer to btree keys */ xfs_inobt_key_t *kp = NULL; /* pointer to btree keys */
xfs_agblock_t lbno; /* left block's block number */ xfs_agblock_t lbno; /* left block's block number */
xfs_buf_t *lbp; /* left block's buffer pointer */ xfs_buf_t *lbp; /* left block's buffer pointer */
xfs_inobt_block_t *left; /* left btree block */ xfs_inobt_block_t *left; /* left btree block */
xfs_inobt_key_t *lkp; /* left block key pointer */ xfs_inobt_key_t *lkp; /* left block key pointer */
xfs_inobt_ptr_t *lpp; /* left block address pointer */ xfs_inobt_ptr_t *lpp; /* left block address pointer */
int lrecs; /* number of records in left block */ int lrecs = 0; /* number of records in left block */
xfs_inobt_rec_t *lrp; /* left block record pointer */ xfs_inobt_rec_t *lrp; /* left block record pointer */
xfs_inobt_ptr_t *pp; /* pointer to btree addresses */ xfs_inobt_ptr_t *pp = NULL; /* pointer to btree addresses */
int ptr; /* index in btree block for this rec */ int ptr; /* index in btree block for this rec */
xfs_agblock_t rbno; /* right block's block number */ xfs_agblock_t rbno; /* right block's block number */
xfs_buf_t *rbp; /* right block's buffer pointer */ xfs_buf_t *rbp; /* right block's buffer pointer */
...@@ -112,10 +111,12 @@ xfs_inobt_delrec( ...@@ -112,10 +111,12 @@ xfs_inobt_delrec(
xfs_inobt_key_t *rkp; /* right block key pointer */ xfs_inobt_key_t *rkp; /* right block key pointer */
xfs_inobt_rec_t *rp; /* pointer to btree records */ xfs_inobt_rec_t *rp; /* pointer to btree records */
xfs_inobt_ptr_t *rpp; /* right block address pointer */ xfs_inobt_ptr_t *rpp; /* right block address pointer */
int rrecs; /* number of records in right block */ int rrecs = 0; /* number of records in right block */
int numrecs;
xfs_inobt_rec_t *rrp; /* right block record pointer */ xfs_inobt_rec_t *rrp; /* right block record pointer */
xfs_btree_cur_t *tcur; /* temporary btree cursor */ xfs_btree_cur_t *tcur; /* temporary btree cursor */
mp = cur->bc_mp;
/* /*
* Get the index of the entry being deleted, check for nothing there. * Get the index of the entry being deleted, check for nothing there.
...@@ -125,19 +126,22 @@ xfs_inobt_delrec( ...@@ -125,19 +126,22 @@ xfs_inobt_delrec(
*stat = 0; *stat = 0;
return 0; return 0;
} }
/* /*
* Get the buffer & block containing the record or key/ptr. * Get the buffer & block containing the record or key/ptr.
*/ */
bp = cur->bc_bufs[level]; bp = cur->bc_bufs[level];
block = XFS_BUF_TO_INOBT_BLOCK(bp); block = XFS_BUF_TO_INOBT_BLOCK(bp);
#ifdef DEBUG #ifdef DEBUG
if (error = xfs_btree_check_sblock(cur, block, level, bp)) if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
return error; return error;
#endif #endif
/* /*
* Fail if we're off the end of the block. * Fail if we're off the end of the block.
*/ */
if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
if (ptr > numrecs) {
*stat = 0; *stat = 0;
return 0; return 0;
} }
...@@ -150,18 +154,18 @@ xfs_inobt_delrec( ...@@ -150,18 +154,18 @@ xfs_inobt_delrec(
kp = XFS_INOBT_KEY_ADDR(block, 1, cur); kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
pp = XFS_INOBT_PTR_ADDR(block, 1, cur); pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
#ifdef DEBUG #ifdef DEBUG
for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) { for (i = ptr; i < numrecs; i++) {
if (error = xfs_btree_check_sptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)) if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)))
return error; return error;
} }
#endif #endif
if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { if (ptr < numrecs) {
memmove(&kp[ptr - 1], &kp[ptr], memmove(&kp[ptr - 1], &kp[ptr],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*kp)); (numrecs - ptr) * sizeof(*kp));
memmove(&pp[ptr - 1], &pp[ptr], memmove(&pp[ptr - 1], &pp[ptr],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*pp)); (numrecs - ptr) * sizeof(*kp));
xfs_inobt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
xfs_inobt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
} }
} }
/* /*
...@@ -170,24 +174,25 @@ xfs_inobt_delrec( ...@@ -170,24 +174,25 @@ xfs_inobt_delrec(
*/ */
else { else {
rp = XFS_INOBT_REC_ADDR(block, 1, cur); rp = XFS_INOBT_REC_ADDR(block, 1, cur);
if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { if (ptr < numrecs) {
memmove(&rp[ptr - 1], &rp[ptr], memmove(&rp[ptr - 1], &rp[ptr],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*rp)); (numrecs - ptr) * sizeof(*rp));
xfs_inobt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1); xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
} }
/* /*
* If it's the first record in the block, we'll need a key * If it's the first record in the block, we'll need a key
* structure to pass up to the next level (updkey). * structure to pass up to the next level (updkey).
*/ */
if (ptr == 1) { if (ptr == 1) {
INT_COPY(key.ir_startino, rp->ir_startino, ARCH_CONVERT); key.ir_startino = rp->ir_startino;
kp = &key; kp = &key;
} }
} }
/* /*
* Decrement and log the number of entries in the block. * Decrement and log the number of entries in the block.
*/ */
INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1); numrecs--;
INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS); xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
/* /*
* Is this the root level? If so, we're almost done. * Is this the root level? If so, we're almost done.
...@@ -199,7 +204,7 @@ xfs_inobt_delrec( ...@@ -199,7 +204,7 @@ xfs_inobt_delrec(
* and it's NOT the leaf level, * and it's NOT the leaf level,
* then we can get rid of this level. * then we can get rid of this level.
*/ */
if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == 1 && level > 0) { if (numrecs == 1 && level > 0) {
agbp = cur->bc_private.i.agbp; agbp = cur->bc_private.i.agbp;
agi = XFS_BUF_TO_AGI(agbp); agi = XFS_BUF_TO_AGI(agbp);
/* /*
...@@ -207,12 +212,13 @@ xfs_inobt_delrec( ...@@ -207,12 +212,13 @@ xfs_inobt_delrec(
* Make it the new root of the btree. * Make it the new root of the btree.
*/ */
bno = INT_GET(agi->agi_root, ARCH_CONVERT); bno = INT_GET(agi->agi_root, ARCH_CONVERT);
INT_COPY(agi->agi_root, *pp, ARCH_CONVERT); agi->agi_root = *pp;
INT_MOD(agi->agi_level, ARCH_CONVERT, -1); INT_MOD(agi->agi_level, ARCH_CONVERT, -1);
/* /*
* Free the block. * Free the block.
*/ */
if (error = xfs_free_extent(cur->bc_tp, bno, 1)) if ((error = xfs_free_extent(cur->bc_tp,
XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1)))
return error; return error;
xfs_trans_binval(cur->bc_tp, bp); xfs_trans_binval(cur->bc_tp, bp);
xfs_ialloc_log_agi(cur->bc_tp, agbp, xfs_ialloc_log_agi(cur->bc_tp, agbp,
...@@ -222,21 +228,6 @@ xfs_inobt_delrec( ...@@ -222,21 +228,6 @@ xfs_inobt_delrec(
*/ */
cur->bc_bufs[level] = NULL; cur->bc_bufs[level] = NULL;
cur->bc_nlevels--; cur->bc_nlevels--;
/*
* To ensure that the freed block is not used for
* user data until this transaction is permanent,
* we lock the agf buffer for this ag until the
* transaction record makes it to the on-disk log.
*/
agfbno = XFS_AG_DADDR(cur->bc_mp,
cur->bc_private.i.agno,
XFS_AGF_DADDR(mp));
if (error = xfs_trans_read_buf(cur->bc_mp, cur->bc_tp,
cur->bc_mp->m_ddev_targp, agfbno,
XFS_FSS_TO_BB(mp, 1), 0, &agfbp))
return error;
ASSERT(!XFS_BUF_GETERROR(agfbp));
xfs_trans_bhold_until_committed(cur->bc_tp, agfbp);
} else if (level > 0 && } else if (level > 0 &&
(error = xfs_inobt_decrement(cur, level, &i))) (error = xfs_inobt_decrement(cur, level, &i)))
return error; return error;
...@@ -253,7 +244,7 @@ xfs_inobt_delrec( ...@@ -253,7 +244,7 @@ xfs_inobt_delrec(
* If the number of records remaining in the block is at least * If the number of records remaining in the block is at least
* the minimum, we're done. * the minimum, we're done.
*/ */
if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_INOBT_BLOCK_MINRECS(level, cur)) { if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
if (level > 0 && if (level > 0 &&
(error = xfs_inobt_decrement(cur, level, &i))) (error = xfs_inobt_decrement(cur, level, &i)))
return error; return error;
...@@ -273,7 +264,7 @@ xfs_inobt_delrec( ...@@ -273,7 +264,7 @@ xfs_inobt_delrec(
* Duplicate the cursor so our btree manipulations here won't * Duplicate the cursor so our btree manipulations here won't
* disrupt the next level up. * disrupt the next level up.
*/ */
if (error = xfs_btree_dup_cursor(cur, &tcur)) if ((error = xfs_btree_dup_cursor(cur, &tcur)))
return error; return error;
/* /*
* If there's a right sibling, see if it's ok to shift an entry * If there's a right sibling, see if it's ok to shift an entry
...@@ -286,7 +277,7 @@ xfs_inobt_delrec( ...@@ -286,7 +277,7 @@ xfs_inobt_delrec(
*/ */
i = xfs_btree_lastrec(tcur, level); i = xfs_btree_lastrec(tcur, level);
XFS_WANT_CORRUPTED_GOTO(i == 1, error0); XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
if (error = xfs_inobt_increment(tcur, level, &i)) if ((error = xfs_inobt_increment(tcur, level, &i)))
goto error0; goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0); XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
i = xfs_btree_lastrec(tcur, level); i = xfs_btree_lastrec(tcur, level);
...@@ -297,7 +288,7 @@ xfs_inobt_delrec( ...@@ -297,7 +288,7 @@ xfs_inobt_delrec(
rbp = tcur->bc_bufs[level]; rbp = tcur->bc_bufs[level];
right = XFS_BUF_TO_INOBT_BLOCK(rbp); right = XFS_BUF_TO_INOBT_BLOCK(rbp);
#ifdef DEBUG #ifdef DEBUG
if (error = xfs_btree_check_sblock(cur, right, level, rbp)) if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
goto error0; goto error0;
#endif #endif
/* /*
...@@ -311,7 +302,7 @@ xfs_inobt_delrec( ...@@ -311,7 +302,7 @@ xfs_inobt_delrec(
*/ */
if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >= if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
XFS_INOBT_BLOCK_MINRECS(level, cur)) { XFS_INOBT_BLOCK_MINRECS(level, cur)) {
if (error = xfs_inobt_lshift(tcur, level, &i)) if ((error = xfs_inobt_lshift(tcur, level, &i)))
goto error0; goto error0;
if (i) { if (i) {
ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >= ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
...@@ -334,7 +325,7 @@ xfs_inobt_delrec( ...@@ -334,7 +325,7 @@ xfs_inobt_delrec(
rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT); rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
if (lbno != NULLAGBLOCK) { if (lbno != NULLAGBLOCK) {
xfs_btree_firstrec(tcur, level); xfs_btree_firstrec(tcur, level);
if (error = xfs_inobt_decrement(tcur, level, &i)) if ((error = xfs_inobt_decrement(tcur, level, &i)))
goto error0; goto error0;
} }
} }
...@@ -348,7 +339,7 @@ xfs_inobt_delrec( ...@@ -348,7 +339,7 @@ xfs_inobt_delrec(
* previous block. * previous block.
*/ */
xfs_btree_firstrec(tcur, level); xfs_btree_firstrec(tcur, level);
if (error = xfs_inobt_decrement(tcur, level, &i)) if ((error = xfs_inobt_decrement(tcur, level, &i)))
goto error0; goto error0;
xfs_btree_firstrec(tcur, level); xfs_btree_firstrec(tcur, level);
/* /*
...@@ -357,7 +348,7 @@ xfs_inobt_delrec( ...@@ -357,7 +348,7 @@ xfs_inobt_delrec(
lbp = tcur->bc_bufs[level]; lbp = tcur->bc_bufs[level];
left = XFS_BUF_TO_INOBT_BLOCK(lbp); left = XFS_BUF_TO_INOBT_BLOCK(lbp);
#ifdef DEBUG #ifdef DEBUG
if (error = xfs_btree_check_sblock(cur, left, level, lbp)) if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
goto error0; goto error0;
#endif #endif
/* /*
...@@ -371,7 +362,7 @@ xfs_inobt_delrec( ...@@ -371,7 +362,7 @@ xfs_inobt_delrec(
*/ */
if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >= if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
XFS_INOBT_BLOCK_MINRECS(level, cur)) { XFS_INOBT_BLOCK_MINRECS(level, cur)) {
if (error = xfs_inobt_rshift(tcur, level, &i)) if ((error = xfs_inobt_rshift(tcur, level, &i)))
goto error0; goto error0;
if (i) { if (i) {
ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >= ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
...@@ -402,41 +393,44 @@ xfs_inobt_delrec( ...@@ -402,41 +393,44 @@ xfs_inobt_delrec(
* See if we can join with the left neighbor block. * See if we can join with the left neighbor block.
*/ */
if (lbno != NULLAGBLOCK && if (lbno != NULLAGBLOCK &&
lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) { lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
/* /*
* Set "right" to be the starting block, * Set "right" to be the starting block,
* "left" to be the left neighbor. * "left" to be the left neighbor.
*/ */
rbno = bno; rbno = bno;
right = block; right = block;
rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
rbp = bp; rbp = bp;
if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
cur->bc_private.i.agno, lbno, 0, &lbp, cur->bc_private.i.agno, lbno, 0, &lbp,
XFS_INO_BTREE_REF)) XFS_INO_BTREE_REF)))
return error; return error;
left = XFS_BUF_TO_INOBT_BLOCK(lbp); left = XFS_BUF_TO_INOBT_BLOCK(lbp);
if (error = xfs_btree_check_sblock(cur, left, level, lbp)) lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
return error; return error;
} }
/* /*
* If that won't work, see if we can join with the right neighbor block. * If that won't work, see if we can join with the right neighbor block.
*/ */
else if (rbno != NULLAGBLOCK && else if (rbno != NULLAGBLOCK &&
rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
/* /*
* Set "left" to be the starting block, * Set "left" to be the starting block,
* "right" to be the right neighbor. * "right" to be the right neighbor.
*/ */
lbno = bno; lbno = bno;
left = block; left = block;
lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
lbp = bp; lbp = bp;
if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
cur->bc_private.i.agno, rbno, 0, &rbp, cur->bc_private.i.agno, rbno, 0, &rbp,
XFS_INO_BTREE_REF)) XFS_INO_BTREE_REF)))
return error; return error;
right = XFS_BUF_TO_INOBT_BLOCK(rbp); right = XFS_BUF_TO_INOBT_BLOCK(rbp);
if (error = xfs_btree_check_sblock(cur, right, level, rbp)) rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
return error; return error;
} }
/* /*
...@@ -457,40 +451,53 @@ xfs_inobt_delrec( ...@@ -457,40 +451,53 @@ xfs_inobt_delrec(
/* /*
* It's a non-leaf. Move keys and pointers. * It's a non-leaf. Move keys and pointers.
*/ */
lkp = XFS_INOBT_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
lpp = XFS_INOBT_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
rkp = XFS_INOBT_KEY_ADDR(right, 1, cur); rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
rpp = XFS_INOBT_PTR_ADDR(right, 1, cur); rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
#ifdef DEBUG #ifdef DEBUG
for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) { for (i = 0; i < rrecs; i++) {
if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)) if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
return error; return error;
} }
#endif #endif
memcpy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp)); memcpy(lkp, rkp, rrecs * sizeof(*lkp));
memcpy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp)); memcpy(lpp, rpp, rrecs * sizeof(*lpp));
xfs_inobt_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
xfs_inobt_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
} else { } else {
/* /*
* It's a leaf. Move records. * It's a leaf. Move records.
*/ */
lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur); lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
rrp = XFS_INOBT_REC_ADDR(right, 1, cur); rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
memcpy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp)); memcpy(lrp, rrp, rrecs * sizeof(*lrp));
xfs_inobt_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT)); }
/*
* If we joined with the left neighbor, set the buffer in the
* cursor to the left block, and fix up the index.
*/
if (bp != lbp) {
xfs_btree_setbuf(cur, level, lbp);
cur->bc_ptrs[level] += lrecs;
} }
/*
* If we joined with the right neighbor and there's a level above
* us, increment the cursor at that level.
*/
else if (level + 1 < cur->bc_nlevels &&
(error = xfs_alloc_increment(cur, level + 1, &i)))
return error;
/* /*
* Fix up the number of records in the surviving block. * Fix up the number of records in the surviving block.
*/ */
INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT)); lrecs += rrecs;
INT_SET(left->bb_numrecs, ARCH_CONVERT, lrecs);
/* /*
* Fix up the right block pointer in the surviving block, and log it. * Fix up the right block pointer in the surviving block, and log it.
*/ */
INT_COPY(left->bb_rightsib, right->bb_rightsib, ARCH_CONVERT); left->bb_rightsib = right->bb_rightsib;
xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
/* /*
* If there is a right sibling now, make it point to the * If there is a right sibling now, make it point to the
...@@ -500,12 +507,12 @@ xfs_inobt_delrec( ...@@ -500,12 +507,12 @@ xfs_inobt_delrec(
xfs_inobt_block_t *rrblock; xfs_inobt_block_t *rrblock;
xfs_buf_t *rrbp; xfs_buf_t *rrbp;
if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
&rrbp, XFS_INO_BTREE_REF)) &rrbp, XFS_INO_BTREE_REF)))
return error; return error;
rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)) if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
return error; return error;
INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno); INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB); xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
...@@ -513,40 +520,10 @@ xfs_inobt_delrec( ...@@ -513,40 +520,10 @@ xfs_inobt_delrec(
/* /*
* Free the deleting block. * Free the deleting block.
*/ */
if (error = xfs_free_extent(cur->bc_tp, rbno, 1)) if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
cur->bc_private.i.agno, rbno), 1)))
return error; return error;
xfs_trans_binval(cur->bc_tp, rbp); xfs_trans_binval(cur->bc_tp, rbp);
/*
* To ensure that the freed block is not used for
* user data until this transaction is permanent,
* we lock the agf buffer for this ag until the
* transaction record makes it to the on-disk log.
*/
agfbno = XFS_AG_DADDR(cur->bc_mp, cur->bc_private.i.agno,
XFS_AGF_DADDR(mp));
if (error = xfs_trans_read_buf(cur->bc_mp, cur->bc_tp,
cur->bc_mp->m_ddev_targp, agfbno,
XFS_FSS_TO_BB(mp, 1), 0, &agfbp))
return error;
ASSERT(!XFS_BUF_GETERROR(agfbp));
xfs_trans_bhold_until_committed(cur->bc_tp, agfbp);
/*
* If we joined with the left neighbor, set the buffer in the
* cursor to the left block, and fix up the index.
*/
if (bp != lbp) {
cur->bc_bufs[level] = lbp;
cur->bc_ptrs[level] += INT_GET(left->bb_numrecs, ARCH_CONVERT);
cur->bc_ra[level] = 0;
}
/*
* If we joined with the right neighbor and there's a level above
* us, increment the cursor at that level.
*/
else if (level + 1 < cur->bc_nlevels &&
(error = xfs_inobt_increment(cur, level + 1, &i))) {
return error;
}
/* /*
* Readjust the ptr at this level if it's not a leaf, since it's * Readjust the ptr at this level if it's not a leaf, since it's
* still pointing at the deletion point, which makes the cursor * still pointing at the deletion point, which makes the cursor
...@@ -565,7 +542,6 @@ xfs_inobt_delrec( ...@@ -565,7 +542,6 @@ xfs_inobt_delrec(
xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
return error; return error;
} }
#endif /* _NOTYET_ */
/* /*
* Insert one record/level. Return information to the caller * Insert one record/level. Return information to the caller
...@@ -590,6 +566,7 @@ xfs_inobt_insrec( ...@@ -590,6 +566,7 @@ xfs_inobt_insrec(
xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */ xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
xfs_inobt_key_t nkey; /* new key value, from split */ xfs_inobt_key_t nkey; /* new key value, from split */
xfs_inobt_rec_t nrec; /* new record value, for caller */ xfs_inobt_rec_t nrec; /* new record value, for caller */
int numrecs;
int optr; /* old ptr value */ int optr; /* old ptr value */
xfs_inobt_ptr_t *pp; /* pointer to btree addresses */ xfs_inobt_ptr_t *pp; /* pointer to btree addresses */
int ptr; /* index in btree block for this rec */ int ptr; /* index in btree block for this rec */
...@@ -622,13 +599,14 @@ xfs_inobt_insrec( ...@@ -622,13 +599,14 @@ xfs_inobt_insrec(
*/ */
bp = cur->bc_bufs[level]; bp = cur->bc_bufs[level];
block = XFS_BUF_TO_INOBT_BLOCK(bp); block = XFS_BUF_TO_INOBT_BLOCK(bp);
numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
#ifdef DEBUG #ifdef DEBUG
if ((error = xfs_btree_check_sblock(cur, block, level, bp))) if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
return error; return error;
/* /*
* Check that the new entry is being inserted in the right place. * Check that the new entry is being inserted in the right place.
*/ */
if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) { if (ptr <= numrecs) {
if (level == 0) { if (level == 0) {
rp = XFS_INOBT_REC_ADDR(block, ptr, cur); rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
xfs_btree_check_rec(cur->bc_btnum, recp, rp); xfs_btree_check_rec(cur->bc_btnum, recp, rp);
...@@ -644,7 +622,7 @@ xfs_inobt_insrec( ...@@ -644,7 +622,7 @@ xfs_inobt_insrec(
* If the block is full, we can't insert the new entry until we * If the block is full, we can't insert the new entry until we
* make the block un-full. * make the block un-full.
*/ */
if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) { if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
/* /*
* First, try shifting an entry to the right neighbor. * First, try shifting an entry to the right neighbor.
*/ */
...@@ -695,6 +673,7 @@ xfs_inobt_insrec( ...@@ -695,6 +673,7 @@ xfs_inobt_insrec(
* At this point we know there's room for our new entry in the block * At this point we know there's room for our new entry in the block
* we're pointing at. * we're pointing at.
*/ */
numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
if (level > 0) { if (level > 0) {
/* /*
* It's a non-leaf entry. Make a hole for the new data * It's a non-leaf entry. Make a hole for the new data
...@@ -703,15 +682,15 @@ xfs_inobt_insrec( ...@@ -703,15 +682,15 @@ xfs_inobt_insrec(
kp = XFS_INOBT_KEY_ADDR(block, 1, cur); kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
pp = XFS_INOBT_PTR_ADDR(block, 1, cur); pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
#ifdef DEBUG #ifdef DEBUG
for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) { for (i = numrecs; i >= ptr; i--) {
if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level))) if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level)))
return error; return error;
} }
#endif #endif
memmove(&kp[ptr], &kp[ptr - 1], memmove(&kp[ptr], &kp[ptr - 1],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp)); (numrecs - ptr + 1) * sizeof(*kp));
memmove(&pp[ptr], &pp[ptr - 1], memmove(&pp[ptr], &pp[ptr - 1],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp)); (numrecs - ptr + 1) * sizeof(*pp));
/* /*
* Now stuff the new data in, bump numrecs and log the new data. * Now stuff the new data in, bump numrecs and log the new data.
*/ */
...@@ -721,23 +700,25 @@ xfs_inobt_insrec( ...@@ -721,23 +700,25 @@ xfs_inobt_insrec(
#endif #endif
kp[ptr - 1] = key; /* INT_: struct copy */ kp[ptr - 1] = key; /* INT_: struct copy */
INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop); INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1); numrecs++;
xfs_inobt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
xfs_inobt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); xfs_inobt_log_keys(cur, bp, ptr, numrecs);
xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
} else { } else {
/* /*
* It's a leaf entry. Make a hole for the new record. * It's a leaf entry. Make a hole for the new record.
*/ */
rp = XFS_INOBT_REC_ADDR(block, 1, cur); rp = XFS_INOBT_REC_ADDR(block, 1, cur);
memmove(&rp[ptr], &rp[ptr - 1], memmove(&rp[ptr], &rp[ptr - 1],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp)); (numrecs - ptr + 1) * sizeof(*rp));
/* /*
* Now stuff the new record in, bump numrecs * Now stuff the new record in, bump numrecs
* and log the new data. * and log the new data.
*/ */
rp[ptr - 1] = *recp; /* INT_: struct copy */ rp[ptr - 1] = *recp; /* INT_: struct copy */
INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1); numrecs++;
xfs_inobt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT)); INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
xfs_inobt_log_recs(cur, bp, ptr, numrecs);
} }
/* /*
* Log the new number of records in the btree header. * Log the new number of records in the btree header.
...@@ -747,7 +728,7 @@ xfs_inobt_insrec( ...@@ -747,7 +728,7 @@ xfs_inobt_insrec(
/* /*
* Check that the key/record is in the right place, now. * Check that the key/record is in the right place, now.
*/ */
if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) { if (ptr < numrecs) {
if (level == 0) if (level == 0)
xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1, xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
rp + ptr); rp + ptr);
...@@ -1774,7 +1755,6 @@ xfs_inobt_decrement( ...@@ -1774,7 +1755,6 @@ xfs_inobt_decrement(
return 0; return 0;
} }
#ifdef _NOTYET_
/* /*
* Delete the record pointed to by cur. * Delete the record pointed to by cur.
* The cursor refers to the place where the record was (could be inserted) * The cursor refers to the place where the record was (could be inserted)
...@@ -1795,13 +1775,13 @@ xfs_inobt_delete( ...@@ -1795,13 +1775,13 @@ xfs_inobt_delete(
* Otherwise we are done. * Otherwise we are done.
*/ */
for (level = 0, i = 2; i == 2; level++) { for (level = 0, i = 2; i == 2; level++) {
if (error = xfs_inobt_delrec(cur, level, &i)) if ((error = xfs_inobt_delrec(cur, level, &i)))
return error; return error;
} }
if (i == 0) { if (i == 0) {
for (level = 1; level < cur->bc_nlevels; level++) { for (level = 1; level < cur->bc_nlevels; level++) {
if (cur->bc_ptrs[level] == 0) { if (cur->bc_ptrs[level] == 0) {
if (error = xfs_inobt_decrement(cur, level, &i)) if ((error = xfs_inobt_decrement(cur, level, &i)))
return error; return error;
break; break;
} }
...@@ -1810,7 +1790,7 @@ xfs_inobt_delete( ...@@ -1810,7 +1790,7 @@ xfs_inobt_delete(
*stat = i; *stat = i;
return 0; return 0;
} }
#endif /* _NOTYET_ */
/* /*
* Get the data from the pointed-to record. * Get the data from the pointed-to record.
......
...@@ -225,7 +225,6 @@ xfs_inobt_decrement( ...@@ -225,7 +225,6 @@ xfs_inobt_decrement(
int level, /* level in btree, 0 is leaf */ int level, /* level in btree, 0 is leaf */
int *stat); /* success/failure */ int *stat); /* success/failure */
#ifdef _NOTYET_
/* /*
* Delete the record pointed to by cur. * Delete the record pointed to by cur.
* The cursor refers to the place where the record was (could be inserted) * The cursor refers to the place where the record was (could be inserted)
...@@ -235,7 +234,6 @@ int /* error */ ...@@ -235,7 +234,6 @@ int /* error */
xfs_inobt_delete( xfs_inobt_delete(
struct xfs_btree_cur *cur, /* btree cursor */ struct xfs_btree_cur *cur, /* btree cursor */
int *stat); /* success/failure */ int *stat); /* success/failure */
#endif /* _NOTYET_ */
/* /*
* Get the data from the pointed-to record. * Get the data from the pointed-to record.
......
...@@ -258,6 +258,7 @@ xfs_iget_core( ...@@ -258,6 +258,7 @@ xfs_iget_core(
if (newnode) { if (newnode) {
xfs_iocore_inode_reinit(ip); xfs_iocore_inode_reinit(ip);
} }
ip->i_flags &= ~XFS_ISTALE;
vn_trace_exit(vp, "xfs_iget.found", vn_trace_exit(vp, "xfs_iget.found",
(inst_t *)__return_address); (inst_t *)__return_address);
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include "xfs_inum.h" #include "xfs_inum.h"
#include "xfs_log.h" #include "xfs_log.h"
#include "xfs_trans.h" #include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_sb.h" #include "xfs_sb.h"
#include "xfs_ag.h" #include "xfs_ag.h"
#include "xfs_dir.h" #include "xfs_dir.h"
...@@ -2103,6 +2104,180 @@ xfs_iunlink_remove( ...@@ -2103,6 +2104,180 @@ xfs_iunlink_remove(
return 0; return 0;
} }
static __inline__ int xfs_inode_clean(xfs_inode_t *ip)
{
return (((ip->i_itemp == NULL) ||
!(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
(ip->i_update_core == 0));
}
void
xfs_ifree_cluster(
xfs_inode_t *free_ip,
xfs_trans_t *tp,
xfs_ino_t inum)
{
xfs_mount_t *mp = free_ip->i_mount;
int blks_per_cluster;
int nbufs;
int ninodes;
int i, j, found, pre_flushed;
xfs_daddr_t blkno;
xfs_buf_t *bp;
xfs_ihash_t *ih;
xfs_inode_t *ip, **ip_found;
xfs_inode_log_item_t *iip;
xfs_log_item_t *lip;
SPLDECL(s);
if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
blks_per_cluster = 1;
ninodes = mp->m_sb.sb_inopblock;
nbufs = XFS_IALLOC_BLOCKS(mp);
} else {
blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
mp->m_sb.sb_blocksize;
ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
}
ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
for (j = 0; j < nbufs; j++, inum += ninodes) {
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
/*
* Look for each inode in memory and attempt to lock it,
* we can be racing with flush and tail pushing here.
* any inode we get the locks on, add to an array of
* inode items to process later.
*
* The get the buffer lock, we could beat a flush
* or tail pushing thread to the lock here, in which
* case they will go looking for the inode buffer
* and fail, we need some other form of interlock
* here.
*/
found = 0;
for (i = 0; i < ninodes; i++) {
ih = XFS_IHASH(mp, inum + i);
read_lock(&ih->ih_lock);
for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
if (ip->i_ino == inum + i)
break;
}
/* Inode not in memory or we found it already,
* nothing to do
*/
if (!ip || (ip->i_flags & XFS_ISTALE)) {
read_unlock(&ih->ih_lock);
continue;
}
if (xfs_inode_clean(ip)) {
read_unlock(&ih->ih_lock);
continue;
}
/* If we can get the locks then add it to the
* list, otherwise by the time we get the bp lock
* below it will already be attached to the
* inode buffer.
*/
/* This inode will already be locked - by us, lets
* keep it that way.
*/
if (ip == free_ip) {
if (xfs_iflock_nowait(ip)) {
ip->i_flags |= XFS_ISTALE;
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
} else {
ip_found[found++] = ip;
}
}
read_unlock(&ih->ih_lock);
continue;
}
if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
if (xfs_iflock_nowait(ip)) {
ip->i_flags |= XFS_ISTALE;
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
} else {
ip_found[found++] = ip;
}
} else {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
}
read_unlock(&ih->ih_lock);
}
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
mp->m_bsize * blks_per_cluster,
XFS_BUF_LOCK);
pre_flushed = 0;
lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
while (lip) {
if (lip->li_type == XFS_LI_INODE) {
iip = (xfs_inode_log_item_t *)lip;
ASSERT(iip->ili_logged == 1);
lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
AIL_LOCK(mp,s);
iip->ili_flush_lsn = iip->ili_item.li_lsn;
AIL_UNLOCK(mp, s);
iip->ili_inode->i_flags |= XFS_ISTALE;
pre_flushed++;
}
lip = lip->li_bio_list;
}
for (i = 0; i < found; i++) {
ip = ip_found[i];
iip = ip->i_itemp;
if (!iip) {
ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
iip->ili_logged = 1;
AIL_LOCK(mp,s);
iip->ili_flush_lsn = iip->ili_item.li_lsn;
AIL_UNLOCK(mp, s);
xfs_buf_attach_iodone(bp,
(void(*)(xfs_buf_t*,xfs_log_item_t*))
xfs_istale_done, (xfs_log_item_t *)iip);
if (ip != free_ip) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
}
if (found || pre_flushed)
xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
}
kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
}
/* /*
* This is called to return an inode to the inode free list. * This is called to return an inode to the inode free list.
* The inode should already be truncated to 0 length and have * The inode should already be truncated to 0 length and have
...@@ -2116,9 +2291,12 @@ xfs_iunlink_remove( ...@@ -2116,9 +2291,12 @@ xfs_iunlink_remove(
int int
xfs_ifree( xfs_ifree(
xfs_trans_t *tp, xfs_trans_t *tp,
xfs_inode_t *ip) xfs_inode_t *ip,
xfs_bmap_free_t *flist)
{ {
int error; int error;
int delete;
xfs_ino_t first_ino;
ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
ASSERT(ip->i_transp == tp); ASSERT(ip->i_transp == tp);
...@@ -2137,7 +2315,7 @@ xfs_ifree( ...@@ -2137,7 +2315,7 @@ xfs_ifree(
return error; return error;
} }
error = xfs_difree(tp, ip->i_ino); error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
if (error != 0) { if (error != 0) {
return error; return error;
} }
...@@ -2149,13 +2327,17 @@ xfs_ifree( ...@@ -2149,13 +2327,17 @@ xfs_ifree(
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t); XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
/* /*
* Bump the generation count so no one will be confused * Bump the generation count so no one will be confused
* by reincarnations of this inode. * by reincarnations of this inode.
*/ */
ip->i_d.di_gen++; ip->i_d.di_gen++;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (delete) {
xfs_ifree_cluster(ip, tp, first_ino);
}
return 0; return 0;
} }
......
...@@ -179,7 +179,7 @@ typedef struct xfs_ihash { ...@@ -179,7 +179,7 @@ typedef struct xfs_ihash {
* Inode hashing and hash bucket locking. * Inode hashing and hash bucket locking.
*/ */
#define XFS_BUCKETS(mp) (37*(mp)->m_sb.sb_agcount-1) #define XFS_BUCKETS(mp) (37*(mp)->m_sb.sb_agcount-1)
#define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)ino) % (mp)->m_ihsize)) #define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize))
/* /*
* This is the xfs inode cluster hash. This hash is used by xfs_iflush to * This is the xfs inode cluster hash. This hash is used by xfs_iflush to
...@@ -362,7 +362,8 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n); ...@@ -362,7 +362,8 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n);
#define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */ #define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */
#define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */ #define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */
#define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */ #define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */
#define XFS_IRECLAIMABLE 0x0010 /* inode can be reclaimed */ #define XFS_ISTALE 0x0010 /* inode has been staled */
#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */
/* /*
* Flags for inode locking. * Flags for inode locking.
...@@ -487,7 +488,8 @@ int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, nlink_t, ...@@ -487,7 +488,8 @@ int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, nlink_t,
struct xfs_buf **, boolean_t *, xfs_inode_t **); struct xfs_buf **, boolean_t *, xfs_inode_t **);
void xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *, int, void xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *, int,
xfs_arch_t); xfs_arch_t);
int xfs_ifree(struct xfs_trans *, xfs_inode_t *); int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
struct xfs_bmap_free *);
int xfs_atruncate_start(xfs_inode_t *); int xfs_atruncate_start(xfs_inode_t *);
void xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); void xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
......
...@@ -631,6 +631,14 @@ xfs_inode_item_trylock( ...@@ -631,6 +631,14 @@ xfs_inode_item_trylock(
} }
/* NOTREACHED */ /* NOTREACHED */
} }
/* Stale items should force out the iclog */
if (ip->i_flags & XFS_ISTALE) {
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
return XFS_ITEM_PINNED;
}
#ifdef DEBUG #ifdef DEBUG
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
ASSERT(iip->ili_format.ilf_fields != 0); ASSERT(iip->ili_format.ilf_fields != 0);
...@@ -1074,3 +1082,11 @@ xfs_iflush_abort( ...@@ -1074,3 +1082,11 @@ xfs_iflush_abort(
*/ */
xfs_ifunlock(ip); xfs_ifunlock(ip);
} }
void
xfs_istale_done(
xfs_buf_t *bp,
xfs_inode_log_item_t *iip)
{
xfs_iflush_abort(iip->ili_inode);
}
...@@ -189,6 +189,7 @@ int xfs_ilog_fext(int w); ...@@ -189,6 +189,7 @@ int xfs_ilog_fext(int w);
void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
void xfs_inode_item_destroy(struct xfs_inode *); void xfs_inode_item_destroy(struct xfs_inode *);
void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *); void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *);
void xfs_iflush_abort(struct xfs_inode *); void xfs_iflush_abort(struct xfs_inode *);
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -1529,17 +1529,35 @@ xlog_recover_reorder_trans( ...@@ -1529,17 +1529,35 @@ xlog_recover_reorder_trans(
xlog_recover_t *trans) xlog_recover_t *trans)
{ {
xlog_recover_item_t *first_item, *itemq, *itemq_next; xlog_recover_item_t *first_item, *itemq, *itemq_next;
xfs_buf_log_format_t *buf_f;
xfs_buf_log_format_v1_t *obuf_f;
ushort flags;
first_item = itemq = trans->r_itemq; first_item = itemq = trans->r_itemq;
trans->r_itemq = NULL; trans->r_itemq = NULL;
do { do {
itemq_next = itemq->ri_next; itemq_next = itemq->ri_next;
buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
switch (ITEM_TYPE(itemq)) { switch (ITEM_TYPE(itemq)) {
case XFS_LI_BUF: case XFS_LI_BUF:
flags = buf_f->blf_flags;
break;
case XFS_LI_6_1_BUF: case XFS_LI_6_1_BUF:
case XFS_LI_5_3_BUF: case XFS_LI_5_3_BUF:
xlog_recover_insert_item_frontq(&trans->r_itemq, itemq); obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
flags = obuf_f->blf_flags;
break; break;
}
switch (ITEM_TYPE(itemq)) {
case XFS_LI_BUF:
case XFS_LI_6_1_BUF:
case XFS_LI_5_3_BUF:
if ((!flags & XFS_BLI_CANCEL)) {
xlog_recover_insert_item_frontq(&trans->r_itemq,
itemq);
break;
}
case XFS_LI_INODE: case XFS_LI_INODE:
case XFS_LI_6_1_INODE: case XFS_LI_6_1_INODE:
case XFS_LI_5_3_INODE: case XFS_LI_5_3_INODE:
...@@ -1668,32 +1686,16 @@ xlog_recover_do_buffer_pass1( ...@@ -1668,32 +1686,16 @@ xlog_recover_do_buffer_pass1(
* made at that point. * made at that point.
*/ */
STATIC int STATIC int
xlog_recover_do_buffer_pass2( xlog_check_buffer_cancelled(
xlog_t *log, xlog_t *log,
xfs_buf_log_format_t *buf_f) xfs_daddr_t blkno,
uint len,
ushort flags)
{ {
xfs_buf_cancel_t *bcp; xfs_buf_cancel_t *bcp;
xfs_buf_cancel_t *prevp; xfs_buf_cancel_t *prevp;
xfs_buf_cancel_t **bucket; xfs_buf_cancel_t **bucket;
xfs_buf_log_format_v1_t *obuf_f;
xfs_daddr_t blkno = 0;
ushort flags = 0;
uint len = 0;
switch (buf_f->blf_type) {
case XFS_LI_BUF:
blkno = buf_f->blf_blkno;
flags = buf_f->blf_flags;
len = buf_f->blf_len;
break;
case XFS_LI_6_1_BUF:
case XFS_LI_5_3_BUF:
obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
blkno = (xfs_daddr_t) obuf_f->blf_blkno;
flags = obuf_f->blf_flags;
len = (xfs_daddr_t) obuf_f->blf_len;
break;
}
if (log->l_buf_cancel_table == NULL) { if (log->l_buf_cancel_table == NULL) {
/* /*
* There is nothing in the table built in pass one, * There is nothing in the table built in pass one,
...@@ -1755,6 +1757,34 @@ xlog_recover_do_buffer_pass2( ...@@ -1755,6 +1757,34 @@ xlog_recover_do_buffer_pass2(
return 0; return 0;
} }
STATIC int
xlog_recover_do_buffer_pass2(
xlog_t *log,
xfs_buf_log_format_t *buf_f)
{
xfs_buf_log_format_v1_t *obuf_f;
xfs_daddr_t blkno = 0;
ushort flags = 0;
uint len = 0;
switch (buf_f->blf_type) {
case XFS_LI_BUF:
blkno = buf_f->blf_blkno;
flags = buf_f->blf_flags;
len = buf_f->blf_len;
break;
case XFS_LI_6_1_BUF:
case XFS_LI_5_3_BUF:
obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
blkno = (xfs_daddr_t) obuf_f->blf_blkno;
flags = obuf_f->blf_flags;
len = (xfs_daddr_t) obuf_f->blf_len;
break;
}
return xlog_check_buffer_cancelled(log, blkno, len, flags);
}
/* /*
* Perform recovery for a buffer full of inodes. In these buffers, * Perform recovery for a buffer full of inodes. In these buffers,
* the only data which should be recovered is that which corresponds * the only data which should be recovered is that which corresponds
...@@ -2289,6 +2319,14 @@ xlog_recover_do_inode_trans( ...@@ -2289,6 +2319,14 @@ xlog_recover_do_inode_trans(
imap.im_blkno = 0; imap.im_blkno = 0;
xfs_imap(log->l_mp, 0, ino, &imap, 0); xfs_imap(log->l_mp, 0, ino, &imap, 0);
} }
/*
* Inode buffers can be freed, look out for it,
* and do not replay the inode.
*/
if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0))
return 0;
bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len, bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len,
XFS_BUF_LOCK); XFS_BUF_LOCK);
if (XFS_BUF_ISERROR(bp)) { if (XFS_BUF_ISERROR(bp)) {
......
...@@ -416,6 +416,7 @@ typedef struct xfs_mount { ...@@ -416,6 +416,7 @@ typedef struct xfs_mount {
#define XFS_MOUNT_32BITINOOPT 0x00008000 /* saved mount option state */ #define XFS_MOUNT_32BITINOOPT 0x00008000 /* saved mount option state */
#define XFS_MOUNT_NOUUID 0x00010000 /* ignore uuid during mount */ #define XFS_MOUNT_NOUUID 0x00010000 /* ignore uuid during mount */
#define XFS_MOUNT_NOLOGFLUSH 0x00020000 #define XFS_MOUNT_NOLOGFLUSH 0x00020000
#define XFS_MOUNT_IDELETE 0x00040000 /* delete empty inode clusters*/
/* /*
* Default minimum read and write sizes. * Default minimum read and write sizes.
......
...@@ -365,7 +365,6 @@ xfs_trans_mod_sb( ...@@ -365,7 +365,6 @@ xfs_trans_mod_sb(
switch (field) { switch (field) {
case XFS_TRANS_SB_ICOUNT: case XFS_TRANS_SB_ICOUNT:
ASSERT(delta > 0);
tp->t_icount_delta += delta; tp->t_icount_delta += delta;
break; break;
case XFS_TRANS_SB_IFREE: case XFS_TRANS_SB_IFREE:
......
...@@ -703,6 +703,8 @@ typedef struct xfs_trans { ...@@ -703,6 +703,8 @@ typedef struct xfs_trans {
* the agi hash list and counters: sector size * the agi hash list and counters: sector size
* the inode btree entry: block size * the inode btree entry: block size
* the on disk inode before ours in the agi hash list: inode cluster size * the on disk inode before ours in the agi hash list: inode cluster size
* the inode btree: max depth * blocksize
* the allocation btrees: 2 trees * (max depth - 1) * block size
*/ */
#define XFS_CALC_IFREE_LOG_RES(mp) \ #define XFS_CALC_IFREE_LOG_RES(mp) \
((mp)->m_sb.sb_inodesize + \ ((mp)->m_sb.sb_inodesize + \
...@@ -710,7 +712,10 @@ typedef struct xfs_trans { ...@@ -710,7 +712,10 @@ typedef struct xfs_trans {
(mp)->m_sb.sb_sectsize + \ (mp)->m_sb.sb_sectsize + \
XFS_FSB_TO_B((mp), 1) + \ XFS_FSB_TO_B((mp), 1) + \
MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \ MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
(128 * 5)) (128 * 5) + \
(128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree) #define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree)
...@@ -918,6 +923,7 @@ typedef struct xfs_trans { ...@@ -918,6 +923,7 @@ typedef struct xfs_trans {
#define XFS_DEFAULT_LOG_COUNT 1 #define XFS_DEFAULT_LOG_COUNT 1
#define XFS_DEFAULT_PERM_LOG_COUNT 2 #define XFS_DEFAULT_PERM_LOG_COUNT 2
#define XFS_ITRUNCATE_LOG_COUNT 2 #define XFS_ITRUNCATE_LOG_COUNT 2
#define XFS_INACTIVE_LOG_COUNT 2
#define XFS_CREATE_LOG_COUNT 2 #define XFS_CREATE_LOG_COUNT 2
#define XFS_MKDIR_LOG_COUNT 3 #define XFS_MKDIR_LOG_COUNT 3
#define XFS_SYMLINK_LOG_COUNT 3 #define XFS_SYMLINK_LOG_COUNT 3
...@@ -991,6 +997,8 @@ void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); ...@@ -991,6 +997,8 @@ void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bhold_until_committed(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bhold_until_committed(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
......
...@@ -931,6 +931,35 @@ xfs_trans_inode_buf( ...@@ -931,6 +931,35 @@ xfs_trans_inode_buf(
bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF; bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF;
} }
/*
* This call is used to indicate that the buffer is going to
* be staled and was an inode buffer. This means it gets
* special processing during unpin - where any inodes
* associated with the buffer should be removed from ail.
* There is also special processing during recovery,
* any replay of the inodes in the buffer needs to be
* prevented as the buffer may have been reused.
*/
void
xfs_trans_stale_inode_buf(
xfs_trans_t *tp,
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip;
ASSERT(XFS_BUF_ISBUSY(bp));
ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_STALE_INODE;
bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))
xfs_buf_iodone;
}
/* /*
* Mark the buffer as being one which contains newly allocated * Mark the buffer as being one which contains newly allocated
...@@ -954,7 +983,6 @@ xfs_trans_inode_alloc_buf( ...@@ -954,7 +983,6 @@ xfs_trans_inode_alloc_buf(
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
} }
......
...@@ -298,6 +298,8 @@ xfs_start_flags( ...@@ -298,6 +298,8 @@ xfs_start_flags(
mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
} }
if (ap->flags & XFSMNT_IDELETE)
mp->m_flags |= XFS_MOUNT_IDELETE;
/* /*
* no recovery flag requires a read-only mount * no recovery flag requires a read-only mount
...@@ -1597,6 +1599,7 @@ xfs_vget( ...@@ -1597,6 +1599,7 @@ xfs_vget(
#define MNTOPT_NOLOGFLUSH "nologflush" /* don't hard flush on log writes */ #define MNTOPT_NOLOGFLUSH "nologflush" /* don't hard flush on log writes */
#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */ #define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ #define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
#define MNTOPT_IKEEP "ikeep" /* free empty inode clusters */
int int
...@@ -1611,6 +1614,8 @@ xfs_parseargs( ...@@ -1611,6 +1614,8 @@ xfs_parseargs(
int dsunit, dswidth, vol_dsunit, vol_dswidth; int dsunit, dswidth, vol_dsunit, vol_dswidth;
int iosize; int iosize;
args->flags |= XFSMNT_IDELETE; /* default to on */
if (!options) if (!options)
return 0; return 0;
...@@ -1715,6 +1720,8 @@ xfs_parseargs( ...@@ -1715,6 +1720,8 @@ xfs_parseargs(
args->flags |= XFSMNT_NOUUID; args->flags |= XFSMNT_NOUUID;
} else if (!strcmp(this_char, MNTOPT_NOLOGFLUSH)) { } else if (!strcmp(this_char, MNTOPT_NOLOGFLUSH)) {
args->flags |= XFSMNT_NOLOGFLUSH; args->flags |= XFSMNT_NOLOGFLUSH;
} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
args->flags &= ~XFSMNT_IDELETE;
} else if (!strcmp(this_char, "osyncisdsync")) { } else if (!strcmp(this_char, "osyncisdsync")) {
/* no-op, this is now the default */ /* no-op, this is now the default */
printk("XFS: osyncisdsync is now the default, option is deprecated.\n"); printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
......
...@@ -1595,8 +1595,7 @@ xfs_inactive_symlink_local( ...@@ -1595,8 +1595,7 @@ xfs_inactive_symlink_local(
STATIC int STATIC int
xfs_inactive_attrs( xfs_inactive_attrs(
xfs_inode_t *ip, xfs_inode_t *ip,
xfs_trans_t **tpp, xfs_trans_t **tpp)
int *commitflags)
{ {
xfs_trans_t *tp; xfs_trans_t *tp;
int error; int error;
...@@ -1606,9 +1605,8 @@ xfs_inactive_attrs( ...@@ -1606,9 +1605,8 @@ xfs_inactive_attrs(
tp = *tpp; tp = *tpp;
mp = ip->i_mount; mp = ip->i_mount;
ASSERT(ip->i_d.di_forkoff != 0); ASSERT(ip->i_d.di_forkoff != 0);
xfs_trans_commit(tp, *commitflags, NULL); xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
*commitflags = 0;
error = xfs_attr_inactive(ip); error = xfs_attr_inactive(ip);
if (error) { if (error) {
...@@ -1620,8 +1618,8 @@ xfs_inactive_attrs( ...@@ -1620,8 +1618,8 @@ xfs_inactive_attrs(
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
error = xfs_trans_reserve(tp, 0, error = xfs_trans_reserve(tp, 0,
XFS_IFREE_LOG_RES(mp), XFS_IFREE_LOG_RES(mp),
0, 0, 0, XFS_TRANS_PERM_LOG_RES,
XFS_DEFAULT_LOG_COUNT); XFS_INACTIVE_LOG_COUNT);
if (error) { if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp)); ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp, 0);
...@@ -1694,10 +1692,12 @@ xfs_inactive( ...@@ -1694,10 +1692,12 @@ xfs_inactive(
{ {
xfs_inode_t *ip; xfs_inode_t *ip;
vnode_t *vp; vnode_t *vp;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
int committed;
xfs_trans_t *tp; xfs_trans_t *tp;
xfs_mount_t *mp; xfs_mount_t *mp;
int error; int error;
int commit_flags;
int truncate; int truncate;
vp = BHV_TO_VNODE(bdp); vp = BHV_TO_VNODE(bdp);
...@@ -1795,10 +1795,10 @@ xfs_inactive( ...@@ -1795,10 +1795,10 @@ xfs_inactive(
*/ */
error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
(!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0)); (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
commit_flags = XFS_TRANS_RELEASE_LOG_RES;
if (error) { if (error) {
xfs_trans_cancel(tp, commit_flags | XFS_TRANS_ABORT); xfs_trans_cancel(tp,
XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
return (VN_INACTIVE_CACHE); return (VN_INACTIVE_CACHE);
} }
...@@ -1819,13 +1819,11 @@ xfs_inactive( ...@@ -1819,13 +1819,11 @@ xfs_inactive(
xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip); xfs_trans_ihold(tp, ip);
commit_flags = XFS_TRANS_RELEASE_LOG_RES;
} else { } else {
error = xfs_trans_reserve(tp, 0, error = xfs_trans_reserve(tp, 0,
XFS_IFREE_LOG_RES(mp), XFS_IFREE_LOG_RES(mp),
0, 0, 0, XFS_TRANS_PERM_LOG_RES,
XFS_DEFAULT_LOG_COUNT); XFS_INACTIVE_LOG_COUNT);
if (error) { if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp)); ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp, 0);
...@@ -1835,7 +1833,6 @@ xfs_inactive( ...@@ -1835,7 +1833,6 @@ xfs_inactive(
xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip); xfs_trans_ihold(tp, ip);
commit_flags = 0;
} }
/* /*
...@@ -1846,7 +1843,7 @@ xfs_inactive( ...@@ -1846,7 +1843,7 @@ xfs_inactive(
* because we can't use it for xfs_attr_inactive(). * because we can't use it for xfs_attr_inactive().
*/ */
if (ip->i_d.di_anextents > 0) { if (ip->i_d.di_anextents > 0) {
error = xfs_inactive_attrs(ip, &tp, &commit_flags); error = xfs_inactive_attrs(ip, &tp);
/* /*
* If we got an error, the transaction is already * If we got an error, the transaction is already
* cancelled, and the inode is unlocked. Just get out. * cancelled, and the inode is unlocked. Just get out.
...@@ -1860,7 +1857,8 @@ xfs_inactive( ...@@ -1860,7 +1857,8 @@ xfs_inactive(
/* /*
* Free the inode. * Free the inode.
*/ */
error = xfs_ifree(tp, ip); XFS_BMAP_INIT(&free_list, &first_block);
error = xfs_ifree(tp, ip, &free_list);
if (error) { if (error) {
/* /*
* If we fail to free the inode, shut down. The cancel * If we fail to free the inode, shut down. The cancel
...@@ -1873,7 +1871,7 @@ xfs_inactive( ...@@ -1873,7 +1871,7 @@ xfs_inactive(
error, mp->m_fsname); error, mp->m_fsname);
xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR); xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR);
} }
xfs_trans_cancel(tp, commit_flags | XFS_TRANS_ABORT); xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
} else { } else {
/* /*
* Credit the quota account(s). The inode is gone. * Credit the quota account(s). The inode is gone.
...@@ -1884,7 +1882,9 @@ xfs_inactive( ...@@ -1884,7 +1882,9 @@ xfs_inactive(
* Just ignore errors at this point. There is * Just ignore errors at this point. There is
* nothing we can do except to try to keep going. * nothing we can do except to try to keep going.
*/ */
(void) xfs_trans_commit(tp, commit_flags, NULL); (void) xfs_bmap_finish(&tp, &free_list, first_block,
&committed);
(void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
} }
/* /*
* Release the dquots held by inode, if any. * Release the dquots held by inode, if any.
......
...@@ -2643,6 +2643,7 @@ xfs_buf_item_print(xfs_buf_log_item_t *blip, int summary) ...@@ -2643,6 +2643,7 @@ xfs_buf_item_print(xfs_buf_log_item_t *blip, int summary)
"stale", /* 0x4 */ "stale", /* 0x4 */
"logged", /* 0x8 */ "logged", /* 0x8 */
"ialloc", /* 0x10 */ "ialloc", /* 0x10 */
"inode_stale", /* 0x20 */
0 0
}; };
static char *blf_flags[] = { static char *blf_flags[] = {
...@@ -4811,6 +4812,7 @@ xfsidbg_xnode(xfs_inode_t *ip) ...@@ -4811,6 +4812,7 @@ xfsidbg_xnode(xfs_inode_t *ip)
"uiosize", /* XFS_IUIOSZ */ "uiosize", /* XFS_IUIOSZ */
"quiesce", /* XFS_IQUIESCE */ "quiesce", /* XFS_IQUIESCE */
"reclaim", /* XFS_IRECLAIM */ "reclaim", /* XFS_IRECLAIM */
"stale", /* XFS_ISTALE */
NULL NULL
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment