Commit 372cc85e authored by Dave Chinner's avatar Dave Chinner Committed by Ben Myers

xfs: support discontiguous buffers in the xfs_buf_log_item

discontigous buffer in separate buffer format structures. This means log
recovery will recover all the changes on a per segment basis without
requiring any knowledge of the fact that it was logged from a
compound buffer.

To do this, we need to be able to determine what buffer segment any
given offset into the compound buffer sits over. This enables us to
translate the dirty bitmap in the number of separate buffer format
structures required.

We also need to be able to determine the number of bitmap elements
that a given buffer segment has, as this determines the size of the
buffer format structure. Hence we need to be able to determine the
both the start offset into the buffer and the length of a given
segment to be able to calculate this.

With this information, we can preallocate, build and format the
correct log vector array for each segment in a compound buffer to
appear exactly the same as individually logged buffers in the log.
Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent de2a4f59
...@@ -153,33 +153,25 @@ STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); ...@@ -153,33 +153,25 @@ STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
* If the XFS_BLI_STALE flag has been set, then log nothing. * If the XFS_BLI_STALE flag has been set, then log nothing.
*/ */
STATIC uint STATIC uint
xfs_buf_item_size( xfs_buf_item_size_segment(
struct xfs_log_item *lip) struct xfs_buf_log_item *bip,
struct xfs_buf_log_format *blfp)
{ {
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf; struct xfs_buf *bp = bip->bli_buf;
uint nvecs; uint nvecs;
int next_bit; int next_bit;
int last_bit; int last_bit;
ASSERT(atomic_read(&bip->bli_refcount) > 0); last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
if (bip->bli_flags & XFS_BLI_STALE) { if (last_bit == -1)
return 0;
/* /*
* The buffer is stale, so all we need to log * initial count for a dirty buffer is 2 vectors - the format structure
* is the buf log format structure with the * and the first dirty region.
* cancel flag in it.
*/ */
trace_xfs_buf_item_size_stale(bip); nvecs = 2;
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
return 1;
}
ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
nvecs = 1;
last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
bip->bli_format.blf_map_size, 0);
ASSERT(last_bit != -1);
nvecs++;
while (last_bit != -1) { while (last_bit != -1) {
/* /*
* This takes the bit number to start looking from and * This takes the bit number to start looking from and
...@@ -187,8 +179,7 @@ xfs_buf_item_size( ...@@ -187,8 +179,7 @@ xfs_buf_item_size(
* if there are no more bits set or the start bit is * if there are no more bits set or the start bit is
* beyond the end of the bitmap. * beyond the end of the bitmap.
*/ */
next_bit = xfs_next_bit(bip->bli_format.blf_data_map, next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
bip->bli_format.blf_map_size,
last_bit + 1); last_bit + 1);
/* /*
* If we run out of bits, leave the loop, * If we run out of bits, leave the loop,
...@@ -196,7 +187,7 @@ xfs_buf_item_size( ...@@ -196,7 +187,7 @@ xfs_buf_item_size(
* else keep scanning the current set of bits. * else keep scanning the current set of bits.
*/ */
if (next_bit == -1) { if (next_bit == -1) {
last_bit = -1; break;
} else if (next_bit != last_bit + 1) { } else if (next_bit != last_bit + 1) {
last_bit = next_bit; last_bit = next_bit;
nvecs++; nvecs++;
...@@ -210,22 +201,73 @@ xfs_buf_item_size( ...@@ -210,22 +201,73 @@ xfs_buf_item_size(
} }
} }
trace_xfs_buf_item_size(bip);
return nvecs; return nvecs;
} }
/* /*
* This is called to fill in the vector of log iovecs for the * This returns the number of log iovecs needed to log the given buf log item.
* given log buf item. It fills the first entry with a buf log *
* format structure, and the rest point to contiguous chunks * It calculates this as 1 iovec for the buf log format structure and 1 for each
* within the buffer. * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged
* in a single iovec.
*
* Discontiguous buffers need a format structure per region that that is being
* logged. This makes the changes in the buffer appear to log recovery as though
* they came from separate buffers, just like would occur if multiple buffers
* were used instead of a single discontiguous buffer. This enables
* discontiguous buffers to be in-memory constructs, completely transparent to
* what ends up on disk.
*
* If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
* format structures.
*/ */
STATIC void STATIC uint
xfs_buf_item_format( xfs_buf_item_size(
struct xfs_log_item *lip, struct xfs_log_item *lip)
struct xfs_log_iovec *vecp)
{ {
struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf_log_item *bip = BUF_ITEM(lip);
uint nvecs;
int i;
ASSERT(atomic_read(&bip->bli_refcount) > 0);
if (bip->bli_flags & XFS_BLI_STALE) {
/*
* The buffer is stale, so all we need to log
* is the buf log format structure with the
* cancel flag in it.
*/
trace_xfs_buf_item_size_stale(bip);
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
return bip->bli_format_count;
}
ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
/*
* the vector count is based on the number of buffer vectors we have
* dirty bits in. This will only be greater than one when we have a
* compound buffer with more than one segment dirty. Hence for compound
* buffers we need to track which segment the dirty bits correspond to,
* and when we move from one segment to the next increment the vector
* count for the extra buf log format structure that will need to be
* written.
*/
nvecs = 0;
for (i = 0; i < bip->bli_format_count; i++) {
nvecs += xfs_buf_item_size_segment(bip, &bip->bli_formats[i]);
}
trace_xfs_buf_item_size(bip);
return nvecs;
}
static struct xfs_log_iovec *
xfs_buf_item_format_segment(
struct xfs_buf_log_item *bip,
struct xfs_log_iovec *vecp,
uint offset,
struct xfs_buf_log_format *blfp)
{
struct xfs_buf *bp = bip->bli_buf; struct xfs_buf *bp = bip->bli_buf;
uint base_size; uint base_size;
uint nvecs; uint nvecs;
...@@ -235,9 +277,8 @@ xfs_buf_item_format( ...@@ -235,9 +277,8 @@ xfs_buf_item_format(
uint nbits; uint nbits;
uint buffer_offset; uint buffer_offset;
ASSERT(atomic_read(&bip->bli_refcount) > 0); /* copy the flags across from the base format item */
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || blfp->blf_flags = bip->bli_format.blf_flags;
(bip->bli_flags & XFS_BLI_STALE));
/* /*
* Base size is the actual size of the ondisk structure - it reflects * Base size is the actual size of the ondisk structure - it reflects
...@@ -245,28 +286,13 @@ xfs_buf_item_format( ...@@ -245,28 +286,13 @@ xfs_buf_item_format(
* memory structure. * memory structure.
*/ */
base_size = offsetof(struct xfs_buf_log_format, blf_data_map) + base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
(bip->bli_format.blf_map_size * (blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
sizeof(bip->bli_format.blf_data_map[0])); vecp->i_addr = blfp;
vecp->i_addr = &bip->bli_format;
vecp->i_len = base_size; vecp->i_len = base_size;
vecp->i_type = XLOG_REG_TYPE_BFORMAT; vecp->i_type = XLOG_REG_TYPE_BFORMAT;
vecp++; vecp++;
nvecs = 1; nvecs = 1;
/*
* If it is an inode buffer, transfer the in-memory state to the
* format flags and clear the in-memory state. We do not transfer
* this state if the inode buffer allocation has not yet been committed
* to the log as setting the XFS_BLI_INODE_BUF flag will prevent
* correct replay of the inode allocation.
*/
if (bip->bli_flags & XFS_BLI_INODE_BUF) {
if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
xfs_log_item_in_current_chkpt(lip)))
bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
bip->bli_flags &= ~XFS_BLI_INODE_BUF;
}
if (bip->bli_flags & XFS_BLI_STALE) { if (bip->bli_flags & XFS_BLI_STALE) {
/* /*
* The buffer is stale, so all we need to log * The buffer is stale, so all we need to log
...@@ -274,16 +300,15 @@ xfs_buf_item_format( ...@@ -274,16 +300,15 @@ xfs_buf_item_format(
* cancel flag in it. * cancel flag in it.
*/ */
trace_xfs_buf_item_format_stale(bip); trace_xfs_buf_item_format_stale(bip);
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
bip->bli_format.blf_size = nvecs; blfp->blf_size = nvecs;
return; return vecp;
} }
/* /*
* Fill in an iovec for each set of contiguous chunks. * Fill in an iovec for each set of contiguous chunks.
*/ */
first_bit = xfs_next_bit(bip->bli_format.blf_data_map, first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
bip->bli_format.blf_map_size, 0);
ASSERT(first_bit != -1); ASSERT(first_bit != -1);
last_bit = first_bit; last_bit = first_bit;
nbits = 1; nbits = 1;
...@@ -294,8 +319,7 @@ xfs_buf_item_format( ...@@ -294,8 +319,7 @@ xfs_buf_item_format(
* if there are no more bits set or the start bit is * if there are no more bits set or the start bit is
* beyond the end of the bitmap. * beyond the end of the bitmap.
*/ */
next_bit = xfs_next_bit(bip->bli_format.blf_data_map, next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
bip->bli_format.blf_map_size,
(uint)last_bit + 1); (uint)last_bit + 1);
/* /*
* If we run out of bits fill in the last iovec and get * If we run out of bits fill in the last iovec and get
...@@ -307,14 +331,14 @@ xfs_buf_item_format( ...@@ -307,14 +331,14 @@ xfs_buf_item_format(
* keep counting and scanning. * keep counting and scanning.
*/ */
if (next_bit == -1) { if (next_bit == -1) {
buffer_offset = first_bit * XFS_BLF_CHUNK; buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK; vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK; vecp->i_type = XLOG_REG_TYPE_BCHUNK;
nvecs++; nvecs++;
break; break;
} else if (next_bit != last_bit + 1) { } else if (next_bit != last_bit + 1) {
buffer_offset = first_bit * XFS_BLF_CHUNK; buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK; vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK; vecp->i_type = XLOG_REG_TYPE_BCHUNK;
...@@ -323,14 +347,17 @@ xfs_buf_item_format( ...@@ -323,14 +347,17 @@ xfs_buf_item_format(
first_bit = next_bit; first_bit = next_bit;
last_bit = next_bit; last_bit = next_bit;
nbits = 1; nbits = 1;
} else if (xfs_buf_offset(bp, next_bit << XFS_BLF_SHIFT) != } else if (xfs_buf_offset(bp, offset +
(xfs_buf_offset(bp, last_bit << XFS_BLF_SHIFT) + (next_bit << XFS_BLF_SHIFT)) !=
(xfs_buf_offset(bp, offset +
(last_bit << XFS_BLF_SHIFT)) +
XFS_BLF_CHUNK)) { XFS_BLF_CHUNK)) {
buffer_offset = first_bit * XFS_BLF_CHUNK; buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset); vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK; vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK; vecp->i_type = XLOG_REG_TYPE_BCHUNK;
/* You would think we need to bump the nvecs here too, but we do not /*
* You would think we need to bump the nvecs here too, but we do not
* this number is used by recovery, and it gets confused by the boundary * this number is used by recovery, and it gets confused by the boundary
* split here * split here
* nvecs++; * nvecs++;
...@@ -345,6 +372,48 @@ xfs_buf_item_format( ...@@ -345,6 +372,48 @@ xfs_buf_item_format(
} }
} }
bip->bli_format.blf_size = nvecs; bip->bli_format.blf_size = nvecs;
return vecp;
}
/*
* This is called to fill in the vector of log iovecs for the
* given log buf item. It fills the first entry with a buf log
* format structure, and the rest point to contiguous chunks
* within the buffer.
*/
STATIC void
xfs_buf_item_format(
struct xfs_log_item *lip,
struct xfs_log_iovec *vecp)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
uint offset = 0;
int i;
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
(bip->bli_flags & XFS_BLI_STALE));
/*
* If it is an inode buffer, transfer the in-memory state to the
* format flags and clear the in-memory state. We do not transfer
* this state if the inode buffer allocation has not yet been committed
* to the log as setting the XFS_BLI_INODE_BUF flag will prevent
* correct replay of the inode allocation.
*/
if (bip->bli_flags & XFS_BLI_INODE_BUF) {
if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
xfs_log_item_in_current_chkpt(lip)))
bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
bip->bli_flags &= ~XFS_BLI_INODE_BUF;
}
for (i = 0; i < bip->bli_format_count; i++) {
vecp = xfs_buf_item_format_segment(bip, vecp, offset,
&bip->bli_formats[i]);
offset += bp->b_maps[i].bm_len;
}
/* /*
* Check to make sure everything is consistent. * Check to make sure everything is consistent.
...@@ -620,6 +689,35 @@ static const struct xfs_item_ops xfs_buf_item_ops = { ...@@ -620,6 +689,35 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_committing = xfs_buf_item_committing .iop_committing = xfs_buf_item_committing
}; };
STATIC int
xfs_buf_item_get_format(
struct xfs_buf_log_item *bip,
int count)
{
ASSERT(bip->bli_formats == NULL);
bip->bli_format_count = count;
if (count == 1) {
bip->bli_formats = &bip->bli_format;
return 0;
}
bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
KM_SLEEP);
if (!bip->bli_formats)
return ENOMEM;
return 0;
}
STATIC void
xfs_buf_item_free_format(
struct xfs_buf_log_item *bip)
{
if (bip->bli_formats != &bip->bli_format) {
kmem_free(bip->bli_formats);
bip->bli_formats = NULL;
}
}
/* /*
* Allocate a new buf log item to go with the given buffer. * Allocate a new buf log item to go with the given buffer.
...@@ -637,6 +735,8 @@ xfs_buf_item_init( ...@@ -637,6 +735,8 @@ xfs_buf_item_init(
xfs_buf_log_item_t *bip; xfs_buf_log_item_t *bip;
int chunks; int chunks;
int map_size; int map_size;
int error;
int i;
/* /*
* Check to see if there is already a buf log item for * Check to see if there is already a buf log item for
...@@ -648,25 +748,33 @@ xfs_buf_item_init( ...@@ -648,25 +748,33 @@ xfs_buf_item_init(
if (lip != NULL && lip->li_type == XFS_LI_BUF) if (lip != NULL && lip->li_type == XFS_LI_BUF)
return; return;
/* bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
* chunks is the number of XFS_BLF_CHUNK size pieces
* the buffer can be divided into. Make sure not to
* truncate any pieces. map_size is the size of the
* bitmap needed to describe the chunks of the buffer.
*/
chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >>
XFS_BLF_SHIFT);
map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT);
bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
KM_SLEEP);
xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
bip->bli_buf = bp; bip->bli_buf = bp;
xfs_buf_hold(bp); xfs_buf_hold(bp);
bip->bli_format.blf_type = XFS_LI_BUF;
bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); /*
bip->bli_format.blf_len = (ushort)bp->b_length; * chunks is the number of XFS_BLF_CHUNK size pieces the buffer
bip->bli_format.blf_map_size = map_size; * can be divided into. Make sure not to truncate any pieces.
* map_size is the size of the bitmap needed to describe the
* chunks of the buffer.
*
* Discontiguous buffer support follows the layout of the underlying
* buffer. This makes the implementation as simple as possible.
*/
error = xfs_buf_item_get_format(bip, bp->b_map_count);
ASSERT(error == 0);
for (i = 0; i < bip->bli_format_count; i++) {
chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
XFS_BLF_CHUNK);
map_size = DIV_ROUND_UP(chunks, NBWORD);
bip->bli_formats[i].blf_type = XFS_LI_BUF;
bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn;
bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len;
bip->bli_formats[i].blf_map_size = map_size;
}
#ifdef XFS_TRANS_DEBUG #ifdef XFS_TRANS_DEBUG
/* /*
...@@ -697,10 +805,11 @@ xfs_buf_item_init( ...@@ -697,10 +805,11 @@ xfs_buf_item_init(
* item's bitmap. * item's bitmap.
*/ */
void void
xfs_buf_item_log( xfs_buf_item_log_segment(
xfs_buf_log_item_t *bip, struct xfs_buf_log_item *bip,
uint first, uint first,
uint last) uint last,
uint *map)
{ {
uint first_bit; uint first_bit;
uint last_bit; uint last_bit;
...@@ -712,12 +821,6 @@ xfs_buf_item_log( ...@@ -712,12 +821,6 @@ xfs_buf_item_log(
uint end_bit; uint end_bit;
uint mask; uint mask;
/*
* Mark the item as having some dirty data for
* quick reference in xfs_buf_item_dirty.
*/
bip->bli_flags |= XFS_BLI_DIRTY;
/* /*
* Convert byte offsets to bit numbers. * Convert byte offsets to bit numbers.
*/ */
...@@ -734,7 +837,7 @@ xfs_buf_item_log( ...@@ -734,7 +837,7 @@ xfs_buf_item_log(
* to set a bit in. * to set a bit in.
*/ */
word_num = first_bit >> BIT_TO_WORD_SHIFT; word_num = first_bit >> BIT_TO_WORD_SHIFT;
wordp = &(bip->bli_format.blf_data_map[word_num]); wordp = &map[word_num];
/* /*
* Calculate the starting bit in the first word. * Calculate the starting bit in the first word.
...@@ -781,6 +884,51 @@ xfs_buf_item_log( ...@@ -781,6 +884,51 @@ xfs_buf_item_log(
xfs_buf_item_log_debug(bip, first, last); xfs_buf_item_log_debug(bip, first, last);
} }
/*
* Mark bytes first through last inclusive as dirty in the buf
* item's bitmap.
*/
void
xfs_buf_item_log(
xfs_buf_log_item_t *bip,
uint first,
uint last)
{
int i;
uint start;
uint end;
struct xfs_buf *bp = bip->bli_buf;
/*
* Mark the item as having some dirty data for
* quick reference in xfs_buf_item_dirty.
*/
bip->bli_flags |= XFS_BLI_DIRTY;
/*
* walk each buffer segment and mark them dirty appropriately.
*/
start = 0;
for (i = 0; i < bip->bli_format_count; i++) {
if (start > last)
break;
end = start + BBTOB(bp->b_maps[i].bm_len);
if (first > end) {
start += BBTOB(bp->b_maps[i].bm_len);
continue;
}
if (first < start)
first = start;
if (end > last)
end = last;
xfs_buf_item_log_segment(bip, first, end,
&bip->bli_formats[i].blf_data_map[0]);
start += bp->b_maps[i].bm_len;
}
}
/* /*
* Return 1 if the buffer has some data that has been logged (at any * Return 1 if the buffer has some data that has been logged (at any
...@@ -802,6 +950,7 @@ xfs_buf_item_free( ...@@ -802,6 +950,7 @@ xfs_buf_item_free(
kmem_free(bip->bli_logged); kmem_free(bip->bli_logged);
#endif /* XFS_TRANS_DEBUG */ #endif /* XFS_TRANS_DEBUG */
xfs_buf_item_free_format(bip);
kmem_zone_free(xfs_buf_item_zone, bip); kmem_zone_free(xfs_buf_item_zone, bip);
} }
......
...@@ -102,6 +102,8 @@ typedef struct xfs_buf_log_item { ...@@ -102,6 +102,8 @@ typedef struct xfs_buf_log_item {
char *bli_orig; /* original buffer copy */ char *bli_orig; /* original buffer copy */
char *bli_logged; /* bytes logged (bitmap) */ char *bli_logged; /* bytes logged (bitmap) */
#endif #endif
int bli_format_count; /* count of headers */
struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
struct xfs_buf_log_format bli_format; /* embedded in-log header */ struct xfs_buf_log_format bli_format; /* embedded in-log header */
} xfs_buf_log_item_t; } xfs_buf_log_item_t;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment