Commit cb7f0903 authored by Andreas Gruenbacher's avatar Andreas Gruenbacher Committed by Bob Peterson

gfs2: Improve non-recursive delete algorithm

In rare cases, the current non-recursive delete algorithm doesn't
deallocate empty intermediary indirect blocks.  This should have very
little practical effect, but deallocating all blocks correctly should
still be preferable as it is cleaner and easier to validate.

The fix consists of using the first block to deallocate to compute the
start marker of the truncate point instead of the last block that needs
to be kept.  With that change, computing which indirect blocks are still
needed becomes relatively easy.
Signed-off-by: default avatarAndreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: default avatarBob Peterson <rpeterso@redhat.com>
parent c3ce5aa9
...@@ -1078,7 +1078,7 @@ static int trunc_start(struct inode *inode, u64 newsize) ...@@ -1078,7 +1078,7 @@ static int trunc_start(struct inode *inode, u64 newsize)
* @mp: current metapath fully populated with buffers * @mp: current metapath fully populated with buffers
* @btotal: place to keep count of total blocks freed * @btotal: place to keep count of total blocks freed
* @hgt: height we're processing * @hgt: height we're processing
* @first: true if this is the first call to this function for this height * @keep_start: preserve the first meta pointer
* *
* We sweep a metadata buffer (provided by the metapath) for blocks we need to * We sweep a metadata buffer (provided by the metapath) for blocks we need to
* free, and free them all. However, we do it one rgrp at a time. If this * free, and free them all. However, we do it one rgrp at a time. If this
...@@ -1094,7 +1094,7 @@ static int trunc_start(struct inode *inode, u64 newsize) ...@@ -1094,7 +1094,7 @@ static int trunc_start(struct inode *inode, u64 newsize)
*/ */
static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
const struct metapath *mp, u32 *btotal, int hgt, const struct metapath *mp, u32 *btotal, int hgt,
bool preserve1) bool keep_start)
{ {
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd; struct gfs2_rgrpd *rgd;
...@@ -1119,7 +1119,7 @@ static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh, ...@@ -1119,7 +1119,7 @@ static int sweep_bh_for_rgrps(struct gfs2_inode *ip, struct gfs2_holder *rd_gh,
top = metapointer(hgt, mp); /* first ptr from metapath */ top = metapointer(hgt, mp); /* first ptr from metapath */
/* If we're keeping some data at the truncation point, we've got to /* If we're keeping some data at the truncation point, we've got to
preserve the metadata tree by adding 1 to the starting metapath. */ preserve the metadata tree by adding 1 to the starting metapath. */
if (preserve1) if (keep_start)
top++; top++;
bottom = (__be64 *)(bh->b_data + bh->b_size); bottom = (__be64 *)(bh->b_data + bh->b_size);
...@@ -1286,9 +1286,9 @@ enum dealloc_states { ...@@ -1286,9 +1286,9 @@ enum dealloc_states {
DEALLOC_DONE = 3, /* process complete */ DEALLOC_DONE = 3, /* process complete */
}; };
static bool mp_eq_to_hgt(struct metapath *mp, __u16 *nbof, unsigned int h) static bool mp_eq_to_hgt(struct metapath *mp, __u16 *list, unsigned int h)
{ {
if (memcmp(mp->mp_list, nbof, h * sizeof(mp->mp_list[0]))) if (memcmp(mp->mp_list, list, h * sizeof(mp->mp_list[0])))
return false; return false;
return true; return true;
} }
...@@ -1310,24 +1310,35 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize) ...@@ -1310,24 +1310,35 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
struct metapath mp; struct metapath mp;
struct buffer_head *dibh, *bh; struct buffer_head *dibh, *bh;
struct gfs2_holder rd_gh; struct gfs2_holder rd_gh;
u64 lblock; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift;
__u16 nbof[GFS2_MAX_META_HEIGHT]; /* new beginning of truncation */ u64 lblock = (newsize + (1 << bsize_shift) - 1) >> bsize_shift;
__u16 start_list[GFS2_MAX_META_HEIGHT]; /* new beginning of truncation */
unsigned int start_aligned;
unsigned int strip_h = ip->i_height - 1; unsigned int strip_h = ip->i_height - 1;
u32 btotal = 0; u32 btotal = 0;
int ret, state; int ret, state;
int mp_h; /* metapath buffers are read in to this height */ int mp_h; /* metapath buffers are read in to this height */
u64 prev_bnr = 0; u64 prev_bnr = 0;
bool preserve1; /* need to preserve the first meta pointer? */ bool keep_start; /* need to preserve the first meta pointer? */
if (!newsize)
lblock = 0;
else
lblock = (newsize - 1) >> sdp->sd_sb.sb_bsize_shift;
memset(&mp, 0, sizeof(mp)); memset(&mp, 0, sizeof(mp));
find_metapath(sdp, lblock, &mp, ip->i_height); find_metapath(sdp, lblock, &mp, ip->i_height);
memcpy(&nbof, &mp.mp_list, sizeof(nbof)); memcpy(start_list, mp.mp_list, sizeof(start_list));
/*
* Set start_aligned to the metadata height up to which the truncate
* point is aligned to the metadata tree (i.e., the truncate point is a
* multiple of the granularity at the height above). This determines
* at which heights an additional meta pointer needs to be preserved:
* an additional meta pointer is needed at a given height if
* height < start_aligned.
*/
for (mp_h = ip->i_height - 1; mp_h > 0; mp_h--) {
if (start_list[mp_h])
break;
}
start_aligned = mp_h;
ret = gfs2_meta_inode_buffer(ip, &dibh); ret = gfs2_meta_inode_buffer(ip, &dibh);
if (ret) if (ret)
...@@ -1363,10 +1374,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize) ...@@ -1363,10 +1374,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
/* Truncate a full metapath at the given strip height. /* Truncate a full metapath at the given strip height.
* Note that strip_h == mp_h in order to be in this state. */ * Note that strip_h == mp_h in order to be in this state. */
case DEALLOC_MP_FULL: case DEALLOC_MP_FULL:
/* If we're truncating to a non-zero size and the mp is
at the beginning of file for the strip height, we
need to preserve the first metadata pointer. */
preserve1 = (newsize && mp_eq_to_hgt(&mp, nbof, mp_h));
bh = mp.mp_bh[mp_h]; bh = mp.mp_bh[mp_h];
gfs2_assert_withdraw(sdp, bh); gfs2_assert_withdraw(sdp, bh);
if (gfs2_assert_withdraw(sdp, if (gfs2_assert_withdraw(sdp,
...@@ -1378,8 +1385,12 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize) ...@@ -1378,8 +1385,12 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
prev_bnr, ip->i_height, strip_h, mp_h); prev_bnr, ip->i_height, strip_h, mp_h);
} }
prev_bnr = bh->b_blocknr; prev_bnr = bh->b_blocknr;
keep_start = mp_h < start_aligned &&
mp_eq_to_hgt(&mp, start_list, mp_h);
ret = sweep_bh_for_rgrps(ip, &rd_gh, &mp, &btotal, ret = sweep_bh_for_rgrps(ip, &rd_gh, &mp, &btotal,
mp_h, preserve1); mp_h, keep_start);
/* If we hit an error or just swept dinode buffer, /* If we hit an error or just swept dinode buffer,
just exit. */ just exit. */
if (ret || !mp_h) { if (ret || !mp_h) {
...@@ -1403,7 +1414,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize) ...@@ -1403,7 +1414,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 newsize)
stripping the previous level of metadata. */ stripping the previous level of metadata. */
if (mp_h == 0) { if (mp_h == 0) {
strip_h--; strip_h--;
memcpy(&mp.mp_list, &nbof, sizeof(nbof)); memcpy(mp.mp_list, start_list, sizeof(start_list));
mp_h = strip_h; mp_h = strip_h;
state = DEALLOC_FILL_MP; state = DEALLOC_FILL_MP;
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment