Commit c2177e4d authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Improved extent merging

Previously, checksummed extents could only be merged when the checksum
covered only the currently live data.

xfstest generic/064 creates a test file, then uses finsert calls to
split the extent, then collapse calls to see if they get merged. But
without any reads to trigger the narrow_crcs path, each of the split
extents will still have a checksum for the entire original extent.

This patch improves the extent merge path so that if either of the
extents we're attempting to merge has a checksum that covers the entire
merged extent, we just use that checksum.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 5db95e50
......@@ -233,102 +233,121 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
{
struct bkey_s_extent l = bkey_s_to_extent(_l);
struct bkey_s_c_extent r = bkey_s_c_to_extent(_r);
union bch_extent_entry *en_l = l.v->start;
const union bch_extent_entry *en_r = r.v->start;
struct bch_extent_crc_unpacked crc_l, crc_r;
union bch_extent_entry *en_l;
const union bch_extent_entry *en_r;
struct extent_ptr_decoded lp, rp;
bool use_right_ptr;
struct bch_dev *ca;
if (bkey_val_u64s(l.k) != bkey_val_u64s(r.k))
return false;
crc_l = bch2_extent_crc_unpack(l.k, NULL);
extent_for_each_entry(l, en_l) {
en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
if (extent_entry_type(en_l) != extent_entry_type(en_r))
return false;
}
switch (extent_entry_type(en_l)) {
case BCH_EXTENT_ENTRY_ptr: {
const struct bch_extent_ptr *lp = &en_l->ptr;
const struct bch_extent_ptr *rp = &en_r->ptr;
struct bch_dev *ca;
if (lp->offset + crc_l.compressed_size != rp->offset ||
lp->dev != rp->dev ||
lp->gen != rp->gen)
return false;
/* We don't allow extents to straddle buckets: */
ca = bch_dev_bkey_exists(c, lp->dev);
if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
return false;
en_l = l.v->start;
en_r = r.v->start;
lp.crc = bch2_extent_crc_unpack(l.k, NULL);
rp.crc = bch2_extent_crc_unpack(r.k, NULL);
while (__bkey_ptr_next_decode(l.k, extent_entry_last(l), lp, en_l) &&
__bkey_ptr_next_decode(r.k, extent_entry_last(r), rp, en_r)) {
if (lp.ptr.offset + lp.crc.offset + lp.crc.live_size !=
rp.ptr.offset + rp.crc.offset ||
lp.ptr.dev != rp.ptr.dev ||
lp.ptr.gen != rp.ptr.gen ||
lp.has_ec != rp.has_ec)
return false;
break;
}
case BCH_EXTENT_ENTRY_stripe_ptr:
if (en_l->stripe_ptr.block != en_r->stripe_ptr.block ||
en_l->stripe_ptr.idx != en_r->stripe_ptr.idx)
return false;
break;
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128:
crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
/* Extents may not straddle buckets: */
ca = bch_dev_bkey_exists(c, lp.ptr.dev);
if (PTR_BUCKET_NR(ca, &lp.ptr) != PTR_BUCKET_NR(ca, &rp.ptr))
return false;
if (crc_l.csum_type != crc_r.csum_type ||
crc_l.compression_type != crc_r.compression_type ||
crc_l.nonce != crc_r.nonce)
return false;
if (lp.has_ec != rp.has_ec ||
(lp.has_ec &&
(lp.ec.block != rp.ec.block ||
lp.ec.redundancy != rp.ec.redundancy ||
lp.ec.idx != rp.ec.idx)))
return false;
if (crc_l.offset + crc_l.live_size != crc_l.compressed_size ||
crc_r.offset)
return false;
if (lp.crc.compression_type != rp.crc.compression_type ||
lp.crc.nonce != rp.crc.nonce)
return false;
if (!bch2_checksum_mergeable(crc_l.csum_type))
if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <=
lp.crc.uncompressed_size) {
/* can use left extent's crc entry */
} else if (lp.crc.live_size <= rp.crc.offset ) {
/* can use right extent's crc entry */
} else {
/* check if checksums can be merged: */
if (lp.crc.csum_type != rp.crc.csum_type ||
lp.crc.nonce != rp.crc.nonce ||
crc_is_compressed(lp.crc) ||
!bch2_checksum_mergeable(lp.crc.csum_type))
return false;
if (crc_is_compressed(crc_l))
if (lp.crc.offset + lp.crc.live_size != lp.crc.compressed_size ||
rp.crc.offset)
return false;
if (crc_l.csum_type &&
crc_l.uncompressed_size +
crc_r.uncompressed_size > c->sb.encoded_extent_max)
if (lp.crc.csum_type &&
lp.crc.uncompressed_size +
rp.crc.uncompressed_size > c->sb.encoded_extent_max)
return false;
if (crc_l.uncompressed_size + crc_r.uncompressed_size >
if (lp.crc.uncompressed_size + rp.crc.uncompressed_size >
bch2_crc_field_size_max[extent_entry_type(en_l)])
return false;
break;
default:
return false;
}
en_l = extent_entry_next(en_l);
en_r = extent_entry_next(en_r);
}
use_right_ptr = false;
extent_for_each_entry(l, en_l) {
struct bch_extent_crc_unpacked crc_l, crc_r;
en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
if (extent_entry_type(en_l) == BCH_EXTENT_ENTRY_ptr &&
use_right_ptr)
en_l->ptr = en_r->ptr;
if (!extent_entry_is_crc(en_l))
continue;
use_right_ptr = false;
crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
crc_l.csum,
crc_r.csum,
crc_r.uncompressed_size << 9);
crc_l.uncompressed_size += crc_r.uncompressed_size;
crc_l.compressed_size += crc_r.compressed_size;
bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
extent_entry_type(en_l));
if (crc_l.offset + crc_l.live_size + crc_r.live_size <=
crc_l.uncompressed_size) {
/* can use left extent's crc entry */
} else if (crc_l.live_size <= crc_r.offset ) {
/* can use right extent's crc entry */
crc_r.offset -= crc_l.live_size;
bch2_extent_crc_pack(entry_to_crc(en_l), crc_r,
extent_entry_type(en_l));
use_right_ptr = true;
} else {
crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
crc_l.csum,
crc_r.csum,
crc_r.uncompressed_size << 9);
crc_l.uncompressed_size += crc_r.uncompressed_size;
crc_l.compressed_size += crc_r.compressed_size;
bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
extent_entry_type(en_l));
}
}
bch2_key_resize(l.k, l.k->size + r.k->size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment