Commit 18d758a2 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba

btrfs: replace btrfs_io_context::raid_map with a fixed u64 value

In btrfs_io_context structure, we have a pointer raid_map, which
indicates the logical bytenr for each stripe.

But considering we always call sort_parity_stripes(), the result
raid_map[] is always sorted, thus raid_map[0] is always the logical
bytenr of the full stripe.

So why we waste the space and time (for sorting) for raid_map?

This patch will replace btrfs_io_context::raid_map with a single u64
number, full_stripe_start, by:

- Replace btrfs_io_context::raid_map with full_stripe_start

- Replace call sites using raid_map[0] to use full_stripe_start

- Replace call sites using raid_map[i] to compare with nr_data_stripes.

The benefits are:

- Less memory wasted on raid_map
  It's sizeof(u64) * num_stripes vs sizeof(u64).
  It'll always save at least one u64, and the benefit grows larger with
  num_stripes.

- No more weird alloc_btrfs_io_context() behavior
  As there is only one fixed size + one variable length array.
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 1faf3885
......@@ -202,7 +202,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
*/
static int rbio_bucket(struct btrfs_raid_bio *rbio)
{
u64 num = rbio->bioc->raid_map[0];
u64 num = rbio->bioc->full_stripe_logical;
/*
* we shift down quite a bit. We're using byte
......@@ -567,7 +567,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
test_bit(RBIO_CACHE_BIT, &cur->flags))
return 0;
if (last->bioc->raid_map[0] != cur->bioc->raid_map[0])
if (last->bioc->full_stripe_logical != cur->bioc->full_stripe_logical)
return 0;
/* we can't merge with different operations */
......@@ -661,7 +661,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
spin_lock(&h->lock);
list_for_each_entry(cur, &h->hash_list, hash_list) {
if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0])
if (cur->bioc->full_stripe_logical != rbio->bioc->full_stripe_logical)
continue;
spin_lock(&cur->bio_list_lock);
......@@ -1113,7 +1113,7 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
struct bio_vec bvec;
struct bvec_iter iter;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
rbio->bioc->raid_map[0];
rbio->bioc->full_stripe_logical;
bio_for_each_segment(bvec, bio, iter) {
u32 bvec_offset;
......@@ -1337,7 +1337,7 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
rbio->bioc->raid_map[0];
rbio->bioc->full_stripe_logical;
int total_nr_sector = offset >> fs_info->sectorsize_bits;
ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors);
......@@ -1614,7 +1614,7 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
{
const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
const u64 full_stripe_start = rbio->bioc->raid_map[0];
const u64 full_stripe_start = rbio->bioc->full_stripe_logical;
const u32 orig_len = orig_bio->bi_iter.bi_size;
const u32 sectorsize = fs_info->sectorsize;
u64 cur_logical;
......@@ -1801,9 +1801,8 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
* here due to a crc mismatch and we can't give them the
* data they want.
*/
if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
if (rbio->bioc->raid_map[faila] ==
RAID5_P_STRIPE)
if (failb == rbio->real_stripes - 1) {
if (faila == rbio->real_stripes - 2)
/*
* Only P and Q are corrupted.
* We only care about data stripes recovery,
......@@ -1817,7 +1816,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
goto pstripe;
}
if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
if (failb == rbio->real_stripes - 2) {
raid6_datap_recov(rbio->real_stripes, sectorsize,
faila, pointers);
} else {
......@@ -2080,8 +2079,8 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
struct btrfs_root *csum_root = btrfs_csum_root(fs_info,
rbio->bioc->raid_map[0]);
const u64 start = rbio->bioc->raid_map[0];
rbio->bioc->full_stripe_logical);
const u64 start = rbio->bioc->full_stripe_logical;
const u32 len = (rbio->nr_data * rbio->stripe_nsectors) <<
fs_info->sectorsize_bits;
int ret;
......@@ -2129,7 +2128,7 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
*/
btrfs_warn_rl(fs_info,
"sub-stripe write for full stripe %llu is not safe, failed to get csum: %d",
rbio->bioc->raid_map[0], ret);
rbio->bioc->full_stripe_logical, ret);
no_csum:
kfree(rbio->csum_buf);
bitmap_free(rbio->csum_bitmap);
......@@ -2385,10 +2384,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
int stripe_offset;
int index;
ASSERT(logical >= rbio->bioc->raid_map[0]);
ASSERT(logical + sectorsize <= rbio->bioc->raid_map[0] +
ASSERT(logical >= rbio->bioc->full_stripe_logical);
ASSERT(logical + sectorsize <= rbio->bioc->full_stripe_logical +
BTRFS_STRIPE_LEN * rbio->nr_data);
stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
stripe_offset = (int)(logical - rbio->bioc->full_stripe_logical);
index = stripe_offset / sectorsize;
rbio->bio_sectors[index].page = page;
rbio->bio_sectors[index].pgoff = pgoff;
......
......@@ -1430,7 +1430,7 @@ static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
}
static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
u64 *raid_map,
u64 full_stripe_logical,
int nstripes, int mirror,
int *stripe_index,
u64 *stripe_offset)
......@@ -1438,19 +1438,22 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
int i;
if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
const int nr_data_stripes = (map_type & BTRFS_BLOCK_GROUP_RAID5) ?
nstripes - 1 : nstripes - 2;
/* RAID5/6 */
for (i = 0; i < nstripes; i++) {
if (raid_map[i] == RAID6_Q_STRIPE ||
raid_map[i] == RAID5_P_STRIPE)
continue;
for (i = 0; i < nr_data_stripes; i++) {
const u64 data_stripe_start = full_stripe_logical +
(i * BTRFS_STRIPE_LEN);
if (logical >= raid_map[i] &&
logical < raid_map[i] + BTRFS_STRIPE_LEN)
if (logical >= data_stripe_start &&
logical < data_stripe_start + BTRFS_STRIPE_LEN)
break;
}
*stripe_index = i;
*stripe_offset = logical - raid_map[i];
*stripe_offset = (logical - full_stripe_logical) &
BTRFS_STRIPE_LEN_MASK;
} else {
/* The other RAID type */
*stripe_index = mirror;
......@@ -1538,7 +1541,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
scrub_stripe_index_and_offset(logical,
bioc->map_type,
bioc->raid_map,
bioc->full_stripe_logical,
bioc->num_stripes -
bioc->replace_nr_stripes,
mirror_index,
......@@ -2398,7 +2401,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&length, &bioc);
if (ret || !bioc || !bioc->raid_map)
if (ret || !bioc)
goto bioc_out;
if (WARN_ON(!sctx->is_dev_replace ||
......@@ -3007,7 +3010,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
&length, &bioc);
if (ret || !bioc || !bioc->raid_map)
if (ret || !bioc)
goto bioc_out;
bio = bio_alloc(NULL, BIO_MAX_VECS, REQ_OP_READ, GFP_NOFS);
......
......@@ -5894,25 +5894,6 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
return preferred_mirror;
}
/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes)
{
int i;
int again = 1;
while (again) {
again = 0;
for (i = 0; i < num_stripes - 1; i++) {
/* Swap if parity is on a smaller index */
if (bioc->raid_map[i] > bioc->raid_map[i + 1]) {
swap(bioc->stripes[i], bioc->stripes[i + 1]);
swap(bioc->raid_map[i], bioc->raid_map[i + 1]);
again = 1;
}
}
}
}
static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
u16 total_stripes)
{
......@@ -5922,12 +5903,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
/* The size of btrfs_io_context */
sizeof(struct btrfs_io_context) +
/* Plus the variable array for the stripes */
sizeof(struct btrfs_io_stripe) * (total_stripes) +
/*
* Plus the raid_map, which includes both the tgt dev
* and the stripes.
*/
sizeof(u64) * (total_stripes),
sizeof(struct btrfs_io_stripe) * (total_stripes),
GFP_NOFS);
if (!bioc)
......@@ -5936,8 +5912,8 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
refcount_set(&bioc->refs, 1);
bioc->fs_info = fs_info;
bioc->raid_map = (u64 *)(bioc->stripes + total_stripes);
bioc->replace_stripe_src = -1;
bioc->full_stripe_logical = (u64)-1;
return bioc;
}
......@@ -6541,33 +6517,39 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
}
bioc->map_type = map->type;
/*
* For RAID56 full map, we need to make sure the stripes[] follows the
* rule that data stripes are all ordered, then followed with P and Q
* (if we have).
*
* It's still mostly the same as other profiles, just with extra rotation.
*/
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
(need_full_stripe(op) || mirror_num > 1)) {
/*
* For RAID56 @stripe_nr is already the number of full stripes
* before us, which is also the rotation value (needs to modulo
* with num_stripes).
*
* In this case, we just add @stripe_nr with @i, then do the
* modulo, to reduce one modulo call.
*/
bioc->full_stripe_logical = em->start +
((stripe_nr * data_stripes) << BTRFS_STRIPE_LEN_SHIFT);
for (i = 0; i < num_stripes; i++)
set_io_stripe(&bioc->stripes[i], map,
(i + stripe_nr) % num_stripes,
stripe_offset, stripe_nr);
} else {
/*
* For all other non-RAID56 profiles, just copy the target
* stripe into the bioc.
*/
for (i = 0; i < num_stripes; i++) {
set_io_stripe(&bioc->stripes[i], map, stripe_index, stripe_offset,
stripe_nr);
set_io_stripe(&bioc->stripes[i], map, stripe_index,
stripe_offset, stripe_nr);
stripe_index++;
}
/* Build raid_map */
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
(need_full_stripe(op) || mirror_num > 1)) {
u64 tmp;
unsigned rot;
/* Work out the disk rotation on this stripe-set */
rot = stripe_nr % num_stripes;
/* Fill in the logical address of each stripe */
tmp = stripe_nr * data_stripes;
for (i = 0; i < data_stripes; i++)
bioc->raid_map[(i + rot) % num_stripes] =
em->start + ((tmp + i) << BTRFS_STRIPE_LEN_SHIFT);
bioc->raid_map[(i + rot) % map->num_stripes] = RAID5_P_STRIPE;
if (map->type & BTRFS_BLOCK_GROUP_RAID6)
bioc->raid_map[(i + rot + 1) % num_stripes] =
RAID6_Q_STRIPE;
sort_parity_stripes(bioc, num_stripes);
}
if (need_full_stripe(op))
......
......@@ -460,11 +460,22 @@ struct btrfs_io_context {
u16 replace_nr_stripes;
s16 replace_stripe_src;
/*
* logical block numbers for the start of each stripe
* The last one or two are p/q. These are sorted,
* so raid_map[0] is the start of our full stripe
* Logical bytenr of the full stripe start, only for RAID56 cases.
*
* When this value is set to other than (u64)-1, the stripes[] should
* follow this pattern:
*
* (real_stripes = num_stripes - replace_nr_stripes)
* (data_stripes = (is_raid6) ? (real_stripes - 2) : (real_stripes - 1))
*
* stripes[0]: The first data stripe
* stripes[1]: The second data stripe
* ...
* stripes[data_stripes - 1]: The last data stripe
* stripes[data_stripes]: The P stripe
* stripes[data_stripes + 1]: The Q stripe (only for RAID6).
*/
u64 *raid_map;
u64 full_stripe_logical;
struct btrfs_io_stripe stripes[];
};
......
......@@ -2422,7 +2422,7 @@ DECLARE_EVENT_CLASS(btrfs_raid56_bio,
),
TP_fast_assign_btrfs(rbio->bioc->fs_info,
__entry->full_stripe = rbio->bioc->raid_map[0];
__entry->full_stripe = rbio->bioc->full_stripe_logical;
__entry->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
__entry->len = bio->bi_iter.bi_size;
__entry->opf = bio_op(bio);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment