Commit 35a067b4 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Change when we allow overwrites

Originally, we'd check for -ENOSPC when getting a disk reservation
whenever the new extent took up more space on disk than the old extent.

Erasure coding screwed this up, because with erasure coding writes are
initially replicated, and then in the background the extra replicas are
dropped when the stripe is created. This means that with erasure coding
enabled, writes will always take up more space on disk than the data
they're overwriting - but, according to posix, overwrites aren't
supposed to return ENOSPC.

So, in this patch we fudge things: if the new extent has more replicas
than the _effective_ replicas of the old extent, or if the old extent is
compressed and the new one isn't, we check for ENOSPC when getting the
disk reservation - otherwise, we don't.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 3187aa8d
...@@ -664,7 +664,7 @@ bool bch2_bkey_is_incompressible(struct bkey_s_c k) ...@@ -664,7 +664,7 @@ bool bch2_bkey_is_incompressible(struct bkey_s_c k)
} }
bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
unsigned nr_replicas) unsigned nr_replicas, bool compressed)
{ {
struct btree_trans trans; struct btree_trans trans;
struct btree_iter *iter; struct btree_iter *iter;
...@@ -682,7 +682,8 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, ...@@ -682,7 +682,8 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0) if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
break; break;
if (nr_replicas > bch2_bkey_nr_ptrs_fully_allocated(k)) { if (nr_replicas > bch2_bkey_replicas(c, k) ||
(!compressed && bch2_bkey_sectors_compressed(k))) {
ret = false; ret = false;
break; break;
} }
...@@ -692,6 +693,33 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, ...@@ -692,6 +693,33 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
return ret; return ret;
} }
unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
unsigned replicas = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
if (p.ptr.cached)
continue;
if (p.has_ec) {
struct stripe *s =
genradix_ptr(&c->stripes[0], p.ec.idx);
WARN_ON(!s);
if (s)
replicas += s->nr_redundant;
}
replicas++;
}
return replicas;
}
static unsigned bch2_extent_ptr_durability(struct bch_fs *c, static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
struct extent_ptr_decoded p) struct extent_ptr_decoded p)
{ {
......
...@@ -538,7 +538,9 @@ unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c); ...@@ -538,7 +538,9 @@ unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c); unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
bool bch2_bkey_is_incompressible(struct bkey_s_c); bool bch2_bkey_is_incompressible(struct bkey_s_c);
unsigned bch2_bkey_sectors_compressed(struct bkey_s_c); unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned); bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned, bool);
unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c);
unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s, void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
......
...@@ -1886,7 +1886,9 @@ static long bch2_dio_write_loop(struct dio_write *dio) ...@@ -1886,7 +1886,9 @@ static long bch2_dio_write_loop(struct dio_write *dio)
dio->op.opts.data_replicas, 0); dio->op.opts.data_replicas, 0);
if (unlikely(ret) && if (unlikely(ret) &&
!bch2_check_range_allocated(c, dio->op.pos, !bch2_check_range_allocated(c, dio->op.pos,
bio_sectors(bio), dio->op.opts.data_replicas)) bio_sectors(bio),
dio->op.opts.data_replicas,
dio->op.opts.compression != 0))
goto err; goto err;
task_io_account_write(bio->bi_iter.bi_size); task_io_account_write(bio->bi_iter.bi_size);
......
...@@ -193,18 +193,23 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio, ...@@ -193,18 +193,23 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
/* Extent update path: */ /* Extent update path: */
static int sum_sector_overwrites(struct btree_trans *trans, int bch2_sum_sector_overwrites(struct btree_trans *trans,
struct btree_iter *extent_iter, struct btree_iter *extent_iter,
struct bkey_i *new, struct bkey_i *new,
bool *maybe_extending, bool *maybe_extending,
s64 *i_sectors_delta, bool *should_check_enospc,
s64 *disk_sectors_delta) s64 *i_sectors_delta,
s64 *disk_sectors_delta)
{ {
struct bch_fs *c = trans->c;
struct btree_iter *iter; struct btree_iter *iter;
struct bkey_s_c old; struct bkey_s_c old;
unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new));
bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new));
int ret = 0; int ret = 0;
*maybe_extending = true; *maybe_extending = true;
*should_check_enospc = false;
*i_sectors_delta = 0; *i_sectors_delta = 0;
*disk_sectors_delta = 0; *disk_sectors_delta = 0;
...@@ -223,6 +228,11 @@ static int sum_sector_overwrites(struct btree_trans *trans, ...@@ -223,6 +228,11 @@ static int sum_sector_overwrites(struct btree_trans *trans,
(int) (bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new)) - (int) (bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new)) -
bch2_bkey_nr_ptrs_fully_allocated(old)); bch2_bkey_nr_ptrs_fully_allocated(old));
if (!*should_check_enospc &&
(new_replicas > bch2_bkey_replicas(c, old) ||
(!new_compressed && bch2_bkey_sectors_compressed(old))))
*should_check_enospc = true;
if (bkey_cmp(old.k->p, new->k.p) >= 0) { if (bkey_cmp(old.k->p, new->k.p) >= 0) {
/* /*
* Check if there's already data above where we're * Check if there's already data above where we're
...@@ -260,7 +270,7 @@ int bch2_extent_update(struct btree_trans *trans, ...@@ -260,7 +270,7 @@ int bch2_extent_update(struct btree_trans *trans,
{ {
/* this must live until after bch2_trans_commit(): */ /* this must live until after bch2_trans_commit(): */
struct bkey_inode_buf inode_p; struct bkey_inode_buf inode_p;
bool extending = false; bool extending = false, should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0; s64 i_sectors_delta = 0, disk_sectors_delta = 0;
int ret; int ret;
...@@ -268,8 +278,9 @@ int bch2_extent_update(struct btree_trans *trans, ...@@ -268,8 +278,9 @@ int bch2_extent_update(struct btree_trans *trans,
if (ret) if (ret)
return ret; return ret;
ret = sum_sector_overwrites(trans, iter, k, ret = bch2_sum_sector_overwrites(trans, iter, k,
&extending, &extending,
&should_check_enospc,
&i_sectors_delta, &i_sectors_delta,
&disk_sectors_delta); &disk_sectors_delta);
if (ret) if (ret)
...@@ -279,7 +290,8 @@ int bch2_extent_update(struct btree_trans *trans, ...@@ -279,7 +290,8 @@ int bch2_extent_update(struct btree_trans *trans,
disk_sectors_delta > (s64) disk_res->sectors) { disk_sectors_delta > (s64) disk_res->sectors) {
ret = bch2_disk_reservation_add(trans->c, disk_res, ret = bch2_disk_reservation_add(trans->c, disk_res,
disk_sectors_delta - disk_res->sectors, disk_sectors_delta - disk_res->sectors,
0); !should_check_enospc
? BCH_DISK_RESERVATION_NOFAIL : 0);
if (ret) if (ret)
return ret; return ret;
} }
......
...@@ -64,6 +64,8 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op) ...@@ -64,6 +64,8 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
: op->c->wq; : op->c->wq;
} }
int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
struct bkey_i *, bool *, bool *, s64 *, s64 *);
int bch2_extent_update(struct btree_trans *, struct btree_iter *, int bch2_extent_update(struct btree_trans *, struct btree_iter *,
struct bkey_i *, struct disk_reservation *, struct bkey_i *, struct disk_reservation *,
u64 *, u64, s64 *); u64 *, u64, s64 *);
......
...@@ -76,17 +76,15 @@ static int bch2_migrate_index_update(struct bch_write_op *op) ...@@ -76,17 +76,15 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
const union bch_extent_entry *entry; const union bch_extent_entry *entry;
struct extent_ptr_decoded p; struct extent_ptr_decoded p;
bool did_work = false; bool did_work = false;
int nr; bool extending = false, should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
bch2_trans_reset(&trans, 0); bch2_trans_reset(&trans, 0);
k = bch2_btree_iter_peek_slot(iter); k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k); ret = bkey_err(k);
if (ret) { if (ret)
if (ret == -EINTR) goto err;
continue;
break;
}
new = bkey_i_to_extent(bch2_keylist_front(keys)); new = bkey_i_to_extent(bch2_keylist_front(keys));
...@@ -143,23 +141,21 @@ static int bch2_migrate_index_update(struct bch_write_op *op) ...@@ -143,23 +141,21 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
op->opts.background_target, op->opts.background_target,
op->opts.data_replicas); op->opts.data_replicas);
/* ret = bch2_sum_sector_overwrites(&trans, iter, insert,
* If we're not fully overwriting @k, and it's compressed, we &extending,
* need a reservation for all the pointers in @insert &should_check_enospc,
*/ &i_sectors_delta,
nr = bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(insert)) - &disk_sectors_delta);
m->nr_ptrs_reserved; if (ret)
goto err;
if (insert->k.size < k.k->size && if (disk_sectors_delta > (s64) op->res.sectors) {
bch2_bkey_sectors_compressed(k) &&
nr > 0) {
ret = bch2_disk_reservation_add(c, &op->res, ret = bch2_disk_reservation_add(c, &op->res,
keylist_sectors(keys) * nr, 0); disk_sectors_delta - op->res.sectors,
!should_check_enospc
? BCH_DISK_RESERVATION_NOFAIL : 0);
if (ret) if (ret)
goto out; goto out;
m->nr_ptrs_reserved += nr;
goto next;
} }
bch2_trans_update(&trans, iter, insert, 0); bch2_trans_update(&trans, iter, insert, 0);
...@@ -168,6 +164,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op) ...@@ -168,6 +164,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
op_journal_seq(op), op_journal_seq(op),
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
m->data_opts.btree_insert_flags); m->data_opts.btree_insert_flags);
err:
if (!ret) if (!ret)
atomic_long_inc(&c->extent_migrate_done); atomic_long_inc(&c->extent_migrate_done);
if (ret == -EINTR) if (ret == -EINTR)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment