Commit 0ce2dbbe authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: ja->discard_idx, ja->dirty_idx

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent fcbf3e50
...@@ -760,6 +760,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, ...@@ -760,6 +760,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
while (ja->nr < nr) { while (ja->nr < nr) {
struct open_bucket *ob = NULL; struct open_bucket *ob = NULL;
unsigned pos;
long bucket; long bucket;
if (new_fs) { if (new_fs) {
...@@ -786,20 +787,24 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, ...@@ -786,20 +787,24 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
preempt_disable(); preempt_disable();
} }
__array_insert_item(ja->buckets, ja->nr, ja->last_idx); pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
__array_insert_item(ja->bucket_seq, ja->nr, ja->last_idx); __array_insert_item(ja->buckets, ja->nr, pos);
__array_insert_item(journal_buckets->buckets, ja->nr, ja->last_idx); __array_insert_item(ja->bucket_seq, ja->nr, pos);
__array_insert_item(journal_buckets->buckets, ja->nr, pos);
ja->nr++;
ja->buckets[ja->last_idx] = bucket; ja->buckets[pos] = bucket;
ja->bucket_seq[ja->last_idx] = 0; ja->bucket_seq[pos] = 0;
journal_buckets->buckets[ja->last_idx] = cpu_to_le64(bucket); journal_buckets->buckets[pos] = cpu_to_le64(bucket);
if (ja->last_idx < ja->nr) { if (pos <= ja->discard_idx)
if (ja->cur_idx >= ja->last_idx) ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
ja->cur_idx++; if (pos <= ja->dirty_idx_ondisk)
ja->last_idx++; ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
} if (pos <= ja->dirty_idx)
ja->nr++; ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
if (pos <= ja->cur_idx)
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL, bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
ca->mi.bucket_size, ca->mi.bucket_size,
...@@ -1042,6 +1047,7 @@ int bch2_fs_journal_init(struct journal *j) ...@@ -1042,6 +1047,7 @@ int bch2_fs_journal_init(struct journal *j)
mutex_init(&j->blacklist_lock); mutex_init(&j->blacklist_lock);
INIT_LIST_HEAD(&j->seq_blacklist); INIT_LIST_HEAD(&j->seq_blacklist);
mutex_init(&j->reclaim_lock); mutex_init(&j->reclaim_lock);
mutex_init(&j->discard_lock);
lockdep_init_map(&j->res_map, "journal res", &res_key, 0); lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
...@@ -1138,13 +1144,17 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf) ...@@ -1138,13 +1144,17 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
"dev %u:\n" "dev %u:\n"
"\tnr\t\t%u\n" "\tnr\t\t%u\n"
"\tavailable\t%u:%u\n" "\tavailable\t%u:%u\n"
"\tcur_idx\t\t%u (seq %llu)\n" "\tdiscard_idx\t\t%u\n"
"\tlast_idx\t%u (seq %llu)\n", "\tdirty_idx_ondisk\t%u (seq %llu)\n"
"\tdirty_idx\t\t%u (seq %llu)\n"
"\tcur_idx\t\t%u (seq %llu)\n",
iter, ja->nr, iter, ja->nr,
bch2_journal_dev_buckets_available(j, ja), bch2_journal_dev_buckets_available(j, ja),
ja->sectors_free, ja->sectors_free,
ja->cur_idx, ja->bucket_seq[ja->cur_idx], ja->discard_idx,
ja->last_idx, ja->bucket_seq[ja->last_idx]); ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk],
ja->dirty_idx, ja->bucket_seq[ja->dirty_idx],
ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
} }
spin_unlock(&j->lock); spin_unlock(&j->lock);
......
...@@ -625,11 +625,12 @@ static void bch2_journal_read_device(struct closure *cl) ...@@ -625,11 +625,12 @@ static void bch2_journal_read_device(struct closure *cl)
ja->sectors_free = 0; ja->sectors_free = 0;
/* /*
* Set last_idx to indicate the entire journal is full and needs to be * Set dirty_idx to indicate the entire journal is full and needs to be
* reclaimed - journal reclaim will immediately reclaim whatever isn't * reclaimed - journal reclaim will immediately reclaim whatever isn't
* pinned when it first runs: * pinned when it first runs:
*/ */
ja->last_idx = (ja->cur_idx + 1) % ja->nr; ja->discard_idx = ja->dirty_idx_ondisk =
ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
out: out:
kvpfree(buf.data, buf.size); kvpfree(buf.data, buf.size);
percpu_ref_put(&ca->io_ref); percpu_ref_put(&ca->io_ref);
...@@ -1069,12 +1070,13 @@ static void journal_write_done(struct closure *cl) ...@@ -1069,12 +1070,13 @@ static void journal_write_done(struct closure *cl)
goto err; goto err;
spin_lock(&j->lock); spin_lock(&j->lock);
j->seq_ondisk = seq;
j->last_seq_ondisk = last_seq;
if (seq >= j->pin.front) if (seq >= j->pin.front)
journal_seq_pin(j, seq)->devs = devs; journal_seq_pin(j, seq)->devs = devs;
j->seq_ondisk = seq;
j->last_seq_ondisk = last_seq;
bch2_journal_space_available(j);
/* /*
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
* more buckets: * more buckets:
......
...@@ -14,22 +14,20 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j, ...@@ -14,22 +14,20 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned next = (ja->cur_idx + 1) % ja->nr; unsigned next = (ja->cur_idx + 1) % ja->nr;
unsigned available = (ja->last_idx + ja->nr - next) % ja->nr; unsigned available = (ja->discard_idx + ja->nr - next) % ja->nr;
/* /*
* Allocator startup needs some journal space before we can do journal * Allocator startup needs some journal space before we can do journal
* replay: * replay:
*/ */
if (available && if (available && test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) --available;
available--;
/* /*
* Don't use the last bucket unless writing the new last_seq * Don't use the last bucket unless writing the new last_seq
* will make another bucket available: * will make another bucket available:
*/ */
if (available && if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
journal_last_seq(j) <= ja->bucket_seq[ja->last_idx])
--available; --available;
return available; return available;
...@@ -55,12 +53,34 @@ void bch2_journal_space_available(struct journal *j) ...@@ -55,12 +53,34 @@ void bch2_journal_space_available(struct journal *j)
for_each_member_device_rcu(ca, c, i, for_each_member_device_rcu(ca, c, i,
&c->rw_devs[BCH_DATA_JOURNAL]) { &c->rw_devs[BCH_DATA_JOURNAL]) {
struct journal_device *ja = &ca->journal; struct journal_device *ja = &ca->journal;
unsigned buckets_this_device, sectors_this_device;
if (!ja->nr) if (!ja->nr)
continue; continue;
while (ja->dirty_idx != ja->cur_idx &&
ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
while (ja->dirty_idx_ondisk != ja->dirty_idx &&
ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
nr_online++; nr_online++;
}
if (nr_online < c->opts.metadata_replicas_required) {
ret = -EROFS;
sectors_next_entry = 0;
goto out;
}
for_each_member_device_rcu(ca, c, i,
&c->rw_devs[BCH_DATA_JOURNAL]) {
struct journal_device *ja = &ca->journal;
unsigned buckets_this_device, sectors_this_device;
if (!ja->nr)
continue;
buckets_this_device = bch2_journal_dev_buckets_available(j, ja); buckets_this_device = bch2_journal_dev_buckets_available(j, ja);
sectors_this_device = ja->sectors_free; sectors_this_device = ja->sectors_free;
...@@ -100,20 +120,17 @@ void bch2_journal_space_available(struct journal *j) ...@@ -100,20 +120,17 @@ void bch2_journal_space_available(struct journal *j)
nr_devs++; nr_devs++;
} }
rcu_read_unlock();
if (nr_online < c->opts.metadata_replicas_required) { if (!sectors_next_entry ||
ret = -EROFS; nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) {
sectors_next_entry = 0;
} else if (!sectors_next_entry ||
nr_devs < min_t(unsigned, nr_online,
c->opts.metadata_replicas)) {
ret = -ENOSPC; ret = -ENOSPC;
sectors_next_entry = 0; sectors_next_entry = 0;
} else if (!fifo_free(&j->pin)) { } else if (!fifo_free(&j->pin)) {
ret = -ENOSPC; ret = -ENOSPC;
sectors_next_entry = 0; sectors_next_entry = 0;
} }
out:
rcu_read_unlock();
j->cur_entry_sectors = sectors_next_entry; j->cur_entry_sectors = sectors_next_entry;
j->cur_entry_error = ret; j->cur_entry_error = ret;
...@@ -129,25 +146,23 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja) ...@@ -129,25 +146,23 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
bool ret; bool ret;
spin_lock(&j->lock); spin_lock(&j->lock);
ret = ja->nr && ret = ja->discard_idx != ja->dirty_idx_ondisk;
ja->last_idx != ja->cur_idx &&
ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk;
spin_unlock(&j->lock); spin_unlock(&j->lock);
return ret; return ret;
} }
/* /*
* Advance ja->last_idx as long as it points to buckets that are no longer * Advance ja->discard_idx as long as it points to buckets that are no longer
* dirty, issuing discards if necessary: * dirty, issuing discards if necessary:
*/ */
static void journal_do_discards(struct journal *j) static void bch2_journal_do_discards(struct journal *j)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca; struct bch_dev *ca;
unsigned iter; unsigned iter;
mutex_lock(&j->reclaim_lock); mutex_lock(&j->discard_lock);
for_each_rw_member(ca, c, iter) { for_each_rw_member(ca, c, iter) {
struct journal_device *ja = &ca->journal; struct journal_device *ja = &ca->journal;
...@@ -157,18 +172,18 @@ static void journal_do_discards(struct journal *j) ...@@ -157,18 +172,18 @@ static void journal_do_discards(struct journal *j)
bdev_max_discard_sectors(ca->disk_sb.bdev)) bdev_max_discard_sectors(ca->disk_sb.bdev))
blkdev_issue_discard(ca->disk_sb.bdev, blkdev_issue_discard(ca->disk_sb.bdev,
bucket_to_sector(ca, bucket_to_sector(ca,
ja->buckets[ja->last_idx]), ja->buckets[ja->discard_idx]),
ca->mi.bucket_size, GFP_NOIO); ca->mi.bucket_size, GFP_NOIO);
spin_lock(&j->lock); spin_lock(&j->lock);
ja->last_idx = (ja->last_idx + 1) % ja->nr; ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
bch2_journal_space_available(j); bch2_journal_space_available(j);
spin_unlock(&j->lock); spin_unlock(&j->lock);
} }
} }
mutex_unlock(&j->reclaim_lock); mutex_unlock(&j->discard_lock);
} }
/* /*
...@@ -399,7 +414,7 @@ void bch2_journal_reclaim_work(struct work_struct *work) ...@@ -399,7 +414,7 @@ void bch2_journal_reclaim_work(struct work_struct *work)
unsigned iter, bucket_to_flush, min_nr = 0; unsigned iter, bucket_to_flush, min_nr = 0;
u64 seq_to_flush = 0; u64 seq_to_flush = 0;
journal_do_discards(j); bch2_journal_do_discards(j);
mutex_lock(&j->reclaim_lock); mutex_lock(&j->reclaim_lock);
spin_lock(&j->lock); spin_lock(&j->lock);
......
...@@ -193,9 +193,6 @@ struct journal { ...@@ -193,9 +193,6 @@ struct journal {
struct journal_entry_pin_list *data; struct journal_entry_pin_list *data;
} pin; } pin;
struct journal_entry_pin *flush_in_progress;
wait_queue_head_t pin_flush_wait;
u64 replay_journal_seq; u64 replay_journal_seq;
struct mutex blacklist_lock; struct mutex blacklist_lock;
...@@ -206,10 +203,13 @@ struct journal { ...@@ -206,10 +203,13 @@ struct journal {
spinlock_t err_lock; spinlock_t err_lock;
struct delayed_work reclaim_work; struct delayed_work reclaim_work;
struct mutex reclaim_lock;
unsigned long last_flushed; unsigned long last_flushed;
struct journal_entry_pin *flush_in_progress;
wait_queue_head_t pin_flush_wait;
/* protects advancing ja->last_idx: */ /* protects advancing ja->discard_idx: */
struct mutex reclaim_lock; struct mutex discard_lock;
unsigned write_delay_ms; unsigned write_delay_ms;
unsigned reclaim_delay_ms; unsigned reclaim_delay_ms;
...@@ -240,17 +240,15 @@ struct journal_device { ...@@ -240,17 +240,15 @@ struct journal_device {
unsigned sectors_free; unsigned sectors_free;
/* Journal bucket we're currently writing to */
unsigned cur_idx;
/* Last journal bucket that still contains an open journal entry */
/* /*
* j->lock and j->reclaim_lock must both be held to modify, j->lock * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
* sufficient to read:
*/ */
unsigned last_idx; unsigned discard_idx; /* Next bucket to discard */
unsigned dirty_idx_ondisk;
unsigned dirty_idx;
unsigned cur_idx; /* Journal bucket we're currently writing to */
unsigned nr; unsigned nr;
u64 *buckets; u64 *buckets;
/* Bio for journal reads/writes to this device */ /* Bio for journal reads/writes to this device */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment