Commit 0ce2dbbe authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: ja->discard_idx, ja->dirty_idx

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent fcbf3e50
......@@ -760,6 +760,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
while (ja->nr < nr) {
struct open_bucket *ob = NULL;
unsigned pos;
long bucket;
if (new_fs) {
......@@ -786,20 +787,24 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
preempt_disable();
}
__array_insert_item(ja->buckets, ja->nr, ja->last_idx);
__array_insert_item(ja->bucket_seq, ja->nr, ja->last_idx);
__array_insert_item(journal_buckets->buckets, ja->nr, ja->last_idx);
pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
__array_insert_item(ja->buckets, ja->nr, pos);
__array_insert_item(ja->bucket_seq, ja->nr, pos);
__array_insert_item(journal_buckets->buckets, ja->nr, pos);
ja->nr++;
ja->buckets[ja->last_idx] = bucket;
ja->bucket_seq[ja->last_idx] = 0;
journal_buckets->buckets[ja->last_idx] = cpu_to_le64(bucket);
ja->buckets[pos] = bucket;
ja->bucket_seq[pos] = 0;
journal_buckets->buckets[pos] = cpu_to_le64(bucket);
if (ja->last_idx < ja->nr) {
if (ja->cur_idx >= ja->last_idx)
ja->cur_idx++;
ja->last_idx++;
}
ja->nr++;
if (pos <= ja->discard_idx)
ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
if (pos <= ja->dirty_idx_ondisk)
ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
if (pos <= ja->dirty_idx)
ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
if (pos <= ja->cur_idx)
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
ca->mi.bucket_size,
......@@ -1042,6 +1047,7 @@ int bch2_fs_journal_init(struct journal *j)
mutex_init(&j->blacklist_lock);
INIT_LIST_HEAD(&j->seq_blacklist);
mutex_init(&j->reclaim_lock);
mutex_init(&j->discard_lock);
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
......@@ -1138,13 +1144,17 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
"dev %u:\n"
"\tnr\t\t%u\n"
"\tavailable\t%u:%u\n"
"\tcur_idx\t\t%u (seq %llu)\n"
"\tlast_idx\t%u (seq %llu)\n",
"\tdiscard_idx\t\t%u\n"
"\tdirty_idx_ondisk\t%u (seq %llu)\n"
"\tdirty_idx\t\t%u (seq %llu)\n"
"\tcur_idx\t\t%u (seq %llu)\n",
iter, ja->nr,
bch2_journal_dev_buckets_available(j, ja),
ja->sectors_free,
ja->cur_idx, ja->bucket_seq[ja->cur_idx],
ja->last_idx, ja->bucket_seq[ja->last_idx]);
ja->discard_idx,
ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk],
ja->dirty_idx, ja->bucket_seq[ja->dirty_idx],
ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
}
spin_unlock(&j->lock);
......
......@@ -625,11 +625,12 @@ static void bch2_journal_read_device(struct closure *cl)
ja->sectors_free = 0;
/*
* Set last_idx to indicate the entire journal is full and needs to be
* Set dirty_idx to indicate the entire journal is full and needs to be
* reclaimed - journal reclaim will immediately reclaim whatever isn't
* pinned when it first runs:
*/
ja->last_idx = (ja->cur_idx + 1) % ja->nr;
ja->discard_idx = ja->dirty_idx_ondisk =
ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
out:
kvpfree(buf.data, buf.size);
percpu_ref_put(&ca->io_ref);
......@@ -1069,12 +1070,13 @@ static void journal_write_done(struct closure *cl)
goto err;
spin_lock(&j->lock);
j->seq_ondisk = seq;
j->last_seq_ondisk = last_seq;
if (seq >= j->pin.front)
journal_seq_pin(j, seq)->devs = devs;
j->seq_ondisk = seq;
j->last_seq_ondisk = last_seq;
bch2_journal_space_available(j);
/*
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
* more buckets:
......
......@@ -14,22 +14,20 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned next = (ja->cur_idx + 1) % ja->nr;
unsigned available = (ja->last_idx + ja->nr - next) % ja->nr;
unsigned available = (ja->discard_idx + ja->nr - next) % ja->nr;
/*
* Allocator startup needs some journal space before we can do journal
* replay:
*/
if (available &&
test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
available--;
if (available && test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
--available;
/*
* Don't use the last bucket unless writing the new last_seq
* will make another bucket available:
*/
if (available &&
journal_last_seq(j) <= ja->bucket_seq[ja->last_idx])
if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
--available;
return available;
......@@ -55,12 +53,34 @@ void bch2_journal_space_available(struct journal *j)
for_each_member_device_rcu(ca, c, i,
&c->rw_devs[BCH_DATA_JOURNAL]) {
struct journal_device *ja = &ca->journal;
unsigned buckets_this_device, sectors_this_device;
if (!ja->nr)
continue;
while (ja->dirty_idx != ja->cur_idx &&
ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
while (ja->dirty_idx_ondisk != ja->dirty_idx &&
ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
nr_online++;
}
if (nr_online < c->opts.metadata_replicas_required) {
ret = -EROFS;
sectors_next_entry = 0;
goto out;
}
for_each_member_device_rcu(ca, c, i,
&c->rw_devs[BCH_DATA_JOURNAL]) {
struct journal_device *ja = &ca->journal;
unsigned buckets_this_device, sectors_this_device;
if (!ja->nr)
continue;
buckets_this_device = bch2_journal_dev_buckets_available(j, ja);
sectors_this_device = ja->sectors_free;
......@@ -100,20 +120,17 @@ void bch2_journal_space_available(struct journal *j)
nr_devs++;
}
rcu_read_unlock();
if (nr_online < c->opts.metadata_replicas_required) {
ret = -EROFS;
sectors_next_entry = 0;
} else if (!sectors_next_entry ||
nr_devs < min_t(unsigned, nr_online,
c->opts.metadata_replicas)) {
if (!sectors_next_entry ||
nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) {
ret = -ENOSPC;
sectors_next_entry = 0;
} else if (!fifo_free(&j->pin)) {
ret = -ENOSPC;
sectors_next_entry = 0;
}
out:
rcu_read_unlock();
j->cur_entry_sectors = sectors_next_entry;
j->cur_entry_error = ret;
......@@ -129,25 +146,23 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
bool ret;
spin_lock(&j->lock);
ret = ja->nr &&
ja->last_idx != ja->cur_idx &&
ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk;
ret = ja->discard_idx != ja->dirty_idx_ondisk;
spin_unlock(&j->lock);
return ret;
}
/*
* Advance ja->last_idx as long as it points to buckets that are no longer
* Advance ja->discard_idx as long as it points to buckets that are no longer
* dirty, issuing discards if necessary:
*/
static void journal_do_discards(struct journal *j)
static void bch2_journal_do_discards(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
unsigned iter;
mutex_lock(&j->reclaim_lock);
mutex_lock(&j->discard_lock);
for_each_rw_member(ca, c, iter) {
struct journal_device *ja = &ca->journal;
......@@ -157,18 +172,18 @@ static void journal_do_discards(struct journal *j)
bdev_max_discard_sectors(ca->disk_sb.bdev))
blkdev_issue_discard(ca->disk_sb.bdev,
bucket_to_sector(ca,
ja->buckets[ja->last_idx]),
ja->buckets[ja->discard_idx]),
ca->mi.bucket_size, GFP_NOIO);
spin_lock(&j->lock);
ja->last_idx = (ja->last_idx + 1) % ja->nr;
ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
bch2_journal_space_available(j);
spin_unlock(&j->lock);
}
}
mutex_unlock(&j->reclaim_lock);
mutex_unlock(&j->discard_lock);
}
/*
......@@ -399,7 +414,7 @@ void bch2_journal_reclaim_work(struct work_struct *work)
unsigned iter, bucket_to_flush, min_nr = 0;
u64 seq_to_flush = 0;
journal_do_discards(j);
bch2_journal_do_discards(j);
mutex_lock(&j->reclaim_lock);
spin_lock(&j->lock);
......
......@@ -193,9 +193,6 @@ struct journal {
struct journal_entry_pin_list *data;
} pin;
struct journal_entry_pin *flush_in_progress;
wait_queue_head_t pin_flush_wait;
u64 replay_journal_seq;
struct mutex blacklist_lock;
......@@ -206,10 +203,13 @@ struct journal {
spinlock_t err_lock;
struct delayed_work reclaim_work;
struct mutex reclaim_lock;
unsigned long last_flushed;
struct journal_entry_pin *flush_in_progress;
wait_queue_head_t pin_flush_wait;
/* protects advancing ja->last_idx: */
struct mutex reclaim_lock;
/* protects advancing ja->discard_idx: */
struct mutex discard_lock;
unsigned write_delay_ms;
unsigned reclaim_delay_ms;
......@@ -240,17 +240,15 @@ struct journal_device {
unsigned sectors_free;
/* Journal bucket we're currently writing to */
unsigned cur_idx;
/* Last journal bucket that still contains an open journal entry */
/*
* j->lock and j->reclaim_lock must both be held to modify, j->lock
* sufficient to read:
* discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
*/
unsigned last_idx;
unsigned discard_idx; /* Next bucket to discard */
unsigned dirty_idx_ondisk;
unsigned dirty_idx;
unsigned cur_idx; /* Journal bucket we're currently writing to */
unsigned nr;
u64 *buckets;
/* Bio for journal reads/writes to this device */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment