Commit b20ba1bc authored by Javier González's avatar Javier González Committed by Jens Axboe

lightnvm: pblk: redesign GC algorithm

At the moment, in order to get enough read parallelism, we have recycled
several lines at the same time. This approach has proven not to work
well when reaching capacity, since we end up mixing valid data from all
lines, thus not maintaining a sustainable free/recycled line ratio.

The new design, relies on a two level workqueue mechanism. In the first
level, we read the metadata for a number of lines based on the GC list
they reside on (this is governed by the number of valid sectors in each
line). In the second level, we recycle a single line at a time. Here, we
issue reads in parallel, while a single GC write thread places data in
the write buffer. This design allows to (i) only move data from one line
at a time, thus maintaining a sane free/recycled ration and (ii)
maintain the GC writer busy with recycled data.
Signed-off-by: default avatarJavier González <javier@cnexlabs.com>
Signed-off-by: default avatarMatias Bjørling <matias@cnexlabs.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 476118c9
...@@ -302,12 +302,12 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) ...@@ -302,12 +302,12 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
line->gc_group = PBLK_LINEGC_FULL; line->gc_group = PBLK_LINEGC_FULL;
move_list = &l_mg->gc_full_list; move_list = &l_mg->gc_full_list;
} }
} else if (vsc < lm->mid_thrs) { } else if (vsc < lm->high_thrs) {
if (line->gc_group != PBLK_LINEGC_HIGH) { if (line->gc_group != PBLK_LINEGC_HIGH) {
line->gc_group = PBLK_LINEGC_HIGH; line->gc_group = PBLK_LINEGC_HIGH;
move_list = &l_mg->gc_high_list; move_list = &l_mg->gc_high_list;
} }
} else if (vsc < lm->high_thrs) { } else if (vsc < lm->mid_thrs) {
if (line->gc_group != PBLK_LINEGC_MID) { if (line->gc_group != PBLK_LINEGC_MID) {
line->gc_group = PBLK_LINEGC_MID; line->gc_group = PBLK_LINEGC_MID;
move_list = &l_mg->gc_mid_list; move_list = &l_mg->gc_mid_list;
...@@ -1199,6 +1199,7 @@ struct pblk_line *pblk_line_get(struct pblk *pblk) ...@@ -1199,6 +1199,7 @@ struct pblk_line *pblk_line_get(struct pblk *pblk)
if (pblk_line_prepare(pblk, line)) { if (pblk_line_prepare(pblk, line)) {
pr_err("pblk: failed to prepare line %d\n", line->id); pr_err("pblk: failed to prepare line %d\n", line->id);
list_add(&line->list, &l_mg->free_list); list_add(&line->list, &l_mg->free_list);
l_mg->nr_free_lines++;
return NULL; return NULL;
} }
...@@ -1465,6 +1466,8 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) ...@@ -1465,6 +1466,8 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
spin_unlock(&line->lock); spin_unlock(&line->lock);
spin_unlock(&l_mg->gc_lock); spin_unlock(&l_mg->gc_lock);
pblk_gc_should_kick(pblk);
} }
void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
......
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
{ {
kfree(gc_rq->data); kfree(gc_rq->data);
kfree(gc_rq->lba_list);
kfree(gc_rq); kfree(gc_rq);
} }
...@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk) ...@@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk)
return 1; return 1;
} }
list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) { list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
list_move_tail(&gc_rq->list, &w_list); gc->w_entries = 0;
gc->w_entries--;
}
spin_unlock(&gc->w_lock); spin_unlock(&gc->w_lock);
list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) { list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
...@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk) ...@@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk)
gc_rq->nr_secs, gc_rq->secs_to_gc, gc_rq->nr_secs, gc_rq->secs_to_gc,
gc_rq->line, PBLK_IOTYPE_GC); gc_rq->line, PBLK_IOTYPE_GC);
kref_put(&gc_rq->line->ref, pblk_line_put);
list_del(&gc_rq->list); list_del(&gc_rq->list);
kref_put(&gc_rq->line->ref, pblk_line_put);
pblk_gc_free_gc_rq(gc_rq); pblk_gc_free_gc_rq(gc_rq);
} }
...@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc) ...@@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc)
* Responsible for managing all memory related to a gc request. Also in case of * Responsible for managing all memory related to a gc request. Also in case of
* failure * failure
*/ */
static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line, static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
u64 *lba_list, unsigned int nr_secs)
{ {
struct nvm_tgt_dev *dev = pblk->dev; struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo; struct nvm_geo *geo = &dev->geo;
struct pblk_gc *gc = &pblk->gc; struct pblk_gc *gc = &pblk->gc;
struct pblk_gc_rq *gc_rq; struct pblk_line *line = gc_rq->line;
void *data; void *data;
unsigned int secs_to_gc; unsigned int secs_to_gc;
int ret = NVM_IO_OK; int ret = 0;
data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL); data = kmalloc(gc_rq->nr_secs * geo->sec_size, GFP_KERNEL);
if (!data) { if (!data) {
ret = NVM_IO_ERR; ret = -ENOMEM;
goto free_lba_list; goto out;
} }
/* Read from GC victim block */ /* Read from GC victim block */
if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs, if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
&secs_to_gc, line)) { &secs_to_gc, line)) {
ret = NVM_IO_ERR; ret = -EFAULT;
goto free_data; goto free_data;
} }
if (!secs_to_gc) if (!secs_to_gc)
goto free_data; goto free_rq;
gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
if (!gc_rq) {
ret = NVM_IO_ERR;
goto free_data;
}
gc_rq->line = line;
gc_rq->data = data; gc_rq->data = data;
gc_rq->lba_list = lba_list;
gc_rq->nr_secs = nr_secs;
gc_rq->secs_to_gc = secs_to_gc; gc_rq->secs_to_gc = secs_to_gc;
kref_get(&line->ref);
retry: retry:
spin_lock(&gc->w_lock); spin_lock(&gc->w_lock);
if (gc->w_entries > 256) { if (gc->w_entries >= PBLK_GC_W_QD) {
spin_unlock(&gc->w_lock); spin_unlock(&gc->w_lock);
usleep_range(256, 1024); pblk_gc_writer_kick(&pblk->gc);
usleep_range(128, 256);
goto retry; goto retry;
} }
gc->w_entries++; gc->w_entries++;
...@@ -120,13 +105,14 @@ static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line, ...@@ -120,13 +105,14 @@ static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line,
pblk_gc_writer_kick(&pblk->gc); pblk_gc_writer_kick(&pblk->gc);
return NVM_IO_OK; return 0;
free_rq:
kfree(gc_rq);
free_data: free_data:
kfree(data); kfree(data);
free_lba_list: out:
kfree(lba_list); kref_put(&line->ref, pblk_line_put);
return ret; return ret;
} }
...@@ -149,21 +135,53 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) ...@@ -149,21 +135,53 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
} }
static void pblk_gc_line_ws(struct work_struct *work) static void pblk_gc_line_ws(struct work_struct *work)
{
struct pblk_line_ws *line_rq_ws = container_of(work,
struct pblk_line_ws, ws);
struct pblk *pblk = line_rq_ws->pblk;
struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line = line_rq_ws->line;
struct pblk_gc_rq *gc_rq = line_rq_ws->priv;
up(&gc->gc_sem);
if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
line->id, *line->vsc,
gc_rq->nr_secs);
}
mempool_free(line_rq_ws, pblk->line_ws_pool);
}
static void pblk_gc_line_prepare_ws(struct work_struct *work)
{ {
struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
ws); ws);
struct pblk *pblk = line_ws->pblk; struct pblk *pblk = line_ws->pblk;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line *line = line_ws->line; struct pblk_line *line = line_ws->line;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_meta *lm = &pblk->lm;
struct line_emeta *emeta_buf = line_ws->priv; struct pblk_gc *gc = &pblk->gc;
struct line_emeta *emeta_buf;
struct pblk_line_ws *line_rq_ws;
struct pblk_gc_rq *gc_rq;
__le64 *lba_list; __le64 *lba_list;
u64 *gc_list; int sec_left, nr_secs, bit;
int sec_left; int ret;
int nr_ppas, bit;
int put_line = 1;
pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id); emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
GFP_KERNEL);
if (!emeta_buf) {
pr_err("pblk: cannot use GC emeta\n");
return;
}
ret = pblk_line_read_emeta(pblk, line, emeta_buf);
if (ret) {
pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
goto fail_free_emeta;
}
/* If this read fails, it means that emeta is corrupted. For now, leave /* If this read fails, it means that emeta is corrupted. For now, leave
* the line untouched. TODO: Implement a recovery routine that scans and * the line untouched. TODO: Implement a recovery routine that scans and
...@@ -172,119 +190,124 @@ static void pblk_gc_line_ws(struct work_struct *work) ...@@ -172,119 +190,124 @@ static void pblk_gc_line_ws(struct work_struct *work)
lba_list = pblk_recov_get_lba_list(pblk, emeta_buf); lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
if (!lba_list) { if (!lba_list) {
pr_err("pblk: could not interpret emeta (line %d)\n", line->id); pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
goto out; goto fail_free_emeta;
}
spin_lock(&line->lock);
sec_left = le32_to_cpu(*line->vsc);
if (!sec_left) {
/* Lines are erased before being used (l_mg->data_/log_next) */
spin_unlock(&line->lock);
goto out;
} }
spin_unlock(&line->lock);
sec_left = pblk_line_vsc(line);
if (sec_left < 0) { if (sec_left < 0) {
pr_err("pblk: corrupted GC line (%d)\n", line->id); pr_err("pblk: corrupted GC line (%d)\n", line->id);
put_line = 0; goto fail_free_emeta;
pblk_put_line_back(pblk, line);
goto out;
} }
bit = -1; bit = -1;
next_rq: next_rq:
gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL); gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
if (!gc_list) { if (!gc_rq)
put_line = 0; goto fail_free_emeta;
pblk_put_line_back(pblk, line);
goto out;
}
nr_ppas = 0; nr_secs = 0;
do { do {
bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line, bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
bit + 1); bit + 1);
if (bit > line->emeta_ssec) if (bit > line->emeta_ssec)
break; break;
gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]); gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
} while (nr_ppas < pblk->max_write_pgs); } while (nr_secs < pblk->max_write_pgs);
if (unlikely(!nr_ppas)) { if (unlikely(!nr_secs)) {
kfree(gc_list); kfree(gc_rq);
goto out; goto out;
} }
if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) { gc_rq->nr_secs = nr_secs;
pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n", gc_rq->line = line;
line->id, *line->vsc,
nr_ppas, nr_ppas); line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
put_line = 0; if (!line_rq_ws)
pblk_put_line_back(pblk, line); goto fail_free_gc_rq;
goto out;
} line_rq_ws->pblk = pblk;
line_rq_ws->line = line;
line_rq_ws->priv = gc_rq;
down(&gc->gc_sem);
kref_get(&line->ref);
INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
sec_left -= nr_ppas; sec_left -= nr_secs;
if (sec_left > 0) if (sec_left > 0)
goto next_rq; goto next_rq;
out: out:
pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
mempool_free(line_ws, pblk->line_ws_pool); mempool_free(line_ws, pblk->line_ws_pool);
atomic_dec(&pblk->gc.inflight_gc);
if (put_line) kref_put(&line->ref, pblk_line_put);
kref_put(&line->ref, pblk_line_put); atomic_dec(&gc->inflight_gc);
return;
fail_free_gc_rq:
kfree(gc_rq);
fail_free_emeta:
pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
pblk_put_line_back(pblk, line);
kref_put(&line->ref, pblk_line_put);
mempool_free(line_ws, pblk->line_ws_pool);
atomic_dec(&gc->inflight_gc);
pr_err("pblk: Failed to GC line %d\n", line->id);
} }
static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{ {
struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_gc *gc = &pblk->gc;
struct pblk_line_meta *lm = &pblk->lm;
struct line_emeta *emeta_buf;
struct pblk_line_ws *line_ws; struct pblk_line_ws *line_ws;
int ret;
line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
GFP_KERNEL);
if (!emeta_buf) {
pr_err("pblk: cannot use GC emeta\n");
goto fail_free_ws;
}
ret = pblk_line_read_emeta(pblk, line, emeta_buf); line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
if (ret) { if (!line_ws)
pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret); return -ENOMEM;
goto fail_free_emeta;
}
line_ws->pblk = pblk; line_ws->pblk = pblk;
line_ws->line = line; line_ws->line = line;
line_ws->priv = emeta_buf;
INIT_WORK(&line_ws->ws, pblk_gc_line_ws); INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
queue_work(pblk->gc.gc_reader_wq, &line_ws->ws); queue_work(gc->gc_reader_wq, &line_ws->ws);
return 0; return 0;
fail_free_emeta:
pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
fail_free_ws:
mempool_free(line_ws, pblk->line_ws_pool);
pblk_put_line_back(pblk, line);
return 1;
} }
static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list) static int pblk_gc_read(struct pblk *pblk)
{ {
struct pblk_line *line, *tline; struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line;
list_for_each_entry_safe(line, tline, gc_list, list) { spin_lock(&gc->r_lock);
if (pblk_gc_line(pblk, line)) if (list_empty(&gc->r_list)) {
pr_err("pblk: failed to GC line %d\n", line->id); spin_unlock(&gc->r_lock);
list_del(&line->list); return 1;
} }
line = list_first_entry(&gc->r_list, struct pblk_line, list);
list_del(&line->list);
spin_unlock(&gc->r_lock);
pblk_gc_kick(pblk);
if (pblk_gc_line(pblk, line))
pr_err("pblk: failed to GC line %d\n", line->id);
return 0;
}
static void pblk_gc_reader_kick(struct pblk_gc *gc)
{
wake_up_process(gc->gc_reader_ts);
} }
static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
...@@ -301,6 +324,17 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, ...@@ -301,6 +324,17 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
return victim; return victim;
} }
static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
{
unsigned int nr_blocks_free, nr_blocks_need;
nr_blocks_need = pblk_rl_high_thrs(rl);
nr_blocks_free = pblk_rl_nr_free_blks(rl);
/* This is not critical, no need to take lock here */
return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
}
/* /*
* Lines with no valid sectors will be returned to the free list immediately. If * Lines with no valid sectors will be returned to the free list immediately. If
* GC is activated - either because the free block count is under the determined * GC is activated - either because the free block count is under the determined
...@@ -311,71 +345,83 @@ static void pblk_gc_run(struct pblk *pblk) ...@@ -311,71 +345,83 @@ static void pblk_gc_run(struct pblk *pblk)
{ {
struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_gc *gc = &pblk->gc; struct pblk_gc *gc = &pblk->gc;
struct pblk_line *line, *tline; struct pblk_line *line;
unsigned int nr_blocks_free, nr_blocks_need;
struct list_head *group_list; struct list_head *group_list;
int run_gc, gc_group = 0; bool run_gc;
int prev_gc = 0; int inflight_gc, gc_group = 0, prev_group = 0;
int inflight_gc = atomic_read(&gc->inflight_gc);
LIST_HEAD(gc_list); do {
spin_lock(&l_mg->gc_lock);
if (list_empty(&l_mg->gc_full_list)) {
spin_unlock(&l_mg->gc_lock);
break;
}
line = list_first_entry(&l_mg->gc_full_list,
struct pblk_line, list);
spin_lock(&l_mg->gc_lock);
list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) {
spin_lock(&line->lock); spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED); WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC; line->state = PBLK_LINESTATE_GC;
spin_unlock(&line->lock); spin_unlock(&line->lock);
list_del(&line->list); list_del(&line->list);
spin_unlock(&l_mg->gc_lock);
kref_put(&line->ref, pblk_line_put); kref_put(&line->ref, pblk_line_put);
} } while (1);
spin_unlock(&l_mg->gc_lock);
nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl); run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl); if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); return;
next_gc_group: next_gc_group:
group_list = l_mg->gc_lists[gc_group++]; group_list = l_mg->gc_lists[gc_group++];
spin_lock(&l_mg->gc_lock);
while (run_gc && !list_empty(group_list)) { do {
/* No need to queue up more GC lines than we can handle */ spin_lock(&l_mg->gc_lock);
if (!run_gc || inflight_gc > gc->gc_jobs_active) { if (list_empty(group_list)) {
spin_unlock(&l_mg->gc_lock); spin_unlock(&l_mg->gc_lock);
pblk_gc_lines(pblk, &gc_list); break;
return;
} }
line = pblk_gc_get_victim_line(pblk, group_list); line = pblk_gc_get_victim_line(pblk, group_list);
nr_blocks_free += atomic_read(&line->blk_in_line);
spin_lock(&line->lock); spin_lock(&line->lock);
WARN_ON(line->state != PBLK_LINESTATE_CLOSED); WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
line->state = PBLK_LINESTATE_GC; line->state = PBLK_LINESTATE_GC;
list_move_tail(&line->list, &gc_list);
atomic_inc(&gc->inflight_gc);
inflight_gc++;
spin_unlock(&line->lock); spin_unlock(&line->lock);
prev_gc = 1; list_del(&line->list);
run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); spin_unlock(&l_mg->gc_lock);
}
spin_unlock(&l_mg->gc_lock); spin_lock(&gc->r_lock);
list_add_tail(&line->list, &gc->r_list);
spin_unlock(&gc->r_lock);
inflight_gc = atomic_inc_return(&gc->inflight_gc);
pblk_gc_reader_kick(gc);
pblk_gc_lines(pblk, &gc_list); prev_group = 1;
if (!prev_gc && pblk->rl.rb_state > gc_group && /* No need to queue up more GC lines than we can handle */
gc_group < PBLK_NR_GC_LISTS) run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
break;
} while (1);
if (!prev_group && pblk->rl.rb_state > gc_group &&
gc_group < PBLK_GC_NR_LISTS)
goto next_gc_group; goto next_gc_group;
} }
void pblk_gc_kick(struct pblk *pblk)
static void pblk_gc_kick(struct pblk *pblk)
{ {
struct pblk_gc *gc = &pblk->gc; struct pblk_gc *gc = &pblk->gc;
wake_up_process(gc->gc_ts); wake_up_process(gc->gc_ts);
pblk_gc_writer_kick(gc); pblk_gc_writer_kick(gc);
pblk_gc_reader_kick(gc);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
} }
...@@ -413,42 +459,34 @@ static int pblk_gc_writer_ts(void *data) ...@@ -413,42 +459,34 @@ static int pblk_gc_writer_ts(void *data)
return 0; return 0;
} }
static void pblk_gc_start(struct pblk *pblk) static int pblk_gc_reader_ts(void *data)
{ {
pblk->gc.gc_active = 1; struct pblk *pblk = data;
pr_debug("pblk: gc start\n"); while (!kthread_should_stop()) {
if (!pblk_gc_read(pblk))
continue;
set_current_state(TASK_INTERRUPTIBLE);
io_schedule();
}
return 0;
} }
int pblk_gc_status(struct pblk *pblk) static void pblk_gc_start(struct pblk *pblk)
{ {
struct pblk_gc *gc = &pblk->gc; pblk->gc.gc_active = 1;
int ret; pr_debug("pblk: gc start\n");
spin_lock(&gc->lock);
ret = gc->gc_active;
spin_unlock(&gc->lock);
return ret;
} }
static void __pblk_gc_should_start(struct pblk *pblk) void pblk_gc_should_start(struct pblk *pblk)
{ {
struct pblk_gc *gc = &pblk->gc; struct pblk_gc *gc = &pblk->gc;
lockdep_assert_held(&gc->lock);
if (gc->gc_enabled && !gc->gc_active) if (gc->gc_enabled && !gc->gc_active)
pblk_gc_start(pblk); pblk_gc_start(pblk);
}
void pblk_gc_should_start(struct pblk *pblk)
{
struct pblk_gc *gc = &pblk->gc;
spin_lock(&gc->lock); pblk_gc_kick(pblk);
__pblk_gc_should_start(pblk);
spin_unlock(&gc->lock);
} }
/* /*
...@@ -457,10 +495,7 @@ void pblk_gc_should_start(struct pblk *pblk) ...@@ -457,10 +495,7 @@ void pblk_gc_should_start(struct pblk *pblk)
*/ */
static void pblk_gc_stop(struct pblk *pblk, int flush_wq) static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
{ {
spin_lock(&pblk->gc.lock);
pblk->gc.gc_active = 0; pblk->gc.gc_active = 0;
spin_unlock(&pblk->gc.lock);
pr_debug("pblk: gc stop\n"); pr_debug("pblk: gc stop\n");
} }
...@@ -483,20 +518,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, ...@@ -483,20 +518,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
spin_unlock(&gc->lock); spin_unlock(&gc->lock);
} }
void pblk_gc_sysfs_force(struct pblk *pblk, int force) int pblk_gc_sysfs_force(struct pblk *pblk, int force)
{ {
struct pblk_gc *gc = &pblk->gc; struct pblk_gc *gc = &pblk->gc;
int rsv = 0;
if (force < 0 || force > 1)
return -EINVAL;
spin_lock(&gc->lock); spin_lock(&gc->lock);
if (force) {
gc->gc_enabled = 1;
rsv = 64;
}
pblk_rl_set_gc_rsc(&pblk->rl, rsv);
gc->gc_forced = force; gc->gc_forced = force;
__pblk_gc_should_start(pblk);
if (force)
gc->gc_enabled = 1;
else
gc->gc_enabled = 0;
spin_unlock(&gc->lock); spin_unlock(&gc->lock);
pblk_gc_should_start(pblk);
return 0;
} }
int pblk_gc_init(struct pblk *pblk) int pblk_gc_init(struct pblk *pblk)
...@@ -518,30 +558,58 @@ int pblk_gc_init(struct pblk *pblk) ...@@ -518,30 +558,58 @@ int pblk_gc_init(struct pblk *pblk)
goto fail_free_main_kthread; goto fail_free_main_kthread;
} }
gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
"pblk-gc-reader-ts");
if (IS_ERR(gc->gc_reader_ts)) {
pr_err("pblk: could not allocate GC reader kthread\n");
ret = PTR_ERR(gc->gc_reader_ts);
goto fail_free_writer_kthread;
}
setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk); setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
gc->gc_active = 0; gc->gc_active = 0;
gc->gc_forced = 0; gc->gc_forced = 0;
gc->gc_enabled = 1; gc->gc_enabled = 1;
gc->gc_jobs_active = 8;
gc->w_entries = 0; gc->w_entries = 0;
atomic_set(&gc->inflight_gc, 0); atomic_set(&gc->inflight_gc, 0);
gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq", /* Workqueue that reads valid sectors from a line and submit them to the
WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active); * GC writer to be recycled.
*/
gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
if (!gc->gc_line_reader_wq) {
pr_err("pblk: could not allocate GC line reader workqueue\n");
ret = -ENOMEM;
goto fail_free_reader_kthread;
}
/* Workqueue that prepare lines for GC */
gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!gc->gc_reader_wq) { if (!gc->gc_reader_wq) {
pr_err("pblk: could not allocate GC reader workqueue\n"); pr_err("pblk: could not allocate GC reader workqueue\n");
ret = -ENOMEM; ret = -ENOMEM;
goto fail_free_writer_kthread; goto fail_free_reader_line_wq;
} }
spin_lock_init(&gc->lock); spin_lock_init(&gc->lock);
spin_lock_init(&gc->w_lock); spin_lock_init(&gc->w_lock);
spin_lock_init(&gc->r_lock);
sema_init(&gc->gc_sem, 128);
INIT_LIST_HEAD(&gc->w_list); INIT_LIST_HEAD(&gc->w_list);
INIT_LIST_HEAD(&gc->r_list);
return 0; return 0;
fail_free_reader_line_wq:
destroy_workqueue(gc->gc_line_reader_wq);
fail_free_reader_kthread:
kthread_stop(gc->gc_reader_ts);
fail_free_writer_kthread: fail_free_writer_kthread:
kthread_stop(gc->gc_writer_ts); kthread_stop(gc->gc_writer_ts);
fail_free_main_kthread: fail_free_main_kthread:
...@@ -555,6 +623,7 @@ void pblk_gc_exit(struct pblk *pblk) ...@@ -555,6 +623,7 @@ void pblk_gc_exit(struct pblk *pblk)
struct pblk_gc *gc = &pblk->gc; struct pblk_gc *gc = &pblk->gc;
flush_workqueue(gc->gc_reader_wq); flush_workqueue(gc->gc_reader_wq);
flush_workqueue(gc->gc_line_reader_wq);
del_timer(&gc->gc_timer); del_timer(&gc->gc_timer);
pblk_gc_stop(pblk, 1); pblk_gc_stop(pblk, 1);
...@@ -562,9 +631,15 @@ void pblk_gc_exit(struct pblk *pblk) ...@@ -562,9 +631,15 @@ void pblk_gc_exit(struct pblk *pblk)
if (gc->gc_ts) if (gc->gc_ts)
kthread_stop(gc->gc_ts); kthread_stop(gc->gc_ts);
if (pblk->gc.gc_reader_wq) if (gc->gc_reader_wq)
destroy_workqueue(pblk->gc.gc_reader_wq); destroy_workqueue(gc->gc_reader_wq);
if (gc->gc_line_reader_wq)
destroy_workqueue(gc->gc_line_reader_wq);
if (gc->gc_writer_ts) if (gc->gc_writer_ts)
kthread_stop(gc->gc_writer_ts); kthread_stop(gc->gc_writer_ts);
if (gc->gc_reader_ts)
kthread_stop(gc->gc_reader_ts);
} }
...@@ -199,12 +199,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, ...@@ -199,12 +199,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
struct pblk_line *line; struct pblk_line *line;
struct pblk_rb_entry *entry; struct pblk_rb_entry *entry;
struct pblk_w_ctx *w_ctx; struct pblk_w_ctx *w_ctx;
unsigned int user_io = 0, gc_io = 0;
unsigned int i; unsigned int i;
int flags;
for (i = 0; i < to_update; i++) { for (i = 0; i < to_update; i++) {
entry = &rb->entries[*l2p_upd]; entry = &rb->entries[*l2p_upd];
w_ctx = &entry->w_ctx; w_ctx = &entry->w_ctx;
flags = READ_ONCE(entry->w_ctx.flags);
if (flags & PBLK_IOTYPE_USER)
user_io++;
else if (flags & PBLK_IOTYPE_GC)
gc_io++;
else
WARN(1, "pblk: unknown IO type\n");
pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
entry->cacheline); entry->cacheline);
...@@ -214,6 +224,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, ...@@ -214,6 +224,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd,
*l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1); *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1);
} }
pblk_rl_out(&pblk->rl, user_io, gc_io);
return 0; return 0;
} }
...@@ -531,7 +543,6 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, ...@@ -531,7 +543,6 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
struct pblk_rb_entry *entry; struct pblk_rb_entry *entry;
struct page *page; struct page *page;
unsigned int pad = 0, to_read = nr_entries; unsigned int pad = 0, to_read = nr_entries;
unsigned int user_io = 0, gc_io = 0;
unsigned int i; unsigned int i;
int flags; int flags;
...@@ -555,13 +566,6 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, ...@@ -555,13 +566,6 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
if (!(flags & PBLK_WRITTEN_DATA)) if (!(flags & PBLK_WRITTEN_DATA))
goto try; goto try;
if (flags & PBLK_IOTYPE_USER)
user_io++;
else if (flags & PBLK_IOTYPE_GC)
gc_io++;
else
WARN(1, "pblk: unknown IO type\n");
page = virt_to_page(entry->data); page = virt_to_page(entry->data);
if (!page) { if (!page) {
pr_err("pblk: could not allocate write bio page\n"); pr_err("pblk: could not allocate write bio page\n");
...@@ -613,7 +617,6 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, ...@@ -613,7 +617,6 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
} }
} }
pblk_rl_out(&pblk->rl, user_io, gc_io);
#ifdef CONFIG_NVM_DEBUG #ifdef CONFIG_NVM_DEBUG
atomic_long_add(pad, &((struct pblk *) atomic_long_add(pad, &((struct pblk *)
(container_of(rb, struct pblk, rwb)))->padded_writes); (container_of(rb, struct pblk, rwb)))->padded_writes);
......
...@@ -27,7 +27,7 @@ int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries) ...@@ -27,7 +27,7 @@ int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
{ {
int rb_user_cnt = atomic_read(&rl->rb_user_cnt); int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
return (!(rb_user_cnt + nr_entries > rl->rb_user_max)); return (!(rb_user_cnt >= rl->rb_user_max));
} }
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
...@@ -37,7 +37,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) ...@@ -37,7 +37,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
/* If there is no user I/O let GC take over space on the write buffer */ /* If there is no user I/O let GC take over space on the write buffer */
rb_user_active = READ_ONCE(rl->rb_user_active); rb_user_active = READ_ONCE(rl->rb_user_active);
return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active)); return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
} }
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
...@@ -77,33 +77,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max) ...@@ -77,33 +77,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max)
unsigned long free_blocks = pblk_rl_nr_free_blks(rl); unsigned long free_blocks = pblk_rl_nr_free_blks(rl);
if (free_blocks >= rl->high) { if (free_blocks >= rl->high) {
rl->rb_user_max = max - rl->rb_gc_rsv; rl->rb_user_max = max;
rl->rb_gc_max = rl->rb_gc_rsv; rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH; rl->rb_state = PBLK_RL_HIGH;
} else if (free_blocks < rl->high) { } else if (free_blocks < rl->high) {
int shift = rl->high_pw - rl->rb_windows_pw; int shift = rl->high_pw - rl->rb_windows_pw;
int user_windows = free_blocks >> shift; int user_windows = free_blocks >> shift;
int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW; int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW;
int gc_max;
rl->rb_user_max = user_max; rl->rb_user_max = user_max;
gc_max = max - rl->rb_user_max; rl->rb_gc_max = max - user_max;
rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv);
if (free_blocks <= rl->rsv_blocks) {
if (free_blocks > rl->low) rl->rb_user_max = 0;
rl->rb_state = PBLK_RL_MID; rl->rb_gc_max = max;
else }
rl->rb_state = PBLK_RL_LOW;
/* In the worst case, we will need to GC lines in the low list
* (high valid sector count). If there are lines to GC on high
* or mid lists, these will be prioritized
*/
rl->rb_state = PBLK_RL_LOW;
} }
return rl->rb_state; return rl->rb_state;
} }
void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv)
{
rl->rb_gc_rsv = rl->rb_gc_max = rsv;
}
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
{ {
struct pblk *pblk = container_of(rl, struct pblk, rl); struct pblk *pblk = container_of(rl, struct pblk, rl);
...@@ -122,11 +121,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) ...@@ -122,11 +121,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
{ {
struct pblk *pblk = container_of(rl, struct pblk, rl);
int blk_in_line = atomic_read(&line->blk_in_line); int blk_in_line = atomic_read(&line->blk_in_line);
int ret;
atomic_sub(blk_in_line, &rl->free_blocks); atomic_sub(blk_in_line, &rl->free_blocks);
}
void pblk_gc_should_kick(struct pblk *pblk)
{
struct pblk_rl *rl = &pblk->rl;
int ret;
/* Rates will not change that often - no need to lock update */ /* Rates will not change that often - no need to lock update */
ret = pblk_rl_update_rates(rl, rl->rb_budget); ret = pblk_rl_update_rates(rl, rl->rb_budget);
...@@ -136,11 +139,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) ...@@ -136,11 +139,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line)
pblk_gc_should_stop(pblk); pblk_gc_should_stop(pblk);
} }
int pblk_rl_gc_thrs(struct pblk_rl *rl) int pblk_rl_high_thrs(struct pblk_rl *rl)
{ {
return rl->high; return rl->high;
} }
int pblk_rl_low_thrs(struct pblk_rl *rl)
{
return rl->low;
}
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl) int pblk_rl_sysfs_rate_show(struct pblk_rl *rl)
{ {
return rl->rb_user_max; return rl->rb_user_max;
...@@ -161,15 +169,23 @@ void pblk_rl_free(struct pblk_rl *rl) ...@@ -161,15 +169,23 @@ void pblk_rl_free(struct pblk_rl *rl)
void pblk_rl_init(struct pblk_rl *rl, int budget) void pblk_rl_init(struct pblk_rl *rl, int budget)
{ {
struct pblk *pblk = container_of(rl, struct pblk, rl);
struct pblk_line_meta *lm = &pblk->lm;
int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE;
unsigned int rb_windows; unsigned int rb_windows;
rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS; rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS;
rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
rl->high_pw = get_count_order(rl->high); rl->high_pw = get_count_order(rl->high);
rl->low = rl->total_blocks / PBLK_USER_LOW_THRS;
if (rl->low < min_blocks)
rl->low = min_blocks;
rl->rsv_blocks = min_blocks;
/* This will always be a power-of-2 */ /* This will always be a power-of-2 */
rb_windows = budget / PBLK_MAX_REQ_ADDRS; rb_windows = budget / PBLK_MAX_REQ_ADDRS;
rl->rb_windows_pw = get_count_order(rb_windows) + 1; rl->rb_windows_pw = get_count_order(rb_windows);
/* To start with, all buffer is available to user I/O writers */ /* To start with, all buffer is available to user I/O writers */
rl->rb_budget = budget; rl->rb_budget = budget;
...@@ -180,5 +196,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) ...@@ -180,5 +196,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget)
atomic_set(&rl->rb_gc_cnt, 0); atomic_set(&rl->rb_gc_cnt, 0);
setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl); setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl);
rl->rb_user_active = 0; rl->rb_user_active = 0;
rl->rb_gc_active = 0;
} }
...@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) ...@@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page)
{ {
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
int free_blocks, total_blocks; int free_blocks, total_blocks;
int rb_user_max, rb_user_cnt; int rb_user_max, rb_user_cnt;
int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state; int rb_gc_max, rb_gc_cnt, rb_budget, rb_state;
free_blocks = atomic_read(&pblk->rl.free_blocks); free_blocks = atomic_read(&pblk->rl.free_blocks);
rb_user_max = pblk->rl.rb_user_max; rb_user_max = pblk->rl.rb_user_max;
rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt); rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt);
rb_gc_max = pblk->rl.rb_gc_max; rb_gc_max = pblk->rl.rb_gc_max;
rb_gc_rsv = pblk->rl.rb_gc_rsv;
rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt); rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt);
rb_budget = pblk->rl.rb_budget; rb_budget = pblk->rl.rb_budget;
rb_state = pblk->rl.rb_state; rb_state = pblk->rl.rb_state;
total_blocks = geo->blks_per_lun * geo->nr_luns; total_blocks = pblk->rl.total_blocks;
return snprintf(page, PAGE_SIZE, return snprintf(page, PAGE_SIZE,
"u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n", "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n",
rb_user_cnt, rb_user_cnt,
rb_user_max, rb_user_max,
rb_gc_cnt, rb_gc_cnt,
rb_gc_max, rb_gc_max,
rb_gc_rsv,
rb_state, rb_state,
rb_budget, rb_budget,
pblk->rl.low, pblk->rl.low,
...@@ -237,7 +233,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) ...@@ -237,7 +233,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
spin_unlock(&l_mg->free_lock); spin_unlock(&l_mg->free_lock);
if (nr_free_lines != free_line_cnt) if (nr_free_lines != free_line_cnt)
pr_err("pblk: corrupted free line list\n"); pr_err("pblk: corrupted free line list:%d/%d\n",
nr_free_lines, free_line_cnt);
sz = snprintf(page, PAGE_SIZE - sz, sz = snprintf(page, PAGE_SIZE - sz,
"line: nluns:%d, nblks:%d, nsecs:%d\n", "line: nluns:%d, nblks:%d, nsecs:%d\n",
...@@ -319,32 +316,11 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page) ...@@ -319,32 +316,11 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
} }
#endif #endif
static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page,
size_t len)
{
struct pblk_gc *gc = &pblk->gc;
size_t c_len;
int value;
c_len = strcspn(page, "\n");
if (c_len >= len)
return -EINVAL;
if (kstrtouint(page, 0, &value))
return -EINVAL;
spin_lock(&gc->lock);
pblk_rl_set_gc_rsc(&pblk->rl, value);
spin_unlock(&gc->lock);
return len;
}
static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page, static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
size_t len) size_t len)
{ {
size_t c_len; size_t c_len;
int force; int ret, force;
c_len = strcspn(page, "\n"); c_len = strcspn(page, "\n");
if (c_len >= len) if (c_len >= len)
...@@ -353,10 +329,7 @@ static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page, ...@@ -353,10 +329,7 @@ static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page,
if (kstrtouint(page, 0, &force)) if (kstrtouint(page, 0, &force))
return -EINVAL; return -EINVAL;
if (force < 0 || force > 1) ret = pblk_gc_sysfs_force(pblk, force);
return -EINVAL;
pblk_gc_sysfs_force(pblk, force);
return len; return len;
} }
...@@ -434,11 +407,6 @@ static struct attribute sys_max_sec_per_write = { ...@@ -434,11 +407,6 @@ static struct attribute sys_max_sec_per_write = {
.mode = 0644, .mode = 0644,
}; };
static struct attribute sys_gc_rl_max = {
.name = "gc_rl_max",
.mode = 0200,
};
#ifdef CONFIG_NVM_DEBUG #ifdef CONFIG_NVM_DEBUG
static struct attribute sys_stats_debug_attr = { static struct attribute sys_stats_debug_attr = {
.name = "stats", .name = "stats",
...@@ -453,7 +421,6 @@ static struct attribute *pblk_attrs[] = { ...@@ -453,7 +421,6 @@ static struct attribute *pblk_attrs[] = {
&sys_gc_state, &sys_gc_state,
&sys_gc_force, &sys_gc_force,
&sys_max_sec_per_write, &sys_max_sec_per_write,
&sys_gc_rl_max,
&sys_rb_attr, &sys_rb_attr,
&sys_stats_ppaf_attr, &sys_stats_ppaf_attr,
&sys_lines_attr, &sys_lines_attr,
...@@ -499,9 +466,7 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr, ...@@ -499,9 +466,7 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
{ {
struct pblk *pblk = container_of(kobj, struct pblk, kobj); struct pblk *pblk = container_of(kobj, struct pblk, kobj);
if (strcmp(attr->name, "gc_rl_max") == 0) if (strcmp(attr->name, "gc_force") == 0)
return pblk_sysfs_rate_store(pblk, buf, len);
else if (strcmp(attr->name, "gc_force") == 0)
return pblk_sysfs_gc_force(pblk, buf, len); return pblk_sysfs_gc_force(pblk, buf, len);
else if (strcmp(attr->name, "max_sec_per_write") == 0) else if (strcmp(attr->name, "max_sec_per_write") == 0)
return pblk_sysfs_set_sec_per_write(pblk, buf, len); return pblk_sysfs_set_sec_per_write(pblk, buf, len);
......
...@@ -72,11 +72,15 @@ enum { ...@@ -72,11 +72,15 @@ enum {
PBLK_BLK_ST_CLOSED = 0x2, PBLK_BLK_ST_CLOSED = 0x2,
}; };
struct pblk_sec_meta {
u64 reserved;
__le64 lba;
};
/* The number of GC lists and the rate-limiter states go together. This way the /* The number of GC lists and the rate-limiter states go together. This way the
* rate-limiter can dictate how much GC is needed based on resource utilization. * rate-limiter can dictate how much GC is needed based on resource utilization.
*/ */
#define PBLK_NR_GC_LISTS 3 #define PBLK_GC_NR_LISTS 3
#define PBLK_MAX_GC_JOBS 32
enum { enum {
PBLK_RL_HIGH = 1, PBLK_RL_HIGH = 1,
...@@ -84,11 +88,6 @@ enum { ...@@ -84,11 +88,6 @@ enum {
PBLK_RL_LOW = 3, PBLK_RL_LOW = 3,
}; };
struct pblk_sec_meta {
u64 reserved;
__le64 lba;
};
#define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
/* write buffer completion context */ /* write buffer completion context */
...@@ -195,29 +194,39 @@ struct pblk_lun { ...@@ -195,29 +194,39 @@ struct pblk_lun {
struct pblk_gc_rq { struct pblk_gc_rq {
struct pblk_line *line; struct pblk_line *line;
void *data; void *data;
u64 *lba_list; u64 lba_list[PBLK_MAX_REQ_ADDRS];
int nr_secs; int nr_secs;
int secs_to_gc; int secs_to_gc;
struct list_head list; struct list_head list;
}; };
struct pblk_gc { struct pblk_gc {
/* These states are not protected by a lock since (i) they are in the
* fast path, and (ii) they are not critical.
*/
int gc_active; int gc_active;
int gc_enabled; int gc_enabled;
int gc_forced; int gc_forced;
int gc_jobs_active;
atomic_t inflight_gc;
struct task_struct *gc_ts; struct task_struct *gc_ts;
struct task_struct *gc_writer_ts; struct task_struct *gc_writer_ts;
struct task_struct *gc_reader_ts;
struct workqueue_struct *gc_line_reader_wq;
struct workqueue_struct *gc_reader_wq; struct workqueue_struct *gc_reader_wq;
struct timer_list gc_timer; struct timer_list gc_timer;
struct semaphore gc_sem;
atomic_t inflight_gc;
int w_entries; int w_entries;
struct list_head w_list; struct list_head w_list;
struct list_head r_list;
spinlock_t lock; spinlock_t lock;
spinlock_t w_lock; spinlock_t w_lock;
spinlock_t r_lock;
}; };
struct pblk_rl { struct pblk_rl {
...@@ -229,10 +238,8 @@ struct pblk_rl { ...@@ -229,10 +238,8 @@ struct pblk_rl {
*/ */
unsigned int high_pw; /* High rounded up as a power of 2 */ unsigned int high_pw; /* High rounded up as a power of 2 */
#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent #define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */
* available blks #define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */
*/
#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */
int rb_windows_pw; /* Number of rate windows in the write buffer int rb_windows_pw; /* Number of rate windows in the write buffer
* given as a power-of-2. This guarantees that * given as a power-of-2. This guarantees that
...@@ -250,7 +257,11 @@ struct pblk_rl { ...@@ -250,7 +257,11 @@ struct pblk_rl {
int rb_state; /* Rate-limiter current state */ int rb_state; /* Rate-limiter current state */
atomic_t rb_gc_cnt; /* GC I/O buffer counter */ atomic_t rb_gc_cnt; /* GC I/O buffer counter */
int rsv_blocks; /* Reserved blocks for GC */
int rb_user_active; int rb_user_active;
int rb_gc_active;
struct timer_list u_timer; struct timer_list u_timer;
unsigned long long nr_secs; unsigned long long nr_secs;
...@@ -428,7 +439,7 @@ struct pblk_line_mgmt { ...@@ -428,7 +439,7 @@ struct pblk_line_mgmt {
struct list_head bad_list; /* Full lines bad */ struct list_head bad_list; /* Full lines bad */
/* GC lists - use gc_lock */ /* GC lists - use gc_lock */
struct list_head *gc_lists[PBLK_NR_GC_LISTS]; struct list_head *gc_lists[PBLK_GC_NR_LISTS];
struct list_head gc_high_list; /* Full lines ready to GC, high isc */ struct list_head gc_high_list; /* Full lines ready to GC, high isc */
struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */
struct list_head gc_low_list; /* Full lines ready to GC, low isc */ struct list_head gc_low_list; /* Full lines ready to GC, low isc */
...@@ -768,30 +779,34 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, ...@@ -768,30 +779,34 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
/* /*
* pblk gc * pblk gc
*/ */
#define PBLK_GC_TRIES 3 #define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */
#define PBLK_GC_W_QD 1024 /* Queue depth for inflight GC write I/Os */
#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */
#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */
int pblk_gc_init(struct pblk *pblk); int pblk_gc_init(struct pblk *pblk);
void pblk_gc_exit(struct pblk *pblk); void pblk_gc_exit(struct pblk *pblk);
void pblk_gc_should_start(struct pblk *pblk); void pblk_gc_should_start(struct pblk *pblk);
void pblk_gc_should_stop(struct pblk *pblk); void pblk_gc_should_stop(struct pblk *pblk);
int pblk_gc_status(struct pblk *pblk); void pblk_gc_should_kick(struct pblk *pblk);
void pblk_gc_kick(struct pblk *pblk);
void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
int *gc_active); int *gc_active);
void pblk_gc_sysfs_force(struct pblk *pblk, int force); int pblk_gc_sysfs_force(struct pblk *pblk, int force);
/* /*
* pblk rate limiter * pblk rate limiter
*/ */
void pblk_rl_init(struct pblk_rl *rl, int budget); void pblk_rl_init(struct pblk_rl *rl, int budget);
void pblk_rl_free(struct pblk_rl *rl); void pblk_rl_free(struct pblk_rl *rl);
int pblk_rl_gc_thrs(struct pblk_rl *rl); int pblk_rl_high_thrs(struct pblk_rl *rl);
int pblk_rl_low_thrs(struct pblk_rl *rl);
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl);
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries);
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries);
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries);
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries);
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc);
void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv);
int pblk_rl_sysfs_rate_show(struct pblk_rl *rl); int pblk_rl_sysfs_rate_show(struct pblk_rl *rl);
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line);
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line);
...@@ -837,6 +852,17 @@ static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta) ...@@ -837,6 +852,17 @@ static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta)
return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]); return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]);
} }
static inline int pblk_line_vsc(struct pblk_line *line)
{
int vsc;
spin_lock(&line->lock);
vsc = le32_to_cpu(*line->vsc);
spin_unlock(&line->lock);
return vsc;
}
#define NVM_MEM_PAGE_WRITE (8) #define NVM_MEM_PAGE_WRITE (8)
static inline int pblk_pad_distance(struct pblk *pblk) static inline int pblk_pad_distance(struct pblk *pblk)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment