Commit 6e25cb01 authored by Omar Sandoval's avatar Omar Sandoval Committed by Jens Axboe

kyber: implement improved heuristics

Kyber's current heuristics have a few flaws:

- It's based on the mean latency, but p99 latency tends to be more
  meaningful to anyone who cares about latency. The mean can also be
  skewed by rare outliers that the scheduler can't do anything about.
- The statistics calculations are purely time-based with a short window.
  This works for steady, high load, but is more sensitive to outliers
  with bursty workloads.
- It only considers the latency once an I/O has been submitted to the
  device, but the user cares about the time spent in the kernel, as
  well.

These are shortcomings of the generic blk-stat code which doesn't quite
fit the ideal use case for Kyber. So, this replaces the statistics with
a histogram used to calculate percentiles of total latency and I/O
latency, which we then use to adjust depths in a slightly more
intelligent manner:

- Sync and async writes are now the same domain.
- Discards are a separate domain.
- Domain queue depths are scaled by the ratio of the p99 total latency
  to the target latency (e.g., if the p99 latency is double the target
  latency, we will double the queue depth; if the p99 latency is half of
  the target latency, we can halve the queue depth).
- We use the I/O latency to determine whether we should scale queue
  depths down: we will only scale down if any domain's I/O latency
  exceeds the target latency, which is an indicator of congestion in the
  device.

These new heuristics are just as scalable as the heuristics they
replace.
Signed-off-by: default avatarOmar Sandoval <osandov@fb.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent fa2a1f60
...@@ -29,13 +29,16 @@ ...@@ -29,13 +29,16 @@
#include "blk-mq-debugfs.h" #include "blk-mq-debugfs.h"
#include "blk-mq-sched.h" #include "blk-mq-sched.h"
#include "blk-mq-tag.h" #include "blk-mq-tag.h"
#include "blk-stat.h"
/* Scheduling domains. */ /*
* Scheduling domains: the device is divided into multiple domains based on the
* request type.
*/
enum { enum {
KYBER_READ, KYBER_READ,
KYBER_SYNC_WRITE, KYBER_WRITE,
KYBER_OTHER, /* Async writes, discard, etc. */ KYBER_DISCARD,
KYBER_OTHER,
KYBER_NUM_DOMAINS, KYBER_NUM_DOMAINS,
}; };
...@@ -49,25 +52,82 @@ enum { ...@@ -49,25 +52,82 @@ enum {
}; };
/* /*
* Initial device-wide depths for each scheduling domain. * Maximum device-wide depth for each scheduling domain.
* *
* Even for fast devices with lots of tags like NVMe, you can saturate * Even for fast devices with lots of tags like NVMe, you can saturate the
* the device with only a fraction of the maximum possible queue depth. * device with only a fraction of the maximum possible queue depth. So, we cap
* So, we cap these to a reasonable value. * these to a reasonable value.
*/ */
static const unsigned int kyber_depth[] = { static const unsigned int kyber_depth[] = {
[KYBER_READ] = 256, [KYBER_READ] = 256,
[KYBER_SYNC_WRITE] = 128, [KYBER_WRITE] = 128,
[KYBER_OTHER] = 64, [KYBER_DISCARD] = 64,
[KYBER_OTHER] = 16,
}; };
/* /*
* Scheduling domain batch sizes. We favor reads. * Default latency targets for each scheduling domain.
*/
static const u64 kyber_latency_targets[] = {
[KYBER_READ] = 2 * NSEC_PER_MSEC,
[KYBER_WRITE] = 10 * NSEC_PER_MSEC,
[KYBER_DISCARD] = 5 * NSEC_PER_SEC,
};
/*
* Batch size (number of requests we'll dispatch in a row) for each scheduling
* domain.
*/ */
static const unsigned int kyber_batch_size[] = { static const unsigned int kyber_batch_size[] = {
[KYBER_READ] = 16, [KYBER_READ] = 16,
[KYBER_SYNC_WRITE] = 8, [KYBER_WRITE] = 8,
[KYBER_OTHER] = 8, [KYBER_DISCARD] = 1,
[KYBER_OTHER] = 1,
};
/*
* Requests latencies are recorded in a histogram with buckets defined relative
* to the target latency:
*
* <= 1/4 * target latency
* <= 1/2 * target latency
* <= 3/4 * target latency
* <= target latency
* <= 1 1/4 * target latency
* <= 1 1/2 * target latency
* <= 1 3/4 * target latency
* > 1 3/4 * target latency
*/
enum {
/*
* The width of the latency histogram buckets is
* 1 / (1 << KYBER_LATENCY_SHIFT) * target latency.
*/
KYBER_LATENCY_SHIFT = 2,
/*
* The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency,
* thus, "good".
*/
KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT,
/* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */
KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT,
};
/*
* We measure both the total latency and the I/O latency (i.e., latency after
* submitting to the device).
*/
enum {
KYBER_TOTAL_LATENCY,
KYBER_IO_LATENCY,
};
/*
* Per-cpu latency histograms: total latency and I/O latency for each scheduling
* domain except for KYBER_OTHER.
*/
struct kyber_cpu_latency {
atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
}; };
/* /*
...@@ -84,14 +144,9 @@ struct kyber_ctx_queue { ...@@ -84,14 +144,9 @@ struct kyber_ctx_queue {
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
struct kyber_queue_data { struct kyber_queue_data {
struct request_queue *q;
struct blk_stat_callback *cb;
/* /*
* The device is divided into multiple scheduling domains based on the * Each scheduling domain has a limited number of in-flight requests
* request type. Each domain has a fixed number of in-flight requests of * device-wide, limited by these tokens.
* that type device-wide, limited by these tokens.
*/ */
struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS]; struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];
...@@ -101,8 +156,19 @@ struct kyber_queue_data { ...@@ -101,8 +156,19 @@ struct kyber_queue_data {
*/ */
unsigned int async_depth; unsigned int async_depth;
struct kyber_cpu_latency __percpu *cpu_latency;
/* Timer for stats aggregation and adjusting domain tokens. */
struct timer_list timer;
unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
unsigned long latency_timeout[KYBER_OTHER];
int domain_p99[KYBER_OTHER];
/* Target latencies in nanoseconds. */ /* Target latencies in nanoseconds. */
u64 read_lat_nsec, write_lat_nsec; u64 latency_targets[KYBER_OTHER];
}; };
struct kyber_hctx_data { struct kyber_hctx_data {
...@@ -122,182 +188,165 @@ static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, ...@@ -122,182 +188,165 @@ static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
static unsigned int kyber_sched_domain(unsigned int op) static unsigned int kyber_sched_domain(unsigned int op)
{ {
if ((op & REQ_OP_MASK) == REQ_OP_READ) switch (op & REQ_OP_MASK) {
case REQ_OP_READ:
return KYBER_READ; return KYBER_READ;
else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op)) case REQ_OP_WRITE:
return KYBER_SYNC_WRITE; return KYBER_WRITE;
else case REQ_OP_DISCARD:
return KYBER_DISCARD;
default:
return KYBER_OTHER; return KYBER_OTHER;
}
} }
enum { static void flush_latency_buckets(struct kyber_queue_data *kqd,
NONE = 0, struct kyber_cpu_latency *cpu_latency,
GOOD = 1, unsigned int sched_domain, unsigned int type)
GREAT = 2,
BAD = -1,
AWFUL = -2,
};
#define IS_GOOD(status) ((status) > 0)
#define IS_BAD(status) ((status) < 0)
static int kyber_lat_status(struct blk_stat_callback *cb,
unsigned int sched_domain, u64 target)
{ {
u64 latency; unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type];
if (!cb->stat[sched_domain].nr_samples) unsigned int bucket;
return NONE;
latency = cb->stat[sched_domain].mean; for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
if (latency >= 2 * target) buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0);
return AWFUL;
else if (latency > target)
return BAD;
else if (latency <= target / 2)
return GREAT;
else /* (latency <= target) */
return GOOD;
} }
/* /*
* Adjust the read or synchronous write depth given the status of reads and * Calculate the histogram bucket with the given percentile rank, or -1 if there
* writes. The goal is that the latencies of the two domains are fair (i.e., if * aren't enough samples yet.
* one is good, then the other is good).
*/ */
static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd, static int calculate_percentile(struct kyber_queue_data *kqd,
unsigned int sched_domain, int this_status, unsigned int sched_domain, unsigned int type,
int other_status) unsigned int percentile)
{ {
unsigned int orig_depth, depth; unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
unsigned int bucket, samples = 0, percentile_samples;
for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
samples += buckets[bucket];
if (!samples)
return -1;
/* /*
* If this domain had no samples, or reads and writes are both good or * We do the calculation once we have 500 samples or one second passes
* both bad, don't adjust the depth. * since the first sample was recorded, whichever comes first.
*/ */
if (this_status == NONE || if (!kqd->latency_timeout[sched_domain])
(IS_GOOD(this_status) && IS_GOOD(other_status)) || kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL);
(IS_BAD(this_status) && IS_BAD(other_status))) if (samples < 500 &&
return; time_is_after_jiffies(kqd->latency_timeout[sched_domain])) {
return -1;
orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth; }
kqd->latency_timeout[sched_domain] = 0;
if (other_status == NONE) { percentile_samples = DIV_ROUND_UP(samples * percentile, 100);
depth++; for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) {
} else { if (buckets[bucket] >= percentile_samples)
switch (this_status) {
case GOOD:
if (other_status == AWFUL)
depth -= max(depth / 4, 1U);
else
depth -= max(depth / 8, 1U);
break;
case GREAT:
if (other_status == AWFUL)
depth /= 2;
else
depth -= max(depth / 4, 1U);
break; break;
case BAD: percentile_samples -= buckets[bucket];
depth++;
break;
case AWFUL:
if (other_status == GREAT)
depth += 2;
else
depth++;
break;
}
} }
memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type]));
return bucket;
}
static void kyber_resize_domain(struct kyber_queue_data *kqd,
unsigned int sched_domain, unsigned int depth)
{
depth = clamp(depth, 1U, kyber_depth[sched_domain]); depth = clamp(depth, 1U, kyber_depth[sched_domain]);
if (depth != orig_depth) if (depth != kqd->domain_tokens[sched_domain].sb.depth)
sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth); sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
} }
/* static void kyber_timer_fn(struct timer_list *t)
* Adjust the depth of other requests given the status of reads and synchronous {
* writes. As long as either domain is doing fine, we don't throttle, but if struct kyber_queue_data *kqd = from_timer(kqd, t, timer);
* both domains are doing badly, we throttle heavily. unsigned int sched_domain;
*/ int cpu;
static void kyber_adjust_other_depth(struct kyber_queue_data *kqd, bool bad = false;
int read_status, int write_status,
bool have_samples) /* Sum all of the per-cpu latency histograms. */
{ for_each_online_cpu(cpu) {
unsigned int orig_depth, depth; struct kyber_cpu_latency *cpu_latency;
int status;
cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu);
orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth; for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
flush_latency_buckets(kqd, cpu_latency, sched_domain,
if (read_status == NONE && write_status == NONE) { KYBER_TOTAL_LATENCY);
depth += 2; flush_latency_buckets(kqd, cpu_latency, sched_domain,
} else if (have_samples) { KYBER_IO_LATENCY);
if (read_status == NONE)
status = write_status;
else if (write_status == NONE)
status = read_status;
else
status = max(read_status, write_status);
switch (status) {
case GREAT:
depth += 2;
break;
case GOOD:
depth++;
break;
case BAD:
depth -= max(depth / 4, 1U);
break;
case AWFUL:
depth /= 2;
break;
} }
} }
depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]); /*
if (depth != orig_depth) * Check if any domains have a high I/O latency, which might indicate
sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth); * congestion in the device. Note that we use the p90; we don't want to
} * be too sensitive to outliers here.
*/
/* for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
* Apply heuristics for limiting queue depths based on gathered latency int p90;
* statistics.
*/
static void kyber_stat_timer_fn(struct blk_stat_callback *cb)
{
struct kyber_queue_data *kqd = cb->data;
int read_status, write_status;
read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec);
write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec);
kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status); p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY,
kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status); 90);
kyber_adjust_other_depth(kqd, read_status, write_status, if (p90 >= KYBER_GOOD_BUCKETS)
cb->stat[KYBER_OTHER].nr_samples != 0); bad = true;
}
/* /*
* Continue monitoring latencies if we aren't hitting the targets or * Adjust the scheduling domain depths. If we determined that there was
* we're still throttling other requests. * congestion, we throttle all domains with good latencies. Either way,
* we ease up on throttling domains with bad latencies.
*/ */
if (!blk_stat_is_active(kqd->cb) && for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
((IS_BAD(read_status) || IS_BAD(write_status) || unsigned int orig_depth, depth;
kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER]))) int p99;
blk_stat_activate_msecs(kqd->cb, 100);
p99 = calculate_percentile(kqd, sched_domain,
KYBER_TOTAL_LATENCY, 99);
/*
* This is kind of subtle: different domains will not
* necessarily have enough samples to calculate the latency
* percentiles during the same window, so we have to remember
* the p99 for the next time we observe congestion; once we do,
* we don't want to throttle again until we get more data, so we
* reset it to -1.
*/
if (bad) {
if (p99 < 0)
p99 = kqd->domain_p99[sched_domain];
kqd->domain_p99[sched_domain] = -1;
} else if (p99 >= 0) {
kqd->domain_p99[sched_domain] = p99;
}
if (p99 < 0)
continue;
/*
* If this domain has bad latency, throttle less. Otherwise,
* throttle more iff we determined that there is congestion.
*
* The new depth is scaled linearly with the p99 latency vs the
* latency target. E.g., if the p99 is 3/4 of the target, then
* we throttle down to 3/4 of the current depth, and if the p99
* is 2x the target, then we double the depth.
*/
if (bad || p99 >= KYBER_GOOD_BUCKETS) {
orig_depth = kqd->domain_tokens[sched_domain].sb.depth;
depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT;
kyber_resize_domain(kqd, sched_domain, depth);
}
}
} }
static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd) static unsigned int kyber_sched_tags_shift(struct request_queue *q)
{ {
/* /*
* All of the hardware queues have the same depth, so we can just grab * All of the hardware queues have the same depth, so we can just grab
* the shift of the first one. * the shift of the first one.
*/ */
return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift; return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
}
static int kyber_bucket_fn(const struct request *rq)
{
return kyber_sched_domain(rq->cmd_flags);
} }
static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
...@@ -307,16 +356,17 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) ...@@ -307,16 +356,17 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
int ret = -ENOMEM; int ret = -ENOMEM;
int i; int i;
kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node); kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
if (!kqd) if (!kqd)
goto err; goto err;
kqd->q = q;
kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn, kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency,
KYBER_NUM_DOMAINS, kqd); GFP_KERNEL | __GFP_ZERO);
if (!kqd->cb) if (!kqd->cpu_latency)
goto err_kqd; goto err_kqd;
timer_setup(&kqd->timer, kyber_timer_fn, 0);
for (i = 0; i < KYBER_NUM_DOMAINS; i++) { for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
WARN_ON(!kyber_depth[i]); WARN_ON(!kyber_depth[i]);
WARN_ON(!kyber_batch_size[i]); WARN_ON(!kyber_batch_size[i]);
...@@ -326,20 +376,22 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q) ...@@ -326,20 +376,22 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
if (ret) { if (ret) {
while (--i >= 0) while (--i >= 0)
sbitmap_queue_free(&kqd->domain_tokens[i]); sbitmap_queue_free(&kqd->domain_tokens[i]);
goto err_cb; goto err_buckets;
} }
} }
shift = kyber_sched_tags_shift(kqd); for (i = 0; i < KYBER_OTHER; i++) {
kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U; kqd->domain_p99[i] = -1;
kqd->latency_targets[i] = kyber_latency_targets[i];
}
kqd->read_lat_nsec = 2000000ULL; shift = kyber_sched_tags_shift(q);
kqd->write_lat_nsec = 10000000ULL; kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
return kqd; return kqd;
err_cb: err_buckets:
blk_stat_free_callback(kqd->cb); free_percpu(kqd->cpu_latency);
err_kqd: err_kqd:
kfree(kqd); kfree(kqd);
err: err:
...@@ -361,25 +413,24 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e) ...@@ -361,25 +413,24 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
return PTR_ERR(kqd); return PTR_ERR(kqd);
} }
blk_stat_enable_accounting(q);
eq->elevator_data = kqd; eq->elevator_data = kqd;
q->elevator = eq; q->elevator = eq;
blk_stat_add_callback(q, kqd->cb);
return 0; return 0;
} }
static void kyber_exit_sched(struct elevator_queue *e) static void kyber_exit_sched(struct elevator_queue *e)
{ {
struct kyber_queue_data *kqd = e->elevator_data; struct kyber_queue_data *kqd = e->elevator_data;
struct request_queue *q = kqd->q;
int i; int i;
blk_stat_remove_callback(q, kqd->cb); del_timer_sync(&kqd->timer);
for (i = 0; i < KYBER_NUM_DOMAINS; i++) for (i = 0; i < KYBER_NUM_DOMAINS; i++)
sbitmap_queue_free(&kqd->domain_tokens[i]); sbitmap_queue_free(&kqd->domain_tokens[i]);
blk_stat_free_callback(kqd->cb); free_percpu(kqd->cpu_latency);
kfree(kqd); kfree(kqd);
} }
...@@ -547,40 +598,44 @@ static void kyber_finish_request(struct request *rq) ...@@ -547,40 +598,44 @@ static void kyber_finish_request(struct request *rq)
rq_clear_domain_token(kqd, rq); rq_clear_domain_token(kqd, rq);
} }
static void kyber_completed_request(struct request *rq, u64 now) static void add_latency_sample(struct kyber_cpu_latency *cpu_latency,
unsigned int sched_domain, unsigned int type,
u64 target, u64 latency)
{ {
struct request_queue *q = rq->q; unsigned int bucket;
struct kyber_queue_data *kqd = q->elevator->elevator_data; u64 divisor;
unsigned int sched_domain;
u64 latency, target;
/* if (latency > 0) {
* Check if this request met our latency goal. If not, quickly gather divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1);
* some statistics and start throttling. bucket = min_t(unsigned int, div64_u64(latency - 1, divisor),
*/ KYBER_LATENCY_BUCKETS - 1);
sched_domain = kyber_sched_domain(rq->cmd_flags); } else {
switch (sched_domain) { bucket = 0;
case KYBER_READ:
target = kqd->read_lat_nsec;
break;
case KYBER_SYNC_WRITE:
target = kqd->write_lat_nsec;
break;
default:
return;
} }
/* If we are already monitoring latencies, don't check again. */ atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]);
if (blk_stat_is_active(kqd->cb)) }
return;
if (now < rq->io_start_time_ns) static void kyber_completed_request(struct request *rq, u64 now)
{
struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
struct kyber_cpu_latency *cpu_latency;
unsigned int sched_domain;
u64 target;
sched_domain = kyber_sched_domain(rq->cmd_flags);
if (sched_domain == KYBER_OTHER)
return; return;
latency = now - rq->io_start_time_ns; cpu_latency = get_cpu_ptr(kqd->cpu_latency);
target = kqd->latency_targets[sched_domain];
add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY,
target, now - rq->start_time_ns);
add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target,
now - rq->io_start_time_ns);
put_cpu_ptr(kqd->cpu_latency);
if (latency > target) timer_reduce(&kqd->timer, jiffies + HZ / 10);
blk_stat_activate_msecs(kqd->cb, 10);
} }
struct flush_kcq_data { struct flush_kcq_data {
...@@ -778,17 +833,17 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx) ...@@ -778,17 +833,17 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
return false; return false;
} }
#define KYBER_LAT_SHOW_STORE(op) \ #define KYBER_LAT_SHOW_STORE(domain, name) \
static ssize_t kyber_##op##_lat_show(struct elevator_queue *e, \ static ssize_t kyber_##name##_lat_show(struct elevator_queue *e, \
char *page) \ char *page) \
{ \ { \
struct kyber_queue_data *kqd = e->elevator_data; \ struct kyber_queue_data *kqd = e->elevator_data; \
\ \
return sprintf(page, "%llu\n", kqd->op##_lat_nsec); \ return sprintf(page, "%llu\n", kqd->latency_targets[domain]); \
} \ } \
\ \
static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \ static ssize_t kyber_##name##_lat_store(struct elevator_queue *e, \
const char *page, size_t count) \ const char *page, size_t count) \
{ \ { \
struct kyber_queue_data *kqd = e->elevator_data; \ struct kyber_queue_data *kqd = e->elevator_data; \
unsigned long long nsec; \ unsigned long long nsec; \
...@@ -798,12 +853,12 @@ static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \ ...@@ -798,12 +853,12 @@ static ssize_t kyber_##op##_lat_store(struct elevator_queue *e, \
if (ret) \ if (ret) \
return ret; \ return ret; \
\ \
kqd->op##_lat_nsec = nsec; \ kqd->latency_targets[domain] = nsec; \
\ \
return count; \ return count; \
} }
KYBER_LAT_SHOW_STORE(read); KYBER_LAT_SHOW_STORE(KYBER_READ, read);
KYBER_LAT_SHOW_STORE(write); KYBER_LAT_SHOW_STORE(KYBER_WRITE, write);
#undef KYBER_LAT_SHOW_STORE #undef KYBER_LAT_SHOW_STORE
#define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store) #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
...@@ -870,7 +925,8 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \ ...@@ -870,7 +925,8 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m) \
return 0; \ return 0; \
} }
KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read) KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write) KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write)
KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard)
KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other) KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
#undef KYBER_DEBUGFS_DOMAIN_ATTRS #undef KYBER_DEBUGFS_DOMAIN_ATTRS
...@@ -892,8 +948,11 @@ static int kyber_cur_domain_show(void *data, struct seq_file *m) ...@@ -892,8 +948,11 @@ static int kyber_cur_domain_show(void *data, struct seq_file *m)
case KYBER_READ: case KYBER_READ:
seq_puts(m, "READ\n"); seq_puts(m, "READ\n");
break; break;
case KYBER_SYNC_WRITE: case KYBER_WRITE:
seq_puts(m, "SYNC_WRITE\n"); seq_puts(m, "WRITE\n");
break;
case KYBER_DISCARD:
seq_puts(m, "DISCARD\n");
break; break;
case KYBER_OTHER: case KYBER_OTHER:
seq_puts(m, "OTHER\n"); seq_puts(m, "OTHER\n");
...@@ -918,7 +977,8 @@ static int kyber_batching_show(void *data, struct seq_file *m) ...@@ -918,7 +977,8 @@ static int kyber_batching_show(void *data, struct seq_file *m)
{#name "_tokens", 0400, kyber_##name##_tokens_show} {#name "_tokens", 0400, kyber_##name##_tokens_show}
static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
KYBER_QUEUE_DOMAIN_ATTRS(read), KYBER_QUEUE_DOMAIN_ATTRS(read),
KYBER_QUEUE_DOMAIN_ATTRS(sync_write), KYBER_QUEUE_DOMAIN_ATTRS(write),
KYBER_QUEUE_DOMAIN_ATTRS(discard),
KYBER_QUEUE_DOMAIN_ATTRS(other), KYBER_QUEUE_DOMAIN_ATTRS(other),
{"async_depth", 0400, kyber_async_depth_show}, {"async_depth", 0400, kyber_async_depth_show},
{}, {},
...@@ -930,7 +990,8 @@ static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = { ...@@ -930,7 +990,8 @@ static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
{#name "_waiting", 0400, kyber_##name##_waiting_show} {#name "_waiting", 0400, kyber_##name##_waiting_show}
static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = { static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
KYBER_HCTX_DOMAIN_ATTRS(read), KYBER_HCTX_DOMAIN_ATTRS(read),
KYBER_HCTX_DOMAIN_ATTRS(sync_write), KYBER_HCTX_DOMAIN_ATTRS(write),
KYBER_HCTX_DOMAIN_ATTRS(discard),
KYBER_HCTX_DOMAIN_ATTRS(other), KYBER_HCTX_DOMAIN_ATTRS(other),
{"cur_domain", 0400, kyber_cur_domain_show}, {"cur_domain", 0400, kyber_cur_domain_show},
{"batching", 0400, kyber_batching_show}, {"batching", 0400, kyber_batching_show},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment