Commit 065655c8 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe

blk-iocost: decouple vrate adjustment from surplus transfers

Budget donations are inaccurate and could take multiple periods to converge.
To prevent triggering vrate adjustments while surplus transfers were
catching up, vrate adjustment was suppressed if donations were increasing,
which was indicated by non-zero nr_surpluses.

This entangling won't be necessary with the scheduled rewrite of donation
mechanism which will make it precise and immediate. Let's decouple the two
in preparation.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 8692d2db
...@@ -1508,7 +1508,7 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1508,7 +1508,7 @@ static void ioc_timer_fn(struct timer_list *timer)
struct ioc_gq *iocg, *tiocg; struct ioc_gq *iocg, *tiocg;
struct ioc_now now; struct ioc_now now;
LIST_HEAD(surpluses); LIST_HEAD(surpluses);
int nr_surpluses = 0, nr_shortages = 0, nr_lagging = 0; int nr_shortages = 0, nr_lagging = 0;
u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM]; u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM];
u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM];
u32 missed_ppm[2], rq_wait_pct; u32 missed_ppm[2], rq_wait_pct;
...@@ -1640,10 +1640,8 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1640,10 +1640,8 @@ static void ioc_timer_fn(struct timer_list *timer)
atomic64_add(delta, &iocg->vtime); atomic64_add(delta, &iocg->vtime);
atomic64_add(delta, &iocg->done_vtime); atomic64_add(delta, &iocg->done_vtime);
/* if usage is sufficiently low, maybe it can donate */ /* if usage is sufficiently low, maybe it can donate */
if (surplus_adjusted_hweight_inuse(usage, hw_inuse)) { if (surplus_adjusted_hweight_inuse(usage, hw_inuse))
list_add(&iocg->surplus_list, &surpluses); list_add(&iocg->surplus_list, &surpluses);
nr_surpluses++;
}
} else if (hw_inuse < hw_active) { } else if (hw_inuse < hw_active) {
u32 new_hwi, new_inuse; u32 new_hwi, new_inuse;
...@@ -1673,7 +1671,7 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1673,7 +1671,7 @@ static void ioc_timer_fn(struct timer_list *timer)
} }
} }
if (!nr_shortages || !nr_surpluses) if (!nr_shortages || list_empty(&surpluses))
goto skip_surplus_transfers; goto skip_surplus_transfers;
/* there are both shortages and surpluses, transfer surpluses */ /* there are both shortages and surpluses, transfer surpluses */
...@@ -1738,11 +1736,9 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1738,11 +1736,9 @@ static void ioc_timer_fn(struct timer_list *timer)
/* /*
* If there are IOs spanning multiple periods, wait * If there are IOs spanning multiple periods, wait
* them out before pushing the device harder. If * them out before pushing the device harder.
* there are surpluses, let redistribution work it
* out first.
*/ */
if (!nr_lagging && !nr_surpluses) if (!nr_lagging)
ioc->busy_level--; ioc->busy_level--;
} else { } else {
/* /*
...@@ -1796,15 +1792,14 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1796,15 +1792,14 @@ static void ioc_timer_fn(struct timer_list *timer)
} }
trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct,
nr_lagging, nr_shortages, nr_lagging, nr_shortages);
nr_surpluses);
atomic64_set(&ioc->vtime_rate, vrate); atomic64_set(&ioc->vtime_rate, vrate);
ioc_refresh_margins(ioc); ioc_refresh_margins(ioc);
} else if (ioc->busy_level != prev_busy_level || nr_lagging) { } else if (ioc->busy_level != prev_busy_level || nr_lagging) {
trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate),
missed_ppm, rq_wait_pct, nr_lagging, missed_ppm, rq_wait_pct, nr_lagging,
nr_shortages, nr_surpluses); nr_shortages);
} }
ioc_refresh_params(ioc, false); ioc_refresh_params(ioc, false);
......
...@@ -128,11 +128,9 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset, ...@@ -128,11 +128,9 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset,
TRACE_EVENT(iocost_ioc_vrate_adj, TRACE_EVENT(iocost_ioc_vrate_adj,
TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 *missed_ppm, TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 *missed_ppm,
u32 rq_wait_pct, int nr_lagging, int nr_shortages, u32 rq_wait_pct, int nr_lagging, int nr_shortages),
int nr_surpluses),
TP_ARGS(ioc, new_vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages, TP_ARGS(ioc, new_vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages),
nr_surpluses),
TP_STRUCT__entry ( TP_STRUCT__entry (
__string(devname, ioc_name(ioc)) __string(devname, ioc_name(ioc))
...@@ -144,7 +142,6 @@ TRACE_EVENT(iocost_ioc_vrate_adj, ...@@ -144,7 +142,6 @@ TRACE_EVENT(iocost_ioc_vrate_adj,
__field(u32, rq_wait_pct) __field(u32, rq_wait_pct)
__field(int, nr_lagging) __field(int, nr_lagging)
__field(int, nr_shortages) __field(int, nr_shortages)
__field(int, nr_surpluses)
), ),
TP_fast_assign( TP_fast_assign(
...@@ -157,15 +154,13 @@ TRACE_EVENT(iocost_ioc_vrate_adj, ...@@ -157,15 +154,13 @@ TRACE_EVENT(iocost_ioc_vrate_adj,
__entry->rq_wait_pct = rq_wait_pct; __entry->rq_wait_pct = rq_wait_pct;
__entry->nr_lagging = nr_lagging; __entry->nr_lagging = nr_lagging;
__entry->nr_shortages = nr_shortages; __entry->nr_shortages = nr_shortages;
__entry->nr_surpluses = nr_surpluses;
), ),
TP_printk("[%s] vrate=%llu->%llu busy=%d missed_ppm=%u:%u rq_wait_pct=%u lagging=%d shortages=%d surpluses=%d", TP_printk("[%s] vrate=%llu->%llu busy=%d missed_ppm=%u:%u rq_wait_pct=%u lagging=%d shortages=%d",
__get_str(devname), __entry->old_vrate, __entry->new_vrate, __get_str(devname), __entry->old_vrate, __entry->new_vrate,
__entry->busy_level, __entry->busy_level,
__entry->read_missed_ppm, __entry->write_missed_ppm, __entry->read_missed_ppm, __entry->write_missed_ppm,
__entry->rq_wait_pct, __entry->nr_lagging, __entry->nr_shortages, __entry->rq_wait_pct, __entry->nr_lagging, __entry->nr_shortages
__entry->nr_surpluses
) )
); );
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment