Commit 2b8bd423 authored by Konstantin Khlebnikov's avatar Konstantin Khlebnikov Committed by Jens Axboe

block/diskstats: more accurate approximation of io_ticks for slow disks

Currently io_ticks is approximated by adding one at each start and end of
requests if jiffies counter has changed. This works perfectly for requests
shorter than a jiffy or if one of requests starts/ends at each jiffy.

If disk executes just one request at a time and they are longer than two
jiffies then only first and last jiffies will be accounted.

Fix is simple: at the end of request add up into io_ticks jiffies passed
since last update rather than just one jiffy.

Example: common HDD executes random read 4k requests around 12ms.

fio --name=test --filename=/dev/sdb --rw=randread --direct=1 --runtime=30 &
iostat -x 10 sdb

Note changes of iostat's "%util" 8,43% -> 99,99% before/after patch:

Before:

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s avgrq-sz avgqu-sz   await r_await w_await  svctm  %util
sdb               0,00     0,00   82,60    0,00   330,40     0,00     8,00     0,96   12,09   12,09    0,00   1,02   8,43

After:

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s avgrq-sz avgqu-sz   await r_await w_await  svctm  %util
sdb               0,00     0,00   82,50    0,00   330,00     0,00     8,00     1,00   12,10   12,10    0,00  12,12  99,99

Now io_ticks does not loose time between start and end of requests, but
for queue-depth > 1 some I/O time between adjacent starts might be lost.

For load estimation "%util" is not as useful as average queue length,
but it clearly shows how often disk queue is completely empty.

Fixes: 5b18b5a7 ("block: delete part_round_stats and switch to less precise counting")
Signed-off-by: default avatarKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: default avatarMing Lei <ming.lei@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 387048bf
...@@ -100,7 +100,7 @@ Field 10 -- # of milliseconds spent doing I/Os (unsigned int) ...@@ -100,7 +100,7 @@ Field 10 -- # of milliseconds spent doing I/Os (unsigned int)
Since 5.0 this field counts jiffies when at least one request was Since 5.0 this field counts jiffies when at least one request was
started or completed. If request runs more than 2 jiffies then some started or completed. If request runs more than 2 jiffies then some
I/O time will not be accounted unless there are other requests. I/O time might be not accounted in case of concurrent requests.
Field 11 -- weighted # of milliseconds spent doing I/Os (unsigned int) Field 11 -- weighted # of milliseconds spent doing I/Os (unsigned int)
This field is incremented at each I/O start, I/O completion, I/O This field is incremented at each I/O start, I/O completion, I/O
...@@ -143,6 +143,9 @@ are summed (possibly overflowing the unsigned long variable they are ...@@ -143,6 +143,9 @@ are summed (possibly overflowing the unsigned long variable they are
summed to) and the result given to the user. There is no convenient summed to) and the result given to the user. There is no convenient
user interface for accessing the per-CPU counters themselves. user interface for accessing the per-CPU counters themselves.
Since 4.19 request times are measured with nanoseconds precision and
truncated to milliseconds before showing in this interface.
Disks vs Partitions Disks vs Partitions
------------------- -------------------
......
...@@ -1768,14 +1768,14 @@ void bio_check_pages_dirty(struct bio *bio) ...@@ -1768,14 +1768,14 @@ void bio_check_pages_dirty(struct bio *bio)
schedule_work(&bio_dirty_work); schedule_work(&bio_dirty_work);
} }
void update_io_ticks(struct hd_struct *part, unsigned long now) void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
{ {
unsigned long stamp; unsigned long stamp;
again: again:
stamp = READ_ONCE(part->stamp); stamp = READ_ONCE(part->stamp);
if (unlikely(stamp != now)) { if (unlikely(stamp != now)) {
if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) { if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
__part_stat_add(part, io_ticks, 1); __part_stat_add(part, io_ticks, end ? now - stamp : 1);
} }
} }
if (part->partno) { if (part->partno) {
...@@ -1791,7 +1791,7 @@ void generic_start_io_acct(struct request_queue *q, int op, ...@@ -1791,7 +1791,7 @@ void generic_start_io_acct(struct request_queue *q, int op,
part_stat_lock(); part_stat_lock();
update_io_ticks(part, jiffies); update_io_ticks(part, jiffies, false);
part_stat_inc(part, ios[sgrp]); part_stat_inc(part, ios[sgrp]);
part_stat_add(part, sectors[sgrp], sectors); part_stat_add(part, sectors[sgrp], sectors);
part_inc_in_flight(q, part, op_is_write(op)); part_inc_in_flight(q, part, op_is_write(op));
...@@ -1809,7 +1809,7 @@ void generic_end_io_acct(struct request_queue *q, int req_op, ...@@ -1809,7 +1809,7 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
part_stat_lock(); part_stat_lock();
update_io_ticks(part, now); update_io_ticks(part, now, true);
part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
part_stat_add(part, time_in_queue, duration); part_stat_add(part, time_in_queue, duration);
part_dec_in_flight(q, part, op_is_write(req_op)); part_dec_in_flight(q, part, op_is_write(req_op));
......
...@@ -1337,7 +1337,7 @@ void blk_account_io_done(struct request *req, u64 now) ...@@ -1337,7 +1337,7 @@ void blk_account_io_done(struct request *req, u64 now)
part_stat_lock(); part_stat_lock();
part = req->part; part = req->part;
update_io_ticks(part, jiffies); update_io_ticks(part, jiffies, true);
part_stat_inc(part, ios[sgrp]); part_stat_inc(part, ios[sgrp]);
part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns)); part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
...@@ -1379,7 +1379,7 @@ void blk_account_io_start(struct request *rq, bool new_io) ...@@ -1379,7 +1379,7 @@ void blk_account_io_start(struct request *rq, bool new_io)
rq->part = part; rq->part = part;
} }
update_io_ticks(part, jiffies); update_io_ticks(part, jiffies, false);
part_stat_unlock(); part_stat_unlock();
} }
......
...@@ -422,7 +422,7 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, ...@@ -422,7 +422,7 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
int rw); int rw);
void update_io_ticks(struct hd_struct *part, unsigned long now); void update_io_ticks(struct hd_struct *part, unsigned long now, bool end);
/* block/genhd.c */ /* block/genhd.c */
extern void device_add_disk(struct device *parent, struct gendisk *disk, extern void device_add_disk(struct device *parent, struct gendisk *disk,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment