Commit ee6c2796 authored by Kirill Smelkov's avatar Kirill Smelkov

go/neo/t/neotest: Disk information & benchmarks

Add to neotest bench-disk command that performs random-read disk
benchmarks via ioping. Example output:

	(venv) (8) neotest@rio:~/8/src/lab.nexedi.com/kirr/neo/go/neo/t$ ./neotest bench-disk
	node:   rio
	cluster:

	*** disk: random direct (no kernel cache) 4K-read latency

	--- . (ext4 /dev/sda1) ioping statistics ---
	29.1 k requests completed in 2.95 s, 113.6 MiB read, 9.85 k iops, 38.5 MiB/s
	generated 29.1 k requests in 3.00 s, 113.6 MiB, 9.69 k iops, 37.9 MiB/s
	min/avg/max/mdev = 43.2 us / 101.5 us / 250.0 us / 7.48 us
	Benchmarkdisk/randread/direct/4K-min 1 43.2 us/op
	Benchmarkdisk/randread/direct/4K-avg 1 101.5 us/op
	< 59.2 us       458     |
	< 63.0 us       0       |
	< 66.7 us       0       |
	< 70.5 us       0       |
	< 74.2 us       1       |
	< 78.0 us       1       |
	< 81.7 us       0       |
	< 85.5 us       1       |
	< 89.2 us       0       |
	< 93.0 us       0       |
	< 96.7 us       0       |
	< 100.5 us      333     |
	< 104.2 us      27793   | ***********************************************
	< 108.0 us      259     |
	< 111.7 us      21      |
	< 115.5 us      8       |
	< 119.2 us      18      |
	< 123.0 us      1       |
	< 126.7 us      0       |
	< 130.5 us      7       |
	< 134.2 us      59      |
	<       +∞      18      |
	# POLL·186 C1·291360 C1E·290802 C3·31 C6·1218

	...

	*** disk: random cached 4K-read latency

	--- . (ext4 /dev/sda1) ioping statistics ---
	3.15 M requests completed in 2.82 s, 12.0 GiB read, 1.12 M iops, 4.26 GiB/s
	generated 3.15 M requests in 3.00 s, 12.0 GiB, 1.05 M iops, 4.00 GiB/s
	min/avg/max/mdev = 465 ns / 896 ns / 37.4 us / 183 ns
	Benchmarkdisk/randread/pagecache/4K-min 1 465 ns/op
	Benchmarkdisk/randread/pagecache/4K-avg 1 896 ns/op
	< 839 ns        771375  | ************
	< 872 ns        609361  | *********
	< 905 ns        660635  | **********
	< 938 ns        505305  | ********
	< 971 ns        189182  | ***
	< 1.00 us       93655   | *
	< 1.04 us       70811   | *
	< 1.07 us       57650   |
	< 1.10 us       51587   |
	< 1.14 us       44648   |
	< 1.17 us       40868   |
	< 1.20 us       27301   |
	< 1.24 us       12503   |
	< 1.27 us       5580    |
	< 1.30 us       2517    |
	< 1.34 us       1404    |
	< 1.37 us       698     |
	< 1.40 us       378     |
	< 1.44 us       208     |
	< 1.47 us       119     |
	< 1.50 us       50      |
	<       +∞      978     |
	# POLL·1 C1·57 C1E·11 C3·2 C6·257

The benchmarks are so done so that output conforms to Go benchmarking
format. This way it is possible to process the output with benchstat to
summarize / compare results. For above run summarization gives:

	name                            time/op
	disk/randread/direct/4K-min     39.0µs ±12%
	disk/randread/direct/4K-avg      117µs ± 3%
	disk/randread/pagecache/4K-min   461ns ± 3%
	disk/randread/pagecache/4K-avg   880ns ± 0%

While at disk topic, teach info/info-local to show related information
about node's disk. Example line output for rio:

	disk/sda: Samsung SSD 840   rev BB0Q 931.5G

Please see

	http://navytux.spb.ru/~kirr/neo.html#the-need-for-faster-storage
	http://navytux.spb.ru/~kirr/neo.html#appendix-i-ssd-latency

for some discussion about SSD performance.

Some draft history related to this patch:

	lab.nexedi.com/kirr/neo/commit/d35a2fdf	X neotest/info-local: Fix disk display in presence of bind-mounts
	lab.nexedi.com/kirr/neo/commit/44529dbf	X neotest/bench-disk: Deduplicate code; change 1M -> 2M
	lab.nexedi.com/kirr/neo/commit/8bac3dba	X neotest/bench-disk: Also benchmark randomly reading 1M blocks
	lab.nexedi.com/kirr/neo/commit/e795c6ed	X neotest: Fix disk display in case of DM
	lab.nexedi.com/kirr/neo/commit/352cd100	X neotest: Fix disk display in case of MD
	lab.nexedi.com/kirr/neo/commit/cd2cd093	X bench_disk: Add std bench format
	lab.nexedi.com/kirr/neo/commit/9f86eb40	X bench += ioping
parent a60c472c
/log
/var
/tcpu /tcpu
/tcpu_go /tcpu_go
/ioping.tmp
...@@ -66,6 +66,9 @@ GOPATH=${GOPATH%:} ...@@ -66,6 +66,9 @@ GOPATH=${GOPATH%:}
# python # python
. $X/venv/bin/activate . $X/venv/bin/activate
# ioping
export PATH=$X/ioping:$PATH
# XXX for mysqld # XXX for mysqld
export PATH=$PATH:/sbin:/usr/sbin export PATH=$PATH:/sbin:/usr/sbin
EOF EOF
...@@ -85,6 +88,11 @@ EOF ...@@ -85,6 +88,11 @@ EOF
go get -v golang.org/x/perf/cmd/benchstat # to summarize/diff benchmark results go get -v golang.org/x/perf/cmd/benchstat # to summarize/diff benchmark results
#go get -v github.com/aclements/perflock/cmd/perflock # handy to fix CPU frequency/etc #go get -v github.com/aclements/perflock/cmd/perflock # handy to fix CPU frequency/etc
git clone -o kirr -b x/hist https://lab.nexedi.com/kirr/ioping.git
pushd ioping
make -j`nproc`
popd
echo ok >deployed echo ok >deployed
echo "# deployed ok" echo "# deployed ok"
} }
...@@ -161,6 +169,12 @@ init_net() { ...@@ -161,6 +169,12 @@ init_net() {
myaddr="${myaddr4v[0]}" myaddr="${myaddr4v[0]}"
} }
# init_fs - do initial disk allocations
init_fs() {
log=`pwd`/log; mkdir -p $log
var=`pwd`/var; mkdir -p $var
}
# if we are abnormally terminating # if we are abnormally terminating
install_trap() { install_trap() {
trap 'set +e trap 'set +e
...@@ -321,6 +335,49 @@ system_info() { ...@@ -321,6 +335,49 @@ system_info() {
echo "WARNING: cpu: (up to that might be adding to networked and IPC request-reply latency)" echo "WARNING: cpu: (up to that might be adding to networked and IPC request-reply latency)"
} }
# disk under .
# TODO show all disks and just mark which is current
mntpt=`stat -c '%m' .` # mountpoint of current filesystem
blkdev=`findmnt -n -o source -v -u $mntpt` # mountpoint -> device
blkdev=`realpath $blkdev` # /dev/mapper/vg0-root -> /dev/dm-0
blkdev1=`basename $blkdev` # /dev/sda -> sda
# showblk1 <device>
showblk1() {
blkdev=$1
blkdev1=`basename $blkdev` # /dev/sda -> sda
# XXX lsblk: tmpfs: not a block device
printf "disk/%s: %s\n" "$blkdev1" "`lsblk -dn -o MODEL $blkdev` rev `lsblk -dn -o REV,SIZE $blkdev`"
}
case "$blkdev1" in
md*)
# software raid
slavev=`ls -x /sys/class/block/$blkdev1/slaves`
printf "disk/%s:\t%s\n" "$blkdev1" "(`cat /sys/class/block/$blkdev1/md/level`) -> $slavev"
# XXX dup wrt dm-*; move recursion to common place
for s in $slavev; do
s=`echo $s |sed -e 's/[0-9]*$//'` # sda3 -> sda
showblk1 /dev/$s
done
;;
dm-*)
# device mapper
slavev=`ls -x /sys/class/block/$blkdev1/slaves`
printf "disk/%s:\t%s\n" "$blkdev1" "(`cat /sys/class/block/$blkdev1/dm/name`) -> $slavev"
# XXX dup wrt md*; move recursion to common place
for s in $slavev; do
s=`echo $s |sed -e 's/[0-9]*$//'` # sda3 -> sda
showblk1 /dev/$s
done
;;
*)
blkdev_main=`echo $blkdev |sed -e 's/[0-9]*$//'` # /dev/sda3 -> /dev/sda
showblk1 $blkdev_main
;;
esac
printf "%-20s" "sw/python:"; proginfo python --version 2>&1 # https://bugs.python.org/issue18338 printf "%-20s" "sw/python:"; proginfo python --version 2>&1 # https://bugs.python.org/issue18338
printf "%-20s" "sw/go:"; proginfo go version printf "%-20s" "sw/go:"; proginfo go version
printf "%-20s" "sw/sqlite:"; proginfo python -c \ printf "%-20s" "sw/sqlite:"; proginfo python -c \
...@@ -419,8 +476,49 @@ bench_cpu() { ...@@ -419,8 +476,49 @@ bench_cpu() {
done done
} }
# bench_disk - benchmark direct (uncached) and cached random reads
bench_disk() {
echo -ne "node:\t"; xhostname
echo "cluster:"
# ioping2bench <topic> - converts timings from ioping to std benchmark
ioping2bench() {
# min/avg/max/mdev = 102.2 us / 138.6 us / 403.3 us / 12.2 us
sed -u -e \
"s|^min/avg/max/mdev = \([0-9.]\+\) \([^ ]\+\) / \([0-9.]\+\) \([^ ]\+\) / \([0-9.]\+\)\+ \([^ ]\+\) / \([0-9.]\+\) \([^ ]\+\)\$|&\n\
Benchmark$1-min 1 \\1 \\2/op\n\
Benchmark$1-avg 1 \\3 \\4/op\
|"
}
sizev="4K" # 2M (XXX it is not linear, but we save benchmarking time for now)
benchtime=3s
for size in $sizev; do
echo -e "\n*** disk: random direct (no kernel cache) $size-read latency"
nrun ioping -D -i 0ms -s $size -S 1024M -w $benchtime -q -k . |\
ioping2bench "disk/randread/direct/$size"
done
# warmup so kernel puts the file into pagecache
for i in `seq 3`; do
cat ioping.tmp >/dev/null
done
for size in $sizev; do
echo -e "\n*** disk: random cached $size-read latency"
nrun ioping -C -i 0ms -s $size -S 1024M -w $benchtime -q -k . |\
ioping2bench "disk/randread/pagecache/$size"
done
}
# command: benchmark local disk
cmd_bench-disk() {
bench_disk
}
# command: benchmark local cpu # command: benchmark local cpu
cmd_bench-cpu() { cmd_bench-cpu() {
bench_cpu bench_cpu
...@@ -463,6 +561,7 @@ The commands are: ...@@ -463,6 +561,7 @@ The commands are:
bench-cpu benchmark local cpu bench-cpu benchmark local cpu
bench-disk benchmark local disk
deploy deploy NEO & needed software for tests to remote host deploy deploy NEO & needed software for tests to remote host
...@@ -481,6 +580,7 @@ EOF ...@@ -481,6 +580,7 @@ EOF
# #
# build: needs to rebuild NEO stuff # build: needs to rebuild NEO stuff
# net: needs init_net # net: needs init_net
# fs: needs init_fs
case "$1" in case "$1" in
test) f=( );; test) f=( );;
test-local) f=(build );; test-local) f=(build );;
...@@ -488,6 +588,7 @@ test-go) f=(build );; ...@@ -488,6 +588,7 @@ test-go) f=(build );;
test-py) f=( );; test-py) f=( );;
bench-cpu) f=(build );; bench-cpu) f=(build );;
bench-disk) f=( fs );;
info) f=( );; info) f=( );;
info-local) f=( net );; info-local) f=( net );;
...@@ -523,6 +624,11 @@ for flag in ${f[*]}; do ...@@ -523,6 +624,11 @@ for flag in ${f[*]}; do
init_net init_net
;; ;;
fs)
# setup fs environment
init_fs
;;
*) *)
die "internal-error: command $1: invalid flag: $flag" die "internal-error: command $1: invalid flag: $flag"
;; ;;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment