Commit 78baea83 authored by Meng Zhuo's avatar Meng Zhuo Committed by Cherry Zhang

internal/bytealg: add assembly implementation of Compare/CompareString on mips64x

name                         old time/op    new time/op    delta
BytesCompare/1                 28.9ns ± 4%    22.8ns ± 0%    -21.23%  (p=0.000 n=9+10)
BytesCompare/2                 34.6ns ± 0%    23.5ns ± 0%    -32.01%  (p=0.000 n=8+10)
BytesCompare/4                 54.6ns ± 0%    41.4ns ± 0%    -24.18%  (p=0.001 n=8+9)
BytesCompare/8                 73.9ns ± 0%    49.7ns ± 0%    -32.75%  (p=0.002 n=8+10)
BytesCompare/16                 113ns ± 0%      23ns ± 0%    -79.56%  (p=0.000 n=9+10)
BytesCompare/32                 190ns ± 0%      26ns ± 0%    -86.53%  (p=0.000 n=10+10)
BytesCompare/64                 345ns ± 0%      44ns ± 0%    -87.19%  (p=0.000 n=10+10)
BytesCompare/128                654ns ± 0%      52ns ± 0%    -91.97%  (p=0.000 n=9+8)
BytesCompare/256               1.27µs ± 0%    0.08µs ± 1%    -94.10%  (p=0.000 n=8+10)
BytesCompare/512               2.51µs ± 0%    0.12µs ± 0%    -95.26%  (p=0.000 n=9+9)
BytesCompare/1024              4.99µs ± 0%    0.21µs ± 1%    -95.84%  (p=0.000 n=8+10)
BytesCompare/2048              9.94µs ± 0%    0.38µs ± 0%    -96.13%  (p=0.000 n=8+10)
CompareBytesEqual               105ns ± 0%      64ns ± 0%    -39.05%  (p=0.000 n=10+9)
CompareBytesToNil              34.8ns ± 1%    39.5ns ± 3%    +13.48%  (p=0.000 n=10+10)
CompareBytesEmpty              33.6ns ± 3%    36.6ns ± 0%     +8.77%  (p=0.000 n=10+10)
CompareBytesIdentical          29.7ns ± 0%    36.6ns ± 0%    +23.23%  (p=0.000 n=10+10)
CompareBytesSameLength         69.1ns ± 0%    51.1ns ± 0%    -26.05%  (p=0.000 n=10+10)
CompareBytesDifferentLength    69.8ns ± 0%    51.1ns ± 0%    -26.79%  (p=0.000 n=10+10)
CompareBytesBigUnaligned       5.15ms ± 0%    2.18ms ± 0%    -57.62%  (p=0.000 n=9+9)
CompareBytesBig                5.28ms ± 0%    0.28ms ± 0%    -94.64%  (p=0.000 n=8+10)
CompareBytesBigIdentical       29.7ns ± 0%    36.8ns ± 0%    +23.91%  (p=0.000 n=8+8)

name                         old speed      new speed      delta
CompareBytesBigUnaligned      204MB/s ± 0%   480MB/s ± 0%   +135.94%  (p=0.000 n=9+9)
CompareBytesBig               198MB/s ± 0%  3703MB/s ± 0%  +1765.85%  (p=0.000 n=8+10)
CompareBytesBigIdentical     35.3TB/s ± 0%  28.5TB/s ± 0%    -19.31%  (p=0.000 n=8+8)

Change-Id: I112d9de2324986fd65ed237a86b11856a1c0f4a7
Reviewed-on: https://go-review.googlesource.com/c/go/+/196837Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 44e752c3
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm // +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm,!mips64,!mips64le
package bytealg package bytealg
......
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build mips64 mips64le
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-56
MOVV a_base+0(FP), R3
MOVV b_base+24(FP), R4
MOVV a_len+8(FP), R1
MOVV b_len+32(FP), R2
MOVV $ret+48(FP), R9
JMP cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
MOVV a_base+0(FP), R3
MOVV b_base+16(FP), R4
MOVV a_len+8(FP), R1
MOVV b_len+24(FP), R2
MOVV $ret+32(FP), R9
JMP cmpbody<>(SB)
// On entry:
// R1 length of a
// R2 length of b
// R3 points to the start of a
// R4 points to the start of b
// R9 points to the return value (-1/0/1)
//
// On exit:
// R6, R7, R8, R10, R11, R13, R14 clobbered
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
BEQ R3, R4, samebytes // same start of a and b
SGTU R1, R2, R7
BNE R0, R7, r2_lt_r1
MOVV R1, R10
JMP entry
r2_lt_r1:
MOVV R2, R10 // R10 is min(R1, R2)
entry:
ADDV R3, R10, R8 // R3 start of a, R8 end of a
BEQ R3, R8, samebytes // length is 0
SRLV $4, R10 // R10 is number of chunks
BEQ R0, R10, byte_loop
// make sure both a and b are aligned.
OR R3, R4, R11
AND $7, R11
BNE R0, R11, byte_loop
chunk16_loop:
BEQ R0, R10, byte_loop
MOVV (R3), R6
MOVV (R4), R7
BNE R6, R7, byte_cmp
MOVV 8(R3), R13
MOVV 8(R4), R14
ADDV $16, R3
ADDV $16, R4
SUBVU $1, R10
BEQ R13, R14, chunk16_loop
MOVV R13, R6
MOVV R14, R7
JMP byte_cmp
byte_loop:
BEQ R3, R8, samebytes
MOVBU (R3), R6
ADDVU $1, R3
MOVBU (R4), R7
ADDVU $1, R4
BEQ R6, R7, byte_loop
byte_cmp:
SGTU R6, R7, R8 // R8 = 1 if (R6 > R7)
BNE R0, R8, ret
MOVV $-1, R8
JMP ret
samebytes:
SGTU R1, R2, R6
SGTU R2, R1, R7
SUBV R7, R6, R8
ret:
MOVV R8, (R9)
RET
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle wasm // +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle mips64 mips64le wasm
package bytealg package bytealg
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment