Commit a1b1ba7d authored by Meng Zhuo's avatar Meng Zhuo Committed by Cherry Zhang

internal/bytealg: (re)adding mips64x compare implementation

The original CL of mips64x compare function has been reverted due to wrong
implement for little endian.
Original CL: https://go-review.googlesource.com/c/go/+/196837

name                         old time/op    new time/op    delta
BytesCompare/1                 28.9ns ± 4%    22.1ns ± 0%    -23.60%  (p=0.000 n=9+8)
BytesCompare/2                 34.6ns ± 0%    23.1ns ± 0%    -33.25%  (p=0.000 n=8+10)
BytesCompare/4                 54.6ns ± 0%    40.8ns ± 0%    -25.27%  (p=0.000 n=8+8)
BytesCompare/8                 73.9ns ± 0%    49.1ns ± 0%    -33.56%  (p=0.000 n=8+8)
BytesCompare/16                 113ns ± 0%      24ns ± 0%    -79.20%  (p=0.000 n=9+9)
BytesCompare/32                 190ns ± 0%      26ns ± 0%    -86.53%  (p=0.000 n=10+10)
BytesCompare/64                 345ns ± 0%      44ns ± 0%    -87.19%  (p=0.000 n=10+8)
BytesCompare/128                654ns ± 0%      52ns ± 0%    -91.97%  (p=0.000 n=9+8)
BytesCompare/256               1.27µs ± 0%    0.07µs ± 0%    -94.14%  (p=0.001 n=8+9)
BytesCompare/512               2.51µs ± 0%    0.12µs ± 0%    -95.26%  (p=0.000 n=9+10)
BytesCompare/1024              4.99µs ± 0%    0.21µs ± 0%    -95.85%  (p=0.000 n=8+10)
BytesCompare/2048              9.94µs ± 0%    0.38µs ± 0%    -96.14%  (p=0.000 n=8+8)
CompareBytesEqual               105ns ± 0%      64ns ± 0%    -39.43%  (p=0.000 n=10+9)
CompareBytesToNil              34.8ns ± 1%    38.6ns ± 3%    +11.01%  (p=0.000 n=10+10)
CompareBytesEmpty              33.6ns ± 3%    36.6ns ± 0%     +8.77%  (p=0.000 n=10+8)
CompareBytesIdentical          29.7ns ± 0%    40.5ns ± 1%    +36.45%  (p=0.000 n=10+8)
CompareBytesSameLength         69.1ns ± 0%    51.8ns ± 0%    -25.04%  (p=0.000 n=10+9)
CompareBytesDifferentLength    69.8ns ± 0%    52.5ns ± 0%    -24.79%  (p=0.000 n=10+8)
CompareBytesBigUnaligned       5.15ms ± 0%    2.19ms ± 0%    -57.59%  (p=0.000 n=9+9)
CompareBytesBig                5.28ms ± 0%    0.28ms ± 0%    -94.64%  (p=0.000 n=8+8)
CompareBytesBigIdentical       29.7ns ± 0%    36.9ns ± 2%    +24.11%  (p=0.000 n=8+10)

name                         old speed      new speed      delta
CompareBytesBigUnaligned      204MB/s ± 0%   480MB/s ± 0%   +135.77%  (p=0.000 n=9+9)
CompareBytesBig               198MB/s ± 0%  3704MB/s ± 0%  +1765.97%  (p=0.000 n=8+8)
CompareBytesBigIdentical     35.3TB/s ± 0%  28.4TB/s ± 2%    -19.44%  (p=0.000 n=8+10)

Fixes #34549

Change-Id: I2ef29f13cdd4229745ac2d018bb53c76f2ff1209
Reviewed-on: https://go-review.googlesource.com/c/go/+/197557Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 2bf7a925
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm
// +build !386,!amd64,!amd64p32,!s390x,!arm,!arm64,!ppc64,!ppc64le,!mips,!mipsle,!wasm,!mips64,!mips64le
package bytealg
......
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build mips64 mips64le
#include "go_asm.h"
#include "textflag.h"
TEXT ·Compare(SB),NOSPLIT,$0-56
MOVV a_base+0(FP), R3
MOVV b_base+24(FP), R4
MOVV a_len+8(FP), R1
MOVV b_len+32(FP), R2
MOVV $ret+48(FP), R9
JMP cmpbody<>(SB)
TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
MOVV a_base+0(FP), R3
MOVV b_base+16(FP), R4
MOVV a_len+8(FP), R1
MOVV b_len+24(FP), R2
MOVV $ret+32(FP), R9
JMP cmpbody<>(SB)
// On entry:
// R1 length of a
// R2 length of b
// R3 points to the start of a
// R4 points to the start of b
// R9 points to the return value (-1/0/1)
TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
BEQ R3, R4, samebytes // same start of a and b
SGTU R1, R2, R7
BNE R0, R7, r2_lt_r1
MOVV R1, R10
JMP entry
r2_lt_r1:
MOVV R2, R10 // R10 is min(R1, R2)
entry:
ADDV R3, R10, R8 // R3 start of a, R8 end of a
BEQ R3, R8, samebytes // length is 0
SRLV $4, R10 // R10 is number of chunks
BEQ R0, R10, byte_loop
// make sure both a and b are aligned.
OR R3, R4, R11
AND $7, R11
BNE R0, R11, byte_loop
chunk16_loop:
BEQ R0, R10, byte_loop
MOVV (R3), R6
MOVV (R4), R7
BNE R6, R7, byte_loop
MOVV 8(R3), R13
MOVV 8(R4), R14
ADDV $16, R3
ADDV $16, R4
SUBVU $1, R10
BEQ R13, R14, chunk16_loop
SUBV $8, R3
SUBV $8, R4
byte_loop:
BEQ R3, R8, samebytes
MOVBU (R3), R6
ADDVU $1, R3
MOVBU (R4), R7
ADDVU $1, R4
BEQ R6, R7, byte_loop
byte_cmp:
SGTU R6, R7, R8 // R8 = 1 if (R6 > R7)
BNE R0, R8, ret
MOVV $-1, R8
JMP ret
samebytes:
SGTU R1, R2, R6
SGTU R2, R1, R7
SUBV R7, R6, R8
ret:
MOVV R8, (R9)
RET
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle wasm
// +build 386 amd64 amd64p32 s390x arm arm64 ppc64 ppc64le mips mipsle wasm mips64 mips64le
package bytealg
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment