Commit fc7a7259 authored by Ben Shi's avatar Ben Shi Committed by Cherry Zhang

cmd/internal/obj/arm64: add LDPW/LDPSW/STPW to arm64 assembler

1. STPW stores the lower 32-bit words of a pair of registers to memory.
2. LDPW loads two 32-bit words from memory, zero extends them to 64-bit,
and then copies to a pair of registers.
3. LDPSW does the same as LDPW, except a sign extension.

This CL implements those 3 instructions and adds test cases.

Change-Id: Ied9834d8240240d23ce00e086b4ea456e1611f1a
Reviewed-on: https://go-review.googlesource.com/99956
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent aaeaad68
...@@ -458,18 +458,86 @@ again: ...@@ -458,18 +458,86 @@ again:
CALL foo(SB) CALL foo(SB)
// LDP/STP // LDP/STP
LDP (R0), (R1, R2) LDP (R0), (R1, R2) // 010840a9
LDP 8(R0), (R1, R2) LDP 8(R0), (R1, R2) // 018840a9
LDP.W 8(R0), (R1, R2) LDP -8(R0), (R1, R2) // 01887fa9
LDP.P 8(R0), (R1, R2) LDP 11(R0), (R1, R2) // 1b2c0091610b40a9
LDP 1024(R0), (R1, R2) // 1b001091610b40a9
LDP.W 8(R0), (R1, R2) // 0188c0a9
LDP.P 8(R0), (R1, R2) // 0188c0a8
LDP (RSP), (R1, R2) // e10b40a9
LDP 8(RSP), (R1, R2) // e18b40a9
LDP -8(RSP), (R1, R2) // e18b7fa9
LDP 11(RSP), (R1, R2) // fb2f0091610b40a9
LDP 1024(RSP), (R1, R2) // fb031091610b40a9
LDP.W 8(RSP), (R1, R2) // e18bc0a9
LDP.P 8(RSP), (R1, R2) // e18bc0a8
LDP x(SB), (R1, R2) LDP x(SB), (R1, R2)
LDP x+8(SB), (R1, R2) LDP x+8(SB), (R1, R2)
STP (R3, R4), (R5) LDPW (R0), (R1, R2) // 01084029
STP (R3, R4), 8(R5) LDPW 4(R0), (R1, R2) // 01884029
STP.W (R3, R4), 8(R5) LDPW -4(R0), (R1, R2) // 01887f29
STP.P (R3, R4), 8(R5) LDPW.W 4(R0), (R1, R2) // 0188c029
LDPW.P 4(R0), (R1, R2) // 0188c028
LDPW 11(R0), (R1, R2) // 1b2c0091610b4029
LDPW 1024(R0), (R1, R2) // 1b001091610b4029
LDPW (RSP), (R1, R2) // e10b4029
LDPW 4(RSP), (R1, R2) // e18b4029
LDPW -4(RSP), (R1, R2) // e18b7f29
LDPW.W 4(RSP), (R1, R2) // e18bc029
LDPW.P 4(RSP), (R1, R2) // e18bc028
LDPW 11(RSP), (R1, R2) // fb2f0091610b4029
LDPW 1024(RSP), (R1, R2) // fb031091610b4029
LDPW x(SB), (R1, R2)
LDPW x+8(SB), (R1, R2)
LDPSW (R0), (R1, R2) // 01084069
LDPSW 4(R0), (R1, R2) // 01884069
LDPSW -4(R0), (R1, R2) // 01887f69
LDPSW.W 4(R0), (R1, R2) // 0188c069
LDPSW.P 4(R0), (R1, R2) // 0188c068
LDPSW 11(R0), (R1, R2) // 1b2c0091610b4069
LDPSW 1024(R0), (R1, R2) // 1b001091610b4069
LDPSW (RSP), (R1, R2) // e10b4069
LDPSW 4(RSP), (R1, R2) // e18b4069
LDPSW -4(RSP), (R1, R2) // e18b7f69
LDPSW.W 4(RSP), (R1, R2) // e18bc069
LDPSW.P 4(RSP), (R1, R2) // e18bc068
LDPSW 11(RSP), (R1, R2) // fb2f0091610b4069
LDPSW 1024(RSP), (R1, R2) // fb031091610b4069
LDPSW x(SB), (R1, R2)
LDPSW x+8(SB), (R1, R2)
STP (R3, R4), (R5) // a31000a9
STP (R3, R4), 8(R5) // a39000a9
STP.W (R3, R4), 8(R5) // a39080a9
STP.P (R3, R4), 8(R5) // a39080a8
STP (R3, R4), -8(R5) // a3903fa9
STP (R3, R4), 11(R0) // 1b2c0091631300a9
STP (R3, R4), 1024(R0) // 1b001091631300a9
STP (R3, R4), (RSP) // e31300a9
STP (R3, R4), 8(RSP) // e39300a9
STP.W (R3, R4), 8(RSP) // e39380a9
STP.P (R3, R4), 8(RSP) // e39380a8
STP (R3, R4), -8(RSP) // e3933fa9
STP (R3, R4), 11(RSP) // fb2f0091631300a9
STP (R3, R4), 1024(RSP) // fb031091631300a9
STP (R3, R4), x(SB) STP (R3, R4), x(SB)
STP (R3, R4), x+8(SB) STP (R3, R4), x+8(SB)
STPW (R3, R4), (R5) // a3100029
STPW (R3, R4), 4(R5) // a3900029
STPW.W (R3, R4), 4(R5) // a3908029
STPW.P (R3, R4), 4(R5) // a3908028
STPW (R3, R4), -4(R5) // a3903f29
STPW (R3, R4), 11(R0) // 1b2c009163130029
STPW (R3, R4), 1024(R0) // 1b00109163130029
STPW (R3, R4), (RSP) // e3130029
STPW (R3, R4), 4(RSP) // e3930029
STPW.W (R3, R4), 4(RSP) // e3938029
STPW.P (R3, R4), 4(RSP) // e3938028
STPW (R3, R4), -4(RSP) // e3933f29
STPW (R3, R4), 11(RSP) // fb2f009163130029
STPW (R3, R4), 1024(RSP) // fb03109163130029
STPW (R3, R4), x(SB)
STPW (R3, R4), x+8(SB)
// END // END
// //
......
...@@ -425,6 +425,7 @@ const ( ...@@ -425,6 +425,7 @@ const (
C_NPAUTO // -512 <= x < 0, 0 mod 8 C_NPAUTO // -512 <= x < 0, 0 mod 8
C_NSAUTO // -256 <= x < 0 C_NSAUTO // -256 <= x < 0
C_PSAUTO_8 // 0 to 255, 0 mod 8 C_PSAUTO_8 // 0 to 255, 0 mod 8
C_PSAUTO_4 // 0 to 255, 0 mod 4
C_PSAUTO // 0 to 255 C_PSAUTO // 0 to 255
C_PPAUTO // 0 to 504, 0 mod 8 C_PPAUTO // 0 to 504, 0 mod 8
C_UAUTO4K_8 // 0 to 4095, 0 mod 8 C_UAUTO4K_8 // 0 to 4095, 0 mod 8
...@@ -450,6 +451,7 @@ const ( ...@@ -450,6 +451,7 @@ const (
C_NPOREG // must mirror NPAUTO, etc C_NPOREG // must mirror NPAUTO, etc
C_NSOREG C_NSOREG
C_PSOREG_8 C_PSOREG_8
C_PSOREG_4
C_PSOREG C_PSOREG
C_PPOREG C_PPOREG
C_UOREG4K_8 C_UOREG4K_8
...@@ -594,6 +596,8 @@ const ( ...@@ -594,6 +596,8 @@ const (
ALDAXRH ALDAXRH
ALDAXRW ALDAXRW
ALDP ALDP
ALDPW
ALDPSW
ALDXR ALDXR
ALDXRB ALDXRB
ALDXRH ALDXRH
...@@ -686,6 +690,7 @@ const ( ...@@ -686,6 +690,7 @@ const (
ASTLXRH ASTLXRH
ASTLXRW ASTLXRW
ASTP ASTP
ASTPW
ASUB ASUB
ASUBS ASUBS
ASUBSW ASUBSW
...@@ -899,4 +904,4 @@ const ( ...@@ -899,4 +904,4 @@ const (
ARNG_H ARNG_H
ARNG_S ARNG_S
ARNG_D ARNG_D
) )
\ No newline at end of file
...@@ -107,6 +107,8 @@ var Anames = []string{ ...@@ -107,6 +107,8 @@ var Anames = []string{
"LDAXRH", "LDAXRH",
"LDAXRW", "LDAXRW",
"LDP", "LDP",
"LDPW",
"LDPSW",
"LDXR", "LDXR",
"LDXRB", "LDXRB",
"LDXRH", "LDXRH",
...@@ -199,6 +201,7 @@ var Anames = []string{ ...@@ -199,6 +201,7 @@ var Anames = []string{
"STLXRH", "STLXRH",
"STLXRW", "STLXRW",
"STP", "STP",
"STPW",
"SUB", "SUB",
"SUBS", "SUBS",
"SUBSW", "SUBSW",
......
...@@ -39,6 +39,7 @@ var cnames7 = []string{ ...@@ -39,6 +39,7 @@ var cnames7 = []string{
"NPAUTO", "NPAUTO",
"NSAUTO", "NSAUTO",
"PSAUTO_8", "PSAUTO_8",
"PSAUTO_4",
"PSAUTO", "PSAUTO",
"PPAUTO", "PPAUTO",
"UAUTO4K_8", "UAUTO4K_8",
...@@ -62,6 +63,7 @@ var cnames7 = []string{ ...@@ -62,6 +63,7 @@ var cnames7 = []string{
"NPOREG", "NPOREG",
"NSOREG", "NSOREG",
"PSOREG_8", "PSOREG_8",
"PSOREG_4",
"PSOREG", "PSOREG",
"PPOREG", "PPOREG",
"UOREG4K_8", "UOREG4K_8",
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment