Commit adbfdfe3 authored by David du Colombier's avatar David du Colombier

cmd/compile: don't use MOVOstore for move on plan9/amd64

The SSA compiler currently generates MOVOstore instructions
to optimize 16 bytes moves on AMD64 architecture.

However, we can't use the MOVOstore instruction on Plan 9,
because floating point operations are not allowed in the
note handler.

We rely on the useSSE flag to disable the use of the
MOVOstore instruction on Plan 9 and replace it by two
MOVQstore instructions.

Fixes #21625

Change-Id: Idfefcceadccafe1752b059b5fe113ce566c0e71c
Reviewed-on: https://go-review.googlesource.com/59171
Run-TryBot: David du Colombier <0intro@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
parent c875cea7
...@@ -261,6 +261,11 @@ var allAsmTests = []*asmTests{ ...@@ -261,6 +261,11 @@ var allAsmTests = []*asmTests{
os: "linux", os: "linux",
tests: linuxPPC64LETests, tests: linuxPPC64LETests,
}, },
{
arch: "amd64",
os: "plan9",
tests: plan9AMD64Tests,
},
} }
var linuxAMD64Tests = []*asmTest{ var linuxAMD64Tests = []*asmTest{
...@@ -2126,6 +2131,34 @@ var linuxPPC64LETests = []*asmTest{ ...@@ -2126,6 +2131,34 @@ var linuxPPC64LETests = []*asmTest{
}, },
} }
var plan9AMD64Tests = []*asmTest{
// We should make sure that the compiler doesn't generate floating point
// instructions for non-float operations on Plan 9, because floating point
// operations are not allowed in the note handler.
// Array zeroing.
{
`
func $() [16]byte {
var a [16]byte
return a
}
`,
[]string{"\tMOVQ\t\\$0, \"\""},
[]string{},
},
// Array copy.
{
`
func $(a [16]byte) (b [16]byte) {
b = a
return
}
`,
[]string{"\tMOVQ\t\"\"\\.a\\+[0-9]+\\(SP\\), (AX|CX)", "\tMOVQ\t(AX|CX), \"\"\\.b\\+[0-9]+\\(SP\\)"},
[]string{},
},
}
// TestLineNumber checks to make sure the generated assembly has line numbers // TestLineNumber checks to make sure the generated assembly has line numbers
// see issue #16214 // see issue #16214
func TestLineNumber(t *testing.T) { func TestLineNumber(t *testing.T) {
......
...@@ -319,7 +319,10 @@ ...@@ -319,7 +319,10 @@
(Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem) (Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem)
(Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem) (Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem)
(Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem) (Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem)
(Move [16] dst src mem) -> (MOVOstore dst (MOVOload src mem) mem) (Move [16] dst src mem) && config.useSSE -> (MOVOstore dst (MOVOload src mem) mem)
(Move [16] dst src mem) && !config.useSSE ->
(MOVQstore [8] dst (MOVQload [8] src mem)
(MOVQstore dst (MOVQload src mem) mem))
(Move [3] dst src mem) -> (Move [3] dst src mem) ->
(MOVBstore [2] dst (MOVBload [2] src mem) (MOVBstore [2] dst (MOVBload [2] src mem)
(MOVWstore dst (MOVWload src mem) mem)) (MOVWstore dst (MOVWload src mem) mem))
...@@ -344,11 +347,18 @@ ...@@ -344,11 +347,18 @@
(OffPtr <src.Type> src [s%16]) (OffPtr <src.Type> src [s%16])
(MOVQstore dst (MOVQload src mem) mem)) (MOVQstore dst (MOVQload src mem) mem))
(Move [s] dst src mem) (Move [s] dst src mem)
&& s > 16 && s%16 != 0 && s%16 > 8 -> && s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE ->
(Move [s-s%16] (Move [s-s%16]
(OffPtr <dst.Type> dst [s%16]) (OffPtr <dst.Type> dst [s%16])
(OffPtr <src.Type> src [s%16]) (OffPtr <src.Type> src [s%16])
(MOVOstore dst (MOVOload src mem) mem)) (MOVOstore dst (MOVOload src mem) mem))
(Move [s] dst src mem)
&& s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE ->
(Move [s-s%16]
(OffPtr <dst.Type> dst [s%16])
(OffPtr <src.Type> src [s%16])
(MOVQstore [8] dst (MOVQload [8] src mem)
(MOVQstore dst (MOVQload src mem) mem)))
// Medium copying uses a duff device. // Medium copying uses a duff device.
(Move [s] dst src mem) (Move [s] dst src mem)
......
...@@ -41824,6 +41824,8 @@ func rewriteValueAMD64_OpMod8u_0(v *Value) bool { ...@@ -41824,6 +41824,8 @@ func rewriteValueAMD64_OpMod8u_0(v *Value) bool {
func rewriteValueAMD64_OpMove_0(v *Value) bool { func rewriteValueAMD64_OpMove_0(v *Value) bool {
b := v.Block b := v.Block
_ = b _ = b
config := b.Func.Config
_ = config
typ := &b.Func.Config.Types typ := &b.Func.Config.Types
_ = typ _ = typ
// match: (Move [0] _ _ mem) // match: (Move [0] _ _ mem)
...@@ -41921,7 +41923,7 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool { ...@@ -41921,7 +41923,7 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool {
return true return true
} }
// match: (Move [16] dst src mem) // match: (Move [16] dst src mem)
// cond: // cond: config.useSSE
// result: (MOVOstore dst (MOVOload src mem) mem) // result: (MOVOstore dst (MOVOload src mem) mem)
for { for {
if v.AuxInt != 16 { if v.AuxInt != 16 {
...@@ -41931,6 +41933,9 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool { ...@@ -41931,6 +41933,9 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool {
dst := v.Args[0] dst := v.Args[0]
src := v.Args[1] src := v.Args[1]
mem := v.Args[2] mem := v.Args[2]
if !(config.useSSE) {
break
}
v.reset(OpAMD64MOVOstore) v.reset(OpAMD64MOVOstore)
v.AddArg(dst) v.AddArg(dst)
v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128) v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128)
...@@ -41940,6 +41945,38 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool { ...@@ -41940,6 +41945,38 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool {
v.AddArg(mem) v.AddArg(mem)
return true return true
} }
// match: (Move [16] dst src mem)
// cond: !config.useSSE
// result: (MOVQstore [8] dst (MOVQload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
for {
if v.AuxInt != 16 {
break
}
_ = v.Args[2]
dst := v.Args[0]
src := v.Args[1]
mem := v.Args[2]
if !(!config.useSSE) {
break
}
v.reset(OpAMD64MOVQstore)
v.AuxInt = 8
v.AddArg(dst)
v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
v0.AuxInt = 8
v0.AddArg(src)
v0.AddArg(mem)
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
v1.AddArg(dst)
v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
v2.AddArg(src)
v2.AddArg(mem)
v1.AddArg(v2)
v1.AddArg(mem)
v.AddArg(v1)
return true
}
// match: (Move [3] dst src mem) // match: (Move [3] dst src mem)
// cond: // cond:
// result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem)) // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem))
...@@ -42027,6 +42064,15 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool { ...@@ -42027,6 +42064,15 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool {
v.AddArg(v1) v.AddArg(v1)
return true return true
} }
return false
}
func rewriteValueAMD64_OpMove_10(v *Value) bool {
b := v.Block
_ = b
config := b.Func.Config
_ = config
typ := &b.Func.Config.Types
_ = typ
// match: (Move [7] dst src mem) // match: (Move [7] dst src mem)
// cond: // cond:
// result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem)) // result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem))
...@@ -42056,15 +42102,6 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool { ...@@ -42056,15 +42102,6 @@ func rewriteValueAMD64_OpMove_0(v *Value) bool {
v.AddArg(v1) v.AddArg(v1)
return true return true
} }
return false
}
func rewriteValueAMD64_OpMove_10(v *Value) bool {
b := v.Block
_ = b
config := b.Func.Config
_ = config
typ := &b.Func.Config.Types
_ = typ
// match: (Move [s] dst src mem) // match: (Move [s] dst src mem)
// cond: s > 8 && s < 16 // cond: s > 8 && s < 16
// result: (MOVQstore [s-8] dst (MOVQload [s-8] src mem) (MOVQstore dst (MOVQload src mem) mem)) // result: (MOVQstore [s-8] dst (MOVQload [s-8] src mem) (MOVQstore dst (MOVQload src mem) mem))
...@@ -42128,7 +42165,7 @@ func rewriteValueAMD64_OpMove_10(v *Value) bool { ...@@ -42128,7 +42165,7 @@ func rewriteValueAMD64_OpMove_10(v *Value) bool {
return true return true
} }
// match: (Move [s] dst src mem) // match: (Move [s] dst src mem)
// cond: s > 16 && s%16 != 0 && s%16 > 8 // cond: s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE
// result: (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (MOVOstore dst (MOVOload src mem) mem)) // result: (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (MOVOstore dst (MOVOload src mem) mem))
for { for {
s := v.AuxInt s := v.AuxInt
...@@ -42136,7 +42173,7 @@ func rewriteValueAMD64_OpMove_10(v *Value) bool { ...@@ -42136,7 +42173,7 @@ func rewriteValueAMD64_OpMove_10(v *Value) bool {
dst := v.Args[0] dst := v.Args[0]
src := v.Args[1] src := v.Args[1]
mem := v.Args[2] mem := v.Args[2]
if !(s > 16 && s%16 != 0 && s%16 > 8) { if !(s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE) {
break break
} }
v.reset(OpMove) v.reset(OpMove)
...@@ -42160,6 +42197,47 @@ func rewriteValueAMD64_OpMove_10(v *Value) bool { ...@@ -42160,6 +42197,47 @@ func rewriteValueAMD64_OpMove_10(v *Value) bool {
return true return true
} }
// match: (Move [s] dst src mem) // match: (Move [s] dst src mem)
// cond: s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE
// result: (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (MOVQstore [8] dst (MOVQload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)))
for {
s := v.AuxInt
_ = v.Args[2]
dst := v.Args[0]
src := v.Args[1]
mem := v.Args[2]
if !(s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE) {
break
}
v.reset(OpMove)
v.AuxInt = s - s%16
v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
v0.AuxInt = s % 16
v0.AddArg(dst)
v.AddArg(v0)
v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
v1.AuxInt = s % 16
v1.AddArg(src)
v.AddArg(v1)
v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
v2.AuxInt = 8
v2.AddArg(dst)
v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
v3.AuxInt = 8
v3.AddArg(src)
v3.AddArg(mem)
v2.AddArg(v3)
v4 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
v4.AddArg(dst)
v5 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
v5.AddArg(src)
v5.AddArg(mem)
v4.AddArg(v5)
v4.AddArg(mem)
v2.AddArg(v4)
v.AddArg(v2)
return true
}
// match: (Move [s] dst src mem)
// cond: s >= 32 && s <= 16*64 && s%16 == 0 && !config.noDuffDevice // cond: s >= 32 && s <= 16*64 && s%16 == 0 && !config.noDuffDevice
// result: (DUFFCOPY [14*(64-s/16)] dst src mem) // result: (DUFFCOPY [14*(64-s/16)] dst src mem)
for { for {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment