cmd/compile: add 32 bit float registers/variables on wasm

Before this change, wasm only used float variables with a size of 64 bit and applied rounding to 32 bit precision where necessary. This change adds proper 32 bit float variables. Reduces the size of pkg/js_wasm by 254 bytes. Change-Id: Ieabe846a8cb283d66def3cdf11e2523b3b31f345 Reviewed-on: https://go-review.googlesource.com/c/go/+/195117Reviewed-by: Cherry Zhang <cherryyz@google.com>

cmd/compile: add 32 bit float registers/variables on wasm
Before this change, wasm only used float variables with a size of 64 bit and applied rounding to 32 bit precision where necessary. This change adds proper 32 bit float variables. Reduces the size of pkg/js_wasm by 254 bytes. Change-Id: Ieabe846a8cb283d66def3cdf11e2523b3b31f345 Reviewed-on: https://go-review.googlesource.com/c/go/+/195117Reviewed-by: Cherry Zhang <cherryyz@google.com>
1c50fcf8 · Richard Musiol · Richard Musiol · d6c2f1e9 · 1c50fcf8 · 1c50fcf8
Commit 1c50fcf8 authored Sep 12, 2019 by Richard Musiol Committed by Richard Musiol Sep 19, 2019
10 changed files
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -25,6 +25,8 @@ type Config struct {
 	registers      []Register    // machine registers
 	gpRegMask      regMask       // general purpose integer register mask
 	fpRegMask      regMask       // floating point register mask
+	fp32RegMask    regMask       // floating point register mask
+	fp64RegMask    regMask       // floating point register mask
 	specialRegMask regMask       // special register mask
 	GCRegMap       []*Register   // garbage collector register map, by GC register index
 	FPReg          int8          // register number of frame pointer, -1 if not used
@@ -324,6 +326,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize bool) *Config
 		c.registers = registersWasm[:]
 		c.gpRegMask = gpRegMaskWasm
 		c.fpRegMask = fpRegMaskWasm
+		c.fp32RegMask = fp32RegMaskWasm
+		c.fp64RegMask = fp64RegMaskWasm
 		c.FPReg = framepointerRegWasm
 		c.LinkReg = linkRegWasm
 		c.hasGReg = true

--- a/src/cmd/compile/internal/ssa/gen/Wasm.rules
+++ b/src/cmd/compile/internal/ssa/gen/Wasm.rules
@@ -4,13 +4,13 @@
 // Lowering arithmetic
 (Add(64|32|16|8|Ptr) x y) -> (I64Add x y)
-(Add(64|32)F x y) -> (F64Add x y)
+(Add(64|32)F x y) -> (F(64|32)Add x y)
 (Sub(64|32|16|8|Ptr) x y) -> (I64Sub x y)
-(Sub(64|32)F x y) -> (F64Sub x y)
+(Sub(64|32)F x y) -> (F(64|32)Sub x y)
 (Mul(64|32|16|8) x y) -> (I64Mul x y)
-(Mul(64|32)F x y) -> (F64Mul x y)
+(Mul(64|32)F x y) -> (F(64|32)Mul x y)
 (Div64  x y) -> (I64DivS x y)
 (Div64u x y) -> (I64DivU x y)
@@ -20,7 +20,7 @@
 (Div16u x y) -> (I64DivU (ZeroExt16to64 x) (ZeroExt16to64 y))
 (Div8   x y) -> (I64DivS (SignExt8to64 x) (SignExt8to64 y))
 (Div8u  x y) -> (I64DivU (ZeroExt8to64 x) (ZeroExt8to64 y))
-(Div(64|32)F x y) -> (F64Div x y)
+(Div(64|32)F x y) -> (F(64|32)Div x y)
 (Mod64  x y) -> (I64RemS x y)
 (Mod64u x y) -> (I64RemU x y)
@@ -38,8 +38,7 @@
 (Xor(64|32|16|8) x y) -> (I64Xor x y)
 (Neg(64|32|16|8) x) -> (I64Sub (I64Const [0]) x)
-(Neg32F x) -> (F64Neg x)
+(Neg(64|32)F x) -> (F(64|32)Neg x)
-(Neg64F x) -> (F64Neg x)
 (Com(64|32|16|8) x) -> (I64Xor x (I64Const [-1]))
@@ -75,28 +74,24 @@
 (Trunc16to8         x) -> x
 // Lowering float <-> int
-(Cvt32to32F x) -> (LoweredRound32F (F64ConvertI64S (SignExt32to64 x)))
+(Cvt32to(64|32)F x) -> (F(64|32)ConvertI64S (SignExt32to64 x))
-(Cvt32to64F x) -> (F64ConvertI64S (SignExt32to64 x))
+(Cvt64to(64|32)F x) -> (F(64|32)ConvertI64S x)
-(Cvt64to32F x) -> (LoweredRound32F (F64ConvertI64S x))
+(Cvt32Uto(64|32)F x) -> (F(64|32)ConvertI64U (ZeroExt32to64 x))
-(Cvt64to64F x) -> (F64ConvertI64S x)
+(Cvt64Uto(64|32)F x) -> (F(64|32)ConvertI64U x)
-(Cvt32Uto32F x) -> (LoweredRound32F (F64ConvertI64U (ZeroExt32to64 x)))
-(Cvt32Uto64F x) -> (F64ConvertI64U (ZeroExt32to64 x))
+(Cvt32Fto32 x) -> (I64TruncSatF32S x)
-(Cvt64Uto32F x) -> (LoweredRound32F (F64ConvertI64U x))
+(Cvt32Fto64 x) -> (I64TruncSatF32S x)
-(Cvt64Uto64F x) -> (F64ConvertI64U x)
-(Cvt32Fto32 x) -> (I64TruncSatF64S x)
-(Cvt32Fto64 x) -> (I64TruncSatF64S x)
 (Cvt64Fto32 x) -> (I64TruncSatF64S x)
 (Cvt64Fto64 x) -> (I64TruncSatF64S x)
-(Cvt32Fto32U x) -> (I64TruncSatF64U x)
+(Cvt32Fto32U x) -> (I64TruncSatF32U x)
-(Cvt32Fto64U x) -> (I64TruncSatF64U x)
+(Cvt32Fto64U x) -> (I64TruncSatF32U x)
 (Cvt64Fto32U x) -> (I64TruncSatF64U x)
 (Cvt64Fto64U x) -> (I64TruncSatF64U x)
-(Cvt32Fto64F x) -> x
+(Cvt32Fto64F x) -> (F64PromoteF32 x)
-(Cvt64Fto32F x) -> (LoweredRound32F x)
+(Cvt64Fto32F x) -> (F32DemoteF64 x)
-(Round32F x) -> (LoweredRound32F x)
+(Round32F x) -> x
 (Round64F x) -> x
 // Lowering shifts
@@ -165,8 +160,7 @@
 (Less32U x y) -> (I64LtU (ZeroExt32to64 x) (ZeroExt32to64 y))
 (Less16U x y) -> (I64LtU (ZeroExt16to64 x) (ZeroExt16to64 y))
 (Less8U  x y) -> (I64LtU (ZeroExt8to64  x) (ZeroExt8to64  y))
-(Less64F x y) -> (F64Lt x y)
+(Less(64|32)F x y) -> (F(64|32)Lt x y)
-(Less32F x y) -> (F64Lt (LoweredRound32F x) (LoweredRound32F y))
 (Leq64  x y) -> (I64LeS x y)
 (Leq32  x y) -> (I64LeS (SignExt32to64 x) (SignExt32to64 y))
@@ -176,8 +170,7 @@
 (Leq32U x y) -> (I64LeU (ZeroExt32to64 x) (ZeroExt32to64 y))
 (Leq16U x y) -> (I64LeU (ZeroExt16to64 x) (ZeroExt16to64 y))
 (Leq8U  x y) -> (I64LeU (ZeroExt8to64  x) (ZeroExt8to64  y))
-(Leq64F x y) -> (F64Le x y)
+(Leq(64|32)F x y) -> (F(64|32)Le x y)
-(Leq32F x y) -> (F64Le (LoweredRound32F x) (LoweredRound32F y))
 (Greater64  x y) -> (I64GtS x y)
 (Greater32  x y) -> (I64GtS (SignExt32to64 x) (SignExt32to64 y))
@@ -187,8 +180,7 @@
 (Greater32U x y) -> (I64GtU (ZeroExt32to64 x) (ZeroExt32to64 y))
 (Greater16U x y) -> (I64GtU (ZeroExt16to64 x) (ZeroExt16to64 y))
 (Greater8U  x y) -> (I64GtU (ZeroExt8to64  x) (ZeroExt8to64  y))
-(Greater64F x y) -> (F64Gt x y)
+(Greater(64|32)F x y) -> (F(64|32)Gt x y)
-(Greater32F x y) -> (F64Gt (LoweredRound32F x) (LoweredRound32F y))
 (Geq64  x y) -> (I64GeS x y)
 (Geq32  x y) -> (I64GeS (SignExt32to64 x) (SignExt32to64 y))
@@ -198,8 +190,7 @@
 (Geq32U x y) -> (I64GeU (ZeroExt32to64 x) (ZeroExt32to64 y))
 (Geq16U x y) -> (I64GeU (ZeroExt16to64 x) (ZeroExt16to64 y))
 (Geq8U  x y) -> (I64GeU (ZeroExt8to64  x) (ZeroExt8to64  y))
-(Geq64F x y) -> (F64Ge x y)
+(Geq(64|32)F x y) -> (F(64|32)Ge x y)
-(Geq32F x y) -> (F64Ge (LoweredRound32F x) (LoweredRound32F y))
 (Eq64  x y) -> (I64Eq x y)
 (Eq32  x y) -> (I64Eq (ZeroExt32to64 x) (ZeroExt32to64 y))
@@ -207,8 +198,7 @@
 (Eq8   x y) -> (I64Eq (ZeroExt8to64  x) (ZeroExt8to64  y))
 (EqB   x y) -> (I64Eq x y)
 (EqPtr x y) -> (I64Eq x y)
-(Eq64F x y) -> (F64Eq x y)
+(Eq(64|32)F x y) -> (F(64|32)Eq x y)
-(Eq32F x y) -> (F64Eq (LoweredRound32F x) (LoweredRound32F y))
 (Neq64  x y) -> (I64Ne x y)
 (Neq32  x y) -> (I64Ne (ZeroExt32to64 x) (ZeroExt32to64 y))
@@ -216,8 +206,7 @@
 (Neq8   x y) -> (I64Ne (ZeroExt8to64  x) (ZeroExt8to64  y))
 (NeqB   x y) -> (I64Ne x y)
 (NeqPtr x y) -> (I64Ne x y)
-(Neq64F x y) -> (F64Ne x y)
+(Neq(64|32)F x y) -> (F(64|32)Ne x y)
-(Neq32F x y) -> (F64Ne (LoweredRound32F x) (LoweredRound32F y))
 // Lowering loads
 (Load <t> ptr mem) && is32BitFloat(t) -> (F32Load ptr mem)
@@ -327,7 +316,7 @@
 // Lowering constants
 (Const(64|32|16|8) [val]) -> (I64Const [val])
-(Const(64|32)F [val]) -> (F64Const [val])
+(Const(64|32)F [val]) -> (F(64|32)Const [val])
 (ConstNil) -> (I64Const [0])
 (ConstBool [b]) -> (I64Const [b])

--- a/src/cmd/compile/internal/ssa/gen/WasmOps.go
+++ b/src/cmd/compile/internal/ssa/gen/WasmOps.go
--- a/src/cmd/compile/internal/ssa/gen/main.go
+++ b/src/cmd/compile/internal/ssa/gen/main.go
@@ -34,6 +34,8 @@ type arch struct {
 	regnames        []string
 	gpregmask       regMask
 	fpregmask       regMask
+	fp32regmask     regMask
+	fp64regmask     regMask
 	specialregmask  regMask
 	framepointerreg int8
 	linkreg         int8
@@ -400,6 +402,12 @@ func genOp() {
 		fmt.Fprintln(w, "}")
 		fmt.Fprintf(w, "var gpRegMask%s = regMask(%d)\n", a.name, a.gpregmask)
 		fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask)
+		if a.fp32regmask != 0 {
+			fmt.Fprintf(w, "var fp32RegMask%s = regMask(%d)\n", a.name, a.fp32regmask)
+		}
+		if a.fp64regmask != 0 {
+			fmt.Fprintf(w, "var fp64RegMask%s = regMask(%d)\n", a.name, a.fp64regmask)
+		}
 		fmt.Fprintf(w, "var specialRegMask%s = regMask(%d)\n", a.name, a.specialregmask)
 		fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg)
 		fmt.Fprintf(w, "var linkReg%s = int8(%d)\n", a.name, a.linkreg)

--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -792,7 +792,13 @@ func (s *regAllocState) compatRegs(t *types.Type) regMask {
 		return 0
 	}
 	if t.IsFloat() || t == types.TypeInt128 {
-		m = s.f.Config.fpRegMask
+		if t.Etype == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
+			m = s.f.Config.fp32RegMask
+		} else if t.Etype == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
+			m = s.f.Config.fp64RegMask
+		} else {
+			m = s.f.Config.fpRegMask
+		}
 	} else {
 		m = s.f.Config.gpRegMask
 	}
@@ -2220,13 +2226,8 @@ func (e *edgeState) erase(loc Location) {
 // findRegFor finds a register we can use to make a temp copy of type typ.
 func (e *edgeState) findRegFor(typ *types.Type) Location {
 	// Which registers are possibilities.
-	var m regMask
 	types := &e.s.f.Config.Types
-	if typ.IsFloat() {
+	m := e.s.compatRegs(typ)
-		m = e.s.compatRegs(types.Float64)
-	} else {
-		m = e.s.compatRegs(types.Int64)
-	}
 	// Pick a register. In priority order:
 	// 1) an unused register

--- a/src/cmd/compile/internal/ssa/rewriteWasm.go
+++ b/src/cmd/compile/internal/ssa/rewriteWasm.go
--- a/src/cmd/compile/internal/wasm/ssa.go
+++ b/src/cmd/compile/internal/wasm/ssa.go
@@ -176,18 +176,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 	case ssa.OpWasmI64Store8, ssa.OpWasmI64Store16, ssa.OpWasmI64Store32, ssa.OpWasmI64Store, ssa.OpWasmF32Store, ssa.OpWasmF64Store:
 		getValue32(s, v.Args[0])
 		getValue64(s, v.Args[1])
-		if v.Op == ssa.OpWasmF32Store {
-			s.Prog(wasm.AF32DemoteF64)
-		}
 		p := s.Prog(v.Op.Asm())
 		p.To = obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt}
 	case ssa.OpStoreReg:
 		getReg(s, wasm.REG_SP)
 		getValue64(s, v.Args[0])
-		if v.Type.Etype == types.TFLOAT32 {
-			s.Prog(wasm.AF32DemoteF64)
-		}
 		p := s.Prog(storeOp(v.Type))
 		gc.AddrAuto(&p.To, v)
@@ -246,11 +240,6 @@ func ssaGenValueOnStack(s *gc.SSAGenState, v *ssa.Value, extend bool) {
 			panic("wasm: bad LoweredAddr")
 		}
-	case ssa.OpWasmLoweredRound32F:
-		getValue64(s, v.Args[0])
-		s.Prog(wasm.AF32DemoteF64)
-		s.Prog(wasm.AF64PromoteF32)
 	case ssa.OpWasmLoweredConvert:
 		getValue64(s, v.Args[0])
@@ -268,6 +257,9 @@ func ssaGenValueOnStack(s *gc.SSAGenState, v *ssa.Value, extend bool) {
 	case ssa.OpWasmI64Const:
 		i64Const(s, v.AuxInt)
+	case ssa.OpWasmF32Const:
+		f32Const(s, v.AuxFloat())
 	case ssa.OpWasmF64Const:
 		f64Const(s, v.AuxFloat())
@@ -275,9 +267,6 @@ func ssaGenValueOnStack(s *gc.SSAGenState, v *ssa.Value, extend bool) {
 		getValue32(s, v.Args[0])
 		p := s.Prog(v.Op.Asm())
 		p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt}
-		if v.Op == ssa.OpWasmF32Load {
-			s.Prog(wasm.AF64PromoteF32)
-		}
 	case ssa.OpWasmI64Eqz:
 		getValue64(s, v.Args[0])
@@ -286,7 +275,9 @@ func ssaGenValueOnStack(s *gc.SSAGenState, v *ssa.Value, extend bool) {
 			s.Prog(wasm.AI64ExtendI32U)
 		}
-	case ssa.OpWasmI64Eq, ssa.OpWasmI64Ne, ssa.OpWasmI64LtS, ssa.OpWasmI64LtU, ssa.OpWasmI64GtS, ssa.OpWasmI64GtU, ssa.OpWasmI64LeS, ssa.OpWasmI64LeU, ssa.OpWasmI64GeS, ssa.OpWasmI64GeU, ssa.OpWasmF64Eq, ssa.OpWasmF64Ne, ssa.OpWasmF64Lt, ssa.OpWasmF64Gt, ssa.OpWasmF64Le, ssa.OpWasmF64Ge:
+	case ssa.OpWasmI64Eq, ssa.OpWasmI64Ne, ssa.OpWasmI64LtS, ssa.OpWasmI64LtU, ssa.OpWasmI64GtS, ssa.OpWasmI64GtU, ssa.OpWasmI64LeS, ssa.OpWasmI64LeU, ssa.OpWasmI64GeS, ssa.OpWasmI64GeU,
+		ssa.OpWasmF32Eq, ssa.OpWasmF32Ne, ssa.OpWasmF32Lt, ssa.OpWasmF32Gt, ssa.OpWasmF32Le, ssa.OpWasmF32Ge,
+		ssa.OpWasmF64Eq, ssa.OpWasmF64Ne, ssa.OpWasmF64Lt, ssa.OpWasmF64Gt, ssa.OpWasmF64Le, ssa.OpWasmF64Ge:
 		getValue64(s, v.Args[0])
 		getValue64(s, v.Args[1])
 		s.Prog(v.Op.Asm())
@@ -294,7 +285,9 @@ func ssaGenValueOnStack(s *gc.SSAGenState, v *ssa.Value, extend bool) {
 			s.Prog(wasm.AI64ExtendI32U)
 		}
-	case ssa.OpWasmI64Add, ssa.OpWasmI64Sub, ssa.OpWasmI64Mul, ssa.OpWasmI64DivU, ssa.OpWasmI64RemS, ssa.OpWasmI64RemU, ssa.OpWasmI64And, ssa.OpWasmI64Or, ssa.OpWasmI64Xor, ssa.OpWasmI64Shl, ssa.OpWasmI64ShrS, ssa.OpWasmI64ShrU, ssa.OpWasmF64Add, ssa.OpWasmF64Sub, ssa.OpWasmF64Mul, ssa.OpWasmF64Div, ssa.OpWasmF64Copysign, ssa.OpWasmI64Rotl:
+	case ssa.OpWasmI64Add, ssa.OpWasmI64Sub, ssa.OpWasmI64Mul, ssa.OpWasmI64DivU, ssa.OpWasmI64RemS, ssa.OpWasmI64RemU, ssa.OpWasmI64And, ssa.OpWasmI64Or, ssa.OpWasmI64Xor, ssa.OpWasmI64Shl, ssa.OpWasmI64ShrS, ssa.OpWasmI64ShrU, ssa.OpWasmI64Rotl,
+		ssa.OpWasmF32Add, ssa.OpWasmF32Sub, ssa.OpWasmF32Mul, ssa.OpWasmF32Div, ssa.OpWasmF32Copysign,
+		ssa.OpWasmF64Add, ssa.OpWasmF64Sub, ssa.OpWasmF64Mul, ssa.OpWasmF64Div, ssa.OpWasmF64Copysign:
 		getValue64(s, v.Args[0])
 		getValue64(s, v.Args[1])
 		s.Prog(v.Op.Asm())
@@ -316,37 +309,50 @@ func ssaGenValueOnStack(s *gc.SSAGenState, v *ssa.Value, extend bool) {
 		}
 		s.Prog(wasm.AI64DivS)
-	case ssa.OpWasmI64TruncSatF64S:
+	case ssa.OpWasmI64TruncSatF32S, ssa.OpWasmI64TruncSatF64S:
 		getValue64(s, v.Args[0])
 		if objabi.GOWASM.SatConv {
 			s.Prog(v.Op.Asm())
 		} else {
+			if v.Op == ssa.OpWasmI64TruncSatF32S {
+				s.Prog(wasm.AF64PromoteF32)
+			}
 			p := s.Prog(wasm.ACall)
 			p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: gc.WasmTruncS}
 		}
-	case ssa.OpWasmI64TruncSatF64U:
+	case ssa.OpWasmI64TruncSatF32U, ssa.OpWasmI64TruncSatF64U:
 		getValue64(s, v.Args[0])
 		if objabi.GOWASM.SatConv {
 			s.Prog(v.Op.Asm())
 		} else {
+			if v.Op == ssa.OpWasmI64TruncSatF32U {
+				s.Prog(wasm.AF64PromoteF32)
+			}
 			p := s.Prog(wasm.ACall)
 			p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: gc.WasmTruncU}
 		}
-	case
+	case ssa.OpWasmF32DemoteF64:
-		ssa.OpWasmF64Neg, ssa.OpWasmF64ConvertI64S, ssa.OpWasmF64ConvertI64U,
+		getValue64(s, v.Args[0])
+		s.Prog(v.Op.Asm())
+	case ssa.OpWasmF64PromoteF32:
+		getValue64(s, v.Args[0])
+		s.Prog(v.Op.Asm())
+	case ssa.OpWasmF32ConvertI64S, ssa.OpWasmF32ConvertI64U,
+		ssa.OpWasmF64ConvertI64S, ssa.OpWasmF64ConvertI64U,
 		ssa.OpWasmI64Extend8S, ssa.OpWasmI64Extend16S, ssa.OpWasmI64Extend32S,
-		ssa.OpWasmF64Sqrt, ssa.OpWasmF64Trunc, ssa.OpWasmF64Ceil, ssa.OpWasmF64Floor, ssa.OpWasmF64Nearest, ssa.OpWasmF64Abs, ssa.OpWasmI64Ctz, ssa.OpWasmI64Clz, ssa.OpWasmI64Popcnt:
+		ssa.OpWasmF32Neg, ssa.OpWasmF32Sqrt, ssa.OpWasmF32Trunc, ssa.OpWasmF32Ceil, ssa.OpWasmF32Floor, ssa.OpWasmF32Nearest, ssa.OpWasmF32Abs,
+		ssa.OpWasmF64Neg, ssa.OpWasmF64Sqrt, ssa.OpWasmF64Trunc, ssa.OpWasmF64Ceil, ssa.OpWasmF64Floor, ssa.OpWasmF64Nearest, ssa.OpWasmF64Abs,
+		ssa.OpWasmI64Ctz, ssa.OpWasmI64Clz, ssa.OpWasmI64Popcnt:
 		getValue64(s, v.Args[0])
 		s.Prog(v.Op.Asm())
 	case ssa.OpLoadReg:
 		p := s.Prog(loadOp(v.Type))
 		gc.AddrAuto(&p.From, v.Args[0])
-		if v.Type.Etype == types.TFLOAT32 {
-			s.Prog(wasm.AF64PromoteF32)
-		}
 	case ssa.OpCopy:
 		getValue64(s, v.Args[0])
@@ -359,7 +365,9 @@ func ssaGenValueOnStack(s *gc.SSAGenState, v *ssa.Value, extend bool) {
 func isCmp(v *ssa.Value) bool {
 	switch v.Op {
-	case ssa.OpWasmI64Eqz, ssa.OpWasmI64Eq, ssa.OpWasmI64Ne, ssa.OpWasmI64LtS, ssa.OpWasmI64LtU, ssa.OpWasmI64GtS, ssa.OpWasmI64GtU, ssa.OpWasmI64LeS, ssa.OpWasmI64LeU, ssa.OpWasmI64GeS, ssa.OpWasmI64GeU, ssa.OpWasmF64Eq, ssa.OpWasmF64Ne, ssa.OpWasmF64Lt, ssa.OpWasmF64Gt, ssa.OpWasmF64Le, ssa.OpWasmF64Ge:
+	case ssa.OpWasmI64Eqz, ssa.OpWasmI64Eq, ssa.OpWasmI64Ne, ssa.OpWasmI64LtS, ssa.OpWasmI64LtU, ssa.OpWasmI64GtS, ssa.OpWasmI64GtU, ssa.OpWasmI64LeS, ssa.OpWasmI64LeU, ssa.OpWasmI64GeS, ssa.OpWasmI64GeU,
+		ssa.OpWasmF32Eq, ssa.OpWasmF32Ne, ssa.OpWasmF32Lt, ssa.OpWasmF32Gt, ssa.OpWasmF32Le, ssa.OpWasmF32Ge,
+		ssa.OpWasmF64Eq, ssa.OpWasmF64Ne, ssa.OpWasmF64Lt, ssa.OpWasmF64Gt, ssa.OpWasmF64Le, ssa.OpWasmF64Ge:
 		return true
 	default:
 		return false
@@ -407,6 +415,11 @@ func i64Const(s *gc.SSAGenState, val int64) {
 	p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: val}
 }
+func f32Const(s *gc.SSAGenState, val float64) {
+	p := s.Prog(wasm.AF32Const)
+	p.From = obj.Addr{Type: obj.TYPE_FCONST, Val: val}
+}
 func f64Const(s *gc.SSAGenState, val float64) {
 	p := s.Prog(wasm.AF64Const)
 	p.From = obj.Addr{Type: obj.TYPE_FCONST, Val: val}

--- a/src/cmd/internal/obj/wasm/a.out.go
+++ b/src/cmd/internal/obj/wasm/a.out.go
@@ -266,7 +266,7 @@ const (
 	REG_RET3
 	REG_PAUSE
-	// locals
+	// i32 locals
 	REG_R0
 	REG_R1
 	REG_R2
@@ -283,6 +283,8 @@ const (
 	REG_R13
 	REG_R14
 	REG_R15
+	// f32 locals
 	REG_F0
 	REG_F1
 	REG_F2
@@ -300,6 +302,24 @@ const (
 	REG_F14
 	REG_F15
+	// f64 locals
+	REG_F16
+	REG_F17
+	REG_F18
+	REG_F19
+	REG_F20
+	REG_F21
+	REG_F22
+	REG_F23
+	REG_F24
+	REG_F25
+	REG_F26
+	REG_F27
+	REG_F28
+	REG_F29
+	REG_F30
+	REG_F31
 	REG_PC_B // also first parameter, i32
 	MAXREG

--- a/src/cmd/internal/obj/wasm/wasmobj.go
+++ b/src/cmd/internal/obj/wasm/wasmobj.go
@@ -59,6 +59,23 @@ var Register = map[string]int16{
 	"F14": REG_F14,
 	"F15": REG_F15,
+	"F16": REG_F16,
+	"F17": REG_F17,
+	"F18": REG_F18,
+	"F19": REG_F19,
+	"F20": REG_F20,
+	"F21": REG_F21,
+	"F22": REG_F22,
+	"F23": REG_F23,
+	"F24": REG_F24,
+	"F25": REG_F25,
+	"F26": REG_F26,
+	"F27": REG_F27,
+	"F28": REG_F28,
+	"F29": REG_F29,
+	"F30": REG_F30,
+	"F31": REG_F31,
 	"PC_B": REG_PC_B,
 }
@@ -841,7 +858,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
 		}
 		regs := []int16{REG_SP}
-		for reg := int16(REG_R0); reg <= REG_F15; reg++ {
+		for reg := int16(REG_R0); reg <= REG_F31; reg++ {
 			if regUsed[reg-MINREG] {
 				regs = append(regs, reg)
 			}
@@ -1022,6 +1039,11 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
 			}
 			writeSleb128(w, p.From.Offset)
+		case AF32Const:
+			b := make([]byte, 4)
+			binary.LittleEndian.PutUint32(b, math.Float32bits(float32(p.From.Val.(float64))))
+			w.Write(b)
 		case AF64Const:
 			b := make([]byte, 8)
 			binary.LittleEndian.PutUint64(b, math.Float64bits(p.From.Val.(float64)))
@@ -1106,6 +1128,8 @@ func regType(reg int16) valueType {
 	case reg >= REG_R0 && reg <= REG_R15:
 		return i64
 	case reg >= REG_F0 && reg <= REG_F15:
+		return f32
+	case reg >= REG_F16 && reg <= REG_F31:
 		return f64
 	default:
 		panic("invalid register")