cmd/internal/obj/arm, runtime: delete old ARM softfloat code

CL 106735 changed to the new softfloat support on GOARM=5. ARM assembly code that uses FP instructions not guarded on GOARM, if any, will break. The easiest way to fix is probably to use Go implementation on GOARM=5, like MOVB runtime·goarm(SB), R11 CMP $5, R11 BEQ arm5 ... FP instructions ... RET arm5: CALL or JMP to Go implementation Change-Id: I52fc76fac9c854ebe7c6c856c365fba35d3f560a Reviewed-on: https://go-review.googlesource.com/107475 Run-TryBot: Cherry Zhang <cherryyz@google.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>

cmd/internal/obj/arm, runtime: delete old ARM softfloat code
CL 106735 changed to the new softfloat support on GOARM=5. ARM assembly code that uses FP instructions not guarded on GOARM, if any, will break. The easiest way to fix is probably to use Go implementation on GOARM=5, like MOVB runtime·goarm(SB), R11 CMP $5, R11 BEQ arm5 ... FP instructions ... RET arm5: CALL or JMP to Go implementation Change-Id: I52fc76fac9c854ebe7c6c856c365fba35d3f560a Reviewed-on: https://go-review.googlesource.com/107475 Run-TryBot: Cherry Zhang <cherryyz@google.com> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org>
f83e4212 · Cherry Zhang · 58b18cfd · f83e4212 · f83e4212 · f83e4212
Commit f83e4212 authored Apr 13, 2018 by Cherry Zhang
10 changed files
--- a/src/cmd/asm/internal/asm/operand_test.go
+++ b/src/cmd/asm/internal/asm/operand_test.go
@@ -306,7 +306,7 @@ var armOperandTests = []operandTest{
 	{"g", "g"},
 	{"gosave<>(SB)", "gosave<>(SB)"},
 	{"retlo+12(FP)", "retlo+12(FP)"},
-	{"runtime·_sfloat2(SB)", "runtime._sfloat2(SB)"},
+	{"runtime·gogo(SB)", "runtime.gogo(SB)"},
 	{"·AddUint32(SB)", "\"\".AddUint32(SB)"},
 	{"(R1, R3)", "(R1, R3)"},
 	{"[R0,R1,g,R15", ""}, // Issue 11764 - asm hung parsing ']' missing register lists.

--- a/src/cmd/internal/obj/arm/obj5.go
+++ b/src/cmd/internal/obj/arm/obj5.go
@@ -255,8 +255,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 	c := ctxt5{ctxt: ctxt, cursym: cursym, newprog: newprog}
-	c.softfloat()
 	p := c.cursym.Func.Text
 	autoffset := int32(p.To.Offset)
 	if autoffset == -4 {
@@ -649,87 +647,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 	}
 }
-func isfloatreg(a *obj.Addr) bool {
-	return a.Type == obj.TYPE_REG && REG_F0 <= a.Reg && a.Reg <= REG_F15
-}
-func (c *ctxt5) softfloat() {
-	if objabi.GOARM > 5 {
-		return
-	}
-	symsfloat := c.ctxt.Lookup("runtime._sfloat")
-	wasfloat := 0
-	for p := c.cursym.Func.Text; p != nil; p = p.Link {
-		if p.Pcond != nil {
-			p.Pcond.Mark |= LABEL
-		}
-	}
-	var next *obj.Prog
-	for p := c.cursym.Func.Text; p != nil; p = p.Link {
-		switch p.As {
-		case AMOVW:
-			if isfloatreg(&p.To) || isfloatreg(&p.From) {
-				goto soft
-			}
-			goto notsoft
-		case AMOVWD,
-			AMOVWF,
-			AMOVDW,
-			AMOVFW,
-			AMOVFD,
-			AMOVDF,
-			AMOVF,
-			AMOVD,
-			ACMPF,
-			ACMPD,
-			AADDF,
-			AADDD,
-			ASUBF,
-			ASUBD,
-			AMULF,
-			AMULD,
-			ADIVF,
-			ADIVD,
-			ASQRTF,
-			ASQRTD,
-			AABSF,
-			AABSD,
-			ANEGF,
-			ANEGD:
-			goto soft
-		default:
-			goto notsoft
-		}
-	soft:
-		if wasfloat == 0 || (p.Mark&LABEL != 0) {
-			next = c.newprog()
-			*next = *p
-			// BL runtime·_sfloat(SB)
-			*p = obj.Prog{}
-			p.Ctxt = c.ctxt
-			p.Link = next
-			p.As = ABL
-			p.To.Type = obj.TYPE_BRANCH
-			p.To.Sym = symsfloat
-			p.Pos = next.Pos
-			p = next
-			wasfloat = 1
-		}
-		continue
-	notsoft:
-		wasfloat = 0
-	}
-}
 func (c *ctxt5) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
 	// MOVW g_stackguard(g), R1
 	p = obj.Appendp(p, c.newprog)

--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -21,7 +21,6 @@ var F32to64 = f32to64
 var Fcmp64 = fcmp64
 var Fintto64 = fintto64
 var F64toint = f64toint
-var Sqrt = sqrt
 var Entersyscall = entersyscall
 var Exitsyscall = exitsyscall

--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -432,15 +432,6 @@ func gopanic(e interface{}) {
 		throw("panic on system stack")
 	}
-	// m.softfloat is set during software floating point.
-	// It increments m.locks to avoid preemption.
-	// We moved the memory loads out, so there shouldn't be
-	// any reason for it to panic anymore.
-	if gp.m.softfloat != 0 {
-		gp.m.locks--
-		gp.m.softfloat = 0
-		throw("panic during softfloat")
-	}
 	if gp.m.mallocing != 0 {
 		print("panic: ")
 		printany(e)
@@ -787,7 +778,7 @@ func canpanic(gp *g) bool {
 	if gp == nil || gp != _m_.curg {
 		return false
 	}
-	if _m_.locks-_m_.softfloat != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 {
+	if _m_.locks != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.preemptoff != "" || _m_.dying != 0 {
 		return false
 	}
 	status := readgstatus(gp)

--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -421,7 +421,6 @@ type m struct {
 	throwing      int32
 	preemptoff    string // if != "", keep curg running on this m
 	locks         int32
-	softfloat     int32
 	dying         int32
 	profilehz     int32
 	helpgc        int32
@@ -445,9 +444,6 @@ type m struct {
 	mcache        *mcache
 	lockedg       guintptr
 	createstack   [32]uintptr    // stack that created this thread.
-	freglo        [16]uint32     // d[i] lsb and f[i]
-	freghi        [16]uint32     // d[i] msb and f[i+16]
-	fflag         uint32         // floating point compare flags
 	lockedExt     uint32         // tracking for external LockOSThread
 	lockedInt     uint32         // tracking for internal lockOSThread
 	nextwaitm     muintptr       // next m waiting for lock

--- a/src/runtime/softfloat_arm.go
+++ b/src/runtime/softfloat_arm.go
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-// Software floating point interpretation of ARM 7500 FP instructions.
-// The interpretation is not bit compatible with the 7500.
-// It uses true little-endian doubles, while the 7500 used mixed-endian.
-package runtime
-import "unsafe"
-const (
-	_CPSR    = 14
-	_FLAGS_N = 1 << 31
-	_FLAGS_Z = 1 << 30
-	_FLAGS_C = 1 << 29
-	_FLAGS_V = 1 << 28
-)
-var fptrace = 0
-func fabort() {
-	throw("unsupported floating point instruction")
-}
-func fputf(reg uint32, val uint32) {
-	_g_ := getg()
-	_g_.m.freglo[reg] = val
-}
-func fputd(reg uint32, val uint64) {
-	_g_ := getg()
-	_g_.m.freglo[reg] = uint32(val)
-	_g_.m.freghi[reg] = uint32(val >> 32)
-}
-func fgetd(reg uint32) uint64 {
-	_g_ := getg()
-	return uint64(_g_.m.freglo[reg]) | uint64(_g_.m.freghi[reg])<<32
-}
-func fprintregs() {
-	_g_ := getg()
-	for i := range _g_.m.freglo {
-		print("\tf", i, ":\t", hex(_g_.m.freghi[i]), " ", hex(_g_.m.freglo[i]), "\n")
-	}
-}
-func fstatus(nan bool, cmp int32) uint32 {
-	if nan {
-		return _FLAGS_C | _FLAGS_V
-	}
-	if cmp == 0 {
-		return _FLAGS_Z | _FLAGS_C
-	}
-	if cmp < 0 {
-		return _FLAGS_N
-	}
-	return _FLAGS_C
-}
-// conditions array record the required CPSR cond field for the
-// first 5 pairs of conditional execution opcodes
-// higher 4 bits are must set, lower 4 bits are must clear
-var conditions = [10 / 2]uint32{
-	0 / 2: _FLAGS_Z>>24 | 0, // 0: EQ (Z set), 1: NE (Z clear)
-	2 / 2: _FLAGS_C>>24 | 0, // 2: CS/HS (C set), 3: CC/LO (C clear)
-	4 / 2: _FLAGS_N>>24 | 0, // 4: MI (N set), 5: PL (N clear)
-	6 / 2: _FLAGS_V>>24 | 0, // 6: VS (V set), 7: VC (V clear)
-	8 / 2: _FLAGS_C>>24 |
-		_FLAGS_Z>>28,
-}
-const _FAULT = 0x80000000 // impossible PC offset
-// returns number of words that the fp instruction
-// is occupying, 0 if next instruction isn't float.
-func stepflt(pc *uint32, regs *[15]uint32) uint32 {
-	var i, opc, regd, regm, regn, cpsr uint32
-	// m is locked in vlop_arm.s, so g.m cannot change during this function call,
-	// so caching it in a local variable is safe.
-	m := getg().m
-	i = *pc
-	if fptrace > 0 {
-		print("stepflt ", pc, " ", hex(i), " (cpsr ", hex(regs[_CPSR]>>28), ")\n")
-	}
-	opc = i >> 28
-	if opc == 14 { // common case first
-		goto execute
-	}
-	cpsr = regs[_CPSR] >> 28
-	switch opc {
-	case 0, 1, 2, 3, 4, 5, 6, 7, 8, 9:
-		if cpsr&(conditions[opc/2]>>4) == conditions[opc/2]>>4 &&
-			cpsr&(conditions[opc/2]&0xf) == 0 {
-			if opc&1 != 0 {
-				return 1
-			}
-		} else {
-			if opc&1 == 0 {
-				return 1
-			}
-		}
-	case 10, 11: // GE (N == V), LT (N != V)
-		if cpsr&(_FLAGS_N>>28) == cpsr&(_FLAGS_V>>28) {
-			if opc&1 != 0 {
-				return 1
-			}
-		} else {
-			if opc&1 == 0 {
-				return 1
-			}
-		}
-	case 12, 13: // GT (N == V and Z == 0), LE (N != V or Z == 1)
-		if cpsr&(_FLAGS_N>>28) == cpsr&(_FLAGS_V>>28) &&
-			cpsr&(_FLAGS_Z>>28) == 0 {
-			if opc&1 != 0 {
-				return 1
-			}
-		} else {
-			if opc&1 == 0 {
-				return 1
-			}
-		}
-	case 14: // AL
-		// ok
-	case 15: // shouldn't happen
-		return 0
-	}
-	if fptrace > 0 {
-		print("conditional ", hex(opc), " (cpsr ", hex(cpsr), ") pass\n")
-	}
-	i = 0xe<<28 | i&(1<<28-1)
-execute:
-	// special cases
-	if i&0xfffff000 == 0xe59fb000 {
-		// load r11 from pc-relative address.
-		// might be part of a floating point move
-		// (or might not, but no harm in simulating
-		// one instruction too many).
-		addr := (*[1]uint32)(add(unsafe.Pointer(pc), uintptr(i&0xfff+8)))
-		regs[11] = addr[0]
-		if fptrace > 0 {
-			print("*** cpu R[11] = *(", addr, ") ", hex(regs[11]), "\n")
-		}
-		return 1
-	}
-	if i == 0xe08fb00b {
-		// add pc to r11
-		// might be part of a PIC floating point move
-		// (or might not, but again no harm done).
-		regs[11] += uint32(uintptr(unsafe.Pointer(pc))) + 8
-		if fptrace > 0 {
-			print("*** cpu R[11] += pc ", hex(regs[11]), "\n")
-		}
-		return 1
-	}
-	if i&0xfffffff0 == 0xe08bb000 {
-		r := i & 0xf
-		// add r to r11.
-		// might be part of a large offset address calculation
-		// (or might not, but again no harm done).
-		regs[11] += regs[r]
-		if fptrace > 0 {
-			print("*** cpu R[11] += R[", r, "] ", hex(regs[11]), "\n")
-		}
-		return 1
-	}
-	if i == 0xeef1fa10 {
-		regs[_CPSR] = regs[_CPSR]&0x0fffffff | m.fflag
-		if fptrace > 0 {
-			print("*** fpsr R[CPSR] = F[CPSR] ", hex(regs[_CPSR]), "\n")
-		}
-		return 1
-	}
-	if i&0xff000000 == 0xea000000 {
-		// unconditional branch
-		// can happen in the middle of floating point
-		// if the linker decides it is time to lay down
-		// a sequence of instruction stream constants.
-		delta := int32(i&0xffffff) << 8 >> 8 // sign extend
-		if fptrace > 0 {
-			print("*** cpu PC += ", hex((delta+2)*4), "\n")
-		}
-		return uint32(delta + 2)
-	}
-	// load/store regn is cpureg, regm is 8bit offset
-	regd = i >> 12 & 0xf
-	regn = i >> 16 & 0xf
-	regm = i & 0xff << 2 // PLUS or MINUS ??
-	switch i & 0xfff00f00 {
-	case 0xed900a00: // single load
-		uaddr := uintptr(regs[regn] + regm)
-		if uaddr < 4096 {
-			if fptrace > 0 {
-				print("*** load @", hex(uaddr), " => fault\n")
-			}
-			return _FAULT
-		}
-		addr := (*[1]uint32)(unsafe.Pointer(uaddr))
-		m.freglo[regd] = addr[0]
-		if fptrace > 0 {
-			print("*** load F[", regd, "] = ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xed900b00: // double load
-		uaddr := uintptr(regs[regn] + regm)
-		if uaddr < 4096 {
-			if fptrace > 0 {
-				print("*** double load @", hex(uaddr), " => fault\n")
-			}
-			return _FAULT
-		}
-		addr := (*[2]uint32)(unsafe.Pointer(uaddr))
-		m.freglo[regd] = addr[0]
-		m.freghi[regd] = addr[1]
-		if fptrace > 0 {
-			print("*** load D[", regd, "] = ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xed800a00: // single store
-		uaddr := uintptr(regs[regn] + regm)
-		if uaddr < 4096 {
-			if fptrace > 0 {
-				print("*** store @", hex(uaddr), " => fault\n")
-			}
-			return _FAULT
-		}
-		addr := (*[1]uint32)(unsafe.Pointer(uaddr))
-		addr[0] = m.freglo[regd]
-		if fptrace > 0 {
-			print("*** *(", addr, ") = ", hex(addr[0]), "\n")
-		}
-		return 1
-	case 0xed800b00: // double store
-		uaddr := uintptr(regs[regn] + regm)
-		if uaddr < 4096 {
-			if fptrace > 0 {
-				print("*** double store @", hex(uaddr), " => fault\n")
-			}
-			return _FAULT
-		}
-		addr := (*[2]uint32)(unsafe.Pointer(uaddr))
-		addr[0] = m.freglo[regd]
-		addr[1] = m.freghi[regd]
-		if fptrace > 0 {
-			print("*** *(", addr, ") = ", hex(addr[1]), "-", hex(addr[0]), "\n")
-		}
-		return 1
-	}
-	// regd, regm, regn are 4bit variables
-	regm = i >> 0 & 0xf
-	switch i & 0xfff00ff0 {
-	case 0xf3000110: // veor
-		m.freglo[regd] = m.freglo[regm] ^ m.freglo[regn]
-		m.freghi[regd] = m.freghi[regm] ^ m.freghi[regn]
-		if fptrace > 0 {
-			print("*** veor D[", regd, "] = ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb00b00: // D[regd] = const(regn,regm)
-		regn = regn<<4 | regm
-		regm = 0x40000000
-		if regn&0x80 != 0 {
-			regm |= 0x80000000
-		}
-		if regn&0x40 != 0 {
-			regm ^= 0x7fc00000
-		}
-		regm |= regn & 0x3f << 16
-		m.freglo[regd] = 0
-		m.freghi[regd] = regm
-		if fptrace > 0 {
-			print("*** immed D[", regd, "] = ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb00a00: // F[regd] = const(regn,regm)
-		regn = regn<<4 | regm
-		regm = 0x40000000
-		if regn&0x80 != 0 {
-			regm |= 0x80000000
-		}
-		if regn&0x40 != 0 {
-			regm ^= 0x7e000000
-		}
-		regm |= regn & 0x3f << 19
-		m.freglo[regd] = regm
-		if fptrace > 0 {
-			print("*** immed D[", regd, "] = ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xee300b00: // D[regd] = D[regn]+D[regm]
-		fputd(regd, fadd64(fgetd(regn), fgetd(regm)))
-		if fptrace > 0 {
-			print("*** add D[", regd, "] = D[", regn, "]+D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xee300a00: // F[regd] = F[regn]+F[regm]
-		m.freglo[regd] = f64to32(fadd64(f32to64(m.freglo[regn]), f32to64(m.freglo[regm])))
-		if fptrace > 0 {
-			print("*** add F[", regd, "] = F[", regn, "]+F[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xee300b40: // D[regd] = D[regn]-D[regm]
-		fputd(regd, fsub64(fgetd(regn), fgetd(regm)))
-		if fptrace > 0 {
-			print("*** sub D[", regd, "] = D[", regn, "]-D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xee300a40: // F[regd] = F[regn]-F[regm]
-		m.freglo[regd] = f64to32(fsub64(f32to64(m.freglo[regn]), f32to64(m.freglo[regm])))
-		if fptrace > 0 {
-			print("*** sub F[", regd, "] = F[", regn, "]-F[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xee200b00: // D[regd] = D[regn]*D[regm]
-		fputd(regd, fmul64(fgetd(regn), fgetd(regm)))
-		if fptrace > 0 {
-			print("*** mul D[", regd, "] = D[", regn, "]*D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xee200a00: // F[regd] = F[regn]*F[regm]
-		m.freglo[regd] = f64to32(fmul64(f32to64(m.freglo[regn]), f32to64(m.freglo[regm])))
-		if fptrace > 0 {
-			print("*** mul F[", regd, "] = F[", regn, "]*F[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xee800b00: // D[regd] = D[regn]/D[regm]
-		fputd(regd, fdiv64(fgetd(regn), fgetd(regm)))
-		if fptrace > 0 {
-			print("*** div D[", regd, "] = D[", regn, "]/D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xee800a00: // F[regd] = F[regn]/F[regm]
-		m.freglo[regd] = f64to32(fdiv64(f32to64(m.freglo[regn]), f32to64(m.freglo[regm])))
-		if fptrace > 0 {
-			print("*** div F[", regd, "] = F[", regn, "]/F[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xee000b10: // S[regn] = R[regd] (MOVW) (regm ignored)
-		m.freglo[regn] = regs[regd]
-		if fptrace > 0 {
-			print("*** cpy S[", regn, "] = R[", regd, "] ", hex(m.freglo[regn]), "\n")
-		}
-		return 1
-	case 0xee100b10: // R[regd] = S[regn] (MOVW) (regm ignored)
-		regs[regd] = m.freglo[regn]
-		if fptrace > 0 {
-			print("*** cpy R[", regd, "] = S[", regn, "] ", hex(regs[regd]), "\n")
-		}
-		return 1
-	}
-	// regd, regm are 4bit variables
-	switch i & 0xffff0ff0 {
-	case 0xeeb00a40: // F[regd] = F[regm] (MOVF)
-		m.freglo[regd] = m.freglo[regm]
-		if fptrace > 0 {
-			print("*** F[", regd, "] = F[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb00b40: // D[regd] = D[regm] (MOVD)
-		m.freglo[regd] = m.freglo[regm]
-		m.freghi[regd] = m.freghi[regm]
-		if fptrace > 0 {
-			print("*** D[", regd, "] = D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb10bc0: // D[regd] = sqrt D[regm]
-		fputd(regd, sqrt(fgetd(regm)))
-		if fptrace > 0 {
-			print("*** D[", regd, "] = sqrt D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb00bc0: // D[regd] = abs D[regm]
-		m.freglo[regd] = m.freglo[regm]
-		m.freghi[regd] = m.freghi[regm] & (1<<31 - 1)
-		if fptrace > 0 {
-			print("*** D[", regd, "] = abs D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb00ac0: // F[regd] = abs F[regm]
-		m.freglo[regd] = m.freglo[regm] & (1<<31 - 1)
-		if fptrace > 0 {
-			print("*** F[", regd, "] = abs F[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb10b40: // D[regd] = neg D[regm]
-		m.freglo[regd] = m.freglo[regm]
-		m.freghi[regd] = m.freghi[regm] ^ 1<<31
-		if fptrace > 0 {
-			print("*** D[", regd, "] = neg D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb10a40: // F[regd] = neg F[regm]
-		m.freglo[regd] = m.freglo[regm] ^ 1<<31
-		if fptrace > 0 {
-			print("*** F[", regd, "] = neg F[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb40bc0: // D[regd] :: D[regm] (CMPD)
-		cmp, nan := fcmp64(fgetd(regd), fgetd(regm))
-		m.fflag = fstatus(nan, cmp)
-		if fptrace > 0 {
-			print("*** cmp D[", regd, "]::D[", regm, "] ", hex(m.fflag), "\n")
-		}
-		return 1
-	case 0xeeb40ac0: // F[regd] :: F[regm] (CMPF)
-		cmp, nan := fcmp64(f32to64(m.freglo[regd]), f32to64(m.freglo[regm]))
-		m.fflag = fstatus(nan, cmp)
-		if fptrace > 0 {
-			print("*** cmp F[", regd, "]::F[", regm, "] ", hex(m.fflag), "\n")
-		}
-		return 1
-	case 0xeeb50bc0: // D[regd] :: 0 (CMPD)
-		cmp, nan := fcmp64(fgetd(regd), 0)
-		m.fflag = fstatus(nan, cmp)
-		if fptrace > 0 {
-			print("*** cmp D[", regd, "]::0 ", hex(m.fflag), "\n")
-		}
-		return 1
-	case 0xeeb50ac0: // F[regd] :: 0 (CMPF)
-		cmp, nan := fcmp64(f32to64(m.freglo[regd]), 0)
-		m.fflag = fstatus(nan, cmp)
-		if fptrace > 0 {
-			print("*** cmp F[", regd, "]::0 ", hex(m.fflag), "\n")
-		}
-		return 1
-	case 0xeeb70ac0: // D[regd] = F[regm] (MOVFD)
-		fputd(regd, f32to64(m.freglo[regm]))
-		if fptrace > 0 {
-			print("*** f2d D[", regd, "]=F[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb70bc0: // F[regd] = D[regm] (MOVDF)
-		m.freglo[regd] = f64to32(fgetd(regm))
-		if fptrace > 0 {
-			print("*** d2f F[", regd, "]=D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeebd0ac0: // S[regd] = F[regm] (MOVFW)
-		sval, ok := f64toint(f32to64(m.freglo[regm]))
-		if !ok || int64(int32(sval)) != sval {
-			sval = 0
-		}
-		m.freglo[regd] = uint32(sval)
-		if fptrace > 0 {
-			print("*** fix S[", regd, "]=F[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeebc0ac0: // S[regd] = F[regm] (MOVFW.U)
-		sval, ok := f64toint(f32to64(m.freglo[regm]))
-		if !ok || int64(uint32(sval)) != sval {
-			sval = 0
-		}
-		m.freglo[regd] = uint32(sval)
-		if fptrace > 0 {
-			print("*** fix unsigned S[", regd, "]=F[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeebd0bc0: // S[regd] = D[regm] (MOVDW)
-		sval, ok := f64toint(fgetd(regm))
-		if !ok || int64(int32(sval)) != sval {
-			sval = 0
-		}
-		m.freglo[regd] = uint32(sval)
-		if fptrace > 0 {
-			print("*** fix S[", regd, "]=D[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeebc0bc0: // S[regd] = D[regm] (MOVDW.U)
-		sval, ok := f64toint(fgetd(regm))
-		if !ok || int64(uint32(sval)) != sval {
-			sval = 0
-		}
-		m.freglo[regd] = uint32(sval)
-		if fptrace > 0 {
-			print("*** fix unsigned S[", regd, "]=D[", regm, "] ", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb80ac0: // D[regd] = S[regm] (MOVWF)
-		cmp := int32(m.freglo[regm])
-		if cmp < 0 {
-			fputf(regd, f64to32(fintto64(-int64(cmp))))
-			m.freglo[regd] ^= 0x80000000
-		} else {
-			fputf(regd, f64to32(fintto64(int64(cmp))))
-		}
-		if fptrace > 0 {
-			print("*** float D[", regd, "]=S[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb80a40: // D[regd] = S[regm] (MOVWF.U)
-		fputf(regd, f64to32(fintto64(int64(m.freglo[regm]))))
-		if fptrace > 0 {
-			print("*** float unsigned D[", regd, "]=S[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb80bc0: // D[regd] = S[regm] (MOVWD)
-		cmp := int32(m.freglo[regm])
-		if cmp < 0 {
-			fputd(regd, fintto64(-int64(cmp)))
-			m.freghi[regd] ^= 0x80000000
-		} else {
-			fputd(regd, fintto64(int64(cmp)))
-		}
-		if fptrace > 0 {
-			print("*** float D[", regd, "]=S[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	case 0xeeb80b40: // D[regd] = S[regm] (MOVWD.U)
-		fputd(regd, fintto64(int64(m.freglo[regm])))
-		if fptrace > 0 {
-			print("*** float unsigned D[", regd, "]=S[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
-		}
-		return 1
-	}
-	if i&0xff000000 == 0xee000000 || i&0xff000000 == 0xed000000 {
-		print("stepflt ", pc, " ", hex(i), "\n")
-		fabort()
-	}
-	return 0
-}
-//go:nosplit
-func _sfloat2(pc uint32, regs [15]uint32) (newpc uint32) {
-	systemstack(func() {
-		newpc = sfloat2(pc, &regs)
-	})
-	return
-}
-func _sfloatpanic()
-func sfloat2(pc uint32, regs *[15]uint32) uint32 {
-	first := true
-	for {
-		skip := stepflt((*uint32)(unsafe.Pointer(uintptr(pc))), regs)
-		if skip == 0 {
-			break
-		}
-		first = false
-		if skip == _FAULT {
-			// Encountered bad address in store/load.
-			// Record signal information and return to assembly
-			// trampoline that fakes the call.
-			const SIGSEGV = 11
-			curg := getg().m.curg
-			curg.sig = SIGSEGV
-			curg.sigcode0 = 0
-			curg.sigcode1 = 0
-			curg.sigpc = uintptr(pc)
-			pc = uint32(funcPC(_sfloatpanic))
-			break
-		}
-		pc += 4 * skip
-	}
-	if first {
-		print("sfloat2 ", pc, " ", hex(*(*uint32)(unsafe.Pointer(uintptr(pc)))), "\n")
-		fabort() // not ok to fail first instruction
-	}
-	return pc
-}
-// Stubs to pacify vet. Not safe to call from Go.
-// Calls to these functions are inserted by the compiler or assembler.
-func _sfloat()
-func udiv()
-func _div()
-func _divu()
-func _mod()
-func _modu()
--- a/src/runtime/sqrt.go
+++ b/src/runtime/sqrt.go
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-// Copy of math/sqrt.go, here for use by ARM softfloat.
-// Modified to not use any floating point arithmetic so
-// that we don't clobber any floating-point registers
-// while emulating the sqrt instruction.
-package runtime
-// The original C code and the long comment below are
-// from FreeBSD's /usr/src/lib/msun/src/e_sqrt.c and
-// came with this notice. The go code is a simplified
-// version of the original C.
-//
-// ====================================================
-// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
-//
-// Developed at SunPro, a Sun Microsystems, Inc. business.
-// Permission to use, copy, modify, and distribute this
-// software is freely granted, provided that this notice
-// is preserved.
-// ====================================================
-//
-// __ieee754_sqrt(x)
-// Return correctly rounded sqrt.
-//           -----------------------------------------
-//           | Use the hardware sqrt if you have one |
-//           -----------------------------------------
-// Method:
-//   Bit by bit method using integer arithmetic. (Slow, but portable)
-//   1. Normalization
-//      Scale x to y in [1,4) with even powers of 2:
-//      find an integer k such that  1 <= (y=x*2**(2k)) < 4, then
-//              sqrt(x) = 2**k * sqrt(y)
-//   2. Bit by bit computation
-//      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
-//           i                                                   0
-//                                     i+1         2
-//          s  = 2*q , and      y  =  2   * ( y - q  ).          (1)
-//           i      i            i                 i
-//
-//      To compute q    from q , one checks whether
-//                  i+1       i
-//
-//                            -(i+1) 2
-//                      (q + 2      )  <= y.                     (2)
-//                        i
-//                                                            -(i+1)
-//      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
-//                             i+1   i             i+1   i
-//
-//      With some algebraic manipulation, it is not difficult to see
-//      that (2) is equivalent to
-//                             -(i+1)
-//                      s  +  2       <= y                       (3)
-//                       i                i
-//
-//      The advantage of (3) is that s  and y  can be computed by
-//                                    i      i
-//      the following recurrence formula:
-//          if (3) is false
-//
-//          s     =  s  ,       y    = y   ;                     (4)
-//           i+1      i          i+1    i
-//
-//      otherwise,
-//                         -i                      -(i+1)
-//          s     =  s  + 2  ,  y    = y  -  s  - 2              (5)
-//           i+1      i          i+1    i     i
-//
-//      One may easily use induction to prove (4) and (5).
-//      Note. Since the left hand side of (3) contain only i+2 bits,
-//            it does not necessary to do a full (53-bit) comparison
-//            in (3).
-//   3. Final rounding
-//      After generating the 53 bits result, we compute one more bit.
-//      Together with the remainder, we can decide whether the
-//      result is exact, bigger than 1/2ulp, or less than 1/2ulp
-//      (it will never equal to 1/2ulp).
-//      The rounding mode can be detected by checking whether
-//      huge + tiny is equal to huge, and whether huge - tiny is
-//      equal to huge for some floating point number "huge" and "tiny".
-//
-//
-// Notes:  Rounding mode detection omitted.
-const (
-	float64Mask  = 0x7FF
-	float64Shift = 64 - 11 - 1
-	float64Bias  = 1023
-	float64NaN   = 0x7FF8000000000001
-	float64Inf   = 0x7FF0000000000000
-	maxFloat64   = 1.797693134862315708145274237317043567981e+308 // 2**1023 * (2**53 - 1) / 2**52
-)
-// isnanu returns whether ix represents a NaN floating point number.
-func isnanu(ix uint64) bool {
-	exp := (ix >> float64Shift) & float64Mask
-	sig := ix << (64 - float64Shift) >> (64 - float64Shift)
-	return exp == float64Mask && sig != 0
-}
-func sqrt(ix uint64) uint64 {
-	// special cases
-	switch {
-	case ix == 0 || ix == 1<<63: // x == 0
-		return ix
-	case isnanu(ix): // x != x
-		return ix
-	case ix&(1<<63) != 0: // x < 0
-		return float64NaN
-	case ix == float64Inf: // x > MaxFloat
-		return ix
-	}
-	// normalize x
-	exp := int((ix >> float64Shift) & float64Mask)
-	if exp == 0 { // subnormal x
-		for ix&(1<<float64Shift) == 0 {
-			ix <<= 1
-			exp--
-		}
-		exp++
-	}
-	exp -= float64Bias // unbias exponent
-	ix &^= float64Mask << float64Shift
-	ix |= 1 << float64Shift
-	if exp&1 == 1 { // odd exp, double x to make it even
-		ix <<= 1
-	}
-	exp >>= 1 // exp = exp/2, exponent of square root
-	// generate sqrt(x) bit by bit
-	ix <<= 1
-	var q, s uint64                      // q = sqrt(x)
-	r := uint64(1 << (float64Shift + 1)) // r = moving bit from MSB to LSB
-	for r != 0 {
-		t := s + r
-		if t <= ix {
-			s = t + r
-			ix -= t
-			q += r
-		}
-		ix <<= 1
-		r >>= 1
-	}
-	// final rounding
-	if ix != 0 { // remainder, result not exact
-		q += q & 1 // round according to extra bit
-	}
-	ix = q>>1 + uint64(exp-1+float64Bias)<<float64Shift // significand + biased exponent
-	return ix
-}
--- a/src/runtime/sqrt_test.go
+++ b/src/runtime/sqrt_test.go
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-// A copy of Sqrt tests from the math package to test the
-// purely integer arithmetic implementation in sqrt.go.
-package runtime_test
-import (
-	"math"
-	"runtime"
-	"testing"
-)
-func SqrtRT(x float64) float64 {
-	return math.Float64frombits(runtime.Sqrt(math.Float64bits(x)))
-}
-func TestSqrt(t *testing.T) {
-	for i := 0; i < len(vf); i++ {
-		a := math.Abs(vf[i])
-		if f := SqrtRT(a); sqrt[i] != f {
-			t.Errorf("Sqrt(%g) = %g, want %g", a, f, sqrt[i])
-		}
-	}
-	for i := 0; i < len(vfsqrtSC); i++ {
-		if f := SqrtRT(vfsqrtSC[i]); !alike(sqrtSC[i], f) {
-			t.Errorf("Sqrt(%g) = %g, want %g", vfsqrtSC[i], f, sqrtSC[i])
-		}
-	}
-}
-func alike(a, b float64) bool {
-	switch {
-	case math.IsNaN(a) && math.IsNaN(b):
-		return true
-	case a == b:
-		return math.Signbit(a) == math.Signbit(b)
-	}
-	return false
-}
-var vf = []float64{
-	4.9790119248836735e+00,
-	7.7388724745781045e+00,
-	-2.7688005719200159e-01,
-	-5.0106036182710749e+00,
-	9.6362937071984173e+00,
-	2.9263772392439646e+00,
-	5.2290834314593066e+00,
-	2.7279399104360102e+00,
-	1.8253080916808550e+00,
-	-8.6859247685756013e+00,
-}
-var sqrt = []float64{
-	2.2313699659365484748756904e+00,
-	2.7818829009464263511285458e+00,
-	5.2619393496314796848143251e-01,
-	2.2384377628763938724244104e+00,
-	3.1042380236055381099288487e+00,
-	1.7106657298385224403917771e+00,
-	2.286718922705479046148059e+00,
-	1.6516476350711159636222979e+00,
-	1.3510396336454586262419247e+00,
-	2.9471892997524949215723329e+00,
-}
-var vfsqrtSC = []float64{
-	math.Inf(-1),
-	-math.Pi,
-	math.Copysign(0, -1),
-	0,
-	math.Inf(1),
-	math.NaN(),
-	math.Float64frombits(2),
-}
-var sqrtSC = []float64{
-	math.NaN(),
-	math.NaN(),
-	math.Copysign(0, -1),
-	0,
-	math.Inf(1),
-	math.NaN(),
-	3.1434555694052576e-162,
-}
--- a/src/runtime/stubs_arm.go
+++ b/src/runtime/stubs_arm.go
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package runtime
+// Stubs to pacify vet. Not safe to call from Go.
+// Calls to these functions are inserted by the compiler or assembler.
+func udiv()
+func _div()
+func _divu()
+func _mod()
+func _modu()
--- a/src/runtime/vlop_arm.s
+++ b/src/runtime/vlop_arm.s
@@ -28,68 +28,6 @@
 #include "funcdata.h"
 #include "textflag.h"
-// trampoline for _sfloat2. passes LR as arg0 and
-// saves registers R0-R13 and CPSR on the stack. R0-R12 and CPSR flags can
-// be changed by _sfloat2.
-TEXT runtime·_sfloat(SB), NOSPLIT, $68-0 // 4 arg + 14*4 saved regs + cpsr + return value
-	MOVW	R14, 4(R13)
-	MOVW	R0, 8(R13)
-	MOVW	$12(R13), R0
-	MOVM.IA.W	[R1-R12], (R0)
-	MOVW	$72(R13), R1 // correct for frame size
-	MOVW	R1, 60(R13)
-	WORD	$0xe10f1000 // mrs r1, cpsr
-	MOVW	R1, 64(R13)
-	// Disable preemption of this goroutine during _sfloat2 by
-	// m->locks++ and m->locks-- around the call.
-	// Rescheduling this goroutine may cause the loss of the
-	// contents of the software floating point registers in 
-	// m->freghi, m->freglo, m->fflag, if the goroutine is moved
-	// to a different m or another goroutine runs on this m.
-	// Rescheduling at ordinary function calls is okay because
-	// all registers are caller save, but _sfloat2 and the things
-	// that it runs are simulating the execution of individual
-	// program instructions, and those instructions do not expect
-	// the floating point registers to be lost.
-	// An alternative would be to move the software floating point
-	// registers into G, but they do not need to be kept at the 
-	// usual places a goroutine reschedules (at function calls),
-	// so it would be a waste of 132 bytes per G.
-	MOVW	g_m(g), R8
-	MOVW	m_locks(R8), R1
-	ADD	$1, R1
-	MOVW	R1, m_locks(R8)
-	MOVW	$1, R1
-	MOVW	R1, m_softfloat(R8)
-	BL	runtime·_sfloat2(SB)
-	MOVW	68(R13), R0
-	MOVW	g_m(g), R8
-	MOVW	m_locks(R8), R1
-	SUB	$1, R1
-	MOVW	R1, m_locks(R8)
-	MOVW	$0, R1
-	MOVW	R1, m_softfloat(R8)
-	MOVW	R0, 0(R13)
-	MOVW	64(R13), R1
-	WORD	$0xe128f001	// msr cpsr_f, r1
-	MOVW	$12(R13), R0
-	// Restore R1-R12, R0.
-	MOVM.IA.W	(R0), [R1-R12]
-	MOVW	8(R13), R0
-	RET
-// trampoline for _sfloat2 panic.
-// _sfloat2 instructs _sfloat to return here.
-// We need to push a fake saved LR onto the stack,
-// load the signal fault address into LR, and jump
-// to the real sigpanic.
-// This simulates what sighandler does for a memory fault.
-TEXT runtime·_sfloatpanic(SB),NOSPLIT|NOFRAME,$0
-	MOVW	$0, R0
-	MOVW.W	R0, -4(R13)
-	MOVW	g_sigpc(g), LR
-	B	runtime·sigpanic(SB)
 // func runtime·udiv(n, d uint32) (q, r uint32)
 // compiler knowns the register usage of this function
 // Reference: