Commit be64a19d authored by Dan Scales's avatar Dan Scales

cmd/compile, cmd/link, runtime: make defers low-cost through inline code and extra funcdata

Generate inline code at defer time to save the args of defer calls to unique
(autotmp) stack slots, and generate inline code at exit time to check which defer
calls were made and make the associated function/method/interface calls. We
remember that a particular defer statement was reached by storing in the deferBits
variable (always stored on the stack). At exit time, we check the bits of the
deferBits variable to determine which defer function calls to make (in reverse
order). These low-cost defers are only used for functions where no defers
appear in loops. In addition, we don't do these low-cost defers if there are too
many defer statements or too many exits in a function (to limit code increase).

When a function uses open-coded defers, we produce extra
FUNCDATA_OpenCodedDeferInfo information that specifies the number of defers, and
for each defer, the stack slots where the closure and associated args have been
stored. The funcdata also includes the location of the deferBits variable.
Therefore, for panics, we can use this funcdata to determine exactly which defers
are active, and call the appropriate functions/methods/closures with the correct
arguments for each active defer.

In order to unwind the stack correctly after a recover(), we need to add an extra
code segment to functions with open-coded defers that simply calls deferreturn()
and returns. This segment is not reachable by the normal function, but is returned
to by the runtime during recovery. We set the liveness information of this
deferreturn() to be the same as the liveness at the first function call during the
last defer exit code (so all return values and all stack slots needed by the defer
calls will be live).

I needed to increase the stackguard constant from 880 to 896, because of a small
amount of new code in deferreturn().

The -N flag disables open-coded defers. '-d defer' prints out the kind of defer
being used at each defer statement (heap-allocated, stack-allocated, or
open-coded).

Cost of defer statement  [ go test -run NONE -bench BenchmarkDefer$ runtime ]
  With normal (stack-allocated) defers only:         35.4  ns/op
  With open-coded defers:                             5.6  ns/op
  Cost of function call alone (remove defer keyword): 4.4  ns/op

Text size increase (including funcdata) for go binary without/with open-coded defers:  0.09%

The average size increase (including funcdata) for only the functions that use
open-coded defers is 1.1%.

The cost of a panic followed by a recover got noticeably slower, since panic
processing now requires a scan of the stack for open-coded defer frames. This scan
is required, even if no frames are using open-coded defers:

Cost of panic and recover [ go test -run NONE -bench BenchmarkPanicRecover runtime ]
  Without open-coded defers:        62.0 ns/op
  With open-coded defers:           255  ns/op

A CGO Go-to-C-to-Go benchmark got noticeably faster because of open-coded defers:

CGO Go-to-C-to-Go benchmark [cd misc/cgo/test; go test -run NONE -bench BenchmarkCGoCallback ]
  Without open-coded defers:        443 ns/op
  With open-coded defers:           347 ns/op

Updates #14939 (defer performance)
Updates #34481 (design doc)

Change-Id: I63b1a60d1ebf28126f55ee9fd7ecffe9cb23d1ff
Reviewed-on: https://go-review.googlesource.com/c/go/+/202340Reviewed-by: default avatarAustin Clements <austin@google.com>
parent dc77dc2b
......@@ -880,7 +880,9 @@ func (e *Escape) augmentParamHole(k EscHole, where *Node) EscHole {
// non-transient location to avoid arguments from being
// transiently allocated.
if where.Op == ODEFER && e.loopDepth == 1 {
where.Esc = EscNever // force stack allocation of defer record (see ssa.go)
// force stack allocation of defer record, unless open-coded
// defers are used (see ssa.go)
where.Esc = EscNever
return e.later(k)
}
......
......@@ -53,6 +53,7 @@ var (
Debug_typecheckinl int
Debug_gendwarfinl int
Debug_softfloat int
Debug_defer int
)
// Debug arguments.
......@@ -83,6 +84,7 @@ var debugtab = []struct {
{"typecheckinl", "eager typechecking of inline function bodies", &Debug_typecheckinl},
{"dwarfinl", "print information about DWARF inlined function creation", &Debug_gendwarfinl},
{"softfloat", "force compiler to emit soft-float code", &Debug_softfloat},
{"defer", "print information about defer compilation", &Debug_defer},
}
const debugHelpHeader = `usage: -d arg[,arg]* and arg is <key>[=<value>]
......
......@@ -294,6 +294,9 @@ func addGCLocals() {
}
ggloblsym(x, int32(len(x.P)), attr)
}
if x := s.Func.OpenCodedDeferInfo; x != nil {
ggloblsym(x, int32(len(x.P)), obj.RODATA|obj.DUPOK)
}
}
}
......
......@@ -863,7 +863,16 @@ func (lv *Liveness) solve() {
newliveout.vars.Set(pos)
}
case ssa.BlockExit:
// panic exit - nothing to do
if lv.fn.Func.HasDefer() && !lv.fn.Func.OpenCodedDeferDisallowed() {
// All stack slots storing args for open-coded
// defers are live at panic exit (since they
// will be used in running defers)
for i, n := range lv.vars {
if n.Name.OpenDeferSlot() {
newliveout.vars.Set(int32(i))
}
}
}
default:
// A variable is live on output from this block
// if it is live on input to some successor.
......
......@@ -317,6 +317,7 @@ func deferstruct(stksize int64) *types.Type {
makefield("siz", types.Types[TUINT32]),
makefield("started", types.Types[TBOOL]),
makefield("heap", types.Types[TBOOL]),
makefield("openDefer", types.Types[TBOOL]),
makefield("sp", types.Types[TUINTPTR]),
makefield("pc", types.Types[TUINTPTR]),
// Note: the types here don't really matter. Defer structures
......@@ -325,6 +326,9 @@ func deferstruct(stksize int64) *types.Type {
makefield("fn", types.Types[TUINTPTR]),
makefield("_panic", types.Types[TUINTPTR]),
makefield("link", types.Types[TUINTPTR]),
makefield("framepc", types.Types[TUINTPTR]),
makefield("varp", types.Types[TUINTPTR]),
makefield("fd", types.Types[TUINTPTR]),
makefield("args", argtype),
}
......
......@@ -20,7 +20,7 @@ func TestSizeof(t *testing.T) {
_32bit uintptr // size on 32bit platforms
_64bit uintptr // size on 64bit platforms
}{
{Func{}, 116, 208},
{Func{}, 124, 224},
{Name{}, 32, 56},
{Param{}, 24, 48},
{Node{}, 76, 128},
......
......@@ -29,6 +29,10 @@ var ssaDumpStdout bool // whether to dump to stdout
var ssaDumpCFG string // generate CFGs for these phases
const ssaDumpFile = "ssa.html"
// The max number of defers in a function using open-coded defers. We enforce this
// limit because the deferBits bitmask is currently a single byte (to minimize code size)
const maxOpenDefers = 8
// ssaDumpInlined holds all inlined functions when ssaDump contains a function name.
var ssaDumpInlined []*Node
......@@ -167,6 +171,111 @@ func initssaconfig() {
SigPanic = sysfunc("sigpanic")
}
// getParam returns the Field of ith param of node n (which is a
// function/method/interface call), where the receiver of a method call is
// considered as the 0th parameter. This does not include the receiver of an
// interface call.
func getParam(n *Node, i int) *types.Field {
t := n.Left.Type
if n.Op == OCALLMETH {
if i == 0 {
return t.Recv()
}
return t.Params().Field(i - 1)
}
return t.Params().Field(i)
}
// dvarint writes a varint v to the funcdata in symbol x and returns the new offset
func dvarint(x *obj.LSym, off int, v int64) int {
if v < 0 || v > 1e9 {
panic(fmt.Sprintf("dvarint: bad offset for funcdata - %v", v))
}
if v < 1<<7 {
return duint8(x, off, uint8(v))
}
off = duint8(x, off, uint8((v&127)|128))
if v < 1<<14 {
return duint8(x, off, uint8(v>>7))
}
off = duint8(x, off, uint8(((v>>7)&127)|128))
if v < 1<<21 {
return duint8(x, off, uint8(v>>14))
}
off = duint8(x, off, uint8(((v>>14)&127)|128))
if v < 1<<28 {
return duint8(x, off, uint8(v>>21))
}
off = duint8(x, off, uint8(((v>>21)&127)|128))
return duint8(x, off, uint8(v>>28))
}
// emitOpenDeferInfo emits FUNCDATA information about the defers in a function
// that is using open-coded defers. This funcdata is used to determine the active
// defers in a function and execute those defers during panic processing.
//
// The funcdata is all encoded in varints (since values will almost always be less than
// 128, but stack offsets could potentially be up to 2Gbyte). All "locations" (offsets)
// for stack variables are specified as the number of bytes below varp (pointer to the
// top of the local variables) for their starting address. The format is:
//
// - Max total argument size among all the defers
// - Offset of the deferBits variable
// - Number of defers in the function
// - Information about each defer call, in reverse order of appearance in the function:
// - Total argument size of the call
// - Offset of the closure value to call
// - Number of arguments (including interface receiver or method receiver as first arg)
// - Information about each argument
// - Offset of the stored defer argument in this function's frame
// - Size of the argument
// - Offset of where argument should be placed in the args frame when making call
func (s *state) emitOpenDeferInfo() {
x := Ctxt.Lookup(s.curfn.Func.lsym.Name + ".opendefer")
s.curfn.Func.lsym.Func.OpenCodedDeferInfo = x
off := 0
// Compute maxargsize (max size of arguments for all defers)
// first, so we can output it first to the funcdata
var maxargsize int64
for i := len(s.openDefers) - 1; i >= 0; i-- {
r := s.openDefers[i]
argsize := r.n.Left.Type.ArgWidth()
if argsize > maxargsize {
maxargsize = argsize
}
}
off = dvarint(x, off, maxargsize)
off = dvarint(x, off, -s.deferBitsTemp.Xoffset)
off = dvarint(x, off, int64(len(s.openDefers)))
// Write in reverse-order, for ease of running in that order at runtime
for i := len(s.openDefers) - 1; i >= 0; i-- {
r := s.openDefers[i]
off = dvarint(x, off, r.n.Left.Type.ArgWidth())
off = dvarint(x, off, -r.closureNode.Xoffset)
numArgs := len(r.argNodes)
if r.rcvrNode != nil {
// If there's an interface receiver, treat/place it as the first
// arg. (If there is a method receiver, it's already included as
// first arg in r.argNodes.)
numArgs++
}
off = dvarint(x, off, int64(numArgs))
if r.rcvrNode != nil {
off = dvarint(x, off, -r.rcvrNode.Xoffset)
off = dvarint(x, off, s.config.PtrSize)
off = dvarint(x, off, 0)
}
for j, arg := range r.argNodes {
f := getParam(r.n, j)
off = dvarint(x, off, -arg.Xoffset)
off = dvarint(x, off, f.Type.Size())
off = dvarint(x, off, f.Offset)
}
}
}
// buildssa builds an SSA function for fn.
// worker indicates which of the backend workers is doing the processing.
func buildssa(fn *Node, worker int) *ssa.Func {
......@@ -229,11 +338,55 @@ func buildssa(fn *Node, worker int) *ssa.Func {
s.labeledNodes = map[*Node]*ssaLabel{}
s.fwdVars = map[*Node]*ssa.Value{}
s.startmem = s.entryNewValue0(ssa.OpInitMem, types.TypeMem)
s.hasOpenDefers = Debug['N'] == 0 && s.hasdefer && !s.curfn.Func.OpenCodedDeferDisallowed()
if s.hasOpenDefers && (Ctxt.Flag_shared || Ctxt.Flag_dynlink) && thearch.LinkArch.Name == "386" {
// Don't support open-coded defers for 386 ONLY when using shared
// libraries, because there is extra code (added by rewriteToUseGot())
// preceding the deferreturn/ret code that is generated by gencallret()
// that we don't track correctly.
s.hasOpenDefers = false
}
if s.hasOpenDefers && s.curfn.Func.Exit.Len() > 0 {
// Skip doing open defers if there is any extra exit code (likely
// copying heap-allocated return values or race detection), since
// we will not generate that code in the case of the extra
// deferreturn/ret segment.
s.hasOpenDefers = false
}
if s.hasOpenDefers &&
s.curfn.Func.numReturns*s.curfn.Func.numDefers > 15 {
// Since we are generating defer calls at every exit for
// open-coded defers, skip doing open-coded defers if there are
// too many returns (especially if there are multiple defers).
// Open-coded defers are most important for improving performance
// for smaller functions (which don't have many returns).
s.hasOpenDefers = false
}
s.sp = s.entryNewValue0(ssa.OpSP, types.Types[TUINTPTR]) // TODO: use generic pointer type (unsafe.Pointer?) instead
s.sb = s.entryNewValue0(ssa.OpSB, types.Types[TUINTPTR])
s.startBlock(s.f.Entry)
s.vars[&memVar] = s.startmem
if s.hasOpenDefers {
// Create the deferBits variable and stack slot. deferBits is a
// bitmask showing which of the open-coded defers in this function
// have been activated.
deferBitsTemp := tempAt(src.NoXPos, s.curfn, types.Types[TUINT8])
s.deferBitsTemp = deferBitsTemp
// For this value, AuxInt is initialized to zero by default
startDeferBits := s.entryNewValue0(ssa.OpConst8, types.Types[TUINT8])
s.vars[&deferBitsVar] = startDeferBits
s.deferBitsAddr = s.addr(deferBitsTemp, false)
s.store(types.Types[TUINT8], s.deferBitsAddr, startDeferBits)
// Make sure that the deferBits stack slot is kept alive (for use
// by panics) and stores to deferBits are not eliminated, even if
// all checking code on deferBits in the function exit can be
// eliminated, because the defer statements were all
// unconditional.
s.vars[&memVar] = s.newValue1Apos(ssa.OpVarLive, types.TypeMem, deferBitsTemp, s.mem(), false)
}
// Generate addresses of local declarations
s.decladdrs = map[*Node]*ssa.Value{}
......@@ -289,6 +442,11 @@ func buildssa(fn *Node, worker int) *ssa.Func {
// Main call to ssa package to compile function
ssa.Compile(s.f)
if s.hasOpenDefers {
s.emitOpenDeferInfo()
}
return s.f
}
......@@ -377,6 +535,29 @@ func (s *state) updateUnsetPredPos(b *ssa.Block) {
}
}
// Information about each open-coded defer.
type openDeferInfo struct {
// The ODEFER node representing the function call of the defer
n *Node
// If defer call is closure call, the address of the argtmp where the
// closure is stored.
closure *ssa.Value
// The node representing the argtmp where the closure is stored - used for
// function, method, or interface call, to store a closure that panic
// processing can use for this defer.
closureNode *Node
// If defer call is interface call, the address of the argtmp where the
// receiver is stored
rcvr *ssa.Value
// The node representing the argtmp where the receiver is stored
rcvrNode *Node
// The addresses of the argtmps where the evaluated arguments of the defer
// function call are stored.
argVals []*ssa.Value
// The nodes representing the argtmps where the args of the defer are stored
argNodes []*Node
}
type state struct {
// configuration (arch) information
config *ssa.Config
......@@ -418,6 +599,9 @@ type state struct {
startmem *ssa.Value
sp *ssa.Value
sb *ssa.Value
// value representing address of where deferBits autotmp is stored
deferBitsAddr *ssa.Value
deferBitsTemp *Node
// line number stack. The current line number is top of stack
line []src.XPos
......@@ -434,6 +618,19 @@ type state struct {
cgoUnsafeArgs bool
hasdefer bool // whether the function contains a defer statement
softFloat bool
hasOpenDefers bool // whether we are doing open-coded defers
// If doing open-coded defers, list of info about the defer calls in
// scanning order. Hence, at exit we should run these defers in reverse
// order of this list
openDefers []*openDeferInfo
// For open-coded defers, this is the beginning and end blocks of the last
// defer exit code that we have generated so far. We use these to share
// code between exits if the shareDeferExits option (disabled by default)
// is on.
lastDeferExit *ssa.Block // Entry block of last defer exit code we generated
lastDeferFinalBlock *ssa.Block // Final block of last defer exit code we generated
lastDeferCount int // Number of defers encountered at that point
}
type funcLine struct {
......@@ -477,6 +674,7 @@ var (
capVar = Node{Op: ONAME, Sym: &types.Sym{Name: "cap"}}
typVar = Node{Op: ONAME, Sym: &types.Sym{Name: "typ"}}
okVar = Node{Op: ONAME, Sym: &types.Sym{Name: "ok"}}
deferBitsVar = Node{Op: ONAME, Sym: &types.Sym{Name: "deferBits"}}
)
// startBlock sets the current block we're generating code in to b.
......@@ -867,11 +1065,26 @@ func (s *state) stmt(n *Node) {
}
}
case ODEFER:
if Debug_defer > 0 {
var defertype string
if s.hasOpenDefers {
defertype = "open-coded"
} else if n.Esc == EscNever {
defertype = "stack-allocated"
} else {
defertype = "heap-allocated"
}
Warnl(n.Pos, "%s defer", defertype)
}
if s.hasOpenDefers {
s.openDeferRecord(n.Left)
} else {
d := callDefer
if n.Esc == EscNever {
d = callDeferStack
}
s.call(n.Left, d)
}
case OGO:
s.call(n.Left, callGo)
......@@ -1288,13 +1501,29 @@ func (s *state) stmt(n *Node) {
}
}
// If true, share as many open-coded defer exits as possible (with the downside of
// worse line-number information)
const shareDeferExits = false
// exit processes any code that needs to be generated just before returning.
// It returns a BlockRet block that ends the control flow. Its control value
// will be set to the final memory state.
func (s *state) exit() *ssa.Block {
if s.hasdefer {
if s.hasOpenDefers {
if shareDeferExits && s.lastDeferExit != nil && len(s.openDefers) == s.lastDeferCount {
if s.curBlock.Kind != ssa.BlockPlain {
panic("Block for an exit should be BlockPlain")
}
s.curBlock.AddEdgeTo(s.lastDeferExit)
s.endBlock()
return s.lastDeferFinalBlock
}
s.openDeferExit()
} else {
s.rtcall(Deferreturn, true, nil)
}
}
// Run exit code. Typically, this code copies heap-allocated PPARAMOUT
// variables back to the stack.
......@@ -1316,6 +1545,9 @@ func (s *state) exit() *ssa.Block {
b := s.endBlock()
b.Kind = ssa.BlockRet
b.SetControl(m)
if s.hasdefer && s.hasOpenDefers {
s.lastDeferFinalBlock = b
}
return b
}
......@@ -3841,6 +4073,230 @@ func (s *state) intrinsicArgs(n *Node) []*ssa.Value {
return args
}
// openDeferRecord adds code to evaluate and store the args for an open-code defer
// call, and records info about the defer, so we can generate proper code on the
// exit paths. n is the sub-node of the defer node that is the actual function
// call. We will also record funcdata information on where the args are stored
// (as well as the deferBits variable), and this will enable us to run the proper
// defer calls during panics.
func (s *state) openDeferRecord(n *Node) {
// Do any needed expression evaluation for the args (including the
// receiver, if any). This may be evaluating something like 'autotmp_3 =
// once.mutex'. Such a statement will create a mapping in s.vars[] from
// the autotmp name to the evaluated SSA arg value, but won't do any
// stores to the stack.
s.stmtList(n.List)
var args []*ssa.Value
var argNodes []*Node
opendefer := &openDeferInfo{
n: n,
}
fn := n.Left
if n.Op == OCALLFUNC {
// We must always store the function value in a stack slot for the
// runtime panic code to use. But in the defer exit code, we will
// call the function directly if it is a static function.
closureVal := s.expr(fn)
closure := s.openDeferSave(fn, fn.Type, closureVal)
opendefer.closureNode = closure.Aux.(*Node)
if !(fn.Op == ONAME && fn.Class() == PFUNC) {
opendefer.closure = closure
}
} else if n.Op == OCALLMETH {
if fn.Op != ODOTMETH {
Fatalf("OCALLMETH: n.Left not an ODOTMETH: %v", fn)
}
closureVal := s.getMethodClosure(fn)
// We must always store the function value in a stack slot for the
// runtime panic code to use. But in the defer exit code, we will
// call the method directly.
closure := s.openDeferSave(fn, fn.Type, closureVal)
opendefer.closureNode = closure.Aux.(*Node)
} else {
if fn.Op != ODOTINTER {
Fatalf("OCALLINTER: n.Left not an ODOTINTER: %v", fn.Op)
}
closure, rcvr := s.getClosureAndRcvr(fn)
opendefer.closure = s.openDeferSave(fn, closure.Type, closure)
// Important to get the receiver type correct, so it is recognized
// as a pointer for GC purposes.
opendefer.rcvr = s.openDeferSave(nil, fn.Type.Recv().Type, rcvr)
opendefer.closureNode = opendefer.closure.Aux.(*Node)
opendefer.rcvrNode = opendefer.rcvr.Aux.(*Node)
}
for _, argn := range n.Rlist.Slice() {
v := s.openDeferSave(argn, argn.Type, s.expr(argn))
args = append(args, v)
argNodes = append(argNodes, v.Aux.(*Node))
}
opendefer.argVals = args
opendefer.argNodes = argNodes
index := len(s.openDefers)
s.openDefers = append(s.openDefers, opendefer)
// Update deferBits only after evaluation and storage to stack of
// args/receiver/interface is successful.
bitvalue := s.constInt8(types.Types[TUINT8], 1<<uint(index))
newDeferBits := s.newValue2(ssa.OpOr8, types.Types[TUINT8], s.variable(&deferBitsVar, types.Types[TUINT8]), bitvalue)
s.vars[&deferBitsVar] = newDeferBits
s.store(types.Types[TUINT8], s.deferBitsAddr, newDeferBits)
}
// openDeferSave generates SSA nodes to store a value val (with type t) for an
// open-coded defer on the stack at an explicit autotmp location, so it can be
// reloaded and used for the appropriate call on exit. n is the associated node,
// which is only needed if the associated type is non-SSAable. It returns an SSA
// value representing a pointer to the stack location.
func (s *state) openDeferSave(n *Node, t *types.Type, val *ssa.Value) *ssa.Value {
argTemp := tempAt(val.Pos.WithNotStmt(), s.curfn, t)
argTemp.Name.SetOpenDeferSlot(true)
var addrArgTemp *ssa.Value
// Use OpVarLive to make sure stack slots for the args, etc. are not
// removed by dead-store elimination
if s.curBlock.ID != s.f.Entry.ID {
// Force the argtmp storing this defer function/receiver/arg to be
// declared in the entry block, so that it will be live for the
// defer exit code (which will actually access it only if the
// associated defer call has been activated).
s.defvars[s.f.Entry.ID][&memVar] = s.entryNewValue1A(ssa.OpVarDef, types.TypeMem, argTemp, s.defvars[s.f.Entry.ID][&memVar])
s.defvars[s.f.Entry.ID][&memVar] = s.entryNewValue1A(ssa.OpVarLive, types.TypeMem, argTemp, s.defvars[s.f.Entry.ID][&memVar])
addrArgTemp = s.entryNewValue2A(ssa.OpLocalAddr, types.NewPtr(argTemp.Type), argTemp, s.sp, s.defvars[s.f.Entry.ID][&memVar])
} else {
// Special case if we're still in the entry block. We can't use
// the above code, since s.defvars[s.f.Entry.ID] isn't defined
// until we end the entry block with s.endBlock().
s.vars[&memVar] = s.newValue1Apos(ssa.OpVarDef, types.TypeMem, argTemp, s.mem(), false)
s.vars[&memVar] = s.newValue1Apos(ssa.OpVarLive, types.TypeMem, argTemp, s.mem(), false)
addrArgTemp = s.newValue2Apos(ssa.OpLocalAddr, types.NewPtr(argTemp.Type), argTemp, s.sp, s.mem(), false)
}
if types.Haspointers(t) {
// Since we may use this argTemp during exit depending on the
// deferBits, we must define it unconditionally on entry.
// Therefore, we must make sure it is zeroed out in the entry
// block if it contains pointers, else GC may wrongly follow an
// uninitialized pointer value.
argTemp.Name.SetNeedzero(true)
}
if !canSSAType(t) {
if n.Op != ONAME {
panic(fmt.Sprintf("Non-SSAable value should be a named location: %v", n))
}
a := s.addr(n, false)
s.move(t, addrArgTemp, a)
return addrArgTemp
}
// We are storing to the stack, hence we can avoid the full checks in
// storeType() (no write barrier) and do a simple store().
s.store(t, addrArgTemp, val)
return addrArgTemp
}
// openDeferExit generates SSA for processing all the open coded defers at exit.
// The code involves loading deferBits, and checking each of the bits to see if
// the corresponding defer statement was executed. For each bit that is turned
// on, the associated defer call is made.
func (s *state) openDeferExit() {
deferExit := s.f.NewBlock(ssa.BlockPlain)
s.endBlock().AddEdgeTo(deferExit)
s.startBlock(deferExit)
s.lastDeferExit = deferExit
s.lastDeferCount = len(s.openDefers)
zeroval := s.constInt8(types.Types[TUINT8], 0)
// Test for and run defers in reverse order
for i := len(s.openDefers) - 1; i >= 0; i-- {
r := s.openDefers[i]
bCond := s.f.NewBlock(ssa.BlockPlain)
bEnd := s.f.NewBlock(ssa.BlockPlain)
deferBits := s.variable(&deferBitsVar, types.Types[TUINT8])
// Generate code to check if the bit associated with the current
// defer is set.
bitval := s.constInt8(types.Types[TUINT8], 1<<uint(i))
andval := s.newValue2(ssa.OpAnd8, types.Types[TUINT8], deferBits, bitval)
eqVal := s.newValue2(ssa.OpEq8, types.Types[TBOOL], andval, zeroval)
b := s.endBlock()
b.Kind = ssa.BlockIf
b.SetControl(eqVal)
b.AddEdgeTo(bEnd)
b.AddEdgeTo(bCond)
bCond.AddEdgeTo(bEnd)
s.startBlock(bCond)
// Clear this bit in deferBits and force store back to stack, so
// we will not try to re-run this defer call if this defer call panics.
nbitval := s.newValue1(ssa.OpCom8, types.Types[TUINT8], bitval)
maskedval := s.newValue2(ssa.OpAnd8, types.Types[TUINT8], deferBits, nbitval)
s.store(types.Types[TUINT8], s.deferBitsAddr, maskedval)
// Use this value for following tests, so we keep previous
// bits cleared.
s.vars[&deferBitsVar] = maskedval
// Generate code to call the function call of the defer, using the
// closure/receiver/args that were stored in argtmps at the point
// of the defer statement.
argStart := Ctxt.FixedFrameSize()
fn := r.n.Left
stksize := fn.Type.ArgWidth()
if r.rcvr != nil {
// rcvr in case of OCALLINTER
v := s.load(r.rcvr.Type.Elem(), r.rcvr)
addr := s.constOffPtrSP(s.f.Config.Types.UintptrPtr, argStart)
s.store(types.Types[TUINTPTR], addr, v)
}
for j, argAddrVal := range r.argVals {
f := getParam(r.n, j)
pt := types.NewPtr(f.Type)
addr := s.constOffPtrSP(pt, argStart+f.Offset)
if !canSSAType(f.Type) {
s.move(f.Type, addr, argAddrVal)
} else {
argVal := s.load(f.Type, argAddrVal)
s.storeType(f.Type, addr, argVal, 0, false)
}
}
var call *ssa.Value
if r.closure != nil {
v := s.load(r.closure.Type.Elem(), r.closure)
s.maybeNilCheckClosure(v, callDefer)
codeptr := s.rawLoad(types.Types[TUINTPTR], v)
call = s.newValue3(ssa.OpClosureCall, types.TypeMem, codeptr, v, s.mem())
} else {
// Do a static call if the original call was a static function or method
call = s.newValue1A(ssa.OpStaticCall, types.TypeMem, fn.Sym.Linksym(), s.mem())
}
call.AuxInt = stksize
s.vars[&memVar] = call
// Make sure that the stack slots with pointers are kept live
// through the call (which is a pre-emption point). Also, we will
// use the first call of the last defer exit to compute liveness
// for the deferreturn, so we want all stack slots to be live.
if r.closureNode != nil {
s.vars[&memVar] = s.newValue1Apos(ssa.OpVarLive, types.TypeMem, r.closureNode, s.mem(), false)
}
if r.rcvrNode != nil {
if types.Haspointers(r.rcvrNode.Type) {
s.vars[&memVar] = s.newValue1Apos(ssa.OpVarLive, types.TypeMem, r.rcvrNode, s.mem(), false)
}
}
for _, argNode := range r.argNodes {
if types.Haspointers(argNode.Type) {
s.vars[&memVar] = s.newValue1Apos(ssa.OpVarLive, types.TypeMem, argNode, s.mem(), false)
}
}
if i == len(s.openDefers)-1 {
// Record the call of the first defer. This will be used
// to set liveness info for the deferreturn (which is also
// used for any location that causes a runtime panic)
s.f.LastDeferExit = call
}
s.endBlock()
s.startBlock(bEnd)
}
}
// Calls the function n using the specified call type.
// Returns the address of the return value (or nil if none).
func (s *state) call(n *Node, k callKind) *ssa.Value {
......@@ -3856,11 +4312,10 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
break
}
closure = s.expr(fn)
if k != callDefer && k != callDeferStack && (thearch.LinkArch.Family == sys.Wasm || objabi.GOOS == "aix" && k != callGo) {
// Deferred nil function needs to panic when the function is invoked, not the point of defer statement.
// On AIX, the closure needs to be verified as fn can be nil, except if it's a call go. This needs to be handled by the runtime to have the "go of nil func value" error.
// TODO(neelance): On other architectures this should be eliminated by the optimization steps
s.nilCheck(closure)
if k != callDefer && k != callDeferStack {
// Deferred nil function needs to panic when the function is invoked,
// not the point of defer statement.
s.maybeNilCheckClosure(closure, k)
}
case OCALLMETH:
if fn.Op != ODOTMETH {
......@@ -3870,35 +4325,20 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
sym = fn.Sym
break
}
// Make a name n2 for the function.
// fn.Sym might be sync.(*Mutex).Unlock.
// Make a PFUNC node out of that, then evaluate it.
// We get back an SSA value representing &sync.(*Mutex).Unlock·f.
// We can then pass that to defer or go.
n2 := newnamel(fn.Pos, fn.Sym)
n2.Name.Curfn = s.curfn
n2.SetClass(PFUNC)
// n2.Sym already existed, so it's already marked as a function.
n2.Pos = fn.Pos
n2.Type = types.Types[TUINT8] // dummy type for a static closure. Could use runtime.funcval if we had it.
closure = s.expr(n2)
closure = s.getMethodClosure(fn)
// Note: receiver is already present in n.Rlist, so we don't
// want to set it here.
case OCALLINTER:
if fn.Op != ODOTINTER {
s.Fatalf("OCALLINTER: n.Left not an ODOTINTER: %v", fn.Op)
}
i := s.expr(fn.Left)
itab := s.newValue1(ssa.OpITab, types.Types[TUINTPTR], i)
s.nilCheck(itab)
itabidx := fn.Xoffset + 2*int64(Widthptr) + 8 // offset of fun field in runtime.itab
itab = s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.UintptrPtr, itabidx, itab)
var iclosure *ssa.Value
iclosure, rcvr = s.getClosureAndRcvr(fn)
if k == callNormal {
codeptr = s.load(types.Types[TUINTPTR], itab)
codeptr = s.load(types.Types[TUINTPTR], iclosure)
} else {
closure = itab
closure = iclosure
}
rcvr = s.newValue1(ssa.OpIData, types.Types[TUINTPTR], i)
}
dowidth(fn.Type)
stksize := fn.Type.ArgWidth() // includes receiver, args, and results
......@@ -3924,18 +4364,22 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
s.constInt32(types.Types[TUINT32], int32(stksize)))
// 1: started, set in deferprocStack
// 2: heap, set in deferprocStack
// 3: sp, set in deferprocStack
// 4: pc, set in deferprocStack
// 5: fn
// 3: openDefer
// 4: sp, set in deferprocStack
// 5: pc, set in deferprocStack
// 6: fn
s.store(closure.Type,
s.newValue1I(ssa.OpOffPtr, closure.Type.PtrTo(), t.FieldOff(5), addr),
s.newValue1I(ssa.OpOffPtr, closure.Type.PtrTo(), t.FieldOff(6), addr),
closure)
// 6: panic, set in deferprocStack
// 7: link, set in deferprocStack
// 7: panic, set in deferprocStack
// 8: link, set in deferprocStack
// 9: framepc
// 10: varp
// 11: fd
// Then, store all the arguments of the defer call.
ft := fn.Type
off := t.FieldOff(8)
off := t.FieldOff(12)
args := n.Rlist.Slice()
// Set receiver (for interface calls). Always a pointer.
......@@ -4050,6 +4494,44 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
return s.constOffPtrSP(types.NewPtr(fp.Type), fp.Offset+Ctxt.FixedFrameSize())
}
// maybeNilCheckClosure checks if a nil check of a closure is needed in some
// architecture-dependent situations and, if so, emits the nil check.
func (s *state) maybeNilCheckClosure(closure *ssa.Value, k callKind) {
if thearch.LinkArch.Family == sys.Wasm || objabi.GOOS == "aix" && k != callGo {
// On AIX, the closure needs to be verified as fn can be nil, except if it's a call go. This needs to be handled by the runtime to have the "go of nil func value" error.
// TODO(neelance): On other architectures this should be eliminated by the optimization steps
s.nilCheck(closure)
}
}
// getMethodClosure returns a value representing the closure for a method call
func (s *state) getMethodClosure(fn *Node) *ssa.Value {
// Make a name n2 for the function.
// fn.Sym might be sync.(*Mutex).Unlock.
// Make a PFUNC node out of that, then evaluate it.
// We get back an SSA value representing &sync.(*Mutex).Unlock·f.
// We can then pass that to defer or go.
n2 := newnamel(fn.Pos, fn.Sym)
n2.Name.Curfn = s.curfn
n2.SetClass(PFUNC)
// n2.Sym already existed, so it's already marked as a function.
n2.Pos = fn.Pos
n2.Type = types.Types[TUINT8] // dummy type for a static closure. Could use runtime.funcval if we had it.
return s.expr(n2)
}
// getClosureAndRcvr returns values for the appropriate closure and receiver of an
// interface call
func (s *state) getClosureAndRcvr(fn *Node) (*ssa.Value, *ssa.Value) {
i := s.expr(fn.Left)
itab := s.newValue1(ssa.OpITab, types.Types[TUINTPTR], i)
s.nilCheck(itab)
itabidx := fn.Xoffset + 2*int64(Widthptr) + 8 // offset of fun field in runtime.itab
closure := s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.UintptrPtr, itabidx, itab)
rcvr := s.newValue1(ssa.OpIData, types.Types[TUINTPTR], i)
return closure, rcvr
}
// etypesign returns the signed-ness of e, for integer/pointer etypes.
// -1 means signed, +1 means unsigned, 0 means non-integer/non-pointer.
func etypesign(e types.EType) int8 {
......@@ -5223,6 +5705,16 @@ func (s *state) addNamedValue(n *Node, v *ssa.Value) {
s.f.NamedValues[loc] = append(values, v)
}
// Generate a disconnected call to a runtime routine and a return.
func gencallret(pp *Progs, sym *obj.LSym) *obj.Prog {
p := pp.Prog(obj.ACALL)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = sym
p = pp.Prog(obj.ARET)
return p
}
// Branch is an unresolved branch.
type Branch struct {
P *obj.Prog // branch instruction
......@@ -5258,6 +5750,11 @@ type SSAGenState struct {
// wasm: The number of values on the WebAssembly stack. This is only used as a safeguard.
OnWasmStackSkipped int
// Liveness index for the first function call in the final defer exit code
// path that we generated. All defer functions and args should be live at
// this point. This will be used to set the liveness for the deferreturn.
lastDeferLiveness LivenessIndex
}
// Prog appends a new Prog.
......@@ -5385,6 +5882,17 @@ func genssa(f *ssa.Func, pp *Progs) {
s.livenessMap = liveness(e, f, pp)
emitStackObjects(e, pp)
openDeferInfo := e.curfn.Func.lsym.Func.OpenCodedDeferInfo
if openDeferInfo != nil {
// This function uses open-coded defers -- write out the funcdata
// info that we computed at the end of genssa.
p := pp.Prog(obj.AFUNCDATA)
Addrconst(&p.From, objabi.FUNCDATA_OpenCodedDeferInfo)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = openDeferInfo
}
// Remember where each block starts.
s.bstart = make([]*obj.Prog, f.NumBlocks())
s.pp = pp
......@@ -5449,6 +5957,12 @@ func genssa(f *ssa.Func, pp *Progs) {
// Attach this safe point to the next
// instruction.
s.pp.nextLive = s.livenessMap.Get(v)
// Remember the liveness index of the first defer call of
// the last defer exit
if v.Block.Func.LastDeferExit != nil && v == v.Block.Func.LastDeferExit {
s.lastDeferLiveness = s.pp.nextLive
}
switch v.Op {
case ssa.OpInitMem:
// memory arg needs no code
......@@ -5532,6 +6046,13 @@ func genssa(f *ssa.Func, pp *Progs) {
// nop (which will never execute) after the call.
thearch.Ginsnop(pp)
}
if openDeferInfo != nil {
// When doing open-coded defers, generate a disconnected call to
// deferreturn and a return. This will be used to during panic
// recovery to unwind the stack and return back to the runtime.
s.pp.nextLive = s.lastDeferLiveness
gencallret(pp, Deferreturn)
}
if inlMarks != nil {
// We have some inline marks. Try to find other instructions we're
......
......@@ -295,6 +295,7 @@ const (
nameAddrtaken // address taken, even if not moved to heap
nameInlFormal // OPAUTO created by inliner, derived from callee formal
nameInlLocal // OPAUTO created by inliner, derived from callee local
nameOpenDeferSlot // if temporary var storing info for open-coded defers
)
func (n *Name) Captured() bool { return n.flags&nameCaptured != 0 }
......@@ -310,6 +311,7 @@ func (n *Name) Assigned() bool { return n.flags&nameAssigned != 0 }
func (n *Name) Addrtaken() bool { return n.flags&nameAddrtaken != 0 }
func (n *Name) InlFormal() bool { return n.flags&nameInlFormal != 0 }
func (n *Name) InlLocal() bool { return n.flags&nameInlLocal != 0 }
func (n *Name) OpenDeferSlot() bool { return n.flags&nameOpenDeferSlot != 0 }
func (n *Name) SetCaptured(b bool) { n.flags.set(nameCaptured, b) }
func (n *Name) SetReadonly(b bool) { n.flags.set(nameReadonly, b) }
......@@ -324,6 +326,7 @@ func (n *Name) SetAssigned(b bool) { n.flags.set(nameAssigned, b) }
func (n *Name) SetAddrtaken(b bool) { n.flags.set(nameAddrtaken, b) }
func (n *Name) SetInlFormal(b bool) { n.flags.set(nameInlFormal, b) }
func (n *Name) SetInlLocal(b bool) { n.flags.set(nameInlLocal, b) }
func (n *Name) SetOpenDeferSlot(b bool) { n.flags.set(nameOpenDeferSlot, b) }
type Param struct {
Ntype *Node
......@@ -492,6 +495,8 @@ type Func struct {
Pragma syntax.Pragma // go:xxx function annotations
flags bitset16
numDefers int // number of defer calls in the function
numReturns int // number of explicit returns in the function
// nwbrCalls records the LSyms of functions called by this
// function for go:nowritebarrierrec analysis. Only filled in
......@@ -532,6 +537,7 @@ const (
funcInlinabilityChecked // inliner has already determined whether the function is inlinable
funcExportInline // include inline body in export data
funcInstrumentBody // add race/msan instrumentation during SSA construction
funcOpenCodedDeferDisallowed // can't do open-coded defers
)
func (f *Func) Dupok() bool { return f.flags&funcDupok != 0 }
......@@ -544,6 +550,7 @@ func (f *Func) NilCheckDisabled() bool { return f.flags&funcNilCheckDisabled
func (f *Func) InlinabilityChecked() bool { return f.flags&funcInlinabilityChecked != 0 }
func (f *Func) ExportInline() bool { return f.flags&funcExportInline != 0 }
func (f *Func) InstrumentBody() bool { return f.flags&funcInstrumentBody != 0 }
func (f *Func) OpenCodedDeferDisallowed() bool { return f.flags&funcOpenCodedDeferDisallowed != 0 }
func (f *Func) SetDupok(b bool) { f.flags.set(funcDupok, b) }
func (f *Func) SetWrapper(b bool) { f.flags.set(funcWrapper, b) }
......@@ -555,6 +562,7 @@ func (f *Func) SetNilCheckDisabled(b bool) { f.flags.set(funcNilCheckDisabled
func (f *Func) SetInlinabilityChecked(b bool) { f.flags.set(funcInlinabilityChecked, b) }
func (f *Func) SetExportInline(b bool) { f.flags.set(funcExportInline, b) }
func (f *Func) SetInstrumentBody(b bool) { f.flags.set(funcInstrumentBody, b) }
func (f *Func) SetOpenCodedDeferDisallowed(b bool) { f.flags.set(funcOpenCodedDeferDisallowed, b) }
func (f *Func) setWBPos(pos src.XPos) {
if Debug_wb != 0 {
......
......@@ -214,6 +214,18 @@ func walkstmt(n *Node) *Node {
case ODEFER:
Curfn.Func.SetHasDefer(true)
Curfn.Func.numDefers++
if Curfn.Func.numDefers > maxOpenDefers {
// Don't allow open-coded defers if there are more than
// 8 defers in the function, since we use a single
// byte to record active defers.
Curfn.Func.SetOpenCodedDeferDisallowed(true)
}
if n.Esc != EscNever {
// If n.Esc is not EscNever, then this defer occurs in a loop,
// so open-coded defers cannot be used in this function.
Curfn.Func.SetOpenCodedDeferDisallowed(true)
}
fallthrough
case OGO:
switch n.Left.Op {
......@@ -255,6 +267,7 @@ func walkstmt(n *Node) *Node {
walkstmtlist(n.Rlist.Slice())
case ORETURN:
Curfn.Func.numReturns++
if n.List.Len() == 0 {
break
}
......
......@@ -170,6 +170,11 @@ func elimDeadAutosGeneric(f *Func) {
return
case OpVarLive:
// Don't delete the auto if it needs to be kept alive.
// We depend on this check to keep the autotmp stack slots
// for open-coded defers from being removed (since they
// may not be used by the inline code, but will be used by
// panic processing).
n, ok := v.Aux.(GCNode)
if !ok || n.StorageClass() != ClassAuto {
return
......
......@@ -32,6 +32,14 @@ type Func struct {
Type *types.Type // type signature of the function.
Blocks []*Block // unordered set of all basic blocks (note: not indexable by ID)
Entry *Block // the entry basic block
// If we are using open-coded defers, this is the first call to a deferred
// function in the final defer exit sequence that we generated. This call
// should be after all defer statements, and will have all args, etc. of
// all defer calls as live. The liveness info of this call will be used
// for the deferreturn/ret segment generated for functions with open-coded
// defers.
LastDeferExit *Value
bid idAlloc // block ID allocator
vid idAlloc // value ID allocator
......
......@@ -409,6 +409,7 @@ type FuncInfo struct {
GCLocals *LSym
GCRegs *LSym
StackObjects *LSym
OpenCodedDeferInfo *LSym
}
type InlMark struct {
......
......@@ -419,6 +419,9 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
// to a PLT, so make sure the GOT pointer is loaded into BX.
// RegTo2 is set on the replacement call insn to stop it being
// processed when it is in turn passed to progedit.
//
// We disable open-coded defers in buildssa() on 386 ONLY with shared
// libraries because of this extra code added before deferreturn calls.
if ctxt.Arch.Family == sys.AMD64 || (p.To.Sym != nil && p.To.Sym.Local()) || p.RegTo2 != 0 {
return
}
......
......@@ -20,6 +20,7 @@ const (
FUNCDATA_RegPointerMaps = 2
FUNCDATA_StackObjects = 3
FUNCDATA_InlTree = 4
FUNCDATA_OpenCodedDeferInfo = 5
// ArgsSizeUnknown is set in Func.argsize to mark all functions
// whose argument size is unknown (C vararg functions, and
......
......@@ -85,6 +85,12 @@ func GetFuncID(name, file string) FuncID {
return FuncID_panicwrap
case "runtime.handleAsyncEvent":
return FuncID_handleAsyncEvent
case "runtime.deferreturn":
// Don't show in the call stack (used when invoking defer functions)
return FuncID_wrapper
case "runtime.runOpenDeferFrame":
// Don't show in the call stack (used when invoking defer functions)
return FuncID_wrapper
}
if file == "<autogenerated>" {
return FuncID_wrapper
......
......@@ -18,7 +18,7 @@ const (
)
// Initialize StackGuard and StackLimit according to target system.
var StackGuard = 880*stackGuardMultiplier() + StackSystem
var StackGuard = 896*stackGuardMultiplier() + StackSystem
var StackLimit = StackGuard - StackSystem - StackSmall
// stackGuardMultiplier returns a multiplier to apply to the default
......
......@@ -11,6 +11,7 @@ import (
"cmd/internal/sys"
"cmd/link/internal/sym"
"encoding/binary"
"fmt"
"log"
"os"
"path/filepath"
......@@ -255,13 +256,23 @@ func (ctxt *Link) pclntab() {
}
if r.Type.IsDirectJump() && r.Sym != nil && r.Sym.Name == "runtime.deferreturn" {
if ctxt.Arch.Family == sys.Wasm {
deferreturn = lastWasmAddr
deferreturn = lastWasmAddr - 1
} else {
// Note: the relocation target is in the call instruction, but
// is not necessarily the whole instruction (for instance, on
// x86 the relocation applies to bytes [1:5] of the 5 byte call
// instruction).
deferreturn = uint32(r.Off)
switch ctxt.Arch.Family {
case sys.AMD64, sys.I386:
deferreturn--
case sys.PPC64, sys.ARM, sys.ARM64, sys.MIPS, sys.MIPS64, sys.RISCV64:
// no change
case sys.S390X:
deferreturn -= 2
default:
panic(fmt.Sprint("Unhandled architecture:", ctxt.Arch.Family))
}
}
break // only need one
}
......
......@@ -498,7 +498,8 @@ func (ctxt *Link) symtab() {
case strings.HasPrefix(s.Name, "gcargs."),
strings.HasPrefix(s.Name, "gclocals."),
strings.HasPrefix(s.Name, "gclocals·"),
strings.HasPrefix(s.Name, "inltree."):
strings.HasPrefix(s.Name, "inltree."),
strings.HasSuffix(s.Name, ".opendefer"):
s.Type = sym.SGOFUNC
s.Attr |= sym.AttrNotInSymbolTable
s.Outer = symgofunc
......
......@@ -188,3 +188,32 @@ func TestCallersDivZeroPanic(t *testing.T) {
t.Fatal("did not see divide-by-sizer panic")
}
}
func TestCallersDeferNilFuncPanic(t *testing.T) {
// Make sure we don't have any extra frames on the stack. We cut off the check
// at runtime.sigpanic, because non-open-coded defers (which may be used in
// non-opt or race checker mode) include an extra 'jmpdefer' frame (which is
// where the nil pointer deref happens). We could consider hiding jmpdefer in
// tracebacks.
state := 1
want := []string{"runtime.Callers", "runtime_test.TestCallersDeferNilFuncPanic.func1",
"runtime.gopanic", "runtime.panicmem", "runtime.sigpanic"}
defer func() {
if r := recover(); r == nil {
t.Fatal("did not panic")
}
pcs := make([]uintptr, 20)
pcs = pcs[:runtime.Callers(0, pcs)]
testCallersEqual(t, pcs, want)
if state == 1 {
t.Fatal("nil defer func panicked at defer time rather than function exit time")
}
}()
var f func()
defer f()
// Use the value of 'state' to make sure nil defer func f causes panic at
// function exit, rather than at the defer statement.
state = 2
}
......@@ -15,11 +15,11 @@ import (
// unconditional panic (hence no return from the function)
func TestUnconditionalPanic(t *testing.T) {
defer func() {
if recover() == nil {
if recover() != "testUnconditional" {
t.Fatal("expected unconditional panic")
}
}()
panic("panic should be recovered")
panic("testUnconditional")
}
var glob int = 3
......@@ -30,7 +30,7 @@ func TestOpenAndNonOpenDefers(t *testing.T) {
for {
// Non-open defer because in a loop
defer func(n int) {
if recover() == nil {
if recover() != "testNonOpenDefer" {
t.Fatal("expected testNonOpen panic")
}
}(3)
......@@ -45,7 +45,7 @@ func TestOpenAndNonOpenDefers(t *testing.T) {
//go:noinline
func testOpen(t *testing.T, arg int) {
defer func(n int) {
if recover() == nil {
if recover() != "testOpenDefer" {
t.Fatal("expected testOpen panic")
}
}(4)
......@@ -61,7 +61,7 @@ func TestNonOpenAndOpenDefers(t *testing.T) {
for {
// Non-open defer because in a loop
defer func(n int) {
if recover() == nil {
if recover() != "testNonOpenDefer" {
t.Fatal("expected testNonOpen panic")
}
}(3)
......@@ -80,7 +80,7 @@ func TestConditionalDefers(t *testing.T) {
list = make([]int, 0, 10)
defer func() {
if recover() == nil {
if recover() != "testConditional" {
t.Fatal("expected panic")
}
want := []int{4, 2, 1}
......@@ -106,7 +106,7 @@ func testConditionalDefers(n int) {
defer doappend(4)
}
}
panic("test")
panic("testConditional")
}
// Test that there is no compile-time or run-time error if an open-coded defer
......@@ -174,3 +174,52 @@ func TestRecoverMatching(t *testing.T) {
}()
panic("panic1")
}
type nonSSAable [128]byte
type bigStruct struct {
x, y, z, w, p, q int64
}
func mknonSSAable() nonSSAable {
globint1++
return nonSSAable{0, 0, 0, 0, 5}
}
var globint1, globint2 int
//go:noinline
func sideeffect(n int64) int64 {
globint2++
return n
}
// Test that nonSSAable arguments to defer are handled correctly and only evaluated once.
func TestNonSSAableArgs(t *testing.T) {
globint1 = 0
globint2 = 0
var save1 byte
var save2 int64
defer func() {
if globint1 != 1 {
t.Fatal(fmt.Sprintf("globint1: wanted: 1, got %v", globint1))
}
if save1 != 5 {
t.Fatal(fmt.Sprintf("save1: wanted: 5, got %v", save1))
}
if globint2 != 1 {
t.Fatal(fmt.Sprintf("globint2: wanted: 1, got %v", globint2))
}
if save2 != 2 {
t.Fatal(fmt.Sprintf("save2: wanted: 2, got %v", save2))
}
}()
defer func(n nonSSAable) {
save1 = n[4]
}(mknonSSAable())
defer func(b bigStruct) {
save2 = b.y
}(bigStruct{1, 2, 3, 4, 5, sideeffect(6)})
}
......@@ -17,6 +17,7 @@
#define FUNCDATA_RegPointerMaps 2
#define FUNCDATA_StackObjects 3
#define FUNCDATA_InlTree 4
#define FUNCDATA_OpenCodedDeferInfo 5 /* info for func with open-coded defers */
// Pseudo-assembly statements.
......
......@@ -10,6 +10,19 @@ import (
"unsafe"
)
// We have two different ways of doing defers. The older way involves creating a
// defer record at the time that a defer statement is executing and adding it to a
// defer chain. This chain is inspected by the deferreturn call at all function
// exits in order to run the appropriate defer calls. A cheaper way (which we call
// open-coded defers) is used for functions in which no defer statements occur in
// loops. In that case, we simply store the defer function/arg information into
// specific stack slots at the point of each defer statement, as well as setting a
// bit in a bitmask. At each function exit, we add inline code to directly make
// the appropriate defer calls based on the bitmask and fn/arg information stored
// on the stack. During panic/Goexit processing, the appropriate defer calls are
// made using extra funcdata info that indicates the exact stack slots that
// contain the bitmask and defer fn/args.
// Check to make sure we can really generate a panic. If the panic
// was generated from the runtime, or from inside malloc, then convert
// to a throw of msg.
......@@ -263,19 +276,24 @@ func deferprocStack(d *_defer) {
// are initialized here.
d.started = false
d.heap = false
d.openDefer = false
d.sp = getcallersp()
d.pc = getcallerpc()
d.framepc = 0
d.varp = 0
// The lines below implement:
// d.panic = nil
// d.fp = nil
// d.link = gp._defer
// gp._defer = d
// But without write barriers. The first two are writes to
// But without write barriers. The first three are writes to
// the stack so they don't need a write barrier, and furthermore
// are to uninitialized memory, so they must not use a write barrier.
// The third write does not require a write barrier because we
// The fourth write does not require a write barrier because we
// explicitly mark all the defer structures, so we don't need to
// keep track of pointers to them with a write barrier.
*(*uintptr)(unsafe.Pointer(&d._panic)) = 0
*(*uintptr)(unsafe.Pointer(&d.fd)) = 0
*(*uintptr)(unsafe.Pointer(&d.link)) = uintptr(unsafe.Pointer(gp._defer))
*(*uintptr)(unsafe.Pointer(&gp._defer)) = uintptr(unsafe.Pointer(d))
......@@ -463,8 +481,12 @@ func freedefer(d *_defer) {
// started causing a nosplit stack overflow via typedmemmove.
d.siz = 0
d.started = false
d.openDefer = false
d.sp = 0
d.pc = 0
d.framepc = 0
d.varp = 0
d.fd = nil
// d._panic and d.fn must be nil already.
// If not, we would have called freedeferpanic or freedeferfn above,
// both of which throw.
......@@ -493,9 +515,11 @@ func freedeferfn() {
// to have been called by the caller of deferreturn at the point
// just before deferreturn was called. The effect is that deferreturn
// is called again and again until there are no more deferred functions.
// Cannot split the stack because we reuse the caller's frame to
// call the deferred function.
//
// Declared as nosplit, because the function should not be preempted once we start
// modifying the caller's frame in order to reuse the frame to call the deferred
// function.
//
// The single argument isn't actually used - it just has its address
// taken so it can be matched against pending defers.
//go:nosplit
......@@ -509,6 +533,15 @@ func deferreturn(arg0 uintptr) {
if d.sp != sp {
return
}
if d.openDefer {
done := runOpenDeferFrame(gp, d)
if !done {
throw("unfinished open-coded defers in deferreturn")
}
gp._defer = d.link
freedefer(d)
return
}
// Moving arguments around.
//
......@@ -544,6 +577,8 @@ func Goexit() {
// This code is similar to gopanic, see that implementation
// for detailed comments.
gp := getg()
addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
for {
d := gp._defer
if d == nil {
......@@ -554,13 +589,26 @@ func Goexit() {
d._panic.aborted = true
d._panic = nil
}
if !d.openDefer {
d.fn = nil
gp._defer = d.link
freedefer(d)
continue
}
}
d.started = true
if d.openDefer {
done := runOpenDeferFrame(gp, d)
if !done {
// We should always run all defers in the frame,
// since there is no panic associated with this
// defer that can be recovered.
throw("unfinished open-coded defers in Goexit")
}
addOneOpenDeferFrame(gp, 0, nil)
} else {
reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz))
}
if gp._defer != d {
throw("bad defer entry in Goexit")
}
......@@ -607,6 +655,177 @@ func printpanics(p *_panic) {
print("\n")
}
// addOneOpenDeferFrame scans the stack for the first frame (if any) with
// open-coded defers and if it finds one, adds a single record to the defer chain
// for that frame. If sp is non-nil, it starts the stack scan from the frame
// specified by sp. If sp is nil, it uses the sp from the current defer record
// (which has just been finished). Hence, it continues the stack scan from the
// frame of the defer that just finished. It skips any frame that already has an
// open-coded _defer record, which would have been been created from a previous
// (unrecovered) panic.
//
// Note: All entries of the defer chain (including this new open-coded entry) have
// their pointers (including sp) adjusted properly if the stack moves while
// running deferred functions. Also, it is safe to pass in the sp arg (which is
// the direct result of calling getcallersp()), because all pointer variables
// (including arguments) are adjusted as needed during stack copies.
func addOneOpenDeferFrame(gp *g, pc uintptr, sp unsafe.Pointer) {
var prevDefer *_defer
if sp == nil {
prevDefer = gp._defer
pc = prevDefer.framepc
sp = unsafe.Pointer(prevDefer.sp)
}
systemstack(func() {
gentraceback(pc, uintptr(sp), 0, gp, 0, nil, 0x7fffffff,
func(frame *stkframe, unused unsafe.Pointer) bool {
if prevDefer != nil && prevDefer.sp == frame.sp {
// Skip the frame for the previous defer that
// we just finished (and was used to set
// where we restarted the stack scan)
return true
}
f := frame.fn
fd := funcdata(f, _FUNCDATA_OpenCodedDeferInfo)
if fd == nil {
return true
}
// Insert the open defer record in the
// chain, in order sorted by sp.
d := gp._defer
var prev *_defer
for d != nil {
dsp := d.sp
if frame.sp < dsp {
break
}
if frame.sp == dsp {
if !d.openDefer {
throw("duplicated defer entry")
}
return true
}
prev = d
d = d.link
}
if frame.fn.deferreturn == 0 {
throw("missing deferreturn")
}
maxargsize, _ := readvarintUnsafe(fd)
d1 := newdefer(int32(maxargsize))
d1.openDefer = true
d1._panic = nil
// These are the pc/sp to set after we've
// run a defer in this frame that did a
// recover. We return to a special
// deferreturn that runs any remaining
// defers and then returns from the
// function.
d1.pc = frame.fn.entry + uintptr(frame.fn.deferreturn)
d1.varp = frame.varp
d1.fd = fd
// Save the SP/PC associated with current frame,
// so we can continue stack trace later if needed.
d1.framepc = frame.pc
d1.sp = frame.sp
d1.link = d
if prev == nil {
gp._defer = d1
} else {
prev.link = d1
}
// Stop stack scanning after adding one open defer record
return false
},
nil, 0)
})
}
// readvarintUnsafe reads the uint32 in varint format starting at fd, and returns the
// uint32 and a pointer to the byte following the varint.
//
// There is a similar function runtime.readvarint, which takes a slice of bytes,
// rather than an unsafe pointer. These functions are duplicated, because one of
// the two use cases for the functions would get slower if the functions were
// combined.
func readvarintUnsafe(fd unsafe.Pointer) (uint32, unsafe.Pointer) {
var r uint32
var shift int
for {
b := *(*uint8)((unsafe.Pointer(fd)))
fd = add(fd, unsafe.Sizeof(b))
if b < 128 {
return r + uint32(b)<<shift, fd
}
r += ((uint32(b) &^ 128) << shift)
shift += 7
if shift > 28 {
panic("Bad varint")
}
}
}
// runOpenDeferFrame runs the active open-coded defers in the frame specified by
// d. It normally processes all active defers in the frame, but stops immediately
// if a defer does a successful recover. It returns true if there are no
// remaining defers to run in the frame.
func runOpenDeferFrame(gp *g, d *_defer) bool {
done := true
fd := d.fd
// Skip the maxargsize
_, fd = readvarintUnsafe(fd)
deferBitsOffset, fd := readvarintUnsafe(fd)
nDefers, fd := readvarintUnsafe(fd)
deferBits := *(*uint8)(unsafe.Pointer(d.varp - uintptr(deferBitsOffset)))
for i := int(nDefers) - 1; i >= 0; i-- {
// read the funcdata info for this defer
var argWidth, closureOffset, nArgs uint32
argWidth, fd = readvarintUnsafe(fd)
closureOffset, fd = readvarintUnsafe(fd)
nArgs, fd = readvarintUnsafe(fd)
if deferBits&(1<<i) == 0 {
for j := uint32(0); j < nArgs; j++ {
_, fd = readvarintUnsafe(fd)
_, fd = readvarintUnsafe(fd)
_, fd = readvarintUnsafe(fd)
}
continue
}
closure := *(**funcval)(unsafe.Pointer(d.varp - uintptr(closureOffset)))
d.fn = closure
deferArgs := deferArgs(d)
// If there is an interface receiver or method receiver, it is
// described/included as the first arg.
for j := uint32(0); j < nArgs; j++ {
var argOffset, argLen, argCallOffset uint32
argOffset, fd = readvarintUnsafe(fd)
argLen, fd = readvarintUnsafe(fd)
argCallOffset, fd = readvarintUnsafe(fd)
memmove(unsafe.Pointer(uintptr(deferArgs)+uintptr(argCallOffset)),
unsafe.Pointer(d.varp-uintptr(argOffset)),
uintptr(argLen))
}
deferBits = deferBits &^ (1 << i)
*(*uint8)(unsafe.Pointer(d.varp - uintptr(deferBitsOffset))) = deferBits
if d._panic != nil {
d._panic.argp = unsafe.Pointer(getargp(0))
}
reflectcall(nil, unsafe.Pointer(closure), deferArgs, argWidth, argWidth)
d.fn = nil
// These args are just a copy, so can be cleared immediately
memclrNoHeapPointers(deferArgs, uintptr(argWidth))
if d._panic != nil && d._panic.recovered {
done = deferBits == 0
break
}
}
return done
}
// The implementation of the predeclared function panic.
func gopanic(e interface{}) {
gp := getg()
......@@ -646,6 +865,10 @@ func gopanic(e interface{}) {
atomic.Xadd(&runningPanicDefers, 1)
// By calculating getcallerpc/getcallersp here, we avoid scanning the
// gopanic frame (stack scanning is slow...)
addOneOpenDeferFrame(gp, getcallerpc(), unsafe.Pointer(getcallersp()))
for {
d := gp._defer
if d == nil {
......@@ -659,11 +882,17 @@ func gopanic(e interface{}) {
d._panic.aborted = true
}
d._panic = nil
if !d.openDefer {
// For open-coded defers, we need to process the
// defer again, in case there are any other defers
// to call in the frame (not including the defer
// call that caused the panic).
d.fn = nil
gp._defer = d.link
freedefer(d)
continue
}
}
// Mark defer as started, but keep on list, so that traceback
// can find and update the defer's argument frame if stack growth
......@@ -675,8 +904,16 @@ func gopanic(e interface{}) {
// will find d in the list and will mark d._panic (this panic) aborted.
d._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
done := true
if d.openDefer {
done = runOpenDeferFrame(gp, d)
if done && !d._panic.recovered {
addOneOpenDeferFrame(gp, 0, nil)
}
} else {
p.argp = unsafe.Pointer(getargp(0))
reflectcall(nil, unsafe.Pointer(d.fn), deferArgs(d), uint32(d.siz), uint32(d.siz))
}
p.argp = nil
// reflectcall did not panic. Remove d.
......@@ -684,18 +921,52 @@ func gopanic(e interface{}) {
throw("bad defer entry in panic")
}
d._panic = nil
d.fn = nil
gp._defer = d.link
// trigger shrinkage to test stack copy. See stack_test.go:TestStackPanic
//GC()
pc := d.pc
sp := unsafe.Pointer(d.sp) // must be pointer so it gets adjusted during stack copy
if done {
d.fn = nil
gp._defer = d.link
freedefer(d)
}
if p.recovered {
atomic.Xadd(&runningPanicDefers, -1)
if done {
// Remove any remaining non-started, open-coded defer
// entry after a recover (there's at most one, if we just
// ran a non-open-coded defer), since the entry will
// become out-dated and the defer will be executed
// normally.
d := gp._defer
var prev *_defer
for d != nil {
if d.openDefer {
if d.started {
// This defer is started but we
// are in the middle of a
// defer-panic-recover inside of
// it, so don't remove it or any
// further defer entries
break
}
if prev == nil {
gp._defer = d.link
} else {
prev.link = d.link
}
freedefer(d)
break
} else {
prev = d
d = d.link
}
}
}
gp._panic = p.link
// Aborted panics are marked but remain on the g.panic list.
// Remove them from the list.
......
......@@ -720,7 +720,7 @@ type _func struct {
nameoff int32 // function name
args int32 // in/out args size
deferreturn uint32 // offset of a deferreturn block from entry, if any.
deferreturn uint32 // offset of start of a deferreturn call instruction from entry, if any.
pcsp int32
pcfile int32
......@@ -793,7 +793,7 @@ func extendRandom(r []byte, n int) {
}
// A _defer holds an entry on the list of deferred calls.
// If you add a field here, add code to clear it in freedefer.
// If you add a field here, add code to clear it in freedefer and deferProcStack
// This struct must match the code in cmd/compile/internal/gc/reflect.go:deferstruct
// and cmd/compile/internal/gc/ssa.go:(*state).call.
// Some defers will be allocated on the stack and some on the heap.
......@@ -804,11 +804,27 @@ type _defer struct {
siz int32 // includes both arguments and results
started bool
heap bool
// openDefer indicates that this _defer is for a frame with open-coded
// defers. We have only one defer record for the entire frame (which may
// currently have 0, 1, or more defers active).
openDefer bool
sp uintptr // sp at time of defer
pc uintptr
pc uintptr // pc at time of defer
fn *funcval
_panic *_panic // panic that is running defer
link *_defer
// If openDefer is true, the fields below record values about the stack
// frame and associated function that has the open-coded defer(s). sp
// above will be the sp for the frame, and pc will be address of the
// deferreturn call in the function.
fd unsafe.Pointer // funcdata for the function associated with the frame
varp uintptr // value of varp for the stack frame
// framepc is the current pc associated with the stack frame. Together,
// with sp above (which is the sp associated with the stack frame),
// framepc/sp can be used as pc/sp pair to continue a stack trace via
// gentraceback().
framepc uintptr
}
// A _panic holds information about an active panic.
......
......@@ -91,7 +91,7 @@ const (
// The stack guard is a pointer this many bytes above the
// bottom of the stack.
_StackGuard = 880*sys.StackGuardMultiplier + _StackSystem
_StackGuard = 896*sys.StackGuardMultiplier + _StackSystem
// After a stack split check the SP is allowed to be this
// many bytes below the stack guard. This saves an instruction
......@@ -736,6 +736,8 @@ func adjustdefers(gp *g, adjinfo *adjustinfo) {
adjustpointer(adjinfo, unsafe.Pointer(&d.sp))
adjustpointer(adjinfo, unsafe.Pointer(&d._panic))
adjustpointer(adjinfo, unsafe.Pointer(&d.link))
adjustpointer(adjinfo, unsafe.Pointer(&d.varp))
adjustpointer(adjinfo, unsafe.Pointer(&d.fd))
}
// Adjust defer argument blocks the same way we adjust active stack frames.
......
......@@ -221,6 +221,7 @@ const (
_FUNCDATA_RegPointerMaps = 2
_FUNCDATA_StackObjects = 3
_FUNCDATA_InlTree = 4
_FUNCDATA_OpenCodedDeferInfo = 5
_ArgsSizeUnknown = -0x80000000
)
......
// errorcheck -0 -l -d=defer
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// check that open-coded defers are used in expected situations
package main
import "fmt"
var glob = 3
func f1() {
for i := 0; i < 10; i++ {
fmt.Println("loop")
}
defer func() { // ERROR "open-coded defer"
fmt.Println("defer")
}()
}
func f2() {
for {
defer func() { // ERROR "heap-allocated defer"
fmt.Println("defer1")
}()
if glob > 2 {
break
}
}
defer func() { // ERROR "stack-allocated defer"
fmt.Println("defer2")
}()
}
func f3() {
defer func() { // ERROR "stack-allocated defer"
fmt.Println("defer2")
}()
for {
defer func() { // ERROR "heap-allocated defer"
fmt.Println("defer1")
}()
if glob > 2 {
break
}
}
}
func f4() {
defer func() { // ERROR "open-coded defer"
fmt.Println("defer")
}()
label:
fmt.Println("goto loop")
if glob > 2 {
goto label
}
}
func f5() {
label:
fmt.Println("goto loop")
defer func() { // ERROR "heap-allocated defer"
fmt.Println("defer")
}()
if glob > 2 {
goto label
}
}
func f6() {
label:
fmt.Println("goto loop")
if glob > 2 {
goto label
}
// The current analysis doesn't end a backward goto loop, so this defer is
// considered to be inside a loop
defer func() { // ERROR "heap-allocated defer"
fmt.Println("defer")
}()
}
......@@ -367,16 +367,19 @@ func f24() {
m2[[2]string{"x", "y"}] = nil
}
// defer should not cause spurious ambiguously live variables
// Non-open-coded defers should not cause autotmps. (Open-coded defers do create extra autotmps).
func f25(b bool) {
for i := 0; i < 2; i++ {
// Put in loop to make sure defer is not open-coded
defer g25()
}
if b {
return
}
var x string
x = g14()
printstring(x)
return
}
func g25()
......@@ -417,7 +420,8 @@ func f27defer(b bool) {
defer call27(func() { x++ }) // ERROR "stack object .autotmp_[0-9]+ struct \{"
}
defer call27(func() { x++ }) // ERROR "stack object .autotmp_[0-9]+ struct \{"
printnl()
printnl() // ERROR "live at call to printnl: .autotmp_[0-9]+ .autotmp_[0-9]+"
return // ERROR "live at call to call27: .autotmp_[0-9]+"
}
// and newproc (go) escapes to the heap
......@@ -687,12 +691,12 @@ type R struct{ *T } // ERRORAUTO "live at entry to \(\*R\)\.Foo: \.this ptr" "li
// In particular, at printint r must be live.
func f41(p, q *int) (r *int) { // ERROR "live at entry to f41: p q$"
r = p
defer func() { // ERROR "live at call to deferprocStack: q r$" "live at call to deferreturn: r$"
defer func() {
recover()
}()
printint(0) // ERROR "live at call to printint: q r$"
printint(0) // ERROR "live at call to printint: q r .autotmp_[0-9]+$"
r = q
return // ERROR "live at call to deferreturn: r$"
return // ERROR "live at call to f41.func1: r .autotmp_[0-9]+$"
}
func f42() {
......
......@@ -309,17 +309,17 @@ TestCases:
name := m[1]
size, _ := strconv.Atoi(m[2])
// The limit was originally 128 but is now 752 (880-128).
// The limit was originally 128 but is now 768 (896-128).
// Instead of rewriting the test cases above, adjust
// the first stack frame to use up the extra bytes.
if i == 0 {
size += (880 - 128) - 128
size += (896 - 128) - 128
// Noopt builds have a larger stackguard.
// See ../src/cmd/dist/buildruntime.go:stackGuardMultiplier
// This increase is included in objabi.StackGuard
for _, s := range strings.Split(os.Getenv("GO_GCFLAGS"), " ") {
if s == "-N" {
size += 880
size += 896
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment