Commit 46b75578 authored by Jeremy Faller's avatar Jeremy Faller

cmd/compile: walk progs to generate debug_lines data

Walking the progs is simpler than using the is_stmt symbol shenanigans.
This is a reinstatement of CL 196661, which was rolled back due to tests
failing. Unlike that original CL, this change should output the same
debug_lines data the original approach wrote.

The stats for JUST this CLC, note teh small speedup in compilation, and
the lack of difference in binary size.

name                      old time/op       new time/op       delta
Template                        229ms ± 4%        218ms ± 1%  -4.95%  (p=0.000 n=10+8)
Unicode                        92.6ms ± 9%       88.6ms ±13%    ~     (p=0.089 n=10+10)
GoTypes                         850ms ± 2%        831ms ± 4%  -2.23%  (p=0.009 n=10+10)
Compiler                        3.99s ± 1%        3.93s ± 1%  -1.29%  (p=0.000 n=10+9)
SSA                             13.7s ± 1%        13.7s ± 1%    ~     (p=0.912 n=10+10)
Flate                           140ms ± 3%        138ms ± 3%  -1.90%  (p=0.009 n=10+10)
GoParser                        172ms ± 2%        169ms ± 4%    ~     (p=0.095 n=9+10)
Reflect                         530ms ± 3%        516ms ± 5%    ~     (p=0.052 n=10+10)
Tar                             202ms ± 1%        196ms ± 3%  -2.83%  (p=0.002 n=9+10)
XML                             280ms ± 3%        270ms ± 4%  -3.48%  (p=0.009 n=10+10)
LinkCompiler                    927ms ± 2%        907ms ± 4%    ~     (p=0.052 n=10+10)
ExternalLinkCompiler            1.97s ± 2%        1.97s ± 3%    ~     (p=0.853 n=10+10)
LinkWithoutDebugCompiler        549ms ± 3%        543ms ± 5%    ~     (p=0.481 n=10+10)
StdCmd                          12.0s ± 1%        12.0s ± 1%    ~     (p=0.905 n=9+10)

name                      old user-time/op  new user-time/op  delta
Template                        372ms ±18%        344ms ±11%    ~     (p=0.190 n=10+10)
Unicode                         264ms ±23%        241ms ±43%    ~     (p=0.315 n=8+10)
GoTypes                         1.56s ±22%        1.68s ± 5%    ~     (p=0.237 n=10+8)
Compiler                        7.41s ± 2%        7.31s ± 3%    ~     (p=0.123 n=10+10)
SSA                             24.5s ± 2%        24.7s ± 1%    ~     (p=0.133 n=10+9)
Flate                           199ms ± 6%        188ms ±28%    ~     (p=0.353 n=10+10)
GoParser                        243ms ±11%        240ms ± 6%    ~     (p=0.968 n=10+9)
Reflect                         929ms ±21%        862ms ±35%    ~     (p=0.190 n=10+10)
Tar                             284ms ± 9%        296ms ±17%    ~     (p=0.497 n=9+10)
XML                             386ms ±21%        398ms ±28%    ~     (p=1.000 n=9+10)
LinkCompiler                    1.13s ± 9%        1.12s ± 8%    ~     (p=0.546 n=9+9)
ExternalLinkCompiler            2.37s ±15%        2.30s ± 9%    ~     (p=0.549 n=10+9)
LinkWithoutDebugCompiler        646ms ±10%        642ms ±13%    ~     (p=0.853 n=10+10)

name                      old alloc/op      new alloc/op      delta
Template                       36.5MB ± 0%       36.5MB ± 0%  -0.11%  (p=0.000 n=10+9)
Unicode                        28.5MB ± 0%       28.5MB ± 0%    ~     (p=0.190 n=10+10)
GoTypes                         121MB ± 0%        121MB ± 0%  -0.10%  (p=0.000 n=9+10)
Compiler                        549MB ± 0%        549MB ± 0%  -0.10%  (p=0.000 n=9+10)
SSA                            1.92GB ± 0%       1.92GB ± 0%  -0.13%  (p=0.000 n=10+10)
Flate                          23.0MB ± 0%       23.0MB ± 0%  -0.07%  (p=0.000 n=10+10)
GoParser                       27.9MB ± 0%       27.9MB ± 0%  -0.09%  (p=0.000 n=10+10)
Reflect                        77.9MB ± 0%       77.8MB ± 0%  -0.13%  (p=0.000 n=9+10)
Tar                            34.5MB ± 0%       34.4MB ± 0%  -0.09%  (p=0.000 n=10+10)
XML                            44.3MB ± 0%       44.3MB ± 0%  -0.08%  (p=0.000 n=10+10)
LinkCompiler                    229MB ± 0%        225MB ± 0%  -1.74%  (p=0.000 n=10+10)
ExternalLinkCompiler            233MB ± 0%        242MB ± 0%  +3.81%  (p=0.000 n=10+10)
LinkWithoutDebugCompiler        156MB ± 0%        152MB ± 0%  -2.29%  (p=0.000 n=10+9)

name                      old allocs/op     new allocs/op     delta
Template                         373k ± 0%         373k ± 0%  -0.21%  (p=0.000 n=10+10)
Unicode                          340k ± 0%         340k ± 0%  -0.04%  (p=0.000 n=10+10)
GoTypes                         1.33M ± 0%        1.33M ± 0%  -0.20%  (p=0.000 n=10+9)
Compiler                        5.39M ± 0%        5.38M ± 0%  -0.16%  (p=0.000 n=10+10)
SSA                             18.3M ± 0%        18.2M ± 0%  -0.15%  (p=0.000 n=10+10)
Flate                            235k ± 0%         234k ± 0%  -0.23%  (p=0.000 n=10+7)
GoParser                         309k ± 0%         308k ± 0%  -0.20%  (p=0.000 n=10+10)
Reflect                          970k ± 0%         968k ± 0%  -0.30%  (p=0.000 n=10+10)
Tar                              347k ± 0%         347k ± 0%  -0.22%  (p=0.000 n=10+10)
XML                              425k ± 0%         424k ± 0%  -0.16%  (p=0.000 n=10+10)
LinkCompiler                     602k ± 0%         601k ± 0%  -0.03%  (p=0.000 n=9+10)
ExternalLinkCompiler            1.65M ± 0%        1.65M ± 0%  -0.02%  (p=0.000 n=10+10)
LinkWithoutDebugCompiler         220k ± 0%         220k ± 0%  -0.03%  (p=0.016 n=10+9)

name                      old object-bytes  new object-bytes  delta
Template                        553kB ± 0%        553kB ± 0%  -0.01%  (p=0.000 n=10+10)
Unicode                         215kB ± 0%        215kB ± 0%    ~     (all equal)
GoTypes                        2.02MB ± 0%       2.02MB ± 0%  -0.00%  (p=0.000 n=10+10)
Compiler                       7.98MB ± 0%       7.98MB ± 0%  -0.01%  (p=0.000 n=10+10)
SSA                            27.1MB ± 0%       27.1MB ± 0%  -0.00%  (p=0.000 n=10+10)
Flate                           340kB ± 0%        340kB ± 0%  -0.01%  (p=0.000 n=10+10)
GoParser                        434kB ± 0%        434kB ± 0%  -0.00%  (p=0.000 n=10+10)
Reflect                        1.34MB ± 0%       1.34MB ± 0%  -0.01%  (p=0.000 n=10+10)
Tar                             479kB ± 0%        479kB ± 0%  -0.00%  (p=0.000 n=10+10)
XML                             618kB ± 0%        618kB ± 0%  -0.01%  (p=0.000 n=10+10)

name                      old export-bytes  new export-bytes  delta
Template                       20.4kB ± 0%       20.4kB ± 0%    ~     (all equal)
Unicode                        8.21kB ± 0%       8.21kB ± 0%    ~     (all equal)
GoTypes                        36.6kB ± 0%       36.6kB ± 0%    ~     (all equal)
Compiler                        116kB ± 0%        116kB ± 0%  +0.00%  (p=0.000 n=10+10)
SSA                             141kB ± 0%        141kB ± 0%  +0.00%  (p=0.000 n=10+10)
Flate                          5.10kB ± 0%       5.10kB ± 0%    ~     (all equal)
GoParser                       8.92kB ± 0%       8.92kB ± 0%    ~     (all equal)
Reflect                        11.8kB ± 0%       11.8kB ± 0%    ~     (all equal)
Tar                            10.9kB ± 0%       10.9kB ± 0%    ~     (all equal)
XML                            17.4kB ± 0%       17.4kB ± 0%    ~     (all equal)

name                      old text-bytes    new text-bytes    delta
HelloSize                       742kB ± 0%        742kB ± 0%    ~     (all equal)
CmdGoSize                      10.6MB ± 0%       10.6MB ± 0%    ~     (all equal)

name                      old data-bytes    new data-bytes    delta
HelloSize                      10.7kB ± 0%       10.7kB ± 0%    ~     (all equal)
CmdGoSize                       312kB ± 0%        312kB ± 0%    ~     (all equal)

name                      old bss-bytes     new bss-bytes     delta
HelloSize                       122kB ± 0%        122kB ± 0%    ~     (all equal)
CmdGoSize                       146kB ± 0%        146kB ± 0%    ~     (all equal)

name                      old exe-bytes     new exe-bytes     delta
HelloSize                      1.10MB ± 0%       1.10MB ± 0%    ~     (all equal)
CmdGoSize                      14.9MB ± 0%       14.9MB ± 0%  -0.03%  (p=0.000 n=10+10)

Change-Id: Ie078a42b29353b96654fa1f0f47d600b5a53762d
Reviewed-on: https://go-review.googlesource.com/c/go/+/200017Reviewed-by: default avatarJeremy Faller <jeremy@golang.org>
Reviewed-by: default avatarDavid Chase <drchase@google.com>
Run-TryBot: Jeremy Faller <jeremy@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent fb9af841
...@@ -8,6 +8,7 @@ package obj ...@@ -8,6 +8,7 @@ package obj
import ( import (
"cmd/internal/dwarf" "cmd/internal/dwarf"
"cmd/internal/src"
"fmt" "fmt"
) )
...@@ -48,78 +49,45 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { ...@@ -48,78 +49,45 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) {
// generating the line table. See below. // generating the line table. See below.
// TODO: Once delve can support multiple DW_LNS_end_statements, we don't have // TODO: Once delve can support multiple DW_LNS_end_statements, we don't have
// to do this. // to do this.
is_stmt := uint8(1) stmt := true
line := int64(1)
pc := s.Func.Text.Pc pc := s.Func.Text.Pc
line := 1 name := ""
file := 1 prologue, wrotePrologue := false, false
// Walk the progs, generating the DWARF table.
// The linker will insert the DW_LNE_set_address once determined; therefore, for p := s.Func.Text; p != nil; p = p.Link {
// it's omitted here. prologue = prologue || (p.Pos.Xlogue() == src.PosPrologueEnd)
// If we're not at a real instruction, keep looping!
// Generate the actual line information. if p.Pos.Line() == 0 || (p.Link != nil && p.Link.Pc == p.Pc) {
// We use the pcline and pcfile to generate this section, and it's suboptimal. continue
// Likely better would be to generate this dirrectly from the progs and not
// parse those tables.
// TODO: Generate from the progs if it's faster.
pcfile := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
pcline := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
pcstmt := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
pcfile.Init(s.Func.Pcln.Pcfile.P)
pcline.Init(s.Func.Pcln.Pcline.P)
var pctostmtData Pcdata
funcpctab(ctxt, &pctostmtData, s, "pctostmt", pctostmt, nil)
pcstmt.Init(pctostmtData.P)
var thispc uint32
for !pcfile.Done && !pcline.Done {
// Only changed if it advanced
if int32(file) != pcfile.Value {
dctxt.AddUint8(lines, dwarf.DW_LNS_set_file)
dwarf.Uleb128put(dctxt, lines, fileNums[pcfile.Value])
file = int(pcfile.Value)
} }
newStmt := p.Pos.IsStmt() != src.PosNotStmt
newName, newLine := linkgetlineFromPos(ctxt, p.Pos)
// Only changed if it advanced // Output debug info.
if is_stmt != uint8(pcstmt.Value) { wrote := false
new_stmt := uint8(pcstmt.Value) if name != newName {
switch new_stmt &^ 1 { newFile := ctxt.PosTable.FileIndex(newName) + 1 // 1 indexing for the table.
case PrologueEnd: dctxt.AddUint8(lines, dwarf.DW_LNS_set_file)
dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_set_prologue_end)) dwarf.Uleb128put(dctxt, lines, int64(newFile))
case EpilogueBegin: name = newName
// TODO if there is a use for this, add it. wrote = true
// Don't forget to increase OPCODE_BASE by 1 and add entry for standard_opcode_lengths[11]
panic("unsupported EpilogueBegin")
}
new_stmt &= 1
if is_stmt != new_stmt {
is_stmt = new_stmt
dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_negate_stmt))
}
} }
if prologue && !wrotePrologue {
// putpcldelta makes a row in the DWARF matrix, always, even if line is unchanged. dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_set_prologue_end))
putpclcdelta(ctxt, dctxt, lines, uint64(s.Func.Text.Pc+int64(thispc)-pc), int64(pcline.Value)-int64(line)) wrotePrologue = true
wrote = true
pc = s.Func.Text.Pc + int64(thispc)
line = int(pcline.Value)
// Take the minimum step forward for the three iterators
thispc = pcfile.NextPC
if pcline.NextPC < thispc {
thispc = pcline.NextPC
} }
if !pcstmt.Done && pcstmt.NextPC < thispc { if stmt != newStmt {
thispc = pcstmt.NextPC dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_negate_stmt))
stmt = newStmt
wrote = true
} }
if pcfile.NextPC == thispc { if line != int64(newLine) || wrote {
pcfile.Next() pcdelta := p.Pc - pc
} putpclcdelta(ctxt, dctxt, lines, uint64(pcdelta), int64(newLine)-line)
if !pcstmt.Done && pcstmt.NextPC == thispc { line, pc = int64(newLine), p.Pc
pcstmt.Next()
}
if pcline.NextPC == thispc {
pcline.Next()
} }
} }
...@@ -129,16 +97,16 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) { ...@@ -129,16 +97,16 @@ func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) {
// file = 1 // file = 1
// line = 1 // line = 1
// column = 0 // column = 0
// is_stmt = set in header, we assume true // stmt = set in header, we assume true
// basic_block = false // basic_block = false
// Careful readers of the DWARF specification will note that we don't reset // Careful readers of the DWARF specification will note that we don't reset
// the address of the state machine -- but this will happen at the beginning // the address of the state machine -- but this will happen at the beginning
// of the NEXT block of opcodes. (See the SetAddress call above.) // of the NEXT block of opcodes.
dctxt.AddUint8(lines, dwarf.DW_LNS_set_file) dctxt.AddUint8(lines, dwarf.DW_LNS_set_file)
dwarf.Uleb128put(dctxt, lines, 1) dwarf.Uleb128put(dctxt, lines, 1)
dctxt.AddUint8(lines, dwarf.DW_LNS_advance_line) dctxt.AddUint8(lines, dwarf.DW_LNS_advance_line)
dwarf.Sleb128put(dctxt, lines, int64(1-line)) dwarf.Sleb128put(dctxt, lines, int64(1-line))
if is_stmt != 1 { if !stmt {
dctxt.AddUint8(lines, dwarf.DW_LNS_negate_stmt) dctxt.AddUint8(lines, dwarf.DW_LNS_negate_stmt)
} }
dctxt.AddUint8(lines, dwarf.DW_LNS_copy) dctxt.AddUint8(lines, dwarf.DW_LNS_copy)
......
...@@ -5,16 +5,10 @@ ...@@ -5,16 +5,10 @@
package obj package obj
import ( import (
"cmd/internal/src"
"encoding/binary" "encoding/binary"
"log" "log"
) )
const (
PrologueEnd = 2 + iota // overload "is_stmt" to include prologue_end
EpilogueBegin // overload "is_stmt" to include epilogue_end
)
// funcpctab writes to dst a pc-value table mapping the code in func to the values // funcpctab writes to dst a pc-value table mapping the code in func to the values
// returned by valfunc parameterized by arg. The invocation of valfunc to update the // returned by valfunc parameterized by arg. The invocation of valfunc to update the
// current value is, for each p, // current value is, for each p,
...@@ -249,34 +243,6 @@ func pctospadj(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg in ...@@ -249,34 +243,6 @@ func pctospadj(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg in
return oldval + p.Spadj return oldval + p.Spadj
} }
// pctostmt returns either,
// if phase==0, then whether the current instruction is a step-target (Dwarf is_stmt)
// bit-or'd with whether the current statement is a prologue end or epilogue begin
// else (phase == 1), zero.
//
func pctostmt(ctxt *Link, sym *LSym, oldval int32, p *Prog, phase int32, arg interface{}) int32 {
if phase == 1 {
return 0 // Ignored; also different from initial value of -1, if that ever matters.
}
s := p.Pos.IsStmt()
l := p.Pos.Xlogue()
var is_stmt int32
// PrologueEnd, at least, is passed to the next instruction
switch l {
case src.PosPrologueEnd:
is_stmt = PrologueEnd
case src.PosEpilogueBegin:
is_stmt = EpilogueBegin
}
if s != src.PosNotStmt {
is_stmt |= 1 // either PosDefaultStmt from asm, or PosIsStmt from go
}
return is_stmt
}
// pctopcdata computes the pcdata value in effect at p. // pctopcdata computes the pcdata value in effect at p.
// A PCDATA instruction sets the value in effect at future // A PCDATA instruction sets the value in effect at future
// non-PCDATA instructions. // non-PCDATA instructions.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment