Commit a35c85c0 authored by Michael Hudson-Doyle's avatar Michael Hudson-Doyle

cmd/internal/obj, runtime: implement IE model TLS on ppc64le

This requires changing the tls access code to match the patterns documented in
the ABI documentation or the system linker will "optimize" it into ridiculousness.

With this change, -buildmode=pie works, although as it is tested in testshared,
the tests are not run yet.

Change-Id: I1efa6687af0a5b8db3385b10f6542a49056b2eb3
Reviewed-on: https://go-review.googlesource.com/15971Reviewed-by: default avatarRuss Cox <rsc@golang.org>
parent bd329d47
...@@ -480,6 +480,19 @@ const ( ...@@ -480,6 +480,19 @@ const (
// instruction word. // instruction word.
R_POWER_TLS_LE R_POWER_TLS_LE
// R_POWER_TLS_IE is used to implement the "initial exec" model for tls access. It
// relocates a D-form, DS-form instruction sequence like R_ADDRPOWER_DS. It
// inserts to the offset of GOT slot for the thread-local symbol from the TOC (the
// GOT slot is filled by the dynamic linker with the offset of the thread-local
// symbol from the thread pointer (R13)).
R_POWER_TLS_IE
// R_POWER_TLS marks an X-form instruction such as "MOVD 0(R13)(R31*1), g" as
// accessing a particular thread-local symbol. It does not affect code generation
// but is used by the system linker when relaxing "initial exec" model code to
// "local exec" model code.
R_POWER_TLS
// R_ADDRPOWER_DS is similar to R_ADDRPOWER above, but assumes the second // R_ADDRPOWER_DS is similar to R_ADDRPOWER above, but assumes the second
// instruction is a "DS-form" instruction, which has an immediate field occupying // instruction is a "DS-form" instruction, which has an immediate field occupying
// bits [15:2] of the instruction word. Bits [15:2] of the address of the // bits [15:2] of the instruction word. Bits [15:2] of the address of the
......
...@@ -222,6 +222,7 @@ const ( ...@@ -222,6 +222,7 @@ const (
C_GOK C_GOK
C_ADDR C_ADDR
C_TLS_LE C_TLS_LE
C_TLS_IE
C_TEXTSIZE C_TEXTSIZE
C_NCLASS /* must be the last */ C_NCLASS /* must be the last */
......
...@@ -36,6 +36,7 @@ var cnames9 = []string{ ...@@ -36,6 +36,7 @@ var cnames9 = []string{
"GOK", "GOK",
"ADDR", "ADDR",
"TLS_LE", "TLS_LE",
"TLS_IE",
"TEXTSIZE", "TEXTSIZE",
"NCLASS", "NCLASS",
} }
...@@ -246,6 +246,7 @@ var optab = []Optab{ ...@@ -246,6 +246,7 @@ var optab = []Optab{
{AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, 76, 12, 0}, {AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, 76, 12, 0},
{AMOVD, C_TLS_LE, C_NONE, C_NONE, C_REG, 79, 4, 0}, {AMOVD, C_TLS_LE, C_NONE, C_NONE, C_REG, 79, 4, 0},
{AMOVD, C_TLS_IE, C_NONE, C_NONE, C_REG, 80, 8, 0},
/* load constant */ /* load constant */
{AMOVD, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB}, {AMOVD, C_SECON, C_NONE, C_NONE, C_REG, 3, 4, REGSB},
...@@ -587,8 +588,12 @@ func aclass(ctxt *obj.Link, a *obj.Addr) int { ...@@ -587,8 +588,12 @@ func aclass(ctxt *obj.Link, a *obj.Addr) int {
ctxt.Instoffset = a.Offset ctxt.Instoffset = a.Offset
if a.Sym != nil { // use relocation if a.Sym != nil { // use relocation
if a.Sym.Type == obj.STLSBSS { if a.Sym.Type == obj.STLSBSS {
if ctxt.Flag_shared != 0 {
return C_TLS_IE
} else {
return C_TLS_LE return C_TLS_LE
} }
}
return C_ADDR return C_ADDR
} }
return C_LEXT return C_LEXT
...@@ -1652,6 +1657,18 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { ...@@ -1652,6 +1657,18 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
if v != 0 { if v != 0 {
ctxt.Diag("illegal indexed instruction\n%v", p) ctxt.Diag("illegal indexed instruction\n%v", p)
} }
if ctxt.Flag_shared != 0 && r == REG_R13 {
rel := obj.Addrel(ctxt.Cursym)
rel.Off = int32(ctxt.Pc)
rel.Siz = 4
// This (and the matching part in the load case
// below) are the only places in the ppc64 toolchain
// that knows the name of the tls variable. Possibly
// we could add some assembly syntax so that the name
// of the variable does not have to be assumed.
rel.Sym = obj.Linklookup(ctxt, "runtime.tls_g", 0)
rel.Type = obj.R_POWER_TLS
}
o1 = AOP_RRR(uint32(opstorex(ctxt, int(p.As))), uint32(p.From.Reg), uint32(p.To.Index), uint32(r)) o1 = AOP_RRR(uint32(opstorex(ctxt, int(p.As))), uint32(p.From.Reg), uint32(p.To.Index), uint32(r))
} else { } else {
if int32(int16(v)) != v { if int32(int16(v)) != v {
...@@ -1671,6 +1688,13 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { ...@@ -1671,6 +1688,13 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
if v != 0 { if v != 0 {
ctxt.Diag("illegal indexed instruction\n%v", p) ctxt.Diag("illegal indexed instruction\n%v", p)
} }
if ctxt.Flag_shared != 0 && r == REG_R13 {
rel := obj.Addrel(ctxt.Cursym)
rel.Off = int32(ctxt.Pc)
rel.Siz = 4
rel.Sym = obj.Linklookup(ctxt, "runtime.tls_g", 0)
rel.Type = obj.R_POWER_TLS
}
o1 = AOP_RRR(uint32(oploadx(ctxt, int(p.As))), uint32(p.To.Reg), uint32(p.From.Index), uint32(r)) o1 = AOP_RRR(uint32(oploadx(ctxt, int(p.As))), uint32(p.To.Reg), uint32(p.From.Index), uint32(r))
} else { } else {
if int32(int16(v)) != v { if int32(int16(v)) != v {
...@@ -2467,6 +2491,18 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { ...@@ -2467,6 +2491,18 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
rel.Sym = p.From.Sym rel.Sym = p.From.Sym
rel.Type = obj.R_POWER_TLS_LE rel.Type = obj.R_POWER_TLS_LE
case 80:
if p.From.Offset != 0 {
ctxt.Diag("invalid offset against tls var %v", p)
}
o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
o2 = AOP_IRR(uint32(opload(ctxt, AMOVD)), uint32(p.To.Reg), uint32(p.To.Reg), 0)
rel := obj.Addrel(ctxt.Cursym)
rel.Off = int32(ctxt.Pc)
rel.Siz = 8
rel.Sym = p.From.Sym
rel.Type = obj.R_POWER_TLS_IE
} }
out[0] = o1 out[0] = o1
......
...@@ -568,6 +568,7 @@ const ( ...@@ -568,6 +568,7 @@ const (
R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO R_PPC64_ADDR16_LO = R_PPC_ADDR16_LO
R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA R_PPC64_ADDR16_HA = R_PPC_ADDR16_HA
R_PPC64_REL24 = R_PPC_REL24 R_PPC64_REL24 = R_PPC_REL24
R_PPC64_GOT16_HA = R_PPC_GOT16_HA
R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT R_PPC64_JMP_SLOT = R_PPC_JMP_SLOT
R_PPC64_TPREL16 = R_PPC_TPREL16 R_PPC64_TPREL16 = R_PPC_TPREL16
R_PPC64_ADDR64 = 38 R_PPC64_ADDR64 = 38
...@@ -576,8 +577,12 @@ const ( ...@@ -576,8 +577,12 @@ const (
R_PPC64_TOC16_HI = 49 R_PPC64_TOC16_HI = 49
R_PPC64_TOC16_HA = 50 R_PPC64_TOC16_HA = 50
R_PPC64_ADDR16_LO_DS = 57 R_PPC64_ADDR16_LO_DS = 57
R_PPC64_GOT16_LO_DS = 59
R_PPC64_TOC16_DS = 63 R_PPC64_TOC16_DS = 63
R_PPC64_TOC16_LO_DS = 64 R_PPC64_TOC16_LO_DS = 64
R_PPC64_TLS = 67
R_PPC64_GOT_TPREL16_LO_DS = 88
R_PPC64_GOT_TPREL16_HA = 90
R_PPC64_REL16_LO = 250 R_PPC64_REL16_LO = 250
R_PPC64_REL16_HI = 251 R_PPC64_REL16_HI = 251
R_PPC64_REL16_HA = 252 R_PPC64_REL16_HA = 252
......
...@@ -310,9 +310,18 @@ func elfreloc1(r *ld.Reloc, sectoff int64) int { ...@@ -310,9 +310,18 @@ func elfreloc1(r *ld.Reloc, sectoff int64) int {
return -1 return -1
} }
case obj.R_POWER_TLS:
ld.Thearch.Vput(ld.R_PPC64_TLS | uint64(elfsym)<<32)
case obj.R_POWER_TLS_LE: case obj.R_POWER_TLS_LE:
ld.Thearch.Vput(ld.R_PPC64_TPREL16 | uint64(elfsym)<<32) ld.Thearch.Vput(ld.R_PPC64_TPREL16 | uint64(elfsym)<<32)
case obj.R_POWER_TLS_IE:
ld.Thearch.Vput(ld.R_PPC64_GOT_TPREL16_HA | uint64(elfsym)<<32)
ld.Thearch.Vput(uint64(r.Xadd))
ld.Thearch.Vput(uint64(sectoff + 4))
ld.Thearch.Vput(ld.R_PPC64_GOT_TPREL16_LO_DS | uint64(elfsym)<<32)
case obj.R_ADDRPOWER: case obj.R_ADDRPOWER:
ld.Thearch.Vput(ld.R_PPC64_ADDR16_HA | uint64(elfsym)<<32) ld.Thearch.Vput(ld.R_PPC64_ADDR16_HA | uint64(elfsym)<<32)
ld.Thearch.Vput(uint64(r.Xadd)) ld.Thearch.Vput(uint64(r.Xadd))
...@@ -444,7 +453,7 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int { ...@@ -444,7 +453,7 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int {
default: default:
return -1 return -1
case obj.R_POWER_TLS_LE: case obj.R_POWER_TLS, obj.R_POWER_TLS_LE, obj.R_POWER_TLS_IE:
r.Done = 0 r.Done = 0
// check Outer is nil, Type is TLSBSS? // check Outer is nil, Type is TLSBSS?
r.Xadd = r.Add r.Xadd = r.Add
......
...@@ -27,8 +27,7 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0 ...@@ -27,8 +27,7 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0
CMP R31, $0 CMP R31, $0
BEQ nocgo BEQ nocgo
MOVD runtime·tls_g(SB), R31 MOVD runtime·tls_g(SB), R31
ADD R13, R31 MOVD g, 0(R13)(R31*1)
MOVD g, 0(R31)
nocgo: nocgo:
RET RET
...@@ -44,8 +43,7 @@ nocgo: ...@@ -44,8 +43,7 @@ nocgo:
// NOTE: _cgo_topofstack assumes this only clobbers g (R30), and R31. // NOTE: _cgo_topofstack assumes this only clobbers g (R30), and R31.
TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0 TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0
MOVD runtime·tls_g(SB), R31 MOVD runtime·tls_g(SB), R31
ADD R13, R31 MOVD 0(R13)(R31*1), g
MOVD 0(R31), g
RET RET
GLOBL runtime·tls_g+0(SB), TLSBSS, $8 GLOBL runtime·tls_g+0(SB), TLSBSS, $8
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment