Commit 6d8a147b authored by Russ Cox's avatar Russ Cox

runtime: use 1-bit pointer bitmaps in type representation

The type information in reflect.Type and the GC programs is now
1 bit per word, down from 2 bits.

The in-memory unrolled type bitmap representation are now
1 bit per word, down from 4 bits.

The conversion from the unrolled (now 1-bit) bitmap to the
heap bitmap (still 4-bit) is not optimized. A followup CL will
work on that, after the heap bitmap has been converted to 2-bit.

The typeDead optimization, in which a special value denotes
that there are no more pointers anywhere in the object, is lost
in this CL. A followup CL will bring it back in the final form of
heapBitsSetType.

Change-Id: If61e67950c16a293b0b516a6fd9a1c755b6d5549
Reviewed-on: https://go-review.googlesource.com/9702Reviewed-by: default avatarAustin Clements <austin@google.com>
parent 7d9e16ab
......@@ -1430,11 +1430,7 @@ func usegcprog(t *Type) bool {
// Calculate size of the unrolled GC mask.
nptr := (t.Width + int64(Widthptr) - 1) / int64(Widthptr)
size := nptr
if size%2 != 0 {
size *= 2 // repeated
}
size = size * obj.GcBits / 8 // 4 bits per word
size := (nptr + 7) / 8
// Decide whether to use unrolled GC mask or GC program.
// We could use a more elaborate condition, but this seems to work well in practice.
......@@ -1445,7 +1441,7 @@ func usegcprog(t *Type) bool {
return size > int64(2*Widthptr)
}
// Generates sparse GC bitmask (4 bits per word).
// Generates GC bitmask (1 bit per word).
func gengcmask(t *Type, gcmask []byte) {
for i := int64(0); i < 16; i++ {
gcmask[i] = 0
......@@ -1454,40 +1450,14 @@ func gengcmask(t *Type, gcmask []byte) {
return
}
// Generate compact mask as stacks use.
xoffset := int64(0)
vec := bvalloc(2 * int32(Widthptr) * 8)
xoffset := int64(0)
onebitwalktype1(t, &xoffset, vec)
// Unfold the mask for the GC bitmap format:
// 4 bits per word, 2 high bits encode pointer info.
pos := gcmask
nptr := (t.Width + int64(Widthptr) - 1) / int64(Widthptr)
half := false
// If number of words is odd, repeat the mask.
// This makes simpler handling of arrays in runtime.
var i int64
var bits uint8
for j := int64(0); j <= (nptr % 2); j++ {
for i = 0; i < nptr; i++ {
// convert 0=scalar / 1=pointer to GC bit encoding
if bvget(vec, int32(i)) == 0 {
bits = obj.BitsScalar
} else {
bits = obj.BitsPointer
}
bits <<= 2
if half {
bits <<= 4
}
pos[0] |= byte(bits)
half = !half
if !half {
pos = pos[1:]
}
for i := int64(0); i < nptr; i++ {
if bvget(vec, int32(i)) == 1 {
gcmask[i/8] |= 1 << (uint(i) % 8)
}
}
}
......@@ -1496,7 +1466,7 @@ func gengcmask(t *Type, gcmask []byte) {
type ProgGen struct {
s *Sym
datasize int32
data [256 / obj.PointersPerByte]uint8
data [256 / 8]uint8
ot int64
}
......@@ -1504,7 +1474,7 @@ func proggeninit(g *ProgGen, s *Sym) {
g.s = s
g.datasize = 0
g.ot = 0
g.data = [256 / obj.PointersPerByte]uint8{}
g.data = [256 / 8]uint8{}
}
func proggenemit(g *ProgGen, v uint8) {
......@@ -1518,16 +1488,16 @@ func proggendataflush(g *ProgGen) {
}
proggenemit(g, obj.InsData)
proggenemit(g, uint8(g.datasize))
s := (g.datasize + obj.PointersPerByte - 1) / obj.PointersPerByte
s := (g.datasize + 7) / 8
for i := int32(0); i < s; i++ {
proggenemit(g, g.data[i])
}
g.datasize = 0
g.data = [256 / obj.PointersPerByte]uint8{}
g.data = [256 / 8]uint8{}
}
func proggendata(g *ProgGen, d uint8) {
g.data[g.datasize/obj.PointersPerByte] |= d << uint((g.datasize%obj.PointersPerByte)*obj.BitsPerPointer)
g.data[g.datasize/8] |= d << uint(g.datasize%8)
g.datasize++
if g.datasize == 255 {
proggendataflush(g)
......@@ -1538,7 +1508,7 @@ func proggendata(g *ProgGen, d uint8) {
func proggenskip(g *ProgGen, off int64, v int64) {
for i := off; i < off+v; i++ {
if (i % int64(Widthptr)) == 0 {
proggendata(g, obj.BitsScalar)
proggendata(g, 0)
}
}
}
......@@ -1566,12 +1536,7 @@ func proggenfini(g *ProgGen) int64 {
// Generates GC program for large types.
func gengcprog(t *Type, pgc0 **Sym, pgc1 **Sym) {
nptr := (t.Width + int64(Widthptr) - 1) / int64(Widthptr)
size := nptr
if size%2 != 0 {
size *= 2 // repeated twice
}
size = size * obj.PointersPerByte / 8 // 4 bits per word
size++ // unroll flag in the beginning, used by runtime (see runtime.markallocated)
size := nptr + 1 // unroll flag in the beginning, used by runtime (see runtime.markallocated)
// emity space in BSS for unrolled program
*pgc0 = nil
......@@ -1623,26 +1588,25 @@ func gengcprog1(g *ProgGen, t *Type, xoffset *int64) {
TFUNC,
TCHAN,
TMAP:
proggendata(g, obj.BitsPointer)
proggendata(g, 1)
*xoffset += t.Width
case TSTRING:
proggendata(g, obj.BitsPointer)
proggendata(g, obj.BitsScalar)
proggendata(g, 1)
proggendata(g, 0)
*xoffset += t.Width
// Assuming IfacePointerOnly=1.
case TINTER:
proggendata(g, obj.BitsPointer)
proggendata(g, obj.BitsPointer)
proggendata(g, 1)
proggendata(g, 1)
*xoffset += t.Width
case TARRAY:
if Isslice(t) {
proggendata(g, obj.BitsPointer)
proggendata(g, obj.BitsScalar)
proggendata(g, obj.BitsScalar)
proggendata(g, 1)
proggendata(g, 0)
proggendata(g, 0)
} else {
t1 := t.Type
if t1.Width == 0 {
......@@ -1656,7 +1620,7 @@ func gengcprog1(g *ProgGen, t *Type, xoffset *int64) {
n := t.Width
n -= -*xoffset & (int64(Widthptr) - 1) // skip to next ptr boundary
proggenarray(g, (n+int64(Widthptr)-1)/int64(Widthptr))
proggendata(g, obj.BitsScalar)
proggendata(g, 0)
proggenarrayend(g)
*xoffset -= (n+int64(Widthptr)-1)/int64(Widthptr)*int64(Widthptr) - t.Width
} else {
......
......@@ -1032,7 +1032,7 @@ func maxalign(s *LSym, type_ int) int32 {
type ProgGen struct {
s *LSym
datasize int32
data [256 / obj.PointersPerByte]uint8
data [256 / 8]uint8
pos int64
}
......@@ -1040,7 +1040,7 @@ func proggeninit(g *ProgGen, s *LSym) {
g.s = s
g.datasize = 0
g.pos = 0
g.data = [256 / obj.PointersPerByte]uint8{}
g.data = [256 / 8]uint8{}
}
func proggenemit(g *ProgGen, v uint8) {
......@@ -1054,16 +1054,16 @@ func proggendataflush(g *ProgGen) {
}
proggenemit(g, obj.InsData)
proggenemit(g, uint8(g.datasize))
s := (g.datasize + obj.PointersPerByte - 1) / obj.PointersPerByte
s := (g.datasize + 7) / 8
for i := int32(0); i < s; i++ {
proggenemit(g, g.data[i])
}
g.datasize = 0
g.data = [256 / obj.PointersPerByte]uint8{}
g.data = [256 / 8]uint8{}
}
func proggendata(g *ProgGen, d uint8) {
g.data[g.datasize/obj.PointersPerByte] |= d << uint((g.datasize%obj.PointersPerByte)*obj.BitsPerPointer)
g.data[g.datasize/8] |= d << uint(g.datasize%8)
g.datasize++
if g.datasize == 255 {
proggendataflush(g)
......@@ -1074,7 +1074,7 @@ func proggendata(g *ProgGen, d uint8) {
func proggenskip(g *ProgGen, off int64, v int64) {
for i := off; i < off+v; i++ {
if (i % int64(Thearch.Ptrsize)) == 0 {
proggendata(g, obj.BitsScalar)
proggendata(g, 0)
}
}
}
......@@ -1119,35 +1119,18 @@ func proggenaddsym(g *ProgGen, s *LSym) {
// Leave debugging the SDATA issue for the Go rewrite.
if s.Gotype == nil && s.Size >= int64(Thearch.Ptrsize) && s.Name[0] != '.' {
// conservative scan
Diag("missing Go type information for global symbol: %s size %d", s.Name, int(s.Size))
return
}
if (s.Size%int64(Thearch.Ptrsize) != 0) || (g.pos%int64(Thearch.Ptrsize) != 0) {
Diag("proggenaddsym: unaligned conservative symbol %s: size=%d pos=%d", s.Name, s.Size, g.pos)
}
size := (s.Size + int64(Thearch.Ptrsize) - 1) / int64(Thearch.Ptrsize) * int64(Thearch.Ptrsize)
if size < int64(32*Thearch.Ptrsize) {
// Emit small symbols as data.
for i := int64(0); i < size/int64(Thearch.Ptrsize); i++ {
proggendata(g, obj.BitsPointer)
}
} else {
// Emit large symbols as array.
proggenarray(g, size/int64(Thearch.Ptrsize))
proggendata(g, obj.BitsPointer)
proggenarrayend(g)
}
g.pos = s.Value + size
} else if s.Gotype == nil || decodetype_noptr(s.Gotype) != 0 || s.Size < int64(Thearch.Ptrsize) || s.Name[0] == '.' {
if s.Gotype == nil || decodetype_noptr(s.Gotype) != 0 || s.Size < int64(Thearch.Ptrsize) || s.Name[0] == '.' {
// no scan
if s.Size < int64(32*Thearch.Ptrsize) {
// Emit small symbols as data.
// This case also handles unaligned and tiny symbols, so tread carefully.
for i := s.Value; i < s.Value+s.Size; i++ {
if (i % int64(Thearch.Ptrsize)) == 0 {
proggendata(g, obj.BitsScalar)
proggendata(g, 0)
}
}
} else {
......@@ -1156,7 +1139,7 @@ func proggenaddsym(g *ProgGen, s *LSym) {
Diag("proggenaddsym: unaligned noscan symbol %s: size=%d pos=%d", s.Name, s.Size, g.pos)
}
proggenarray(g, s.Size/int64(Thearch.Ptrsize))
proggendata(g, obj.BitsScalar)
proggendata(g, 0)
proggenarrayend(g)
}
......@@ -1183,7 +1166,8 @@ func proggenaddsym(g *ProgGen, s *LSym) {
Diag("proggenaddsym: unaligned gcmask symbol %s: size=%d pos=%d", s.Name, s.Size, g.pos)
}
for i := int64(0); i < size; i += int64(Thearch.Ptrsize) {
proggendata(g, uint8((mask[i/int64(Thearch.Ptrsize)/2]>>uint64((i/int64(Thearch.Ptrsize)%2)*4+2))&obj.BitsMask))
word := uint(i / int64(Thearch.Ptrsize))
proggendata(g, (mask[word/8]>>(word%8))&1)
}
g.pos = s.Value + size
}
......
......@@ -21,16 +21,6 @@ package obj
// Used by cmd/gc.
const (
GcBits = 4
BitsPerPointer = 2
BitsDead = 0
BitsScalar = 1
BitsPointer = 2
BitsMask = 3
PointersPerByte = 8 / BitsPerPointer
)
const (
InsData = 1 + iota
InsArray
......
......@@ -4388,7 +4388,7 @@ func TestCallGC(t *testing.T) {
type funcLayoutTest struct {
rcvr, t Type
size, argsize, retOffset uintptr
stack []byte
stack []byte // pointer bitmap: 1 is pointer, 0 is scalar (or uninitialized)
gc []byte
}
......@@ -4399,7 +4399,7 @@ func init() {
var naclExtra []byte
if runtime.GOARCH == "amd64p32" {
argAlign = 2 * PtrSize
naclExtra = append(naclExtra, BitsScalar)
naclExtra = append(naclExtra, 0)
}
roundup := func(x uintptr, a uintptr) uintptr {
return (x + a - 1) / a * a
......@@ -4412,17 +4412,17 @@ func init() {
6 * PtrSize,
4 * PtrSize,
4 * PtrSize,
[]byte{BitsPointer, BitsScalar, BitsPointer},
[]byte{BitsPointer, BitsScalar, BitsPointer, BitsScalar, BitsPointer, BitsScalar},
[]byte{1, 0, 1},
[]byte{1, 0, 1, 0, 1, 0},
})
var r, s []byte
if PtrSize == 4 {
r = []byte{BitsScalar, BitsScalar, BitsScalar, BitsPointer}
s = append([]byte{BitsScalar, BitsScalar, BitsScalar, BitsPointer, BitsScalar}, naclExtra...)
r = []byte{0, 0, 0, 1}
s = append([]byte{0, 0, 0, 1, 0}, naclExtra...)
} else {
r = []byte{BitsScalar, BitsScalar, BitsPointer}
s = []byte{BitsScalar, BitsScalar, BitsPointer, BitsScalar}
r = []byte{0, 0, 1}
s = []byte{0, 0, 1, 0}
}
funcLayoutTests = append(funcLayoutTests,
funcLayoutTest{
......@@ -4442,8 +4442,8 @@ func init() {
4 * PtrSize,
4 * PtrSize,
4 * PtrSize,
[]byte{BitsPointer, BitsScalar, BitsPointer, BitsPointer},
[]byte{BitsPointer, BitsScalar, BitsPointer, BitsPointer},
[]byte{1, 0, 1, 1},
[]byte{1, 0, 1, 1},
})
type S struct {
......@@ -4457,8 +4457,8 @@ func init() {
4 * PtrSize,
4 * PtrSize,
4 * PtrSize,
[]byte{BitsScalar, BitsScalar, BitsPointer, BitsPointer},
[]byte{BitsScalar, BitsScalar, BitsPointer, BitsPointer},
[]byte{0, 0, 1, 1},
[]byte{0, 0, 1, 1},
})
funcLayoutTests = append(funcLayoutTests,
......@@ -4468,8 +4468,8 @@ func init() {
roundup(3*PtrSize, argAlign),
3 * PtrSize,
roundup(3*PtrSize, argAlign),
[]byte{BitsPointer, BitsScalar, BitsPointer},
append([]byte{BitsPointer, BitsScalar, BitsPointer}, naclExtra...),
[]byte{1, 0, 1},
append([]byte{1, 0, 1}, naclExtra...),
})
funcLayoutTests = append(funcLayoutTests,
......@@ -4480,7 +4480,7 @@ func init() {
PtrSize,
roundup(PtrSize, argAlign),
[]byte{},
append([]byte{BitsScalar}, naclExtra...),
append([]byte{0}, naclExtra...),
})
funcLayoutTests = append(funcLayoutTests,
......@@ -4491,7 +4491,7 @@ func init() {
0,
0,
[]byte{},
[]byte{BitsScalar},
[]byte{0},
})
funcLayoutTests = append(funcLayoutTests,
......@@ -4501,8 +4501,8 @@ func init() {
2 * PtrSize,
2 * PtrSize,
2 * PtrSize,
[]byte{BitsPointer},
[]byte{BitsPointer, BitsScalar},
[]byte{1},
[]byte{1, 0},
// Note: this one is tricky, as the receiver is not a pointer. But we
// pass the receiver by reference to the autogenerated pointer-receiver
// version of the function.
......
......@@ -18,8 +18,6 @@ func IsRO(v Value) bool {
var CallGC = &callGC
const PtrSize = ptrSize
const BitsPointer = bitsPointer
const BitsScalar = bitsScalar
func FuncLayout(t Type, rcvr Type) (frametype Type, argSize, retOffset uintptr, stack []byte, gc []byte, ptrs bool) {
var ft *rtype
......@@ -38,7 +36,7 @@ func FuncLayout(t Type, rcvr Type) (frametype Type, argSize, retOffset uintptr,
}
gcdata := (*[1000]byte)(ft.gc[0])
for i := uintptr(0); i < ft.size/ptrSize; i++ {
gc = append(gc, gcdata[i/2]>>(i%2*4+2)&3)
gc = append(gc, gcdata[i/8]>>(i%8)&1)
}
ptrs = ft.kind&kindNoPointers == 0
return
......
......@@ -1701,14 +1701,14 @@ func (gc *gcProg) appendProg(t *rtype) {
default:
panic("reflect: non-pointer type marked as having pointers")
case Ptr, UnsafePointer, Chan, Func, Map:
gc.appendWord(bitsPointer)
gc.appendWord(1)
case Slice:
gc.appendWord(bitsPointer)
gc.appendWord(bitsScalar)
gc.appendWord(bitsScalar)
gc.appendWord(1)
gc.appendWord(0)
gc.appendWord(0)
case String:
gc.appendWord(bitsPointer)
gc.appendWord(bitsScalar)
gc.appendWord(1)
gc.appendWord(0)
case Array:
c := t.Len()
e := t.Elem().common()
......@@ -1716,8 +1716,8 @@ func (gc *gcProg) appendProg(t *rtype) {
gc.appendProg(e)
}
case Interface:
gc.appendWord(bitsPointer)
gc.appendWord(bitsPointer)
gc.appendWord(1)
gc.appendWord(1)
case Struct:
oldsize := gc.size
c := t.NumField()
......@@ -1737,13 +1737,12 @@ func (gc *gcProg) appendWord(v byte) {
panic("reflect: unaligned GC program")
}
nptr := gc.size / ptrsize
for uintptr(len(gc.gc)) < nptr/2+1 {
gc.gc = append(gc.gc, 0x44) // BitsScalar
for uintptr(len(gc.gc)) <= nptr/8 {
gc.gc = append(gc.gc, 0)
}
gc.gc[nptr/2] &= ^(3 << ((nptr%2)*4 + 2))
gc.gc[nptr/2] |= v << ((nptr%2)*4 + 2)
gc.gc[nptr/8] |= v << (nptr % 8)
gc.size += ptrsize
if v == bitsPointer {
if v == 1 {
gc.hasPtr = true
}
}
......@@ -1758,33 +1757,20 @@ func (gc *gcProg) finalize() (unsafe.Pointer, bool) {
ptrsize := unsafe.Sizeof(uintptr(0))
gc.align(ptrsize)
nptr := gc.size / ptrsize
for uintptr(len(gc.gc)) < nptr/2+1 {
gc.gc = append(gc.gc, 0x44) // BitsScalar
}
// If number of words is odd, repeat the mask twice.
// Compiler does the same.
if nptr%2 != 0 {
for i := uintptr(0); i < nptr; i++ {
gc.appendWord(extractGCWord(gc.gc, i))
}
for uintptr(len(gc.gc)) <= nptr/8 {
gc.gc = append(gc.gc, 0)
}
return unsafe.Pointer(&gc.gc[0]), gc.hasPtr
}
func extractGCWord(gc []byte, i uintptr) byte {
return (gc[i/2] >> ((i%2)*4 + 2)) & 3
return gc[i/8] >> (i % 8) & 1
}
func (gc *gcProg) align(a uintptr) {
gc.size = align(gc.size, a)
}
// These constants must stay in sync with ../runtime/mbitmap.go.
const (
bitsScalar = 1
bitsPointer = 2
)
// Make sure these routines stay in sync with ../../runtime/hashmap.go!
// These types exist only for GC, so we only fill out GC relevant info.
// Currently, that's just size and the GC program. We also fill in string
......@@ -1814,7 +1800,7 @@ func bucketOf(ktyp, etyp *rtype) *rtype {
var gc gcProg
// topbits
for i := 0; i < int(bucketSize*unsafe.Sizeof(uint8(0))/ptrsize); i++ {
gc.append(bitsScalar)
gc.append(0)
}
// keys
for i := 0; i < bucketSize; i++ {
......@@ -1825,10 +1811,10 @@ func bucketOf(ktyp, etyp *rtype) *rtype {
gc.appendProg(etyp)
}
// overflow
gc.append(bitsPointer)
gc.append(1)
ptrdata := gc.size
if runtime.GOARCH == "amd64p32" {
gc.append(bitsScalar)
gc.append(0)
}
b := new(rtype)
......@@ -2058,16 +2044,16 @@ func funcLayout(t *rtype, rcvr *rtype) (frametype *rtype, argSize, retOffset uin
// space no matter how big they actually are.
if ifaceIndir(rcvr) {
// we pass a pointer to the receiver.
gc.append(bitsPointer)
stack.append2(bitsPointer)
gc.append(1)
stack.append2(1)
} else if rcvr.pointers() {
// rcvr is a one-word pointer object. Its gc program
// is just what we need here.
gc.append(bitsPointer)
stack.append2(bitsPointer)
gc.append(1)
stack.append2(1)
} else {
gc.append(bitsScalar)
stack.append2(bitsScalar)
gc.append(0)
stack.append2(0)
}
offset += ptrSize
}
......@@ -2154,17 +2140,17 @@ func addTypeBits(bv *bitVector, offset *uintptr, t *rtype) {
case Chan, Func, Map, Ptr, Slice, String, UnsafePointer:
// 1 pointer at start of representation
for bv.n < 2*uint32(*offset/uintptr(ptrSize)) {
bv.append2(bitsScalar)
bv.append2(0)
}
bv.append2(bitsPointer)
bv.append2(1)
case Interface:
// 2 pointers
for bv.n < 2*uint32(*offset/uintptr(ptrSize)) {
bv.append2(bitsScalar)
bv.append2(0)
}
bv.append2(bitsPointer)
bv.append2(bitsPointer)
bv.append2(1)
bv.append2(1)
case Array:
// repeat inner type
......
......@@ -76,15 +76,8 @@ func ParForIters(desc *ParFor, tid uint32) (uint32, uint32) {
}
func GCMask(x interface{}) (ret []byte) {
e := (*eface)(unsafe.Pointer(&x))
s := (*slice)(unsafe.Pointer(&ret))
systemstack(func() {
var len uintptr
var a *byte
getgcmask(e.data, e._type, &a, &len)
s.array = unsafe.Pointer(a)
s.len = int(len)
s.cap = s.len
ret = getgcmask(x)
})
return
}
......
......@@ -28,13 +28,13 @@ func TestGCInfo(t *testing.T) {
verifyGCInfo(t, "data eface", &dataEface, infoEface)
verifyGCInfo(t, "data iface", &dataIface, infoIface)
verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), infoScalarPtr)
verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), infoPtrScalar)
verifyGCInfo(t, "stack BigStruct", new(BigStruct), infoBigStruct())
verifyGCInfo(t, "stack string", new(string), infoString)
verifyGCInfo(t, "stack slice", new([]string), infoSlice)
verifyGCInfo(t, "stack eface", new(interface{}), infoEface)
verifyGCInfo(t, "stack iface", new(Iface), infoIface)
verifyGCInfo(t, "stack ScalarPtr", new(ScalarPtr), nonStackInfo(infoScalarPtr))
verifyGCInfo(t, "stack PtrScalar", new(PtrScalar), nonStackInfo(infoPtrScalar))
verifyGCInfo(t, "stack BigStruct", new(BigStruct), nonStackInfo(infoBigStruct()))
verifyGCInfo(t, "stack string", new(string), nonStackInfo(infoString))
verifyGCInfo(t, "stack slice", new([]string), nonStackInfo(infoSlice))
verifyGCInfo(t, "stack eface", new(interface{}), nonStackInfo(infoEface))
verifyGCInfo(t, "stack iface", new(Iface), nonStackInfo(infoIface))
for i := 0; i < 10; i++ {
verifyGCInfo(t, "heap ScalarPtr", escape(new(ScalarPtr)), infoScalarPtr)
......
......@@ -223,29 +223,25 @@ func typedmemmove(typ *_type, dst, src unsafe.Pointer) {
}
systemstack(func() {
mask := typeBitmapInHeapBitmapFormat(typ)
dst := dst // make local copies
src := src
nptr := typ.size / ptrSize
for i := uintptr(0); i < nptr; i += 2 {
bits := mask[i/2]
if (bits>>2)&typeMask == typePointer {
writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
} else {
*(*uintptr)(dst) = *(*uintptr)(src)
}
// TODO(rsc): The noescape calls should be unnecessary.
dst = add(noescape(dst), ptrSize)
src = add(noescape(src), ptrSize)
if i+1 == nptr {
break
}
bits >>= 4
if (bits>>2)&typeMask == typePointer {
writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
} else {
*(*uintptr)(dst) = *(*uintptr)(src)
i := uintptr(0)
Copy:
for _, bits := range ptrBitmapForType(typ) {
for j := 0; j < 8; j++ {
if bits&1 != 0 {
writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
} else {
*(*uintptr)(dst) = *(*uintptr)(src)
}
if i++; i >= nptr {
break Copy
}
dst = add(dst, ptrSize)
src = add(src, ptrSize)
bits >>= 1
}
dst = add(noescape(dst), ptrSize)
src = add(noescape(src), ptrSize)
}
})
}
......@@ -274,18 +270,25 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size
off += frag
}
mask := typeBitmapInHeapBitmapFormat(typ)
mask := ptrBitmapForType(typ)
nptr := (off + size) / ptrSize
for i := uintptr(off / ptrSize); i < nptr; i++ {
bits := mask[i/2] >> ((i & 1) << 2)
if (bits>>2)&typeMask == typePointer {
writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
} else {
*(*uintptr)(dst) = *(*uintptr)(src)
i := uintptr(off / ptrSize)
Copy:
for {
bits := mask[i/8] >> (i % 8)
for j := i % 8; j < 8; j++ {
if bits&1 != 0 {
writebarrierptr((*uintptr)(dst), *(*uintptr)(src))
} else {
*(*uintptr)(dst) = *(*uintptr)(src)
}
if i++; i >= nptr {
break Copy
}
dst = add(dst, ptrSize)
src = add(src, ptrSize)
bits >>= 1
}
// TODO(rsc): The noescape calls should be unnecessary.
dst = add(noescape(dst), ptrSize)
src = add(noescape(src), ptrSize)
}
size &= ptrSize - 1
if size > 0 {
......@@ -307,18 +310,25 @@ func callwritebarrier(typ *_type, frame unsafe.Pointer, framesize, retoffset uin
}
systemstack(func() {
mask := typeBitmapInHeapBitmapFormat(typ)
mask := ptrBitmapForType(typ)
// retoffset is known to be pointer-aligned (at least).
// TODO(rsc): The noescape call should be unnecessary.
dst := add(noescape(frame), retoffset)
nptr := framesize / ptrSize
for i := uintptr(retoffset / ptrSize); i < nptr; i++ {
bits := mask[i/2] >> ((i & 1) << 2)
if (bits>>2)&typeMask == typePointer {
writebarrierptr_nostore((*uintptr)(dst), *(*uintptr)(dst))
i := uintptr(retoffset / ptrSize)
Copy:
for {
bits := mask[i/8] >> (i % 8)
for j := i % 8; j < 8; j++ {
if bits&1 != 0 {
writebarrierptr_nostore((*uintptr)(dst), *(*uintptr)(dst))
}
if i++; i >= nptr {
break Copy
}
dst = add(dst, ptrSize)
bits >>= 1
}
// TODO(rsc): The noescape call should be unnecessary.
dst = add(noescape(dst), ptrSize)
}
})
}
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment