Files
llgo/internal/runtime/mbitmap.go
2024-06-13 22:58:04 +08:00

1447 lines
43 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Garbage collector: type and heap bitmaps.
//
// Stack, data, and bss bitmaps
//
// Stack frames and global variables in the data and bss sections are
// described by bitmaps with 1 bit per pointer-sized word. A "1" bit
// means the word is a live pointer to be visited by the GC (referred to
// as "pointer"). A "0" bit means the word should be ignored by GC
// (referred to as "scalar", though it could be a dead pointer value).
//
// Heap bitmap
//
// The heap bitmap comprises 1 bit for each pointer-sized word in the heap,
// recording whether a pointer is stored in that word or not. This bitmap
// is stored in the heapArena metadata backing each heap arena.
// That is, if ha is the heapArena for the arena starting at "start",
// then ha.bitmap[0] holds the 64 bits for the 64 words "start"
// through start+63*ptrSize, ha.bitmap[1] holds the entries for
// start+64*ptrSize through start+127*ptrSize, and so on.
// Bits correspond to words in little-endian order. ha.bitmap[0]&1 represents
// the word at "start", ha.bitmap[0]>>1&1 represents the word at start+8, etc.
// (For 32-bit platforms, s/64/32/.)
//
// We also keep a noMorePtrs bitmap which allows us to stop scanning
// the heap bitmap early in certain situations. If ha.noMorePtrs[i]>>j&1
// is 1, then the object containing the last word described by ha.bitmap[8*i+j]
// has no more pointers beyond those described by ha.bitmap[8*i+j].
// If ha.noMorePtrs[i]>>j&1 is set, the entries in ha.bitmap[8*i+j+1] and
// beyond must all be zero until the start of the next object.
//
// The bitmap for noscan spans is set to all zero at span allocation time.
//
// The bitmap for unallocated objects in scannable spans is not maintained
// (can be junk).
package runtime
/*
import (
"internal/goarch"
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
// addb returns the byte pointer p+n.
//
//go:nowritebarrier
//go:nosplit
func addb(p *byte, n uintptr) *byte {
// Note: wrote out full expression instead of calling add(p, n)
// to reduce the number of temporaries generated by the
// compiler for this trivial expression during inlining.
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + n))
}
// subtractb returns the byte pointer p-n.
//
//go:nowritebarrier
//go:nosplit
func subtractb(p *byte, n uintptr) *byte {
// Note: wrote out full expression instead of calling add(p, -n)
// to reduce the number of temporaries generated by the
// compiler for this trivial expression during inlining.
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - n))
}
// add1 returns the byte pointer p+1.
//
//go:nowritebarrier
//go:nosplit
func add1(p *byte) *byte {
// Note: wrote out full expression instead of calling addb(p, 1)
// to reduce the number of temporaries generated by the
// compiler for this trivial expression during inlining.
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + 1))
}
// subtract1 returns the byte pointer p-1.
//
// nosplit because it is used during write barriers and must not be preempted.
//
//go:nowritebarrier
//go:nosplit
func subtract1(p *byte) *byte {
// Note: wrote out full expression instead of calling subtractb(p, 1)
// to reduce the number of temporaries generated by the
// compiler for this trivial expression during inlining.
return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) - 1))
}
// markBits provides access to the mark bit for an object in the heap.
// bytep points to the byte holding the mark bit.
// mask is a byte with a single bit set that can be &ed with *bytep
// to see if the bit has been set.
// *m.byte&m.mask != 0 indicates the mark bit is set.
// index can be used along with span information to generate
// the address of the object in the heap.
// We maintain one set of mark bits for allocation and one for
// marking purposes.
type markBits struct {
bytep *uint8
mask uint8
index uintptr
}
//go:nosplit
func (s *mspan) allocBitsForIndex(allocBitIndex uintptr) markBits {
bytep, mask := s.allocBits.bitp(allocBitIndex)
return markBits{bytep, mask, allocBitIndex}
}
// refillAllocCache takes 8 bytes s.allocBits starting at whichByte
// and negates them so that ctz (count trailing zeros) instructions
// can be used. It then places these 8 bytes into the cached 64 bit
// s.allocCache.
func (s *mspan) refillAllocCache(whichByte uintptr) {
bytes := (*[8]uint8)(unsafe.Pointer(s.allocBits.bytep(whichByte)))
aCache := uint64(0)
aCache |= uint64(bytes[0])
aCache |= uint64(bytes[1]) << (1 * 8)
aCache |= uint64(bytes[2]) << (2 * 8)
aCache |= uint64(bytes[3]) << (3 * 8)
aCache |= uint64(bytes[4]) << (4 * 8)
aCache |= uint64(bytes[5]) << (5 * 8)
aCache |= uint64(bytes[6]) << (6 * 8)
aCache |= uint64(bytes[7]) << (7 * 8)
s.allocCache = ^aCache
}
// nextFreeIndex returns the index of the next free object in s at
// or after s.freeindex.
// There are hardware instructions that can be used to make this
// faster if profiling warrants it.
func (s *mspan) nextFreeIndex() uintptr {
sfreeindex := s.freeindex
snelems := s.nelems
if sfreeindex == snelems {
return sfreeindex
}
if sfreeindex > snelems {
throw("s.freeindex > s.nelems")
}
aCache := s.allocCache
bitIndex := sys.TrailingZeros64(aCache)
for bitIndex == 64 {
// Move index to start of next cached bits.
sfreeindex = (sfreeindex + 64) &^ (64 - 1)
if sfreeindex >= snelems {
s.freeindex = snelems
return snelems
}
whichByte := sfreeindex / 8
// Refill s.allocCache with the next 64 alloc bits.
s.refillAllocCache(whichByte)
aCache = s.allocCache
bitIndex = sys.TrailingZeros64(aCache)
// nothing available in cached bits
// grab the next 8 bytes and try again.
}
result := sfreeindex + uintptr(bitIndex)
if result >= snelems {
s.freeindex = snelems
return snelems
}
s.allocCache >>= uint(bitIndex + 1)
sfreeindex = result + 1
if sfreeindex%64 == 0 && sfreeindex != snelems {
// We just incremented s.freeindex so it isn't 0.
// As each 1 in s.allocCache was encountered and used for allocation
// it was shifted away. At this point s.allocCache contains all 0s.
// Refill s.allocCache so that it corresponds
// to the bits at s.allocBits starting at s.freeindex.
whichByte := sfreeindex / 8
s.refillAllocCache(whichByte)
}
s.freeindex = sfreeindex
return result
}
// isFree reports whether the index'th object in s is unallocated.
//
// The caller must ensure s.state is mSpanInUse, and there must have
// been no preemption points since ensuring this (which could allow a
// GC transition, which would allow the state to change).
func (s *mspan) isFree(index uintptr) bool {
if index < s.freeIndexForScan {
return false
}
bytep, mask := s.allocBits.bitp(index)
return *bytep&mask == 0
}
// divideByElemSize returns n/s.elemsize.
// n must be within [0, s.npages*_PageSize),
// or may be exactly s.npages*_PageSize
// if s.elemsize is from sizeclasses.go.
//
// nosplit, because it is called by objIndex, which is nosplit
//
//go:nosplit
func (s *mspan) divideByElemSize(n uintptr) uintptr {
const doubleCheck = false
// See explanation in mksizeclasses.go's computeDivMagic.
q := uintptr((uint64(n) * uint64(s.divMul)) >> 32)
if doubleCheck && q != n/s.elemsize {
println(n, "/", s.elemsize, "should be", n/s.elemsize, "but got", q)
throw("bad magic division")
}
return q
}
// nosplit, because it is called by other nosplit code like findObject
//
//go:nosplit
func (s *mspan) objIndex(p uintptr) uintptr {
return s.divideByElemSize(p - s.base())
}
func markBitsForAddr(p uintptr) markBits {
s := spanOf(p)
objIndex := s.objIndex(p)
return s.markBitsForIndex(objIndex)
}
func (s *mspan) markBitsForIndex(objIndex uintptr) markBits {
bytep, mask := s.gcmarkBits.bitp(objIndex)
return markBits{bytep, mask, objIndex}
}
func (s *mspan) markBitsForBase() markBits {
return markBits{&s.gcmarkBits.x, uint8(1), 0}
}
// isMarked reports whether mark bit m is set.
func (m markBits) isMarked() bool {
return *m.bytep&m.mask != 0
}
// setMarked sets the marked bit in the markbits, atomically.
func (m markBits) setMarked() {
// Might be racing with other updates, so use atomic update always.
// We used to be clever here and use a non-atomic update in certain
// cases, but it's not worth the risk.
atomic.Or8(m.bytep, m.mask)
}
// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
func (m markBits) setMarkedNonAtomic() {
*m.bytep |= m.mask
}
// clearMarked clears the marked bit in the markbits, atomically.
func (m markBits) clearMarked() {
// Might be racing with other updates, so use atomic update always.
// We used to be clever here and use a non-atomic update in certain
// cases, but it's not worth the risk.
atomic.And8(m.bytep, ^m.mask)
}
// markBitsForSpan returns the markBits for the span base address base.
func markBitsForSpan(base uintptr) (mbits markBits) {
mbits = markBitsForAddr(base)
if mbits.mask != 1 {
throw("markBitsForSpan: unaligned start")
}
return mbits
}
// advance advances the markBits to the next object in the span.
func (m *markBits) advance() {
if m.mask == 1<<7 {
m.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(m.bytep)) + 1))
m.mask = 1
} else {
m.mask = m.mask << 1
}
m.index++
}
// clobberdeadPtr is a special value that is used by the compiler to
// clobber dead stack slots, when -clobberdead flag is set.
const clobberdeadPtr = uintptr(0xdeaddead | 0xdeaddead<<((^uintptr(0)>>63)*32))
// badPointer throws bad pointer in heap panic.
func badPointer(s *mspan, p, refBase, refOff uintptr) {
// Typically this indicates an incorrect use
// of unsafe or cgo to store a bad pointer in
// the Go heap. It may also indicate a runtime
// bug.
//
// TODO(austin): We could be more aggressive
// and detect pointers to unallocated objects
// in allocated spans.
printlock()
print("runtime: pointer ", hex(p))
if s != nil {
state := s.state.get()
if state != mSpanInUse {
print(" to unallocated span")
} else {
print(" to unused region of span")
}
print(" span.base()=", hex(s.base()), " span.limit=", hex(s.limit), " span.state=", state)
}
print("\n")
if refBase != 0 {
print("runtime: found in object at *(", hex(refBase), "+", hex(refOff), ")\n")
gcDumpObject("object", refBase, refOff)
}
getg().m.traceback = 2
throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
}
// findObject returns the base address for the heap object containing
// the address p, the object's span, and the index of the object in s.
// If p does not point into a heap object, it returns base == 0.
//
// If p points is an invalid heap pointer and debug.invalidptr != 0,
// findObject panics.
//
// refBase and refOff optionally give the base address of the object
// in which the pointer p was found and the byte offset at which it
// was found. These are used for error reporting.
//
// It is nosplit so it is safe for p to be a pointer to the current goroutine's stack.
// Since p is a uintptr, it would not be adjusted if the stack were to move.
//
//go:nosplit
func findObject(p, refBase, refOff uintptr) (base uintptr, s *mspan, objIndex uintptr) {
s = spanOf(p)
// If s is nil, the virtual address has never been part of the heap.
// This pointer may be to some mmap'd region, so we allow it.
if s == nil {
if (GOARCH == "amd64" || GOARCH == "arm64") && p == clobberdeadPtr && debug.invalidptr != 0 {
// Crash if clobberdeadPtr is seen. Only on AMD64 and ARM64 for now,
// as they are the only platform where compiler's clobberdead mode is
// implemented. On these platforms clobberdeadPtr cannot be a valid address.
badPointer(s, p, refBase, refOff)
}
return
}
// If p is a bad pointer, it may not be in s's bounds.
//
// Check s.state to synchronize with span initialization
// before checking other fields. See also spanOfHeap.
if state := s.state.get(); state != mSpanInUse || p < s.base() || p >= s.limit {
// Pointers into stacks are also ok, the runtime manages these explicitly.
if state == mSpanManual {
return
}
// The following ensures that we are rigorous about what data
// structures hold valid pointers.
if debug.invalidptr != 0 {
badPointer(s, p, refBase, refOff)
}
return
}
objIndex = s.objIndex(p)
base = s.base() + objIndex*s.elemsize
return
}
// reflect_verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok.
//
//go:linkname reflect_verifyNotInHeapPtr reflect.verifyNotInHeapPtr
func reflect_verifyNotInHeapPtr(p uintptr) bool {
// Conversion to a pointer is ok as long as findObject above does not call badPointer.
// Since we're already promised that p doesn't point into the heap, just disallow heap
// pointers and the special clobbered pointer.
return spanOf(p) == nil && p != clobberdeadPtr
}
const ptrBits = 8 * goarch.PtrSize
// heapBits provides access to the bitmap bits for a single heap word.
// The methods on heapBits take value receivers so that the compiler
// can more easily inline calls to those methods and registerize the
// struct fields independently.
type heapBits struct {
// heapBits will report on pointers in the range [addr,addr+size).
// The low bit of mask contains the pointerness of the word at addr
// (assuming valid>0).
addr, size uintptr
// The next few pointer bits representing words starting at addr.
// Those bits already returned by next() are zeroed.
mask uintptr
// Number of bits in mask that are valid. mask is always less than 1<<valid.
valid uintptr
}
// heapBitsForAddr returns the heapBits for the address addr.
// The caller must ensure [addr,addr+size) is in an allocated span.
// In particular, be careful not to point past the end of an object.
//
// nosplit because it is used during write barriers and must not be preempted.
//
//go:nosplit
func heapBitsForAddr(addr, size uintptr) heapBits {
// Find arena
ai := arenaIndex(addr)
ha := mheap_.arenas[ai.l1()][ai.l2()]
// Word index in arena.
word := addr / goarch.PtrSize % heapArenaWords
// Word index and bit offset in bitmap array.
idx := word / ptrBits
off := word % ptrBits
// Grab relevant bits of bitmap.
mask := ha.bitmap[idx] >> off
valid := ptrBits - off
// Process depending on where the object ends.
nptr := size / goarch.PtrSize
if nptr < valid {
// Bits for this object end before the end of this bitmap word.
// Squash bits for the following objects.
mask &= 1<<(nptr&(ptrBits-1)) - 1
valid = nptr
} else if nptr == valid {
// Bits for this object end at exactly the end of this bitmap word.
// All good.
} else {
// Bits for this object extend into the next bitmap word. See if there
// may be any pointers recorded there.
if uintptr(ha.noMorePtrs[idx/8])>>(idx%8)&1 != 0 {
// No more pointers in this object after this bitmap word.
// Update size so we know not to look there.
size = valid * goarch.PtrSize
}
}
return heapBits{addr: addr, size: size, mask: mask, valid: valid}
}
// Returns the (absolute) address of the next known pointer and
// a heapBits iterator representing any remaining pointers.
// If there are no more pointers, returns address 0.
// Note that next does not modify h. The caller must record the result.
//
// nosplit because it is used during write barriers and must not be preempted.
//
//go:nosplit
func (h heapBits) next() (heapBits, uintptr) {
for {
if h.mask != 0 {
var i int
if goarch.PtrSize == 8 {
i = sys.TrailingZeros64(uint64(h.mask))
} else {
i = sys.TrailingZeros32(uint32(h.mask))
}
h.mask ^= uintptr(1) << (i & (ptrBits - 1))
return h, h.addr + uintptr(i)*goarch.PtrSize
}
// Skip words that we've already processed.
h.addr += h.valid * goarch.PtrSize
h.size -= h.valid * goarch.PtrSize
if h.size == 0 {
return h, 0 // no more pointers
}
// Grab more bits and try again.
h = heapBitsForAddr(h.addr, h.size)
}
}
// nextFast is like next, but can return 0 even when there are more pointers
// to be found. Callers should call next if nextFast returns 0 as its second
// return value.
//
// if addr, h = h.nextFast(); addr == 0 {
// if addr, h = h.next(); addr == 0 {
// ... no more pointers ...
// }
// }
// ... process pointer at addr ...
//
// nextFast is designed to be inlineable.
//
//go:nosplit
func (h heapBits) nextFast() (heapBits, uintptr) {
// TESTQ/JEQ
if h.mask == 0 {
return h, 0
}
// BSFQ
var i int
if goarch.PtrSize == 8 {
i = sys.TrailingZeros64(uint64(h.mask))
} else {
i = sys.TrailingZeros32(uint32(h.mask))
}
// BTCQ
h.mask ^= uintptr(1) << (i & (ptrBits - 1))
// LEAQ (XX)(XX*8)
return h, h.addr + uintptr(i)*goarch.PtrSize
}
*/
// bulkBarrierPreWrite executes a write barrier
// for every pointer slot in the memory range [src, src+size),
// using pointer/scalar information from [dst, dst+size).
// This executes the write barriers necessary before a memmove.
// src, dst, and size must be pointer-aligned.
// The range [dst, dst+size) must lie within a single object.
// It does not perform the actual writes.
//
// As a special case, src == 0 indicates that this is being used for a
// memclr. bulkBarrierPreWrite will pass 0 for the src of each write
// barrier.
//
// Callers should call bulkBarrierPreWrite immediately before
// calling memmove(dst, src, size). This function is marked nosplit
// to avoid being preempted; the GC must not stop the goroutine
// between the memmove and the execution of the barriers.
// The caller is also responsible for cgo pointer checks if this
// may be writing Go pointers into non-Go memory.
//
// The pointer bitmap is not maintained for allocations containing
// no pointers at all; any caller of bulkBarrierPreWrite must first
// make sure the underlying allocation contains pointers, usually
// by checking typ.PtrBytes.
//
// Callers must perform cgo checks if goexperiment.CgoCheck2.
func bulkBarrierPreWrite(dst, src, size uintptr) {
}
/*
// bulkBarrierPreWriteSrcOnly is like bulkBarrierPreWrite but
// does not execute write barriers for [dst, dst+size).
//
// In addition to the requirements of bulkBarrierPreWrite
// callers need to ensure [dst, dst+size) is zeroed.
//
// This is used for special cases where e.g. dst was just
// created and zeroed with malloc.
//
//go:nosplit
func bulkBarrierPreWriteSrcOnly(dst, src, size uintptr) {
if (dst|src|size)&(goarch.PtrSize-1) != 0 {
throw("bulkBarrierPreWrite: unaligned arguments")
}
if !writeBarrier.needed {
return
}
buf := &getg().m.p.ptr().wbBuf
h := heapBitsForAddr(dst, size)
for {
var addr uintptr
if h, addr = h.next(); addr == 0 {
break
}
srcx := (*uintptr)(unsafe.Pointer(addr - dst + src))
p := buf.get1()
p[0] = *srcx
}
}
// bulkBarrierBitmap executes write barriers for copying from [src,
// src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is
// assumed to start maskOffset bytes into the data covered by the
// bitmap in bits (which may not be a multiple of 8).
//
// This is used by bulkBarrierPreWrite for writes to data and BSS.
//
//go:nosplit
func bulkBarrierBitmap(dst, src, size, maskOffset uintptr, bits *uint8) {
word := maskOffset / goarch.PtrSize
bits = addb(bits, word/8)
mask := uint8(1) << (word % 8)
buf := &getg().m.p.ptr().wbBuf
for i := uintptr(0); i < size; i += goarch.PtrSize {
if mask == 0 {
bits = addb(bits, 1)
if *bits == 0 {
// Skip 8 words.
i += 7 * goarch.PtrSize
continue
}
mask = 1
}
if *bits&mask != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
if src == 0 {
p := buf.get1()
p[0] = *dstx
} else {
srcx := (*uintptr)(unsafe.Pointer(src + i))
p := buf.get2()
p[0] = *dstx
p[1] = *srcx
}
}
mask <<= 1
}
}
// typeBitsBulkBarrier executes a write barrier for every
// pointer that would be copied from [src, src+size) to [dst,
// dst+size) by a memmove using the type bitmap to locate those
// pointer slots.
//
// The type typ must correspond exactly to [src, src+size) and [dst, dst+size).
// dst, src, and size must be pointer-aligned.
// The type typ must have a plain bitmap, not a GC program.
// The only use of this function is in channel sends, and the
// 64 kB channel element limit takes care of this for us.
//
// Must not be preempted because it typically runs right before memmove,
// and the GC must observe them as an atomic action.
//
// Callers must perform cgo checks if goexperiment.CgoCheck2.
//
//go:nosplit
func typeBitsBulkBarrier(typ *_type, dst, src, size uintptr) {
if typ == nil {
throw("runtime: typeBitsBulkBarrier without type")
}
if typ.Size_ != size {
println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " of size ", typ.Size_, " but memory size", size)
throw("runtime: invalid typeBitsBulkBarrier")
}
if typ.Kind_&kindGCProg != 0 {
println("runtime: typeBitsBulkBarrier with type ", toRType(typ).string(), " with GC prog")
throw("runtime: invalid typeBitsBulkBarrier")
}
if !writeBarrier.needed {
return
}
ptrmask := typ.GCData
buf := &getg().m.p.ptr().wbBuf
var bits uint32
for i := uintptr(0); i < typ.PtrBytes; i += goarch.PtrSize {
if i&(goarch.PtrSize*8-1) == 0 {
bits = uint32(*ptrmask)
ptrmask = addb(ptrmask, 1)
} else {
bits = bits >> 1
}
if bits&1 != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
srcx := (*uintptr)(unsafe.Pointer(src + i))
p := buf.get2()
p[0] = *dstx
p[1] = *srcx
}
}
}
// initHeapBits initializes the heap bitmap for a span.
// If this is a span of single pointer allocations, it initializes all
// words to pointer. If force is true, clears all bits.
func (s *mspan) initHeapBits(forceClear bool) {
if forceClear || s.spanclass.noscan() {
// Set all the pointer bits to zero. We do this once
// when the span is allocated so we don't have to do it
// for each object allocation.
base := s.base()
size := s.npages * pageSize
h := writeHeapBitsForAddr(base)
h.flush(base, size)
return
}
isPtrs := goarch.PtrSize == 8 && s.elemsize == goarch.PtrSize
if !isPtrs {
return // nothing to do
}
h := writeHeapBitsForAddr(s.base())
size := s.npages * pageSize
nptrs := size / goarch.PtrSize
for i := uintptr(0); i < nptrs; i += ptrBits {
h = h.write(^uintptr(0), ptrBits)
}
h.flush(s.base(), size)
}
// countAlloc returns the number of objects allocated in span s by
// scanning the allocation bitmap.
func (s *mspan) countAlloc() int {
count := 0
bytes := divRoundUp(s.nelems, 8)
// Iterate over each 8-byte chunk and count allocations
// with an intrinsic. Note that newMarkBits guarantees that
// gcmarkBits will be 8-byte aligned, so we don't have to
// worry about edge cases, irrelevant bits will simply be zero.
for i := uintptr(0); i < bytes; i += 8 {
// Extract 64 bits from the byte pointer and get a OnesCount.
// Note that the unsafe cast here doesn't preserve endianness,
// but that's OK. We only care about how many bits are 1, not
// about the order we discover them in.
mrkBits := *(*uint64)(unsafe.Pointer(s.gcmarkBits.bytep(i)))
count += sys.OnesCount64(mrkBits)
}
return count
}
type writeHeapBits struct {
addr uintptr // address that the low bit of mask represents the pointer state of.
mask uintptr // some pointer bits starting at the address addr.
valid uintptr // number of bits in buf that are valid (including low)
low uintptr // number of low-order bits to not overwrite
}
func writeHeapBitsForAddr(addr uintptr) (h writeHeapBits) {
// We start writing bits maybe in the middle of a heap bitmap word.
// Remember how many bits into the word we started, so we can be sure
// not to overwrite the previous bits.
h.low = addr / goarch.PtrSize % ptrBits
// round down to heap word that starts the bitmap word.
h.addr = addr - h.low*goarch.PtrSize
// We don't have any bits yet.
h.mask = 0
h.valid = h.low
return
}
// write appends the pointerness of the next valid pointer slots
// using the low valid bits of bits. 1=pointer, 0=scalar.
func (h writeHeapBits) write(bits, valid uintptr) writeHeapBits {
if h.valid+valid <= ptrBits {
// Fast path - just accumulate the bits.
h.mask |= bits << h.valid
h.valid += valid
return h
}
// Too many bits to fit in this word. Write the current word
// out and move on to the next word.
data := h.mask | bits<<h.valid // mask for this word
h.mask = bits >> (ptrBits - h.valid) // leftover for next word
h.valid += valid - ptrBits // have h.valid+valid bits, writing ptrBits of them
// Flush mask to the memory bitmap.
// TODO: figure out how to cache arena lookup.
ai := arenaIndex(h.addr)
ha := mheap_.arenas[ai.l1()][ai.l2()]
idx := h.addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords
m := uintptr(1)<<h.low - 1
ha.bitmap[idx] = ha.bitmap[idx]&m | data
// Note: no synchronization required for this write because
// the allocator has exclusive access to the page, and the bitmap
// entries are all for a single page. Also, visibility of these
// writes is guaranteed by the publication barrier in mallocgc.
// Clear noMorePtrs bit, since we're going to be writing bits
// into the following word.
ha.noMorePtrs[idx/8] &^= uint8(1) << (idx % 8)
// Note: same as above
// Move to next word of bitmap.
h.addr += ptrBits * goarch.PtrSize
h.low = 0
return h
}
// Add padding of size bytes.
func (h writeHeapBits) pad(size uintptr) writeHeapBits {
if size == 0 {
return h
}
words := size / goarch.PtrSize
for words > ptrBits {
h = h.write(0, ptrBits)
words -= ptrBits
}
return h.write(0, words)
}
// Flush the bits that have been written, and add zeros as needed
// to cover the full object [addr, addr+size).
func (h writeHeapBits) flush(addr, size uintptr) {
// zeros counts the number of bits needed to represent the object minus the
// number of bits we've already written. This is the number of 0 bits
// that need to be added.
zeros := (addr+size-h.addr)/goarch.PtrSize - h.valid
// Add zero bits up to the bitmap word boundary
if zeros > 0 {
z := ptrBits - h.valid
if z > zeros {
z = zeros
}
h.valid += z
zeros -= z
}
// Find word in bitmap that we're going to write.
ai := arenaIndex(h.addr)
ha := mheap_.arenas[ai.l1()][ai.l2()]
idx := h.addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords
// Write remaining bits.
if h.valid != h.low {
m := uintptr(1)<<h.low - 1 // don't clear existing bits below "low"
m |= ^(uintptr(1)<<h.valid - 1) // don't clear existing bits above "valid"
ha.bitmap[idx] = ha.bitmap[idx]&m | h.mask
}
if zeros == 0 {
return
}
// Record in the noMorePtrs map that there won't be any more 1 bits,
// so readers can stop early.
ha.noMorePtrs[idx/8] |= uint8(1) << (idx % 8)
// Advance to next bitmap word.
h.addr += ptrBits * goarch.PtrSize
// Continue on writing zeros for the rest of the object.
// For standard use of the ptr bits this is not required, as
// the bits are read from the beginning of the object. Some uses,
// like noscan spans, oblets, bulk write barriers, and cgocheck, might
// start mid-object, so these writes are still required.
for {
// Write zero bits.
ai := arenaIndex(h.addr)
ha := mheap_.arenas[ai.l1()][ai.l2()]
idx := h.addr / (ptrBits * goarch.PtrSize) % heapArenaBitmapWords
if zeros < ptrBits {
ha.bitmap[idx] &^= uintptr(1)<<zeros - 1
break
} else if zeros == ptrBits {
ha.bitmap[idx] = 0
break
} else {
ha.bitmap[idx] = 0
zeros -= ptrBits
}
ha.noMorePtrs[idx/8] |= uint8(1) << (idx % 8)
h.addr += ptrBits * goarch.PtrSize
}
}
// Read the bytes starting at the aligned pointer p into a uintptr.
// Read is little-endian.
func readUintptr(p *byte) uintptr {
x := *(*uintptr)(unsafe.Pointer(p))
if goarch.BigEndian {
if goarch.PtrSize == 8 {
return uintptr(sys.Bswap64(uint64(x)))
}
return uintptr(sys.Bswap32(uint32(x)))
}
return x
}
// heapBitsSetType records that the new allocation [x, x+size)
// holds in [x, x+dataSize) one or more values of type typ.
// (The number of values is given by dataSize / typ.Size.)
// If dataSize < size, the fragment [x+dataSize, x+size) is
// recorded as non-pointer data.
// It is known that the type has pointers somewhere;
// malloc does not call heapBitsSetType when there are no pointers,
// because all free objects are marked as noscan during
// heapBitsSweepSpan.
//
// There can only be one allocation from a given span active at a time,
// and the bitmap for a span always falls on word boundaries,
// so there are no write-write races for access to the heap bitmap.
// Hence, heapBitsSetType can access the bitmap without atomics.
//
// There can be read-write races between heapBitsSetType and things
// that read the heap bitmap like scanobject. However, since
// heapBitsSetType is only used for objects that have not yet been
// made reachable, readers will ignore bits being modified by this
// function. This does mean this function cannot transiently modify
// bits that belong to neighboring objects. Also, on weakly-ordered
// machines, callers must execute a store/store (publication) barrier
// between calling this function and making the object reachable.
func heapBitsSetType(x, size, dataSize uintptr, typ *_type) {
const doubleCheck = false // slow but helpful; enable to test modifications to this code
if doubleCheck && dataSize%typ.Size_ != 0 {
throw("heapBitsSetType: dataSize not a multiple of typ.Size")
}
if goarch.PtrSize == 8 && size == goarch.PtrSize {
// It's one word and it has pointers, it must be a pointer.
// Since all allocated one-word objects are pointers
// (non-pointers are aggregated into tinySize allocations),
// (*mspan).initHeapBits sets the pointer bits for us.
// Nothing to do here.
if doubleCheck {
h, addr := heapBitsForAddr(x, size).next()
if addr != x {
throw("heapBitsSetType: pointer bit missing")
}
_, addr = h.next()
if addr != 0 {
throw("heapBitsSetType: second pointer bit found")
}
}
return
}
h := writeHeapBitsForAddr(x)
// Handle GC program.
if typ.Kind_&kindGCProg != 0 {
// Expand the gc program into the storage we're going to use for the actual object.
obj := (*uint8)(unsafe.Pointer(x))
n := runGCProg(addb(typ.GCData, 4), obj)
// Use the expanded program to set the heap bits.
for i := uintptr(0); true; i += typ.Size_ {
// Copy expanded program to heap bitmap.
p := obj
j := n
for j > 8 {
h = h.write(uintptr(*p), 8)
p = add1(p)
j -= 8
}
h = h.write(uintptr(*p), j)
if i+typ.Size_ == dataSize {
break // no padding after last element
}
// Pad with zeros to the start of the next element.
h = h.pad(typ.Size_ - n*goarch.PtrSize)
}
h.flush(x, size)
// Erase the expanded GC program.
memclrNoHeapPointers(unsafe.Pointer(obj), (n+7)/8)
return
}
// Note about sizes:
//
// typ.Size is the number of words in the object,
// and typ.PtrBytes is the number of words in the prefix
// of the object that contains pointers. That is, the final
// typ.Size - typ.PtrBytes words contain no pointers.
// This allows optimization of a common pattern where
// an object has a small header followed by a large scalar
// buffer. If we know the pointers are over, we don't have
// to scan the buffer's heap bitmap at all.
// The 1-bit ptrmasks are sized to contain only bits for
// the typ.PtrBytes prefix, zero padded out to a full byte
// of bitmap. If there is more room in the allocated object,
// that space is pointerless. The noMorePtrs bitmap will prevent
// scanning large pointerless tails of an object.
//
// Replicated copies are not as nice: if there is an array of
// objects with scalar tails, all but the last tail does have to
// be initialized, because there is no way to say "skip forward".
ptrs := typ.PtrBytes / goarch.PtrSize
if typ.Size_ == dataSize { // Single element
if ptrs <= ptrBits { // Single small element
m := readUintptr(typ.GCData)
h = h.write(m, ptrs)
} else { // Single large element
p := typ.GCData
for {
h = h.write(readUintptr(p), ptrBits)
p = addb(p, ptrBits/8)
ptrs -= ptrBits
if ptrs <= ptrBits {
break
}
}
m := readUintptr(p)
h = h.write(m, ptrs)
}
} else { // Repeated element
words := typ.Size_ / goarch.PtrSize // total words, including scalar tail
if words <= ptrBits { // Repeated small element
n := dataSize / typ.Size_
m := readUintptr(typ.GCData)
// Make larger unit to repeat
for words <= ptrBits/2 {
if n&1 != 0 {
h = h.write(m, words)
}
n /= 2
m |= m << words
ptrs += words
words *= 2
if n == 1 {
break
}
}
for n > 1 {
h = h.write(m, words)
n--
}
h = h.write(m, ptrs)
} else { // Repeated large element
for i := uintptr(0); true; i += typ.Size_ {
p := typ.GCData
j := ptrs
for j > ptrBits {
h = h.write(readUintptr(p), ptrBits)
p = addb(p, ptrBits/8)
j -= ptrBits
}
m := readUintptr(p)
h = h.write(m, j)
if i+typ.Size_ == dataSize {
break // don't need the trailing nonptr bits on the last element.
}
// Pad with zeros to the start of the next element.
h = h.pad(typ.Size_ - typ.PtrBytes)
}
}
}
h.flush(x, size)
if doubleCheck {
h := heapBitsForAddr(x, size)
for i := uintptr(0); i < size; i += goarch.PtrSize {
// Compute the pointer bit we want at offset i.
want := false
if i < dataSize {
off := i % typ.Size_
if off < typ.PtrBytes {
j := off / goarch.PtrSize
want = *addb(typ.GCData, j/8)>>(j%8)&1 != 0
}
}
if want {
var addr uintptr
h, addr = h.next()
if addr != x+i {
throw("heapBitsSetType: pointer entry not correct")
}
}
}
if _, addr := h.next(); addr != 0 {
throw("heapBitsSetType: extra pointer")
}
}
}
var debugPtrmask struct {
lock mutex
data *byte
}
// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
// size the size of the region described by prog, in bytes.
// The resulting bitvector will have no more than size/goarch.PtrSize bits.
func progToPointerMask(prog *byte, size uintptr) bitvector {
n := (size/goarch.PtrSize + 7) / 8
x := (*[1 << 30]byte)(persistentalloc(n+1, 1, &memstats.buckhash_sys))[:n+1]
x[len(x)-1] = 0xa1 // overflow check sentinel
n = runGCProg(prog, &x[0])
if x[len(x)-1] != 0xa1 {
throw("progToPointerMask: overflow")
}
return bitvector{int32(n), &x[0]}
}
// Packed GC pointer bitmaps, aka GC programs.
//
// For large types containing arrays, the type information has a
// natural repetition that can be encoded to save space in the
// binary and in the memory representation of the type information.
//
// The encoding is a simple Lempel-Ziv style bytecode machine
// with the following instructions:
//
// 00000000: stop
// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
// 10000000 n c: repeat the previous n bits c times; n, c are varints
// 1nnnnnnn c: repeat the previous n bits c times; c is a varint
// runGCProg returns the number of 1-bit entries written to memory.
func runGCProg(prog, dst *byte) uintptr {
dstStart := dst
// Bits waiting to be written to memory.
var bits uintptr
var nbits uintptr
p := prog
Run:
for {
// Flush accumulated full bytes.
// The rest of the loop assumes that nbits <= 7.
for ; nbits >= 8; nbits -= 8 {
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
}
// Process one instruction.
inst := uintptr(*p)
p = add1(p)
n := inst & 0x7F
if inst&0x80 == 0 {
// Literal bits; n == 0 means end of program.
if n == 0 {
// Program is over.
break Run
}
nbyte := n / 8
for i := uintptr(0); i < nbyte; i++ {
bits |= uintptr(*p) << nbits
p = add1(p)
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
}
if n %= 8; n > 0 {
bits |= uintptr(*p) << nbits
p = add1(p)
nbits += n
}
continue Run
}
// Repeat. If n == 0, it is encoded in a varint in the next bytes.
if n == 0 {
for off := uint(0); ; off += 7 {
x := uintptr(*p)
p = add1(p)
n |= (x & 0x7F) << off
if x&0x80 == 0 {
break
}
}
}
// Count is encoded in a varint in the next bytes.
c := uintptr(0)
for off := uint(0); ; off += 7 {
x := uintptr(*p)
p = add1(p)
c |= (x & 0x7F) << off
if x&0x80 == 0 {
break
}
}
c *= n // now total number of bits to copy
// If the number of bits being repeated is small, load them
// into a register and use that register for the entire loop
// instead of repeatedly reading from memory.
// Handling fewer than 8 bits here makes the general loop simpler.
// The cutoff is goarch.PtrSize*8 - 7 to guarantee that when we add
// the pattern to a bit buffer holding at most 7 bits (a partial byte)
// it will not overflow.
src := dst
const maxBits = goarch.PtrSize*8 - 7
if n <= maxBits {
// Start with bits in output buffer.
pattern := bits
npattern := nbits
// If we need more bits, fetch them from memory.
src = subtract1(src)
for npattern < n {
pattern <<= 8
pattern |= uintptr(*src)
src = subtract1(src)
npattern += 8
}
// We started with the whole bit output buffer,
// and then we loaded bits from whole bytes.
// Either way, we might now have too many instead of too few.
// Discard the extra.
if npattern > n {
pattern >>= npattern - n
npattern = n
}
// Replicate pattern to at most maxBits.
if npattern == 1 {
// One bit being repeated.
// If the bit is 1, make the pattern all 1s.
// If the bit is 0, the pattern is already all 0s,
// but we can claim that the number of bits
// in the word is equal to the number we need (c),
// because right shift of bits will zero fill.
if pattern == 1 {
pattern = 1<<maxBits - 1
npattern = maxBits
} else {
npattern = c
}
} else {
b := pattern
nb := npattern
if nb+nb <= maxBits {
// Double pattern until the whole uintptr is filled.
for nb <= goarch.PtrSize*8 {
b |= b << nb
nb += nb
}
// Trim away incomplete copy of original pattern in high bits.
// TODO(rsc): Replace with table lookup or loop on systems without divide?
nb = maxBits / npattern * npattern
b &= 1<<nb - 1
pattern = b
npattern = nb
}
}
// Add pattern to bit buffer and flush bit buffer, c/npattern times.
// Since pattern contains >8 bits, there will be full bytes to flush
// on each iteration.
for ; c >= npattern; c -= npattern {
bits |= pattern << nbits
nbits += npattern
for nbits >= 8 {
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
nbits -= 8
}
}
// Add final fragment to bit buffer.
if c > 0 {
pattern &= 1<<c - 1
bits |= pattern << nbits
nbits += c
}
continue Run
}
// Repeat; n too large to fit in a register.
// Since nbits <= 7, we know the first few bytes of repeated data
// are already written to memory.
off := n - nbits // n > nbits because n > maxBits and nbits <= 7
// Leading src fragment.
src = subtractb(src, (off+7)/8)
if frag := off & 7; frag != 0 {
bits |= uintptr(*src) >> (8 - frag) << nbits
src = add1(src)
nbits += frag
c -= frag
}
// Main loop: load one byte, write another.
// The bits are rotating through the bit buffer.
for i := c / 8; i > 0; i-- {
bits |= uintptr(*src) << nbits
src = add1(src)
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
}
// Final src fragment.
if c %= 8; c > 0 {
bits |= (uintptr(*src) & (1<<c - 1)) << nbits
nbits += c
}
}
// Write any final bits out, using full-byte writes, even for the final byte.
totalBits := (uintptr(unsafe.Pointer(dst))-uintptr(unsafe.Pointer(dstStart)))*8 + nbits
nbits += -nbits & 7
for ; nbits > 0; nbits -= 8 {
*dst = uint8(bits)
dst = add1(dst)
bits >>= 8
}
return totalBits
}
// materializeGCProg allocates space for the (1-bit) pointer bitmask
// for an object of size ptrdata. Then it fills that space with the
// pointer bitmask specified by the program prog.
// The bitmask starts at s.startAddr.
// The result must be deallocated with dematerializeGCProg.
func materializeGCProg(ptrdata uintptr, prog *byte) *mspan {
// Each word of ptrdata needs one bit in the bitmap.
bitmapBytes := divRoundUp(ptrdata, 8*goarch.PtrSize)
// Compute the number of pages needed for bitmapBytes.
pages := divRoundUp(bitmapBytes, pageSize)
s := mheap_.allocManual(pages, spanAllocPtrScalarBits)
runGCProg(addb(prog, 4), (*byte)(unsafe.Pointer(s.startAddr)))
return s
}
func dematerializeGCProg(s *mspan) {
mheap_.freeManual(s, spanAllocPtrScalarBits)
}
func dumpGCProg(p *byte) {
nptr := 0
for {
x := *p
p = add1(p)
if x == 0 {
print("\t", nptr, " end\n")
break
}
if x&0x80 == 0 {
print("\t", nptr, " lit ", x, ":")
n := int(x+7) / 8
for i := 0; i < n; i++ {
print(" ", hex(*p))
p = add1(p)
}
print("\n")
nptr += int(x)
} else {
nbit := int(x &^ 0x80)
if nbit == 0 {
for nb := uint(0); ; nb += 7 {
x := *p
p = add1(p)
nbit |= int(x&0x7f) << nb
if x&0x80 == 0 {
break
}
}
}
count := 0
for nb := uint(0); ; nb += 7 {
x := *p
p = add1(p)
count |= int(x&0x7f) << nb
if x&0x80 == 0 {
break
}
}
print("\t", nptr, " repeat ", nbit, " × ", count, "\n")
nptr += nbit * count
}
}
}
// Testing.
// reflect_gcbits returns the GC type info for x, for testing.
// The result is the bitmap entries (0 or 1), one entry per byte.
//
//go:linkname reflect_gcbits reflect.gcbits
func reflect_gcbits(x any) []byte {
return getgcmask(x)
}
// Returns GC type info for the pointer stored in ep for testing.
// If ep points to the stack, only static live information will be returned
// (i.e. not for objects which are only dynamically live stack objects).
func getgcmask(ep any) (mask []byte) {
e := *efaceOf(&ep)
p := e.data
t := e._type
// data or bss
for _, datap := range activeModules() {
// data
if datap.data <= uintptr(p) && uintptr(p) < datap.edata {
bitmap := datap.gcdatamask.bytedata
n := (*ptrtype)(unsafe.Pointer(t)).Elem.Size_
mask = make([]byte, n/goarch.PtrSize)
for i := uintptr(0); i < n; i += goarch.PtrSize {
off := (uintptr(p) + i - datap.data) / goarch.PtrSize
mask[i/goarch.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
}
return
}
// bss
if datap.bss <= uintptr(p) && uintptr(p) < datap.ebss {
bitmap := datap.gcbssmask.bytedata
n := (*ptrtype)(unsafe.Pointer(t)).Elem.Size_
mask = make([]byte, n/goarch.PtrSize)
for i := uintptr(0); i < n; i += goarch.PtrSize {
off := (uintptr(p) + i - datap.bss) / goarch.PtrSize
mask[i/goarch.PtrSize] = (*addb(bitmap, off/8) >> (off % 8)) & 1
}
return
}
}
// heap
if base, s, _ := findObject(uintptr(p), 0, 0); base != 0 {
if s.spanclass.noscan() {
return nil
}
n := s.elemsize
hbits := heapBitsForAddr(base, n)
mask = make([]byte, n/goarch.PtrSize)
for {
var addr uintptr
if hbits, addr = hbits.next(); addr == 0 {
break
}
mask[(addr-base)/goarch.PtrSize] = 1
}
// Callers expect this mask to end at the last pointer.
for len(mask) > 0 && mask[len(mask)-1] == 0 {
mask = mask[:len(mask)-1]
}
return
}
// stack
if gp := getg(); gp.m.curg.stack.lo <= uintptr(p) && uintptr(p) < gp.m.curg.stack.hi {
found := false
var u unwinder
for u.initAt(gp.m.curg.sched.pc, gp.m.curg.sched.sp, 0, gp.m.curg, 0); u.valid(); u.next() {
if u.frame.sp <= uintptr(p) && uintptr(p) < u.frame.varp {
found = true
break
}
}
if found {
locals, _, _ := u.frame.getStackMap(nil, false)
if locals.n == 0 {
return
}
size := uintptr(locals.n) * goarch.PtrSize
n := (*ptrtype)(unsafe.Pointer(t)).Elem.Size_
mask = make([]byte, n/goarch.PtrSize)
for i := uintptr(0); i < n; i += goarch.PtrSize {
off := (uintptr(p) + i - u.frame.varp + size) / goarch.PtrSize
mask[i/goarch.PtrSize] = locals.ptrbit(off)
}
}
return
}
// otherwise, not something the GC knows about.
// possibly read-only data, like malloc(0).
// must not have pointers
return
}
*/