Source File
mbitmap.go
Belonging Package
runtime
// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// Garbage collector: type and heap bitmaps.//// Stack, data, and bss bitmaps//// Stack frames and global variables in the data and bss sections are// described by bitmaps with 1 bit per pointer-sized word. A "1" bit// means the word is a live pointer to be visited by the GC (referred to// as "pointer"). A "0" bit means the word should be ignored by GC// (referred to as "scalar", though it could be a dead pointer value).//// Heap bitmap//// The heap bitmap comprises 2 bits for each pointer-sized word in the heap,// stored in the heapArena metadata backing each heap arena.// That is, if ha is the heapArena for the arena starting a start,// then ha.bitmap[0] holds the 2-bit entries for the four words start// through start+3*ptrSize, ha.bitmap[1] holds the entries for// start+4*ptrSize through start+7*ptrSize, and so on.//// In each 2-bit entry, the lower bit is a pointer/scalar bit, just// like in the stack/data bitmaps described above. The upper bit// indicates scan/dead: a "1" value ("scan") indicates that there may// be pointers in later words of the allocation, and a "0" value// ("dead") indicates there are no more pointers in the allocation. If// the upper bit is 0, the lower bit must also be 0, and this// indicates scanning can ignore the rest of the allocation.//// The 2-bit entries are split when written into the byte, so that the top half// of the byte contains 4 high (scan) bits and the bottom half contains 4 low// (pointer) bits. This form allows a copy from the 1-bit to the 4-bit form to// keep the pointer bits contiguous, instead of having to space them out.//// The code makes use of the fact that the zero value for a heap// bitmap means scalar/dead. This property must be preserved when// modifying the encoding.//// The bitmap for noscan spans is not maintained. Code must ensure// that an object is scannable before consulting its bitmap by// checking either the noscan bit in the span or by consulting its// type's information.package runtimeimport ()const (bitPointer = 1 << 0bitScan = 1 << 4heapBitsShift = 1 // shift offset between successive bitPointer or bitScan entrieswordsPerBitmapByte = 8 / 2 // heap words described by one bitmap byte// all scan/pointer bits in a bytebitScanAll = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift))// addb returns the byte pointer p+n.//go:nowritebarrier//go:nosplitfunc ( *byte, uintptr) *byte {// Note: wrote out full expression instead of calling add(p, n)// to reduce the number of temporaries generated by the// compiler for this trivial expression during inlining.return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) + ))}// subtractb returns the byte pointer p-n.//go:nowritebarrier//go:nosplitfunc ( *byte, uintptr) *byte {// Note: wrote out full expression instead of calling add(p, -n)// to reduce the number of temporaries generated by the// compiler for this trivial expression during inlining.return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) - ))}// add1 returns the byte pointer p+1.//go:nowritebarrier//go:nosplitfunc ( *byte) *byte {// Note: wrote out full expression instead of calling addb(p, 1)// to reduce the number of temporaries generated by the// compiler for this trivial expression during inlining.return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) + 1))}// subtract1 returns the byte pointer p-1.//go:nowritebarrier//// nosplit because it is used during write barriers and must not be preempted.//go:nosplitfunc ( *byte) *byte {// Note: wrote out full expression instead of calling subtractb(p, 1)// to reduce the number of temporaries generated by the// compiler for this trivial expression during inlining.return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) - 1))}// heapBits provides access to the bitmap bits for a single heap word.// The methods on heapBits take value receivers so that the compiler// can more easily inline calls to those methods and registerize the// struct fields independently.type heapBits struct {bitp *uint8shift uint32arena uint32 // Index of heap arena containing bitplast *uint8 // Last byte arena's bitmap}// Make the compiler check that heapBits.arena is large enough to hold// the maximum arena frame number.var _ = heapBits{arena: (1<<heapAddrBits)/heapArenaBytes - 1}// markBits provides access to the mark bit for an object in the heap.// bytep points to the byte holding the mark bit.// mask is a byte with a single bit set that can be &ed with *bytep// to see if the bit has been set.// *m.byte&m.mask != 0 indicates the mark bit is set.// index can be used along with span information to generate// the address of the object in the heap.// We maintain one set of mark bits for allocation and one for// marking purposes.type markBits struct {bytep *uint8mask uint8index uintptr}//go:nosplitfunc ( *mspan) ( uintptr) markBits {, := .allocBits.bitp()return markBits{, , }}// refillAllocCache takes 8 bytes s.allocBits starting at whichByte// and negates them so that ctz (count trailing zeros) instructions// can be used. It then places these 8 bytes into the cached 64 bit// s.allocCache.func ( *mspan) ( uintptr) {:= (*[8]uint8)(unsafe.Pointer(.allocBits.bytep())):= uint64(0)|= uint64([0])|= uint64([1]) << (1 * 8)|= uint64([2]) << (2 * 8)|= uint64([3]) << (3 * 8)|= uint64([4]) << (4 * 8)|= uint64([5]) << (5 * 8)|= uint64([6]) << (6 * 8)|= uint64([7]) << (7 * 8).allocCache = ^}// nextFreeIndex returns the index of the next free object in s at// or after s.freeindex.// There are hardware instructions that can be used to make this// faster if profiling warrants it.func ( *mspan) () uintptr {:= .freeindex:= .nelemsif == {return}if > {throw("s.freeindex > s.nelems")}:= .allocCache:= sys.Ctz64()for == 64 {// Move index to start of next cached bits.= ( + 64) &^ (64 - 1)if >= {.freeindex =return}:= / 8// Refill s.allocCache with the next 64 alloc bits..refillAllocCache()= .allocCache= sys.Ctz64()// nothing available in cached bits// grab the next 8 bytes and try again.}:= + uintptr()if >= {.freeindex =return}.allocCache >>= uint( + 1)= + 1if %64 == 0 && != {// We just incremented s.freeindex so it isn't 0.// As each 1 in s.allocCache was encountered and used for allocation// it was shifted away. At this point s.allocCache contains all 0s.// Refill s.allocCache so that it corresponds// to the bits at s.allocBits starting at s.freeindex.:= / 8.refillAllocCache()}.freeindex =return}// isFree reports whether the index'th object in s is unallocated.//// The caller must ensure s.state is mSpanInUse, and there must have// been no preemption points since ensuring this (which could allow a// GC transition, which would allow the state to change).func ( *mspan) ( uintptr) bool {if < .freeindex {return false}, := .allocBits.bitp()return *& == 0}func ( *mspan) ( uintptr) uintptr {:= - .base()if == 0 {return 0}if .baseMask != 0 {// s.baseMask is non-0, elemsize is a power of two, so shift by s.divShiftreturn >> .divShift}return uintptr(((uint64() >> .divShift) * uint64(.divMul)) >> .divShift2)}func ( uintptr) markBits {:= spanOf():= .objIndex()return .markBitsForIndex()}func ( *mspan) ( uintptr) markBits {, := .gcmarkBits.bitp()return markBits{, , }}func ( *mspan) () markBits {return markBits{(*uint8)(.gcmarkBits), uint8(1), 0}}// isMarked reports whether mark bit m is set.func ( markBits) () bool {return *.bytep&.mask != 0}// setMarked sets the marked bit in the markbits, atomically.func ( markBits) () {// Might be racing with other updates, so use atomic update always.// We used to be clever here and use a non-atomic update in certain// cases, but it's not worth the risk.atomic.Or8(.bytep, .mask)}// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.func ( markBits) () {*.bytep |= .mask}// clearMarked clears the marked bit in the markbits, atomically.func ( markBits) () {// Might be racing with other updates, so use atomic update always.// We used to be clever here and use a non-atomic update in certain// cases, but it's not worth the risk.atomic.And8(.bytep, ^.mask)}// markBitsForSpan returns the markBits for the span base address base.func ( uintptr) ( markBits) {= markBitsForAddr()if .mask != 1 {throw("markBitsForSpan: unaligned start")}return}// advance advances the markBits to the next object in the span.func ( *markBits) () {if .mask == 1<<7 {.bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(.bytep)) + 1)).mask = 1} else {.mask = .mask << 1}.index++}// heapBitsForAddr returns the heapBits for the address addr.// The caller must ensure addr is in an allocated span.// In particular, be careful not to point past the end of an object.//// nosplit because it is used during write barriers and must not be preempted.//go:nosplitfunc ( uintptr) ( heapBits) {// 2 bits per word, 4 pairs per byte, and a mask is hard coded.:= arenaIndex():= mheap_.arenas[.l1()][.l2()]// The compiler uses a load for nil checking ha, but in this// case we'll almost never hit that cache line again, so it// makes more sense to do a value check.if == nil {// addr is not in the heap. Return nil heapBits, which// we expect to crash in the caller.return}.bitp = &.bitmap[(/(sys.PtrSize*4))%heapArenaBitmapBytes].shift = uint32(( / sys.PtrSize) & 3).arena = uint32().last = &.bitmap[len(.bitmap)-1]return}// badPointer throws bad pointer in heap panic.func ( *mspan, , , uintptr) {// Typically this indicates an incorrect use// of unsafe or cgo to store a bad pointer in// the Go heap. It may also indicate a runtime// bug.//// TODO(austin): We could be more aggressive// and detect pointers to unallocated objects// in allocated spans.printlock()print("runtime: pointer ", hex()):= .state.get()if != mSpanInUse {print(" to unallocated span")} else {print(" to unused region of span")}print(" span.base()=", hex(.base()), " span.limit=", hex(.limit), " span.state=", , "\n")if != 0 {print("runtime: found in object at *(", hex(), "+", hex(), ")\n")gcDumpObject("object", , )}getg().m.traceback = 2throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")}// findObject returns the base address for the heap object containing// the address p, the object's span, and the index of the object in s.// If p does not point into a heap object, it returns base == 0.//// If p points is an invalid heap pointer and debug.invalidptr != 0,// findObject panics.//// refBase and refOff optionally give the base address of the object// in which the pointer p was found and the byte offset at which it// was found. These are used for error reporting.//// It is nosplit so it is safe for p to be a pointer to the current goroutine's stack.// Since p is a uintptr, it would not be adjusted if the stack were to move.//go:nosplitfunc (, , uintptr) ( uintptr, *mspan, uintptr) {= spanOf()// If s is nil, the virtual address has never been part of the heap.// This pointer may be to some mmap'd region, so we allow it.if == nil {return}// If p is a bad pointer, it may not be in s's bounds.//// Check s.state to synchronize with span initialization// before checking other fields. See also spanOfHeap.if := .state.get(); != mSpanInUse || < .base() || >= .limit {// Pointers into stacks are also ok, the runtime manages these explicitly.if == mSpanManual {return}// The following ensures that we are rigorous about what data// structures hold valid pointers.if debug.invalidptr != 0 {badPointer(, , , )}return}// If this span holds object of a power of 2 size, just mask off the bits to// the interior of the object. Otherwise use the size to get the base.if .baseMask != 0 {// optimize for power of 2 sized objects.= .base()= + (-)&uintptr(.baseMask)= ( - .base()) >> .divShift// base = p & s.baseMask is faster for small spans,// but doesn't work for large spans.// Overall, it's faster to use the more general computation above.} else {= .base()if - >= .elemsize {// n := (p - base) / s.elemsize, using division by multiplication= uintptr(-) >> .divShift * uintptr(.divMul) >> .divShift2+= * .elemsize}}return}// next returns the heapBits describing the next pointer-sized word in memory.// That is, if h describes address p, h.next() describes p+ptrSize.// Note that next does not modify h. The caller must record the result.//// nosplit because it is used during write barriers and must not be preempted.//go:nosplitfunc ( heapBits) () heapBits {if .shift < 3*heapBitsShift {.shift += heapBitsShift} else if .bitp != .last {.bitp, .shift = add1(.bitp), 0} else {// Move to the next arena.return .nextArena()}return}// nextArena advances h to the beginning of the next heap arena.//// This is a slow-path helper to next. gc's inliner knows that// heapBits.next can be inlined even though it calls this. This is// marked noinline so it doesn't get inlined into next and cause next// to be too big to inline.////go:nosplit//go:noinlinefunc ( heapBits) () heapBits {.arena++:= arenaIdx(.arena):= mheap_.arenas[.l1()]if == nil {// We just passed the end of the object, which// was also the end of the heap. Poison h. It// should never be dereferenced at this point.return heapBits{}}:= [.l2()]if == nil {return heapBits{}}.bitp, .shift = &.bitmap[0], 0.last = &.bitmap[len(.bitmap)-1]return}// forward returns the heapBits describing n pointer-sized words ahead of h in memory.// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.// h.forward(1) is equivalent to h.next(), just slower.// Note that forward does not modify h. The caller must record the result.// bits returns the heap bits for the current word.//go:nosplitfunc ( heapBits) ( uintptr) heapBits {+= uintptr(.shift) / heapBitsShift:= uintptr(unsafe.Pointer(.bitp)) + /4.shift = uint32(%4) * heapBitsShiftif <= uintptr(unsafe.Pointer(.last)) {.bitp = (*uint8)(unsafe.Pointer())return}// We're in a new heap arena.:= - (uintptr(unsafe.Pointer(.last)) + 1).arena += 1 + uint32(/heapArenaBitmapBytes):= arenaIdx(.arena)if := mheap_.arenas[.l1()]; != nil && [.l2()] != nil {:= [.l2()].bitp = &.bitmap[%heapArenaBitmapBytes].last = &.bitmap[len(.bitmap)-1]} else {.bitp, .last = nil, nil}return}// forwardOrBoundary is like forward, but stops at boundaries between// contiguous sections of the bitmap. It returns the number of words// advanced over, which will be <= n.func ( heapBits) ( uintptr) (heapBits, uintptr) {:= 4 * ((uintptr(unsafe.Pointer(.last)) + 1) - uintptr(unsafe.Pointer(.bitp)))if > {=}return .forward(),}// The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.// The result includes in its higher bits the bits for subsequent words// described by the same bitmap byte.//// nosplit because it is used during write barriers and must not be preempted.//go:nosplitfunc ( heapBits) () uint32 {// The (shift & 31) eliminates a test and conditional branch// from the generated code.return uint32(*.bitp) >> (.shift & 31)}// morePointers reports whether this word and all remaining words in this object// are scalars.// h must not describe the second word of the object.func ( heapBits) () bool {return .bits()&bitScan != 0}// isPointer reports whether the heap bits describe a pointer word.//// nosplit because it is used during write barriers and must not be preempted.//go:nosplitfunc ( heapBits) () bool {return .bits()&bitPointer != 0}// bulkBarrierPreWrite executes a write barrier// for every pointer slot in the memory range [src, src+size),// using pointer/scalar information from [dst, dst+size).// This executes the write barriers necessary before a memmove.// src, dst, and size must be pointer-aligned.// The range [dst, dst+size) must lie within a single object.// It does not perform the actual writes.//// As a special case, src == 0 indicates that this is being used for a// memclr. bulkBarrierPreWrite will pass 0 for the src of each write// barrier.//// Callers should call bulkBarrierPreWrite immediately before// calling memmove(dst, src, size). This function is marked nosplit// to avoid being preempted; the GC must not stop the goroutine// between the memmove and the execution of the barriers.// The caller is also responsible for cgo pointer checks if this// may be writing Go pointers into non-Go memory.//// The pointer bitmap is not maintained for allocations containing// no pointers at all; any caller of bulkBarrierPreWrite must first// make sure the underlying allocation contains pointers, usually// by checking typ.ptrdata.//// Callers must perform cgo checks if writeBarrier.cgo.////go:nosplitfunc (, , uintptr) {if (||)&(sys.PtrSize-1) != 0 {throw("bulkBarrierPreWrite: unaligned arguments")}if !writeBarrier.needed {return}if := spanOf(); == nil {// If dst is a global, use the data or BSS bitmaps to// execute write barriers.for , := range activeModules() {if .data <= && < .edata {bulkBarrierBitmap(, , , -.data, .gcdatamask.bytedata)return}}for , := range activeModules() {if .bss <= && < .ebss {bulkBarrierBitmap(, , , -.bss, .gcbssmask.bytedata)return}}return} else if .state.get() != mSpanInUse || < .base() || .limit <= {// dst was heap memory at some point, but isn't now.// It can't be a global. It must be either our stack,// or in the case of direct channel sends, it could be// another stack. Either way, no need for barriers.// This will also catch if dst is in a freed span,// though that should never have.return}:= &getg().m.p.ptr().wbBuf:= heapBitsForAddr()if == 0 {for := uintptr(0); < ; += sys.PtrSize {if .isPointer() {:= (*uintptr)(unsafe.Pointer( + ))if !.putFast(*, 0) {wbBufFlush(nil, 0)}}= .next()}} else {for := uintptr(0); < ; += sys.PtrSize {if .isPointer() {:= (*uintptr)(unsafe.Pointer( + )):= (*uintptr)(unsafe.Pointer( + ))if !.putFast(*, *) {wbBufFlush(nil, 0)}}= .next()}}}// bulkBarrierPreWriteSrcOnly is like bulkBarrierPreWrite but// does not execute write barriers for [dst, dst+size).//// In addition to the requirements of bulkBarrierPreWrite// callers need to ensure [dst, dst+size) is zeroed.//// This is used for special cases where e.g. dst was just// created and zeroed with malloc.//go:nosplitfunc (, , uintptr) {if (||)&(sys.PtrSize-1) != 0 {throw("bulkBarrierPreWrite: unaligned arguments")}if !writeBarrier.needed {return}:= &getg().m.p.ptr().wbBuf:= heapBitsForAddr()for := uintptr(0); < ; += sys.PtrSize {if .isPointer() {:= (*uintptr)(unsafe.Pointer( + ))if !.putFast(0, *) {wbBufFlush(nil, 0)}}= .next()}}// bulkBarrierBitmap executes write barriers for copying from [src,// src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is// assumed to start maskOffset bytes into the data covered by the// bitmap in bits (which may not be a multiple of 8).//// This is used by bulkBarrierPreWrite for writes to data and BSS.////go:nosplitfunc (, , , uintptr, *uint8) {:= / sys.PtrSize= addb(, /8):= uint8(1) << ( % 8):= &getg().m.p.ptr().wbBuffor := uintptr(0); < ; += sys.PtrSize {if == 0 {= addb(, 1)if * == 0 {// Skip 8 words.+= 7 * sys.PtrSizecontinue}= 1}if *& != 0 {:= (*uintptr)(unsafe.Pointer( + ))if == 0 {if !.putFast(*, 0) {wbBufFlush(nil, 0)}} else {:= (*uintptr)(unsafe.Pointer( + ))if !.putFast(*, *) {wbBufFlush(nil, 0)}}}<<= 1}}// typeBitsBulkBarrier executes a write barrier for every// pointer that would be copied from [src, src+size) to [dst,// dst+size) by a memmove using the type bitmap to locate those// pointer slots.//// The type typ must correspond exactly to [src, src+size) and [dst, dst+size).// dst, src, and size must be pointer-aligned.// The type typ must have a plain bitmap, not a GC program.// The only use of this function is in channel sends, and the// 64 kB channel element limit takes care of this for us.//// Must not be preempted because it typically runs right before memmove,// and the GC must observe them as an atomic action.//// Callers must perform cgo checks if writeBarrier.cgo.////go:nosplitfunc ( *_type, , , uintptr) {if == nil {throw("runtime: typeBitsBulkBarrier without type")}if .size != {println("runtime: typeBitsBulkBarrier with type ", .string(), " of size ", .size, " but memory size", )throw("runtime: invalid typeBitsBulkBarrier")}if .kind&kindGCProg != 0 {println("runtime: typeBitsBulkBarrier with type ", .string(), " with GC prog")throw("runtime: invalid typeBitsBulkBarrier")}if !writeBarrier.needed {return}:= .gcdata:= &getg().m.p.ptr().wbBufvar uint32for := uintptr(0); < .ptrdata; += sys.PtrSize {if &(sys.PtrSize*8-1) == 0 {= uint32(*)= addb(, 1)} else {= >> 1}if &1 != 0 {:= (*uintptr)(unsafe.Pointer( + )):= (*uintptr)(unsafe.Pointer( + ))if !.putFast(*, *) {wbBufFlush(nil, 0)}}}}// The methods operating on spans all require that h has been returned// by heapBitsForSpan and that size, n, total are the span layout description// returned by the mspan's layout method.// If total > size*n, it means that there is extra leftover memory in the span,// usually due to rounding.//// TODO(rsc): Perhaps introduce a different heapBitsSpan type.// initSpan initializes the heap bitmap for a span.// If this is a span of pointer-sized objects, it initializes all// words to pointer/scan.// Otherwise, it initializes all words to scalar/dead.func ( heapBits) ( *mspan) {// Clear bits corresponding to objects.:= (.npages << _PageShift) / sys.PtrSizeif %wordsPerBitmapByte != 0 {throw("initSpan: unaligned length")}if .shift != 0 {throw("initSpan: unaligned base")}:= sys.PtrSize == 8 && .elemsize == sys.PtrSizefor > 0 {, := .forwardOrBoundary():= / wordsPerBitmapByteif {:= .bitpfor := uintptr(0); < ; ++ {* = bitPointerAll | bitScanAll= add1()}} else {memclrNoHeapPointers(unsafe.Pointer(.bitp), )}=-=}}// countAlloc returns the number of objects allocated in span s by// scanning the allocation bitmap.func ( *mspan) () int {:= 0:= divRoundUp(.nelems, 8)// Iterate over each 8-byte chunk and count allocations// with an intrinsic. Note that newMarkBits guarantees that// gcmarkBits will be 8-byte aligned, so we don't have to// worry about edge cases, irrelevant bits will simply be zero.for := uintptr(0); < ; += 8 {// Extract 64 bits from the byte pointer and get a OnesCount.// Note that the unsafe cast here doesn't preserve endianness,// but that's OK. We only care about how many bits are 1, not// about the order we discover them in.:= *(*uint64)(unsafe.Pointer(.gcmarkBits.bytep()))+= sys.OnesCount64()}return}// heapBitsSetType records that the new allocation [x, x+size)// holds in [x, x+dataSize) one or more values of type typ.// (The number of values is given by dataSize / typ.size.)// If dataSize < size, the fragment [x+dataSize, x+size) is// recorded as non-pointer data.// It is known that the type has pointers somewhere;// malloc does not call heapBitsSetType when there are no pointers,// because all free objects are marked as noscan during// heapBitsSweepSpan.//// There can only be one allocation from a given span active at a time,// and the bitmap for a span always falls on byte boundaries,// so there are no write-write races for access to the heap bitmap.// Hence, heapBitsSetType can access the bitmap without atomics.//// There can be read-write races between heapBitsSetType and things// that read the heap bitmap like scanobject. However, since// heapBitsSetType is only used for objects that have not yet been// made reachable, readers will ignore bits being modified by this// function. This does mean this function cannot transiently modify// bits that belong to neighboring objects. Also, on weakly-ordered// machines, callers must execute a store/store (publication) barrier// between calling this function and making the object reachable.func (, , uintptr, *_type) {const = false // slow but helpful; enable to test modifications to this codeconst (= bitPointer | bitScan // 00010001= bitPointer | bitScan | <<heapBitsShift // 00110011= bitPointer | bitScan | <<heapBitsShift // 01110111)// dataSize is always size rounded up to the next malloc size class,// except in the case of allocating a defer block, in which case// size is sizeof(_defer{}) (at least 6 words) and dataSize may be// arbitrarily larger.//// The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore// assume that dataSize == size without checking it explicitly.if sys.PtrSize == 8 && == sys.PtrSize {// It's one word and it has pointers, it must be a pointer.// Since all allocated one-word objects are pointers// (non-pointers are aggregated into tinySize allocations),// initSpan sets the pointer bits for us. Nothing to do here.if {:= heapBitsForAddr()if !.isPointer() {throw("heapBitsSetType: pointer bit missing")}if !.morePointers() {throw("heapBitsSetType: scan bit missing")}}return}:= heapBitsForAddr():= .gcdata // start of 1-bit pointer mask (or GC program, handled below)// 2-word objects only have 4 bitmap bits and 3-word objects only have 6 bitmap bits.// Therefore, these objects share a heap bitmap byte with the objects next to them.// These are called out as a special case primarily so the code below can assume all// objects are at least 4 words long and that their bitmaps start either at the beginning// of a bitmap byte, or half-way in (h.shift of 0 and 2 respectively).if == 2*sys.PtrSize {if .size == sys.PtrSize {// We're allocating a block big enough to hold two pointers.// On 64-bit, that means the actual object must be two pointers,// or else we'd have used the one-pointer-sized block.// On 32-bit, however, this is the 8-byte block, the smallest one.// So it could be that we're allocating one pointer and this was// just the smallest block available. Distinguish by checking dataSize.// (In general the number of instances of typ being allocated is// dataSize/typ.size.)if sys.PtrSize == 4 && == sys.PtrSize {// 1 pointer object. On 32-bit machines clear the bit for the// unused second word.*.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << .shift*.bitp |= (bitPointer | bitScan) << .shift} else {// 2-element array of pointer.*.bitp |= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << .shift}return}// Otherwise typ.size must be 2*sys.PtrSize,// and typ.kind&kindGCProg == 0.if {if .size != 2*sys.PtrSize || .kind&kindGCProg != 0 {print("runtime: heapBitsSetType size=", , " but typ.size=", .size, " gcprog=", .kind&kindGCProg != 0, "\n")throw("heapBitsSetType")}}:= uint32(*):= & 3|= bitScanAll & ((bitScan << (.ptrdata / sys.PtrSize)) - 1)// Clear the bits for this object so we can set the// appropriate ones.*.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << .shift*.bitp |= uint8( << .shift)return} else if == 3*sys.PtrSize {:= uint8(*)if {if == 0 {println("runtime: invalid type ", .string())throw("heapBitsSetType: called with non-pointer type")}if sys.PtrSize != 8 {throw("heapBitsSetType: unexpected 3 pointer wide size class on 32 bit")}if .kind&kindGCProg != 0 {throw("heapBitsSetType: unexpected GC prog for 3 pointer wide size class")}if .size == 2*sys.PtrSize {print("runtime: heapBitsSetType size=", , " but typ.size=", .size, "\n")throw("heapBitsSetType: inconsistent object sizes")}}if .size == sys.PtrSize {// The type contains a pointer otherwise heapBitsSetType wouldn't have been called.// Since the type is only 1 pointer wide and contains a pointer, its gcdata must be exactly 1.if && *.gcdata != 1 {print("runtime: heapBitsSetType size=", , " typ.size=", .size, "but *typ.gcdata", *.gcdata, "\n")throw("heapBitsSetType: unexpected gcdata for 1 pointer wide type size in 3 pointer wide size class")}// 3 element array of pointers. Unrolling ptrmask 3 times into p yields 00000111.= 7}:= & 7// Set bitScan bits for all pointers.|= << wordsPerBitmapByte// First bitScan bit is always set since the type contains pointers.|= bitScan// Second bitScan bit needs to also be set if the third bitScan bit is set.|= & (bitScan << (2 * heapBitsShift)) >> 1// For h.shift > 1 heap bits cross a byte boundary and need to be written part// to h.bitp and part to the next h.bitp.switch .shift {case 0:*.bitp &^= << 0*.bitp |= << 0case 1:*.bitp &^= << 1*.bitp |= << 1case 2:*.bitp &^= << 2*.bitp |= ( & ) << 2// Two words written to the first byte.// Advance two words to get to the next byte.= .next().next()*.bitp &^=*.bitp |= ( >> 2) &case 3:*.bitp &^= << 3*.bitp |= ( & ) << 3// One word written to the first byte.// Advance one word to get to the next byte.= .next()*.bitp &^=*.bitp |= ( >> 1) &}return}// Copy from 1-bit ptrmask into 2-bit bitmap.// The basic approach is to use a single uintptr as a bit buffer,// alternating between reloading the buffer and writing bitmap bytes.// In general, one load can supply two bitmap byte writes.// This is a lot of lines of code, but it compiles into relatively few// machine instructions.:= falseif arenaIndex(+-1) != arenaIdx(.arena) || ( && fastrand()%2 == 0) {// This object spans heap arenas, so the bitmap may be// discontiguous. Unroll it into the object instead// and then copy it out.//// In doubleCheck mode, we randomly do this anyway to// stress test the bitmap copying path.= true.bitp = (*uint8)(unsafe.Pointer()).last = nil}var (// Ptrmask input.*byte // last ptrmask byte readuintptr // ptrmask bits already loadeduintptr // number of bits in b at next read*byte // final ptrmask byte to read (then repeat)uintptr // number of valid bits in *endpuintptr // alternate source of bits// Heap bitmap output.uintptr // words processeduintptr // number of words to process*byte // next heap bitmap byte to writeuintptr // bits being prepared for *hbitp)= .bitp// Handle GC program. Delayed until this part of the code// so that we can use the same double-checking mechanism// as the 1-bit case. Nothing above could have encountered// GC programs: the cases were all too small.if .kind&kindGCProg != 0 {heapBitsSetTypeGCProg(, .ptrdata, .size, , , addb(.gcdata, 4))if {// Double-check the heap bits written by GC program// by running the GC program to create a 1-bit pointer mask// and then jumping to the double-check code below.// This doesn't catch bugs shared between the 1-bit and 4-bit// GC program execution, but it does catch mistakes specific// to just one of those and bugs in heapBitsSetTypeGCProg's// implementation of arrays.lock(&debugPtrmask.lock)if debugPtrmask.data == nil {debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys))}= debugPtrmask.datarunGCProg(addb(.gcdata, 4), nil, , 1)}goto}// Note about sizes://// typ.size is the number of words in the object,// and typ.ptrdata is the number of words in the prefix// of the object that contains pointers. That is, the final// typ.size - typ.ptrdata words contain no pointers.// This allows optimization of a common pattern where// an object has a small header followed by a large scalar// buffer. If we know the pointers are over, we don't have// to scan the buffer's heap bitmap at all.// The 1-bit ptrmasks are sized to contain only bits for// the typ.ptrdata prefix, zero padded out to a full byte// of bitmap. This code sets nw (below) so that heap bitmap// bits are only written for the typ.ptrdata prefix; if there is// more room in the allocated object, the next heap bitmap// entry is a 00, indicating that there are no more pointers// to scan. So only the ptrmask for the ptrdata bytes is needed.//// Replicated copies are not as nice: if there is an array of// objects with scalar tails, all but the last tail does have to// be initialized, because there is no way to say "skip forward".// However, because of the possibility of a repeated type with// size not a multiple of 4 pointers (one heap bitmap byte),// the code already must handle the last ptrmask byte specially// by treating it as containing only the bits for endnb pointers,// where endnb <= 4. We represent large scalar tails that must// be expanded in the replication by setting endnb larger than 4.// This will have the effect of reading many bits out of b,// but once the real bits are shifted out, b will supply as many// zero bits as we try to read, which is exactly what we need.=if .size < {// Filling in bits for an array of typ.// Set up for repetition of ptrmask during main loop.// Note that ptrmask describes only a prefix ofconst = sys.PtrSize*8 - 7if .ptrdata/sys.PtrSize <= {// Entire ptrmask fits in uintptr with room for a byte fragment.// Load into pbits and never read from ptrmask again.// This is especially important when the ptrmask has// fewer than 8 bits in it; otherwise the reload in the middle// of the Phase 2 loop would itself need to loop to gather// at least 8 bits.// Accumulate ptrmask into b.// ptrmask is sized to describe only typ.ptrdata, but we record// it as describing typ.size bytes, since all the high bits are zero.= .ptrdata / sys.PtrSizefor := uintptr(0); < ; += 8 {|= uintptr(*) <<= add1()}= .size / sys.PtrSize// Replicate ptrmask to fill entire pbits uintptr.// Doubling and truncating is fewer steps than// iterating by nb each time. (nb could be 1.)// Since we loaded typ.ptrdata/sys.PtrSize bits// but are pretending to have typ.size/sys.PtrSize,// there might be no replication necessary/possible.==if + <= {for <= sys.PtrSize*8 {|= <<+=}// Truncate to a multiple of original ptrmask.// Because nb+nb <= maxBits, nb fits in a byte.// Byte division is cheaper than uintptr division.= uintptr(/byte()) *&= 1<< - 1==}// Clear p and endp as sentinel for using pbits.// Checked during Phase 2 loop.= nil= nil} else {// Ptrmask is larger. Read it multiple times.:= (.ptrdata/sys.PtrSize+7)/8 - 1= addb(, )= .size/sys.PtrSize - *8}}if != nil {= uintptr(*)= add1()= 8}if .size == {// Single entry: can stop once we reach the non-pointer data.= .ptrdata / sys.PtrSize} else {// Repeated instances of typ in an array.// Have to process first N-1 entries in full, but can stop// once we reach the non-pointer data in the final entry.= ((/.size-1)*.size + .ptrdata) / sys.PtrSize}if == 0 {// No pointers! Caller was supposed to check.println("runtime: invalid type ", .string())throw("heapBitsSetType: called with non-pointer type")return}// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2).// The leading byte is special because it contains the bits for word 1,// which does not have the scan bit set.// The leading half-byte is special because it's a half a byte,// so we have to be careful with the bits already there.switch {default:throw("heapBitsSetType: unexpected shift")case .shift == 0:// Ptrmask and heap bitmap are aligned.//// This is a fast path for small objects.//// The first byte we write out covers the first four// words of the object. The scan/dead bit on the first// word must be set to scan since there are pointers// somewhere in the object.// In all following words, we set the scan/dead// appropriately to indicate that the object continues// to the next 2-bit entry in the bitmap.//// We set four bits at a time here, but if the object// is fewer than four words, phase 3 will clear// unnecessary bits.= & bitPointerAll|= bitScanAllif += 4; >= {goto}* = uint8()= add1()>>= 4-= 4case .shift == 2:// Ptrmask and heap bitmap are misaligned.//// On 32 bit architectures only the 6-word object that corresponds// to a 24 bytes size class can start with h.shift of 2 here since// all other non 16 byte aligned size classes have been handled by// special code paths at the beginning of heapBitsSetType on 32 bit.//// Many size classes are only 16 byte aligned. On 64 bit architectures// this results in a heap bitmap position starting with a h.shift of 2.//// The bits for the first two words are in a byte shared// with another object, so we must be careful with the bits// already there.//// We took care of 1-word, 2-word, and 3-word objects above,// so this is at least a 6-word object.= ( & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift)|= bitScan << (2 * heapBitsShift)if > 1 {|= bitScan << (3 * heapBitsShift)}>>= 2-= 2* &^= uint8((bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << (2 * heapBitsShift))* |= uint8()= add1()if += 2; >= {// We know that there is more data, because we handled 2-word and 3-word objects above.// This must be at least a 6-word object. If we're out of pointer words,// mark no scan in next bitmap byte and finish.= 0+= 4goto}}// Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.// The loop computes the bits for that last write but does not execute the write;// it leaves the bits in hb for processing by phase 3.// To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to// use in the first half of the loop right now, and then we only adjust nb explicitly// if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.-= 4for {// Emit bitmap byte.// b has at least nb+4 bits, with one exception:// if w+4 >= nw, then b has only nw-w bits,// but we'll stop at the break and then truncate// appropriately in Phase 3.= & bitPointerAll|= bitScanAllif += 4; >= {break}* = uint8()= add1()>>= 4// Load more bits. b has nb right now.if != {// Fast path: keep reading from ptrmask.// nb unmodified: we just loaded 8 bits,// and the next iteration will consume 8 bits,// leaving us with the same nb the next time we're here.if < 8 {|= uintptr(*) <<= add1()} else {// Reduce the number of bits in b.// This is important if we skipped// over a scalar tail, since nb could// be larger than the bit width of b.-= 8}} else if == nil {// Almost as fast path: track bit count and refill from pbits.// For short repetitions.if < 8 {|= <<+=}-= 8 // for next iteration} else {// Slow path: reached end of ptrmask.// Process final partial byte and rewind to start.|= uintptr(*) <<+=if < 8 {|= uintptr(*) <<= add1()} else {-= 8=}}// Emit bitmap byte.= & bitPointerAll|= bitScanAllif += 4; >= {break}* = uint8()= add1()>>= 4}:// Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.if > {// Counting the 4 entries in hb not yet written to memory,// there are more entries than possible pointer slots.// Discard the excess entries (can't be more than 3).:= uintptr(1)<<(4-(-)) - 1&= | <<4 // apply mask to both pointer bits and scan bits}// Change nw from counting possibly-pointer words to total words in allocation.= / sys.PtrSize// Write whole bitmap bytes.// The first is hb, the rest are zero.if <= {* = uint8()= add1()= 0 // for possible final half-byte belowfor += 4; <= ; += 4 {* = 0= add1()}}// Write final partial bitmap byte if any.// We know w > nw, or else we'd still be in the loop above.// It can be bigger only due to the 4 entries in hb that it counts.// If w == nw+4 then there's nothing left to do: we wrote all nw entries// and can discard the 4 sitting in hb.// But if w == nw+2, we need to write first two in hb.// The byte is shared with the next object, so be careful with// existing bits.if == +2 {* = *&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | uint8()}:// Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary.if {// TODO: We could probably make this faster by// handling [x+dataSize, x+size) specially.:= heapBitsForAddr()// cnw is the number of heap words, or bit pairs// remaining (like nw above).:= / sys.PtrSize:= (*uint8)(unsafe.Pointer())// We know the first and last byte of the bitmap are// not the same, but it's still possible for small// objects span arenas, so it may share bitmap bytes// with neighboring objects.//// Handle the first byte specially if it's shared. See// Phase 1 for why this is the only special case we need.if {if !(.shift == 0 || .shift == 2) {print("x=", , " size=", , " cnw=", .shift, "\n")throw("bad start shift")}}if .shift == 2 {*.bitp = *.bitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | *= .next().next()-= 2= addb(, 1)}// We're now byte aligned. Copy out to per-arena// bitmaps until the last byte (which may again be// partial).for >= 4 {// This loop processes four words at a time,// so round cnw down accordingly., := .forwardOrBoundary( / 4 * 4)// n is the number of bitmap bytes to copy.:= / 4memmove(unsafe.Pointer(.bitp), unsafe.Pointer(), )-=== addb(, )}if && .shift != 0 {print("cnw=", , " h.shift=", .shift, "\n")throw("bad shift after block copy")}// Handle the last byte if it's shared.if == 2 {*.bitp = *.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | *= addb(, 1)= .next().next()}if {if uintptr(unsafe.Pointer()) > + {throw("copy exceeded object size")}if !( == 0 || == 2) {print("x=", , " size=", , " cnw=", , "\n")throw("bad number of remaining words")}// Set up hbitp so doubleCheck code below can check it.= .bitp}// Zero the object where we wrote the bitmap.memclrNoHeapPointers(unsafe.Pointer(), uintptr(unsafe.Pointer())-)}// Double check the whole bitmap.if {// x+size may not point to the heap, so back up one// word and then advance it the way we do above.:= heapBitsForAddr( + - sys.PtrSize)if {// In out-of-place copying, we just advance// using next.= .next()} else {// Don't use next because that may advance to// the next arena and the in-place logic// doesn't do that..shift += heapBitsShiftif .shift == 4*heapBitsShift {.bitp, .shift = add1(.bitp), 0}}if .kind&kindGCProg == 0 && ( != .bitp || ( == +2) != (.shift == 2)) {println("ended at wrong bitmap byte for", .string(), "x", /.size)print("typ.size=", .size, " typ.ptrdata=", .ptrdata, " dataSize=", , " size=", , "\n")print("w=", , " nw=", , " b=", hex(), " nb=", , " hb=", hex(), "\n"):= heapBitsForAddr()print("initial bits h0.bitp=", .bitp, " h0.shift=", .shift, "\n")print("ended at hbitp=", , " but next starts at bitp=", .bitp, " shift=", .shift, "\n")throw("bad heapBitsSetType")}// Double-check that bits to be written were written correctly.// Does not check that other bits were not written, unfortunately.:= heapBitsForAddr():= .ptrdata / sys.PtrSize:= .size / sys.PtrSize:= / .size:= ((-1)*.size + .ptrdata) / sys.PtrSizefor := uintptr(0); < /sys.PtrSize; ++ {:= %var , uint8= (*.bitp >> .shift) & (bitPointer | bitScan)if >= {if .kind&kindGCProg != 0 && < (+3)/4*4 {// heapBitsSetTypeGCProg always fills// in full nibbles of bitScan.= bitScan}} else {if < && (*addb(, /8)>>(%8))&1 != 0 {|= bitPointer}|= bitScan}if != {println("mismatch writing bits for", .string(), "x", /.size)print("typ.size=", .size, " typ.ptrdata=", .ptrdata, " dataSize=", , " size=", , "\n")print("kindGCProg=", .kind&kindGCProg != 0, " outOfPlace=", , "\n")print("w=", , " nw=", , " b=", hex(), " nb=", , " hb=", hex(), "\n"):= heapBitsForAddr()print("initial bits h0.bitp=", .bitp, " h0.shift=", .shift, "\n")print("current bits h.bitp=", .bitp, " h.shift=", .shift, " *h.bitp=", hex(*.bitp), "\n")print("ptrmask=", , " p=", , " endp=", , " endnb=", , " pbits=", hex(), " b=", hex(), " nb=", , "\n")println("at word", , "offset", *sys.PtrSize, "have", hex(), "want", hex())if .kind&kindGCProg != 0 {println("GC program:")dumpGCProg(addb(.gcdata, 4))}throw("bad heapBitsSetType")}= .next()}if == debugPtrmask.data {unlock(&debugPtrmask.lock)}}}var debugPtrmask struct {lock mutexdata *byte}// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.// progSize is the size of the memory described by the program.// elemSize is the size of the element that the GC program describes (a prefix of).// dataSize is the total size of the intended data, a multiple of elemSize.// allocSize is the total size of the allocated memory.//// GC programs are only used for large allocations.// heapBitsSetType requires that allocSize is a multiple of 4 words,// so that the relevant bitmap bytes are not shared with surrounding// objects.func ( heapBits, , , , uintptr, *byte) {if sys.PtrSize == 8 && %(4*sys.PtrSize) != 0 {// Alignment will be wrong.throw("heapBitsSetTypeGCProg: small allocation")}var uintptrif == {= runGCProg(, nil, .bitp, 2)if *sys.PtrSize != {println("runtime: heapBitsSetTypeGCProg: total bits", , "but progSize", )throw("heapBitsSetTypeGCProg: unexpected bit count")}} else {:= /// Piece together program trailer to run after prog that does:// literal(0)// repeat(1, elemSize-progSize-1) // zeros to fill element size// repeat(elemSize, count-1) // repeat that element for count// This zero-pads the data remaining in the first element and then// repeats that first element to fill the array.var [40]byte // 3 varints (max 10 each) + some bytes:= 0if := /sys.PtrSize - /sys.PtrSize; > 0 {// literal(0)[] = 0x01++[] = 0++if > 1 {// repeat(1, n-1)[] = 0x81++--for ; >= 0x80; >>= 7 {[] = byte( | 0x80)++}[] = byte()++}}// repeat(elemSize/ptrSize, count-1)[] = 0x80++:= / sys.PtrSizefor ; >= 0x80; >>= 7 {[] = byte( | 0x80)++}[] = byte()++= - 1for ; >= 0x80; >>= 7 {[] = byte( | 0x80)++}[] = byte()++[] = 0++runGCProg(, &[0], .bitp, 2)// Even though we filled in the full array just now,// record that we only filled in up to the ptrdata of the// last element. This will cause the code below to// memclr the dead section of the final array element,// so that scanobject can stop early in the final element.= (*(-1) + ) / sys.PtrSize}:= unsafe.Pointer(addb(.bitp, (+3)/4)):= unsafe.Pointer(addb(.bitp, /sys.PtrSize/wordsPerBitmapByte))memclrNoHeapPointers(, uintptr()-uintptr())}// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.// size the size of the region described by prog, in bytes.// The resulting bitvector will have no more than size/sys.PtrSize bits.func ( *byte, uintptr) bitvector {:= (/sys.PtrSize + 7) / 8:= (*[1 << 30]byte)(persistentalloc(+1, 1, &memstats.buckhash_sys))[:+1][len()-1] = 0xa1 // overflow check sentinel= runGCProg(, nil, &[0], 1)if [len()-1] != 0xa1 {throw("progToPointerMask: overflow")}return bitvector{int32(), &[0]}}// Packed GC pointer bitmaps, aka GC programs.//// For large types containing arrays, the type information has a// natural repetition that can be encoded to save space in the// binary and in the memory representation of the type information.//// The encoding is a simple Lempel-Ziv style bytecode machine// with the following instructions://// 00000000: stop// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes// 10000000 n c: repeat the previous n bits c times; n, c are varints// 1nnnnnnn c: repeat the previous n bits c times; c is a varint// runGCProg executes the GC program prog, and then trailer if non-nil,// writing to dst with entries of the given size.// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.// If size == 2, dst is the 2-bit heap bitmap, and writes move backward// starting at dst (because the heap bitmap does). In this case, the caller guarantees// that only whole bytes in dst need to be written.//// runGCProg returns the number of 1- or 2-bit entries written to memory.func (, , *byte, int) uintptr {:=// Bits waiting to be written to memory.var uintptrvar uintptr:=:for {// Flush accumulated full bytes.// The rest of the loop assumes that nbits <= 7.for ; >= 8; -= 8 {if == 1 {* = uint8()= add1()>>= 8} else {:= &bitPointerAll | bitScanAll* = uint8()= add1()>>= 4= &bitPointerAll | bitScanAll* = uint8()= add1()>>= 4}}// Process one instruction.:= uintptr(*)= add1():= & 0x7Fif &0x80 == 0 {// Literal bits; n == 0 means end of program.if == 0 {// Program is over; continue in trailer if present.if != nil {== nilcontinue}break}:= / 8for := uintptr(0); < ; ++ {|= uintptr(*) <<= add1()if == 1 {* = uint8()= add1()>>= 8} else {:= &0xf | bitScanAll* = uint8()= add1()>>= 4= &0xf | bitScanAll* = uint8()= add1()>>= 4}}if %= 8; > 0 {|= uintptr(*) <<= add1()+=}continue}// Repeat. If n == 0, it is encoded in a varint in the next bytes.if == 0 {for := uint(0); ; += 7 {:= uintptr(*)= add1()|= ( & 0x7F) <<if &0x80 == 0 {break}}}// Count is encoded in a varint in the next bytes.:= uintptr(0)for := uint(0); ; += 7 {:= uintptr(*)= add1()|= ( & 0x7F) <<if &0x80 == 0 {break}}*= // now total number of bits to copy// If the number of bits being repeated is small, load them// into a register and use that register for the entire loop// instead of repeatedly reading from memory.// Handling fewer than 8 bits here makes the general loop simpler.// The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add// the pattern to a bit buffer holding at most 7 bits (a partial byte)// it will not overflow.:=const = sys.PtrSize*8 - 7if <= {// Start with bits in output buffer.:=:=// If we need more bits, fetch them from memory.if == 1 {= subtract1()for < {<<= 8|= uintptr(*)= subtract1()+= 8}} else {= subtract1()for < {<<= 4|= uintptr(*) & 0xf= subtract1()+= 4}}// We started with the whole bit output buffer,// and then we loaded bits from whole bytes.// Either way, we might now have too many instead of too few.// Discard the extra.if > {>>= -=}// Replicate pattern to at most maxBits.if == 1 {// One bit being repeated.// If the bit is 1, make the pattern all 1s.// If the bit is 0, the pattern is already all 0s,// but we can claim that the number of bits// in the word is equal to the number we need (c),// because right shift of bits will zero fill.if == 1 {= 1<< - 1=} else {=}} else {:=:=if + <= {// Double pattern until the whole uintptr is filled.for <= sys.PtrSize*8 {|= <<+=}// Trim away incomplete copy of original pattern in high bits.// TODO(rsc): Replace with table lookup or loop on systems without divide?= / *&= 1<< - 1==}}// Add pattern to bit buffer and flush bit buffer, c/npattern times.// Since pattern contains >8 bits, there will be full bytes to flush// on each iteration.for ; >= ; -= {|= <<+=if == 1 {for >= 8 {* = uint8()= add1()>>= 8-= 8}} else {for >= 4 {* = uint8(&0xf | bitScanAll)= add1()>>= 4-= 4}}}// Add final fragment to bit buffer.if > 0 {&= 1<< - 1|= <<+=}continue}// Repeat; n too large to fit in a register.// Since nbits <= 7, we know the first few bytes of repeated data// are already written to memory.:= - // n > nbits because n > maxBits and nbits <= 7if == 1 {// Leading src fragment.= subtractb(, (+7)/8)if := & 7; != 0 {|= uintptr(*) >> (8 - ) <<= add1()+=-=}// Main loop: load one byte, write another.// The bits are rotating through the bit buffer.for := / 8; > 0; -- {|= uintptr(*) <<= add1()* = uint8()= add1()>>= 8}// Final src fragment.if %= 8; > 0 {|= (uintptr(*) & (1<< - 1)) <<+=}} else {// Leading src fragment.= subtractb(, (+3)/4)if := & 3; != 0 {|= (uintptr(*) & 0xf) >> (4 - ) <<= add1()+=-=}// Main loop: load one byte, write another.// The bits are rotating through the bit buffer.for := / 4; > 0; -- {|= (uintptr(*) & 0xf) <<= add1()* = uint8(&0xf | bitScanAll)= add1()>>= 4}// Final src fragment.if %= 4; > 0 {|= (uintptr(*) & (1<< - 1)) <<+=}}}// Write any final bits out, using full-byte writes, even for the final byte.var uintptrif == 1 {= (uintptr(unsafe.Pointer())-uintptr(unsafe.Pointer()))*8 ++= - & 7for ; > 0; -= 8 {* = uint8()= add1()>>= 8}} else {= (uintptr(unsafe.Pointer())-uintptr(unsafe.Pointer()))*4 ++= - & 3for ; > 0; -= 4 {:= &0xf | bitScanAll* = uint8()= add1()>>= 4}}return}// materializeGCProg allocates space for the (1-bit) pointer bitmask// for an object of size ptrdata. Then it fills that space with the// pointer bitmask specified by the program prog.// The bitmask starts at s.startAddr.// The result must be deallocated with dematerializeGCProg.func ( uintptr, *byte) *mspan {// Each word of ptrdata needs one bit in the bitmap.:= divRoundUp(, 8*sys.PtrSize)// Compute the number of pages needed for bitmapBytes.:= divRoundUp(, pageSize):= mheap_.allocManual(, spanAllocPtrScalarBits)runGCProg(addb(, 4), nil, (*byte)(unsafe.Pointer(.startAddr)), 1)return}func ( *mspan) {mheap_.freeManual(, spanAllocPtrScalarBits)}func ( *byte) {:= 0for {:= *= add1()if == 0 {print("\t", , " end\n")break}if &0x80 == 0 {print("\t", , " lit ", , ":"):= int(+7) / 8for := 0; < ; ++ {print(" ", hex(*))= add1()}print("\n")+= int()} else {:= int( &^ 0x80)if == 0 {for := uint(0); ; += 7 {:= *= add1()|= int(&0x7f) <<if &0x80 == 0 {break}}}:= 0for := uint(0); ; += 7 {:= *= add1()|= int(&0x7f) <<if &0x80 == 0 {break}}print("\t", , " repeat ", , " × ", , "\n")+= *}}}// Testing.func ( *stkframe, unsafe.Pointer) bool {:= (*stkframe)()if .sp <= .sp && .sp < .varp {* = *return false}return true}// gcbits returns the GC type info for x, for testing.// The result is the bitmap entries (0 or 1), one entry per byte.//go:linkname reflect_gcbits reflect.gcbitsfunc ( interface{}) []byte {:= getgcmask():= (*ptrtype)(unsafe.Pointer(efaceOf(&)._type)).elem:= .ptrdata / sys.PtrSizefor uintptr(len()) > && [len()-1] == 0 {= [:len()-1]}return}// Returns GC type info for the pointer stored in ep for testing.// If ep points to the stack, only static live information will be returned// (i.e. not for objects which are only dynamically live stack objects).func ( interface{}) ( []byte) {:= *efaceOf(&):= .data:= ._type// data or bssfor , := range activeModules() {// dataif .data <= uintptr() && uintptr() < .edata {:= .gcdatamask.bytedata:= (*ptrtype)(unsafe.Pointer()).elem.size= make([]byte, /sys.PtrSize)for := uintptr(0); < ; += sys.PtrSize {:= (uintptr() + - .data) / sys.PtrSize[/sys.PtrSize] = (*addb(, /8) >> ( % 8)) & 1}return}// bssif .bss <= uintptr() && uintptr() < .ebss {:= .gcbssmask.bytedata:= (*ptrtype)(unsafe.Pointer()).elem.size= make([]byte, /sys.PtrSize)for := uintptr(0); < ; += sys.PtrSize {:= (uintptr() + - .bss) / sys.PtrSize[/sys.PtrSize] = (*addb(, /8) >> ( % 8)) & 1}return}}// heapif , , := findObject(uintptr(), 0, 0); != 0 {:= heapBitsForAddr():= .elemsize= make([]byte, /sys.PtrSize)for := uintptr(0); < ; += sys.PtrSize {if .isPointer() {[/sys.PtrSize] = 1}if !.morePointers() {= [:/sys.PtrSize]break}= .next()}return}// stackif := getg(); .m.curg.stack.lo <= uintptr() && uintptr() < .m.curg.stack.hi {var stkframe.sp = uintptr():= getg()gentraceback(.m.curg.sched.pc, .m.curg.sched.sp, 0, .m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&)), 0)if .fn.valid() {, , := getStackMap(&, nil, false)if .n == 0 {return}:= uintptr(.n) * sys.PtrSize:= (*ptrtype)(unsafe.Pointer()).elem.size= make([]byte, /sys.PtrSize)for := uintptr(0); < ; += sys.PtrSize {:= (uintptr() + - .varp + ) / sys.PtrSize[/sys.PtrSize] = .ptrbit()}}return}// otherwise, not something the GC knows about.// possibly read-only data, like malloc(0).// must not have pointersreturn}