关于go:内存分配与GC

Go 应用值传递
协程栈记录了协程执行现场
协程栈在堆上由 GC 回收
编译原理相干

局部变量太大
栈帧回收后，须要持续应用的变量
不是所有变量读能放在协程栈上

指针逃逸

函数返回了对象的指针 (函数外能够拜访，变量此时不是局部变量)
func a()*int{
    v := 0
    return &v
}
func main(){i := a()
}

空接口逃逸

func b()*int{
    v := 0
    // interface{} 类型的函数往往会应用反射
    fmt.Println(v)
}
func main(){i := a()
}

大变量逃逸

变量过大会导致栈空间有余，64 位，个别超过 64KB 的变量会逃逸

Go 栈的初始空间为 2KB
在函数调用前判断栈空间（morestack）
必要时对栈进行扩容
晚期应用分段栈，前期应用间断栈
当空间有余时扩容，变为原来的 2 倍
当空间使用率有余 1 / 4 时缩容，变为原来的 1 /2

/*
 * support for morestack
 */

// Called during function prolog when more stack is needed.
//
// The traceback routines see morestack on a g0 as being
// the top of a stack (for example, morestack calling newstack
// calling the scheduler calling newm calling gc), so we must
// record an argument size. For that purpose, it has no arguments.
TEXT runtime·morestack(SB),NOSPLIT,$0-0
    // Cannot grow scheduler stack (m->g0).
    get_tls(CX)
    MOVQ    g(CX), BX
    MOVQ    g_m(BX), BX
    MOVQ    m_g0(BX), SI
    CMPQ    g(CX), SI
    JNE    3(PC)
    CALL    runtime·badmorestackg0(SB)
    CALL    runtime·abort(SB)

    // Cannot grow signal stack (m->gsignal).
    MOVQ    m_gsignal(BX), SI
    CMPQ    g(CX), SI
    JNE    3(PC)
    CALL    runtime·badmorestackgsignal(SB)
    CALL    runtime·abort(SB)

    // Called from f.
    // Set m->morebuf to f's caller.
    NOP    SP    // tell vet SP changed - stop checking offsets
    MOVQ    8(SP), AX    // f's caller's PC
    MOVQ    AX, (m_morebuf+gobuf_pc)(BX)
    LEAQ    16(SP), AX    // f's caller's SP
    MOVQ    AX, (m_morebuf+gobuf_sp)(BX)
    get_tls(CX)
    MOVQ    g(CX), SI
    MOVQ    SI, (m_morebuf+gobuf_g)(BX)

    // Set g->sched to context in f.
    MOVQ    0(SP), AX // f's PC
    MOVQ    AX, (g_sched+gobuf_pc)(SI)
    LEAQ    8(SP), AX // f's SP
    MOVQ    AX, (g_sched+gobuf_sp)(SI)
    MOVQ    BP, (g_sched+gobuf_bp)(SI)
    MOVQ    DX, (g_sched+gobuf_ctxt)(SI)

    // Call newstack on m->g0's stack.
    MOVQ    m_g0(BX), BX
    MOVQ    BX, g(CX)
    MOVQ    (g_sched+gobuf_sp)(BX), SP
    CALL    runtime·newstack(SB)
    CALL    runtime·abort(SB)    // crash if newstack returns
    RET

Go 每次申请的虚拟内存单元为 64MB
最多有 20^20 个虚拟内存单元
所有的 heapArena 组成了 mheap（Go 堆内存）

// A heapArena stores metadata for a heap arena. heapArenas are stored
// outside of the Go heap and accessed via the mheap_.arenas index.
//
//go:notinheap
// 申请的信息
type heapArena struct {
    // bitmap stores the pointer/scalar bitmap for the words in
    // this arena. See mbitmap.go for a description. Use the
    // heapBits type to access this.
    bitmap [heapArenaBitmapBytes]byte

    // spans maps from virtual address page ID within this arena to *mspan.
    // For allocated spans, their pages map to the span itself.
    // For free spans, only the lowest and highest pages map to the span itself.
    // Internal pages map to an arbitrary span.
    // For pages that have never been allocated, spans entries are nil.
    //
    // Modifications are protected by mheap.lock. Reads can be
    // performed without locking, but ONLY from indexes that are
    // known to contain in-use or stack spans. This means there
    // must not be a safe-point between establishing that an
    // address is live and looking it up in the spans array.
    // 记录 mspan
    spans [pagesPerArena]*mspan

    // pageInUse is a bitmap that indicates which spans are in
    // state mSpanInUse. This bitmap is indexed by page number,
    // but only the bit corresponding to the first page in each
    // span is used.
    //
    // Reads and writes are atomic.
    pageInUse [pagesPerArena / 8]uint8

    // pageMarks is a bitmap that indicates which spans have any
    // marked objects on them. Like pageInUse, only the bit
    // corresponding to the first page in each span is used.
    //
    // Writes are done atomically during marking. Reads are
    // non-atomic and lock-free since they only occur during
    // sweeping (and hence never race with writes).
    //
    // This is used to quickly find whole spans that can be freed.
    //
    // TODO(austin): It would be nice if this was uint64 for
    // faster scanning, but we don't have 64-bit atomic bit
    // operations.
    pageMarks [pagesPerArena / 8]uint8

    // pageSpecials is a bitmap that indicates which spans have
    // specials (finalizers or other). Like pageInUse, only the bit
    // corresponding to the first page in each span is used.
    //
    // Writes are done atomically whenever a special is added to
    // a span and whenever the last special is removed from a span.
    // Reads are done atomically to find spans containing specials
    // during marking.
    pageSpecials [pagesPerArena / 8]uint8

    // checkmarks stores the debug.gccheckmark state. It is only
    // used if debug.gccheckmark > 0.
    checkmarks *checkmarksMap

    // zeroedBase marks the first byte of the first page in this
    // arena which hasn't been used yet and is therefore already
    // zero. zeroedBase is relative to the arena base.
    // Increases monotonically until it hits heapArenaBytes.
    //
    // This field is sufficient to determine if an allocation
    // needs to be zeroed because the page allocator follows an
    // address-ordered first-fit policy.
    //
    // Read atomically and written with an atomic CAS.
    zeroedBase uintptr
}

type mheap struct {
    // lock must only be acquired on the system stack, otherwise a g
    // could self-deadlock if its stack grows with the lock held.
    lock  mutex
    pages pageAlloc // page allocation data structure

    sweepgen uint32 // sweep generation, see comment in mspan; written during STW

    // allspans is a slice of all mspans ever created. Each mspan
    // appears exactly once.
    //
    // The memory for allspans is manually managed and can be
    // reallocated and move as the heap grows.
    //
    // In general, allspans is protected by mheap_.lock, which
    // prevents concurrent access as well as freeing the backing
    // store. Accesses during STW might not hold the lock, but
    // must ensure that allocation cannot happen around the
    // access (since that may free the backing store).
    allspans []*mspan // all spans out there

    // _ uint32 // align uint64 fields on 32-bit for atomics

    // Proportional sweep
    //
    // These parameters represent a linear function from gcController.heapLive
    // to page sweep count. The proportional sweep system works to
    // stay in the black by keeping the current page sweep count
    // above this line at the current gcController.heapLive.
    //
    // The line has slope sweepPagesPerByte and passes through a
    // basis point at (sweepHeapLiveBasis, pagesSweptBasis). At
    // any given time, the system is at (gcController.heapLive,
    // pagesSwept) in this space.
    //
    // It is important that the line pass through a point we
    // control rather than simply starting at a 0,0 origin
    // because that lets us adjust sweep pacing at any time while
    // accounting for current progress. If we could only adjust
    // the slope, it would create a discontinuity in debt if any
    // progress has already been made.
    pagesInUse         atomic.Uint64 // pages of spans in stats mSpanInUse
    pagesSwept         atomic.Uint64 // pages swept this cycle
    pagesSweptBasis    atomic.Uint64 // pagesSwept to use as the origin of the sweep ratio
    sweepHeapLiveBasis uint64        // value of gcController.heapLive to use as the origin of sweep ratio; written with lock, read without
    sweepPagesPerByte  float64       // proportional sweep ratio; written with lock, read without
    // TODO(austin): pagesInUse should be a uintptr, but the 386
    // compiler can't 8-byte align fields.

    // scavengeGoal is the amount of total retained heap memory (measured by
    // heapRetained) that the runtime will try to maintain by returning memory
    // to the OS.
    //
    // Accessed atomically.
    scavengeGoal uint64

    // Page reclaimer state

    // reclaimIndex is the page index in allArenas of next page to
    // reclaim. Specifically, it refers to page (i %
    // pagesPerArena) of arena allArenas[i / pagesPerArena].
    //
    // If this is >= 1<<63, the page reclaimer is done scanning
    // the page marks.
    reclaimIndex atomic.Uint64

    // reclaimCredit is spare credit for extra pages swept. Since
    // the page reclaimer works in large chunks, it may reclaim
    // more than requested. Any spare pages released go to this
    // credit pool.
    reclaimCredit atomic.Uintptr

    // arenas is the heap arena map. It points to the metadata for
    // the heap for every arena frame of the entire usable virtual
    // address space.
    //
    // Use arenaIndex to compute indexes into this array.
    //
    // For regions of the address space that are not backed by the
    // Go heap, the arena map contains nil.
    //
    // Modifications are protected by mheap_.lock. Reads can be
    // performed without locking; however, a given entry can
    // transition from nil to non-nil at any time when the lock
    // isn't held. (Entries never transitions back to nil.)
    //
    // In general, this is a two-level mapping consisting of an L1
    // map and possibly many L2 maps. This saves space when there
    // are a huge number of arena frames. However, on many
    // platforms (even 64-bit), arenaL1Bits is 0, making this
    // effectively a single-level map. In this case, arenas[0]
    // will never be nil.
    // 堆由所有 heapArena 组成
    arenas [1 << arenaL1Bits]*[1 << arenaL2Bits]*heapArena

    // heapArenaAlloc is pre-reserved space for allocating heapArena
    // objects. This is only used on 32-bit, where we pre-reserve
    // this space to avoid interleaving it with the heap itself.
    heapArenaAlloc linearAlloc

    // arenaHints is a list of addresses at which to attempt to
    // add more heap arenas. This is initially populated with a
    // set of general hint addresses, and grown with the bounds of
    // actual heap arena ranges.
    arenaHints *arenaHint

    // arena is a pre-reserved space for allocating heap arenas
    // (the actual arenas). This is only used on 32-bit.
    arena linearAlloc

    // allArenas is the arenaIndex of every mapped arena. This can
    // be used to iterate through the address space.
    //
    // Access is protected by mheap_.lock. However, since this is
    // append-only and old backing arrays are never freed, it is
    // safe to acquire mheap_.lock, copy the slice header, and
    // then release mheap_.lock.
    allArenas []arenaIdx

    // sweepArenas is a snapshot of allArenas taken at the
    // beginning of the sweep cycle. This can be read safely by
    // simply blocking GC (by disabling preemption).
    sweepArenas []arenaIdx

    // markArenas is a snapshot of allArenas taken at the beginning
    // of the mark cycle. Because allArenas is append-only, neither
    // this slice nor its contents will change during the mark, so
    // it can be read safely.
    markArenas []arenaIdx

    // curArena is the arena that the heap is currently growing
    // into. This should always be physPageSize-aligned.
    curArena struct {base, end uintptr}

    _ uint32 // ensure 64-bit alignment of central

    // central free lists for small size classes.
    // the padding makes sure that the mcentrals are
    // spaced CacheLinePadSize bytes apart, so that each mcentral.lock
    // gets its own cache line.
    // central is indexed by spanClass.
    // 136 个
    central [numSpanClasses]struct {
        mcentral mcentral
        pad      [cpu.CacheLinePadSize - unsafe.Sizeof(mcentral{})%cpu.CacheLinePadSize]byte
    }

    spanalloc             fixalloc // allocator for span*
    cachealloc            fixalloc // allocator for mcache*
    specialfinalizeralloc fixalloc // allocator for specialfinalizer*
    specialprofilealloc   fixalloc // allocator for specialprofile*
    specialReachableAlloc fixalloc // allocator for specialReachable
    speciallock           mutex    // lock for special record allocators.
    arenaHintAlloc        fixalloc // allocator for arenaHints

    unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
}

线性调配
闲暇链表调配

线性调配与闲暇链表调配会产生碎片

分级调配

依据隔离适应策略，应用内存时最小单位为 mspan
每个 mspan 是 N 个雷同的小格子
67 个 mspan

// class  bytes/obj  bytes/span  objects  tail waste  max waste  min align
//     1          8        8192     1024           0     87.50%          8
//     2         16        8192      512           0     43.75%         16
//     3         24        8192      341           8     29.24%          8
//     4         32        8192      256           0     21.88%         32
//     5         48        8192      170          32     31.52%         16
//     6         64        8192      128           0     23.44%         64
//     7         80        8192      102          32     19.07%         16
//     8         96        8192       85          32     15.95%         32
//     9        112        8192       73          16     13.56%         16
//    10        128        8192       64           0     11.72%        128
//    11        144        8192       56         128     11.82%         16
//    12        160        8192       51          32      9.73%         32
//    13        176        8192       46          96      9.59%         16
//    14        192        8192       42         128      9.25%         64
//    15        208        8192       39          80      8.12%         16
//    16        224        8192       36         128      8.15%         32
//    17        240        8192       34          32      6.62%         16
//    18        256        8192       32           0      5.86%        256
//    19        288        8192       28         128     12.16%         32
//    20        320        8192       25         192     11.80%         64
//    21        352        8192       23          96      9.88%         32
//    22        384        8192       21         128      9.51%        128
//    23        416        8192       19         288     10.71%         32
//    24        448        8192       18         128      8.37%         64
//    25        480        8192       17          32      6.82%         32
//    26        512        8192       16           0      6.05%        512
//    27        576        8192       14         128     12.33%         64
//    28        640        8192       12         512     15.48%        128
//    29        704        8192       11         448     13.93%         64
//    30        768        8192       10         512     13.94%        256
//    31        896        8192        9         128     15.52%        128
//    32       1024        8192        8           0     12.40%       1024
//    33       1152        8192        7         128     12.41%        128
//    34       1280        8192        6         512     15.55%        256
//    35       1408       16384       11         896     14.00%        128
//    36       1536        8192        5         512     14.00%        512
//    37       1792       16384        9         256     15.57%        256
//    38       2048        8192        4           0     12.45%       2048
//    39       2304       16384        7         256     12.46%        256
//    40       2688        8192        3         128     15.59%        128
//    41       3072       24576        8           0     12.47%       1024
//    42       3200       16384        5         384      6.22%        128
//    43       3456       24576        7         384      8.83%        128
//    44       4096        8192        2           0     15.60%       4096
//    45       4864       24576        5         256     16.65%        256
//    46       5376       16384        3         256     10.92%        256
//    47       6144       24576        4           0     12.48%       2048
//    48       6528       32768        5         128      6.23%        128
//    49       6784       40960        6         256      4.36%        128
//    50       6912       49152        7         768      3.37%        256
//    51       8192        8192        1           0     15.61%       8192
//    52       9472       57344        6         512     14.28%        256
//    53       9728       49152        5         512      3.64%        512
//    54      10240       40960        4           0      4.99%       2048
//    55      10880       32768        3         128      6.24%        128
//    56      12288       24576        2           0     11.45%       4096
//    57      13568       40960        3         256      9.99%        256
//    58      14336       57344        4           0      5.35%       2048
//    59      16384       16384        1           0     12.49%       8192
//    60      18432       73728        4           0     11.11%       2048
//    61      19072       57344        3         128      3.57%        128
//    62      20480       40960        2           0      6.87%       4096
//    63      21760       65536        3         256      6.25%        256
//    64      24576       24576        1           0     11.45%       8192
//    65      27264       81920        3         128     10.00%        128
//    66      28672       57344        2           0      4.91%       4096
//    67      32768       32768        1           0     12.50%       8192

// alignment  bits  min obj size
//         8     3             8
//        16     4            32
//        32     5           256
//        64     6           512
//       128     7           768
//      4096    12         28672
//      8192    13         32768

//go:notinheap
type mspan struct {
    next *mspan     // next span in list, or nil if none
    prev *mspan     // previous span in list, or nil if none
    list *mSpanList // For debugging. TODO: Remove.

    startAddr uintptr // address of first byte of span aka s.base()
    npages    uintptr // number of pages in span

    manualFreeList gclinkptr // list of free objects in mSpanManual spans

    // freeindex is the slot index between 0 and nelems at which to begin scanning
    // for the next free object in this span.
    // Each allocation scans allocBits starting at freeindex until it encounters a 0
    // indicating a free object. freeindex is then adjusted so that subsequent scans begin
    // just past the newly discovered free object.
    //
    // If freeindex == nelem, this span has no free objects.
    //
    // allocBits is a bitmap of objects in this span.
    // If n >= freeindex and allocBits[n/8] & (1<<(n%8)) is 0
    // then object n is free;
    // otherwise, object n is allocated. Bits starting at nelem are
    // undefined and should never be referenced.
    //
    // Object n starts at address n*elemsize + (start << pageShift).
    freeindex uintptr
    // TODO: Look up nelems from sizeclass and remove this field if it
    // helps performance.
    nelems uintptr // number of object in the span.

    // Cache of the allocBits at freeindex. allocCache is shifted
    // such that the lowest bit corresponds to the bit freeindex.
    // allocCache holds the complement of allocBits, thus allowing
    // ctz (count trailing zero) to use it directly.
    // allocCache may contain bits beyond s.nelems; the caller must ignore
    // these.
    allocCache uint64

    // allocBits and gcmarkBits hold pointers to a span's mark and
    // allocation bits. The pointers are 8 byte aligned.
    // There are three arenas where this data is held.
    // free: Dirty arenas that are no longer accessed
    //       and can be reused.
    // next: Holds information to be used in the next GC cycle.
    // current: Information being used during this GC cycle.
    // previous: Information being used during the last GC cycle.
    // A new GC cycle starts with the call to finishsweep_m.
    // finishsweep_m moves the previous arena to the free arena,
    // the current arena to the previous arena, and
    // the next arena to the current arena.
    // The next arena is populated as the spans request
    // memory to hold gcmarkBits for the next GC cycle as well
    // as allocBits for newly allocated spans.
    //
    // The pointer arithmetic is done "by hand" instead of using
    // arrays to avoid bounds checks along critical performance
    // paths.
    // The sweep will free the old allocBits and set allocBits to the
    // gcmarkBits. The gcmarkBits are replaced with a fresh zeroed
    // out memory.
    allocBits  *gcBits
    gcmarkBits *gcBits

    // sweep generation:
    // if sweepgen == h->sweepgen - 2, the span needs sweeping
    // if sweepgen == h->sweepgen - 1, the span is currently being swept
    // if sweepgen == h->sweepgen, the span is swept and ready to use
    // if sweepgen == h->sweepgen + 1, the span was cached before sweep began and is still cached, and needs sweeping
    // if sweepgen == h->sweepgen + 3, the span was swept and then cached and is still cached
    // h->sweepgen is incremented by 2 after every GC

    sweepgen    uint32
    divMul      uint32        // for divide by elemsize
    allocCount  uint16        // number of allocated objects
    spanclass   spanClass     // size class and noscan (uint8)
    state       mSpanStateBox // mSpanInUse etc; accessed atomically (get/set methods)
    needzero    uint8         // needs to be zeroed before allocation
    elemsize    uintptr       // computed from sizeclass or from npages
    limit       uintptr       // end of data in span
    speciallock mutex         // guards specials list
    specials    *special      // linked list of special records sorted by offset.
}

每个 heapArena 中的 mspan 都不确定

136 个

68 个不须要 GC 扫描，68 须要 GC 扫描

// 给定大小的闲暇对象的地方列表
// Central list of free objects of a given size.
//
//go:notinheap
// 保留雷同 mspan 的目录
type mcentral struct {
    spanclass spanClass

    // partial and full contain two mspan sets: one of swept in-use
    // spans, and one of unswept in-use spans. These two trade
    // roles on each GC cycle. The unswept set is drained either by
    // allocation or by the background sweeper in every GC cycle,
    // so only two roles are necessary.
    //
    // sweepgen is increased by 2 on each GC cycle, so the swept
    // spans are in partial[sweepgen/2%2] and the unswept spans are in
    // partial[1-sweepgen/2%2]. Sweeping pops spans from the
    // unswept set and pushes spans that are still in-use on the
    // swept set. Likewise, allocating an in-use span pushes it
    // on the swept set.
    //
    // Some parts of the sweeper can sweep arbitrary spans, and hence
    // can't remove them from the unswept set, but will add the span
    // to the appropriate swept list. As a result, the parts of the
    // sweeper and mcentral that do consume from the unswept list may
    // encounter swept spans, and these should be ignored.
    partial [2]spanSet // list of spans with a free object
    full    [2]spanSet // list of spans with no free objects
}

每个 P 有一个 mcache

// Per-thread (in Go, per-P) cache for small objects.
// This includes a small object cache and local allocation stats.
// No locking needed because it is per-thread (per-P).
//
// mcaches are allocated from non-GC'd memory, so any heap pointers
// must be specially handled.
//
//go:notinheap
type mcache struct {
    // The following members are accessed on every malloc,
    // so they are grouped here for better caching.
    nextSample uintptr // trigger heap sample after allocating this many bytes
    scanAlloc  uintptr // bytes of scannable heap allocated

    // Allocator cache for tiny objects w/o pointers.
    // See "Tiny allocator" comment in malloc.go.

    // tiny points to the beginning of the current tiny block, or
    // nil if there is no current tiny block.
    //
    // tiny is a heap pointer. Since mcache is in non-GC'd memory,
    // we handle it by clearing it in releaseAll during mark
    // termination.
    //
    // tinyAllocs is the number of tiny allocations performed
    // by the P that owns this mcache.
    tiny       uintptr
    tinyoffset uintptr
    tinyAllocs uintptr

    // The rest is not accessed on every malloc.

    alloc [numSpanClasses]*mspan // spans to allocate from, indexed by spanClass

    stackcache [_NumStackOrders]stackfreelist

    // flushGen indicates the sweepgen during which this mcache
    // was last flushed. If flushGen != mheap_.sweepgen, the spans
    // in this mcache are stale and need to the flushed so they
    // can be swept. This is done in acquirep.
    flushGen uint32
}


type p struct {
     ...
    // 本地 mache
    mcache      *mcache
    ...
}

Tiny 微对象（0，16B）无指针
Small 小对象 [16B,32KB]
Large 大对象 (32KB, 正无穷大)

渺小对象调配至一般 mspan，大对象调配到 0 级 mspan（量身定做 mspan）

从 mcache 拿到 2 级 mspan
将多个微对象合并成一个 16Byte 存入

// Allocate an object of size bytes.
// Small objects are allocated from the per-P cache's free lists.
// Large objects (> 32 kB) are allocated straight from the heap.
// 小对象是从 per-P 缓存的闲暇列表中调配的。// 大对象 (> 32 kB) 间接从堆中调配。func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
    if gcphase == _GCmarktermination {throw("mallocgc called with gcphase == _GCmarktermination")
    }

    if size == 0 {return unsafe.Pointer(&zerobase)
    }
    userSize := size
    if asanenabled {// Refer to ASAN runtime library, the malloc() function allocates extra memory,
        // the redzone, around the user requested memory region. And the redzones are marked
        // as unaddressable. We perform the same operations in Go to detect the overflows or
        // underflows.
        size += computeRZlog(size)
    }

    if debug.malloc {
        if debug.sbrk != 0 {align := uintptr(16)
            if typ != nil {// TODO(austin): This should be just
                //   align = uintptr(typ.align)
                // but that's only 4 on 32-bit platforms,
                // even if there's a uint64 field in typ (see #599).
                // This causes 64-bit atomic accesses to panic.
                // Hence, we use stricter alignment that matches
                // the normal allocator better.
                if size&7 == 0 {align = 8} else if size&3 == 0 {align = 4} else if size&1 == 0 {align = 2} else {align = 1}
            }
            return persistentalloc(size, align, &memstats.other_sys)
        }

        if inittrace.active && inittrace.id == getg().goid {
            // Init functions are executed sequentially in a single goroutine.
            inittrace.allocs += 1
        }
    }

    // assistG is the G to charge for this allocation, or nil if
    // GC is not currently active.
    var assistG *g
    if gcBlackenEnabled != 0 {
        // Charge the current user G for this allocation.
        assistG = getg()
        if assistG.m.curg != nil {assistG = assistG.m.curg}
        // Charge the allocation against the G. We'll account
        // for internal fragmentation at the end of mallocgc.
        assistG.gcAssistBytes -= int64(size)

        if assistG.gcAssistBytes < 0 {
            // This G is in debt. Assist the GC to correct
            // this before allocating. This must happen
            // before disabling preemption.
            gcAssistAlloc(assistG)
        }
    }

    // Set mp.mallocing to keep from being preempted by GC.
    mp := acquirem()
    if mp.mallocing != 0 {throw("malloc deadlock")
    }
    if mp.gsignal == getg() {throw("malloc during signal")
    }
    mp.mallocing = 1

    shouldhelpgc := false
    dataSize := userSize
    c := getMCache(mp)
    if c == nil {throw("mallocgc called without a P or outside bootstrapping")
    }
    var span *mspan
    var x unsafe.Pointer
    noscan := typ == nil || typ.ptrdata == 0
    // In some cases block zeroing can profitably (for latency reduction purposes)
    // be delayed till preemption is possible; delayedZeroing tracks that state.
    delayedZeroing := false
    // <= 32KB
    if size <= maxSmallSize {
        // < 16B
        if noscan && size < maxTinySize {
            // Tiny allocator.
            //
            // Tiny allocator combines several tiny allocation requests
            // into a single memory block. The resulting memory block
            // is freed when all subobjects are unreachable. The subobjects
            // must be noscan (don't have pointers), this ensures that
            // the amount of potentially wasted memory is bounded.
            //
            // Size of the memory block used for combining (maxTinySize) is tunable.
            // Current setting is 16 bytes, which relates to 2x worst case memory
            // wastage (when all but one subobjects are unreachable).
            // 8 bytes would result in no wastage at all, but provides less
            // opportunities for combining.
            // 32 bytes provides more opportunities for combining,
            // but can lead to 4x worst case wastage.
            // The best case winning is 8x regardless of block size.
            //
            // Objects obtained from tiny allocator must not be freed explicitly.
            // So when an object will be freed explicitly, we ensure that
            // its size >= maxTinySize.
            //
            // SetFinalizer has a special case for objects potentially coming
            // from tiny allocator, it such case it allows to set finalizers
            // for an inner byte of a memory block.
            //
            // The main targets of tiny allocator are small strings and
            // standalone escaping variables. On a json benchmark
            // the allocator reduces number of allocations by ~12% and
            // reduces heap size by ~20%.
            off := c.tinyoffset
            // Align tiny pointer for required (conservative) alignment.
            if size&7 == 0 {off = alignUp(off, 8)
            } else if goarch.PtrSize == 4 && size == 12 {
                // Conservatively align 12-byte objects to 8 bytes on 32-bit
                // systems so that objects whose first field is a 64-bit
                // value is aligned to 8 bytes and does not cause a fault on
                // atomic access. See issue 37262.
                // TODO(mknyszek): Remove this workaround if/when issue 36606
                // is resolved.
                off = alignUp(off, 8)
            } else if size&3 == 0 {off = alignUp(off, 4)
            } else if size&1 == 0 {off = alignUp(off, 2)
            }
             //  该 object 适宜现有的 tiny block。if off+size <= maxTinySize && c.tiny != 0 {
                // The object fits into existing tiny block.
                x = unsafe.Pointer(c.tiny + off)
                c.tinyoffset = off + size
                c.tinyAllocs++
                mp.mallocing = 0
                releasem(mp)
                return x
            }
            // Allocate a new maxTinySize block.
             // 调配一个新的 maxTinySize block。span = c.alloc[tinySpanClass]
            v := nextFreeFast(span)
            if v == 0 {v, span, shouldhelpgc = c.nextFree(tinySpanClass)
            }
            x = unsafe.Pointer(v)
            (*[2]uint64)(x)[0] = 0
            (*[2]uint64)(x)[1] = 0
            // See if we need to replace the existing tiny block with the new one
            // based on amount of remaining free space.
            if !raceenabled && (size < c.tinyoffset || c.tiny == 0) {
                // Note: disabled when race detector is on, see comment near end of this function.
                c.tiny = uintptr(x)
                c.tinyoffset = size
            }
            size = maxTinySize
        } else {
            var sizeclass uint8
             // 查表找实用的 span 
            if size <= smallSizeMax-8 {sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
            } else {sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
            }
            size = uintptr(class_to_size[sizeclass])
            spc := makeSpanClass(sizeclass, noscan)
            span = c.alloc[spc]
            // 找到
            v := nextFreeFast(span)
            if v == 0 {
                // 将 span 进行替换，全局与本地 mache 替换
                v, span, shouldhelpgc = c.nextFree(spc)
            }
            x = unsafe.Pointer(v)
            if needzero && span.needzero != 0 {memclrNoHeapPointers(unsafe.Pointer(v), size)
            }
        }
    } else {
        shouldhelpgc = true
        // For large allocations, keep track of zeroed state so that
        // bulk zeroing can be happen later in a preemptible context.
        // 定制 0 级 span
         span = c.allocLarge(size, noscan)
        span.freeindex = 1
        span.allocCount = 1
        size = span.elemsize
        x = unsafe.Pointer(span.base())
        if needzero && span.needzero != 0 {
            if noscan {delayedZeroing = true} else {memclrNoHeapPointers(x, size)
                // We've in theory cleared almost the whole span here,
                // and could take the extra step of actually clearing
                // the whole thing. However, don't. Any GC bits for the
                // uncleared parts will be zero, and it's just going to
                // be needzero = 1 once freed anyway.
            }
        }
    }

    var scanSize uintptr
    if !noscan {heapBitsSetType(uintptr(x), size, dataSize, typ)
        if dataSize > typ.size {
            // Array allocation. If there are any
            // pointers, GC has to scan to the last
            // element.
            if typ.ptrdata != 0 {scanSize = dataSize - typ.size + typ.ptrdata}
        } else {scanSize = typ.ptrdata}
        c.scanAlloc += scanSize
    }

    // Ensure that the stores above that initialize x to
    // type-safe memory and set the heap bits occur before
    // the caller can make x observable to the garbage
    // collector. Otherwise, on weakly ordered machines,
    // the garbage collector could follow a pointer to x,
    // but see uninitialized memory or stale heap bits.
    publicationBarrier()

    // Allocate black during GC.
    // All slots hold nil so no scanning is needed.
    // This may be racing with GC so do it atomically if there can be
    // a race marking the bit.
    if gcphase != _GCoff {gcmarknewobject(span, uintptr(x), size, scanSize)
    }

    if raceenabled {racemalloc(x, size)
    }

    if msanenabled {msanmalloc(x, size)
    }

    if asanenabled {
        // We should only read/write the memory with the size asked by the user.
        // The rest of the allocated memory should be poisoned, so that we can report
        // errors when accessing poisoned memory.
        // The allocated memory is larger than required userSize, it will also include
        // redzone and some other padding bytes.
        rzBeg := unsafe.Add(x, userSize)
        asanpoison(rzBeg, size-userSize)
        asanunpoison(x, userSize)
    }

    if rate := MemProfileRate; rate > 0 {
        // Note cache c only valid while m acquired; see #47302
        if rate != 1 && size < c.nextSample {c.nextSample -= size} else {profilealloc(mp, x, size)
        }
    }
    mp.mallocing = 0
    releasem(mp)

    // Pointerfree data can be zeroed late in a context where preemption can occur.
    // x will keep the memory alive.
    if delayedZeroing {
        if !noscan {throw("delayed zeroing on data that may contain pointers")
        }
        memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302
    }

    if debug.malloc {
        if debug.allocfreetrace != 0 {tracealloc(x, size, typ)
        }

        if inittrace.active && inittrace.id == getg().goid {
            // Init functions are executed sequentially in a single goroutine.
            inittrace.bytes += uint64(size)
        }
    }

    if assistG != nil {
        // Account for internal fragmentation in the assist
        // debt now that we know it.
        assistG.gcAssistBytes -= int64(size - dataSize)
    }

    if shouldhelpgc {if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {gcStart(t)
        }
    }

    if raceenabled && noscan && dataSize < maxTinySize {
        // Pad tinysize allocations so they are aligned with the end
        // of the tinyalloc region. This ensures that any arithmetic
        // that goes off the top end of the object will be detectable
        // by checkptr (issue 38872).
        // Note that we disable tinyalloc when raceenabled for this to work.
        // TODO: This padding is only performed when the race detector
        // is enabled. It would be nice to enable it if any package
        // was compiled with checkptr, but there's no easy way to
        // detect that (especially at compile time).
        // TODO: enable this padding for all allocations, not just
        // tinyalloc ones. It's tricky because of pointer maps.
        // Maybe just all noscan objects?
        x = add(x, size-dataSize)
    }

    return x
}

// allocLarge allocates a span for a large object.
func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan {
    if size+_PageSize < size {throw("out of memory")
    }
    npages := size >> _PageShift
    if size&_PageMask != 0 {npages++}

    // Deduct credit for this span allocation and sweep if
    // necessary. mHeap_Alloc will also sweep npages, so this only
    // pays the debt down to npage pages.
    deductSweepCredit(npages*_PageSize, npages)

    spc := makeSpanClass(0, noscan)
    s := mheap_.alloc(npages, spc)
    if s == nil {throw("out of memory")
    }
    stats := memstats.heapStats.acquire()
    atomic.Xadd64(&stats.largeAlloc, int64(npages*pageSize))
    atomic.Xadd64(&stats.largeAllocCount, 1)
    memstats.heapStats.release()

    // Update heapLive.
    gcController.update(int64(s.npages*pageSize), 0)

    // Put the large span in the mcentral swept list so that it's
    // visible to the background sweeper.
    mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
    s.limit = s.base() + size
    heapBitsForAddr(s.base()).initSpan(s)
    return s
}

// nextFree returns the next free object from the cached span if one is available.
// Otherwise it refills the cache with a span with an available object and
// returns that object along with a flag indicating that this was a heavy
// weight allocation. If it is a heavy weight allocation the caller must
// determine whether a new GC cycle needs to be started or if the GC is active
// whether this goroutine needs to assist the GC.
//
// Must run in a non-preemptible context since otherwise the owner of
// c could change.
func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, shouldhelpgc bool) {s = c.alloc[spc]
    shouldhelpgc = false
    freeIndex := s.nextFreeIndex()
    if freeIndex == s.nelems {
        // The span is full.
        if uintptr(s.allocCount) != s.nelems {println("runtime: s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
            throw("s.allocCount != s.nelems && freeIndex == s.nelems")
        }
        c.refill(spc)
        shouldhelpgc = true
        s = c.alloc[spc]

        freeIndex = s.nextFreeIndex()}

    if freeIndex >= s.nelems {throw("freeIndex is not valid")
    }

    v = gclinkptr(freeIndex*s.elemsize + s.base())
    s.allocCount++
    if uintptr(s.allocCount) > s.nelems {println("s.allocCount=", s.allocCount, "s.nelems=", s.nelems)
        throw("s.allocCount > s.nelems")
    }
    return
}

// refill acquires a new span of span class spc for c. This span will
// have at least one free object. The current span in c must be full.
//
// Must run in a non-preemptible context since otherwise the owner of
// c could change.
func (c *mcache) refill(spc spanClass) {
    // Return the current cached span to the central lists.
    s := c.alloc[spc]

    if uintptr(s.allocCount) != s.nelems {throw("refill of span with free space remaining")
    }
    if s != &emptymspan {
        // Mark this span as no longer cached.
        if s.sweepgen != mheap_.sweepgen+3 {throw("bad sweepgen in refill")
        }
        mheap_.central[spc].mcentral.uncacheSpan(s)
    }

    // Get a new cached span from the central lists.
    s = mheap_.central[spc].mcentral.cacheSpan()
    if s == nil {throw("out of memory")
    }

    if uintptr(s.allocCount) == s.nelems {throw("span has no free space")
    }

    // Indicate that this span is cached and prevent asynchronous
    // sweeping in the next sweep phase.
    s.sweepgen = mheap_.sweepgen + 3

    // Assume all objects from this span will be allocated in the
    // mcache. If it gets uncached, we'll adjust this.
    stats := memstats.heapStats.acquire()
    atomic.Xadd64(&stats.smallAllocCount[spc.sizeclass()], int64(s.nelems)-int64(s.allocCount))

    // Flush tinyAllocs.
    if spc == tinySpanClass {atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs))
        c.tinyAllocs = 0
    }
    memstats.heapStats.release()

    // Update heapLive with the same assumption.
    // While we're here, flush scanAlloc, since we have to call
    // revise anyway.
    usedBytes := uintptr(s.allocCount) * s.elemsize
    gcController.update(int64(s.npages*pageSize)-int64(usedBytes), int64(c.scanAlloc))
    c.scanAlloc = 0

    c.alloc[spc] = s
}

// Allocate a span to use in an mcache.
func (c *mcentral) cacheSpan() *mspan {
    // Deduct credit for this span allocation and sweep if necessary.
    spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize
    deductSweepCredit(spanBytes, 0)

    traceDone := false
    if trace.enabled {traceGCSweepStart()
    }

    // If we sweep spanBudget spans without finding any free
    // space, just allocate a fresh span. This limits the amount
    // of time we can spend trying to find free space and
    // amortizes the cost of small object sweeping over the
    // benefit of having a full free span to allocate from. By
    // setting this to 100, we limit the space overhead to 1%.
    //
    // TODO(austin,mknyszek): This still has bad worst-case
    // throughput. For example, this could find just one free slot
    // on the 100th swept span. That limits allocation latency, but
    // still has very poor throughput. We could instead keep a
    // running free-to-used budget and switch to fresh span
    // allocation if the budget runs low.
    spanBudget := 100

    var s *mspan
    var sl sweepLocker

    // Try partial swept spans first.
    sg := mheap_.sweepgen
    if s = c.partialSwept(sg).pop(); s != nil {goto havespan}

    sl = sweep.active.begin()
    if sl.valid {
        // Now try partial unswept spans.
        for ; spanBudget >= 0; spanBudget-- {s = c.partialUnswept(sg).pop()
            if s == nil {break}
            if s, ok := sl.tryAcquire(s); ok {
                // We got ownership of the span, so let's sweep it and use it.
                s.sweep(true)
                sweep.active.end(sl)
                goto havespan
            }
            // We failed to get ownership of the span, which means it's being or
            // has been swept by an asynchronous sweeper that just couldn't remove it
            // from the unswept list. That sweeper took ownership of the span and
            // responsibility for either freeing it to the heap or putting it on the
            // right swept list. Either way, we should just ignore it (and it's unsafe
            // for us to do anything else).
        }
        // Now try full unswept spans, sweeping them and putting them into the
        // right list if we fail to get a span.
        for ; spanBudget >= 0; spanBudget-- {s = c.fullUnswept(sg).pop()
            if s == nil {break}
            if s, ok := sl.tryAcquire(s); ok {
                // We got ownership of the span, so let's sweep it.
                s.sweep(true)
                // Check if there's any free space.
                freeIndex := s.nextFreeIndex()
                if freeIndex != s.nelems {
                    s.freeindex = freeIndex
                    sweep.active.end(sl)
                    goto havespan
                }
                // Add it to the swept list, because sweeping didn't give us any free space.
                c.fullSwept(sg).push(s.mspan)
            }
            // See comment for partial unswept spans.
        }
        sweep.active.end(sl)
    }
    if trace.enabled {traceGCSweepDone()
        traceDone = true
    }

    // We failed to get a span from the mcentral so get one from mheap.
    s = c.grow()
    if s == nil {return nil}

    // At this point s is a span that should have free slots.
havespan:
    if trace.enabled && !traceDone {traceGCSweepDone()
    }
    n := int(s.nelems) - int(s.allocCount)
    if n == 0 || s.freeindex == s.nelems || uintptr(s.allocCount) == s.nelems {throw("span has no free objects")
    }
    freeByteBase := s.freeindex &^ (64 - 1)
    whichByte := freeByteBase / 8
    // Init alloc bits cache.
    s.refillAllocCache(whichByte)

    // Adjust the allocCache so that s.freeindex corresponds to the low bit in
    // s.allocCache.
    s.allocCache >>= s.freeindex % 64

    return s
}

// grow allocates a new empty span from the heap and initializes it for c's size class.
func (c *mcentral) grow() *mspan {npages := uintptr(class_to_allocnpages[c.spanclass.sizeclass()])
    size := uintptr(class_to_size[c.spanclass.sizeclass()])

    s := mheap_.alloc(npages, c.spanclass)
    if s == nil {return nil}

    // Use division by multiplication and shifts to quickly compute:
    // n := (npages << _PageShift) / size
    n := s.divideByElemSize(npages << _PageShift)
    s.limit = s.base() + size*n
    heapBitsForAddr(s.base()).initSpan(s)
    return s
}

// allocLarge allocates a span for a large object.
func (c *mcache) allocLarge(size uintptr, noscan bool) *mspan {
    if size+_PageSize < size {throw("out of memory")
    }
    npages := size >> _PageShift
    if size&_PageMask != 0 {npages++}

    // Deduct credit for this span allocation and sweep if
    // necessary. mHeap_Alloc will also sweep npages, so this only
    // pays the debt down to npage pages.
    deductSweepCredit(npages*_PageSize, npages)

    spc := makeSpanClass(0, noscan)
    s := mheap_.alloc(npages, spc)
    if s == nil {throw("out of memory")
    }
    stats := memstats.heapStats.acquire()
    atomic.Xadd64(&stats.largeAlloc, int64(npages*pageSize))
    atomic.Xadd64(&stats.largeAllocCount, 1)
    memstats.heapStats.release()

    // Update heapLive.
    gcController.update(int64(s.npages*pageSize), 0)

    // Put the large span in the mcentral swept list so that it's
    // visible to the background sweeper.
    mheap_.central[spc].mcentral.fullSwept(mheap_.sweepgen).push(s)
    s.limit = s.base() + size
    heapBitsForAddr(s.base()).initSpan(s)
    return s
}

// alloc allocates a new span of npage pages from the GC'd heap.
//
// spanclass indicates the span's size class and scannability.
//
// Returns a span that has been fully initialized. span.needzero indicates
// whether the span has been zeroed. Note that it may not be.
func (h *mheap) alloc(npages uintptr, spanclass spanClass) *mspan {
    // Don't do any operations that lock the heap on the G stack.
    // It might trigger stack growth, and the stack growth code needs
    // to be able to allocate heap.
    var s *mspan
    systemstack(func() {
        // To prevent excessive heap growth, before allocating n pages
        // we need to sweep and reclaim at least n pages.
        if !isSweepDone() {h.reclaim(npages)
        }
        s = h.allocSpan(npages, spanAllocHeap, spanclass)
    })
    return s
}

// allocSpan allocates an mspan which owns npages worth of memory.
//
// If typ.manual() == false, allocSpan allocates a heap span of class spanclass
// and updates heap accounting. If manual == true, allocSpan allocates a
// manually-managed span (spanclass is ignored), and the caller is
// responsible for any accounting related to its use of the span. Either
// way, allocSpan will atomically add the bytes in the newly allocated
// span to *sysStat.
//
// The returned span is fully initialized.
//
// h.lock must not be held.
//
// allocSpan must be called on the system stack both because it acquires
// the heap lock and because it must block GC transitions.
//
//go:systemstack
func (h *mheap) allocSpan(npages uintptr, typ spanAllocType, spanclass spanClass) (s *mspan) {
    // Function-global state.
    gp := getg()
    base, scav := uintptr(0), uintptr(0)
    growth := uintptr(0)

    // On some platforms we need to provide physical page aligned stack
    // allocations. Where the page size is less than the physical page
    // size, we already manage to do this by default.
    needPhysPageAlign := physPageAlignedStacks && typ == spanAllocStack && pageSize < physPageSize

    // If the allocation is small enough, try the page cache!
    // The page cache does not support aligned allocations, so we cannot use
    // it if we need to provide a physical page aligned stack allocation.
    pp := gp.m.p.ptr()
    if !needPhysPageAlign && pp != nil && npages < pageCachePages/4 {
        c := &pp.pcache

        // If the cache is empty, refill it.
        if c.empty() {lock(&h.lock)
            *c = h.pages.allocToCache()
            unlock(&h.lock)
        }

        // Try to allocate from the cache.
        base, scav = c.alloc(npages)
        if base != 0 {s = h.tryAllocMSpan()
            if s != nil {goto HaveSpan}
            // We have a base but no mspan, so we need
            // to lock the heap.
        }
    }

    // For one reason or another, we couldn't get the
    // whole job done without the heap lock.
    lock(&h.lock)

    if needPhysPageAlign {
        // Overallocate by a physical page to allow for later alignment.
        npages += physPageSize / pageSize
    }

    if base == 0 {
        // Try to acquire a base address.
        base, scav = h.pages.alloc(npages)
        if base == 0 {
            var ok bool
            growth, ok = h.grow(npages)
            if !ok {unlock(&h.lock)
                return nil
            }
            base, scav = h.pages.alloc(npages)
            if base == 0 {throw("grew heap, but no adequate free space found")
            }
        }
    }
    if s == nil {
        // We failed to get an mspan earlier, so grab
        // one now that we have the heap lock.
        s = h.allocMSpanLocked()}

    if needPhysPageAlign {
        allocBase, allocPages := base, npages
        base = alignUp(allocBase, physPageSize)
        npages -= physPageSize / pageSize

        // Return memory around the aligned allocation.
        spaceBefore := base - allocBase
        if spaceBefore > 0 {h.pages.free(allocBase, spaceBefore/pageSize, false)
        }
        spaceAfter := (allocPages-npages)*pageSize - spaceBefore
        if spaceAfter > 0 {h.pages.free(base+npages*pageSize, spaceAfter/pageSize, false)
        }
    }

    unlock(&h.lock)

    if growth > 0 {
        // We just caused a heap growth, so scavenge down what will soon be used.
        // By scavenging inline we deal with the failure to allocate out of
        // memory fragments by scavenging the memory fragments that are least
        // likely to be re-used.
        scavengeGoal := atomic.Load64(&h.scavengeGoal)
        if retained := heapRetained(); retained+uint64(growth) > scavengeGoal {
            // The scavenging algorithm requires the heap lock to be dropped so it
            // can acquire it only sparingly. This is a potentially expensive operation
            // so it frees up other goroutines to allocate in the meanwhile. In fact,
            // they can make use of the growth we just created.
            todo := growth
            if overage := uintptr(retained + uint64(growth) - scavengeGoal); todo > overage {todo = overage}
            h.pages.scavenge(todo)
        }
    }

HaveSpan:
    // At this point, both s != nil and base != 0, and the heap
    // lock is no longer held. Initialize the span.
    s.init(base, npages)
    if h.allocNeedsZero(base, npages) {s.needzero = 1}
    nbytes := npages * pageSize
    if typ.manual() {
        s.manualFreeList = 0
        s.nelems = 0
        s.limit = s.base() + s.npages*pageSize
        s.state.set(mSpanManual)
    } else {
        // We must set span properties before the span is published anywhere
        // since we're not holding the heap lock.
        s.spanclass = spanclass
        if sizeclass := spanclass.sizeclass(); sizeclass == 0 {
            s.elemsize = nbytes
            s.nelems = 1
            s.divMul = 0
        } else {s.elemsize = uintptr(class_to_size[sizeclass])
            s.nelems = nbytes / s.elemsize
            s.divMul = class_to_divmagic[sizeclass]
        }

        // Initialize mark and allocation structures.
        s.freeindex = 0
        s.allocCache = ^uint64(0) // all 1s indicating all free.
        s.gcmarkBits = newMarkBits(s.nelems)
        s.allocBits = newAllocBits(s.nelems)

        // It's safe to access h.sweepgen without the heap lock because it's
        // only ever updated with the world stopped and we run on the
        // systemstack which blocks a STW transition.
        atomic.Store(&s.sweepgen, h.sweepgen)

        // Now that the span is filled in, set its state. This
        // is a publication barrier for the other fields in
        // the span. While valid pointers into this span
        // should never be visible until the span is returned,
        // if the garbage collector finds an invalid pointer,
        // access to the span may race with initialization of
        // the span. We resolve this race by atomically
        // setting the state after the span is fully
        // initialized, and atomically checking the state in
        // any situation where a pointer is suspect.
        s.state.set(mSpanInUse)
    }

    // Commit and account for any scavenged memory that the span now owns.
    if scav != 0 {
        // sysUsed all the pages that are actually available
        // in the span since some of them might be scavenged.
        sysUsed(unsafe.Pointer(base), nbytes)
        atomic.Xadd64(&memstats.heap_released, -int64(scav))
    }
    // Update stats.
    if typ == spanAllocHeap {atomic.Xadd64(&memstats.heap_inuse, int64(nbytes))
    }
    if typ.manual() {
        // Manually managed memory doesn't count toward heap_sys.
        memstats.heap_sys.add(-int64(nbytes))
    }
    // Update consistent stats.
    stats := memstats.heapStats.acquire()
    atomic.Xaddint64(&stats.committed, int64(scav))
    atomic.Xaddint64(&stats.released, -int64(scav))
    switch typ {
    case spanAllocHeap:
        atomic.Xaddint64(&stats.inHeap, int64(nbytes))
    case spanAllocStack:
        atomic.Xaddint64(&stats.inStacks, int64(nbytes))
    case spanAllocPtrScalarBits:
        atomic.Xaddint64(&stats.inPtrScalarBits, int64(nbytes))
    case spanAllocWorkBuf:
        atomic.Xaddint64(&stats.inWorkBufs, int64(nbytes))
    }
    memstats.heapStats.release()

    // Publish the span in various locations.

    // This is safe to call without the lock held because the slots
    // related to this span will only ever be read or modified by
    // this thread until pointers into the span are published (and
    // we execute a publication barrier at the end of this function
    // before that happens) or pageInUse is updated.
    h.setSpans(s.base(), npages, s)

    if !typ.manual() {
        // Mark in-use span in arena page bitmap.
        //
        // This publishes the span to the page sweeper, so
        // it's imperative that the span be completely initialized
        // prior to this line.
        arena, pageIdx, pageMask := pageIndexOf(s.base())
        atomic.Or8(&arena.pageInUse[pageIdx], pageMask)

        // Update related page sweeper stats.
        h.pagesInUse.Add(int64(npages))
    }

    // Make sure the newly allocated span will be observed
    // by the GC before pointers into the span are published.
    publicationBarrier()

    return s
}

// Try to add at least npage pages of memory to the heap,
// returning how much the heap grew by and whether it worked.
//
// h.lock must be held.
func (h *mheap) grow(npage uintptr) (uintptr, bool) {assertLockHeld(&h.lock)

    // We must grow the heap in whole palloc chunks.
    // We call sysMap below but note that because we
    // round up to pallocChunkPages which is on the order
    // of MiB (generally >= to the huge page size) we
    // won't be calling it too much.
    ask := alignUp(npage, pallocChunkPages) * pageSize

    totalGrowth := uintptr(0)
    // This may overflow because ask could be very large
    // and is otherwise unrelated to h.curArena.base.
    end := h.curArena.base + ask
    nBase := alignUp(end, physPageSize)
    if nBase > h.curArena.end || /* overflow */ end < h.curArena.base {
        // Not enough room in the current arena. Allocate more
        // arena space. This may not be contiguous with the
        // current arena, so we have to request the full ask.
        av, asize := h.sysAlloc(ask)
        if av == nil {print("runtime: out of memory: cannot allocate", ask, "-byte block (", memstats.heap_sys, "in use)\n")
            return 0, false
        }

        if uintptr(av) == h.curArena.end {
            // The new space is contiguous with the old
            // space, so just extend the current space.
            h.curArena.end = uintptr(av) + asize
        } else {
            // The new space is discontiguous. Track what
            // remains of the current space and switch to
            // the new space. This should be rare.
            if size := h.curArena.end - h.curArena.base; size != 0 {
                // Transition this space from Reserved to Prepared and mark it
                // as released since we'll be able to start using it after updating
                // the page allocator and releasing the lock at any time.
                sysMap(unsafe.Pointer(h.curArena.base), size, &memstats.heap_sys)
                // Update stats.
                atomic.Xadd64(&memstats.heap_released, int64(size))
                stats := memstats.heapStats.acquire()
                atomic.Xaddint64(&stats.released, int64(size))
                memstats.heapStats.release()
                // Update the page allocator's structures to make this
                // space ready for allocation.
                h.pages.grow(h.curArena.base, size)
                totalGrowth += size
            }
            // Switch to the new space.
            h.curArena.base = uintptr(av)
            h.curArena.end = uintptr(av) + asize
        }

        // Recalculate nBase.
        // We know this won't overflow, because sysAlloc returned
        // a valid region starting at h.curArena.base which is at
        // least ask bytes in size.
        nBase = alignUp(h.curArena.base+ask, physPageSize)
    }

    // Grow into the current arena.
    v := h.curArena.base
    h.curArena.base = nBase

    // Transition the space we're going to use from Reserved to Prepared.
    sysMap(unsafe.Pointer(v), nBase-v, &memstats.heap_sys)

    // The memory just allocated counts as both released
    // and idle, even though it's not yet backed by spans.
    //
    // The allocation is always aligned to the heap arena
    // size which is always > physPageSize, so its safe to
    // just add directly to heap_released.
    atomic.Xadd64(&memstats.heap_released, int64(nBase-v))
    stats := memstats.heapStats.acquire()
    atomic.Xaddint64(&stats.released, int64(nBase-v))
    memstats.heapStats.release()

    // Update the page allocator's structures to make this
    // space ready for allocation.
    h.pages.grow(v, nBase-v)
    totalGrowth += nBase - v
    return totalGrowth, true
}

GC 应用三色标记 - 革除法

root

被栈上的指针援用
被区区变量指针援用
被寄存器中的指针援用

间接援用也不可回收

彩色：有援用，曾经扫描实现
灰色：有援用，正在扫描
红色：未扫描 / 垃圾

并发标记时，对指针开释的红色对象置灰（能够杜绝在 GC 标记中被开释的指针被清理回收）

并发标记时，对指针新指向的红色对象置灰（能够杜绝在 GC 标记中被插入的指针被清理回收）

Go 应用混合屏障

go tool pprof
go tool trace
go build -gcflags=” -m”
GODEBUG=” gctrace=1″

关于go:内存分配与GC

内存调配与 GC

逃逸剖析

触发逃逸的情景

栈扩容

heapArena

调配

内存治理单元 mspan

核心索引 mcentral

协程缓存 mcache

调配堆内存

对象分级

微对象调配

GC

GC 回收对象

root

三色标记法

Yuasa 删除屏障

Dijkstra 写屏障

混合屏障

GC 剖析工具