前序
上文讲到rt0_go的runtime·schedinit(SB)
TEXT runtime·rt0_go<ABIInternal>(SB),NOSPLIT,$0 // 略, 查看 golang调度学习-调度流程 (一) // create a new goroutine to start program MOVQ $runtime·mainPC(SB), AX // entry, 就是 $runtime·main PUSHQ AX // newproc 的第二个参数 PUSHQ $0 // arg size的第一个参数 CALL runtime·newproc(SB) // 调用 runtime·newproc($0, $runtime·mainPC(SB)) POPQ AX POPQ AX // start this M CALL runtime·mstart(SB) CALL runtime·abort(SB) // mstart should never return RET // Prevent dead-code elimination of debugCallV1, which is // intended to be called by debuggers. MOVQ $runtime·debugCallV1<ABIInternal>(SB), AX RET
newproc
创立一个新的g运行带siz字节参数的fn, 并且把它放到g.m.p的待运行队列
在编写程序中,应用 go func() {}来创立一个goroutine(g),这条语句会被编译器翻译成函数 newproc()。
// Create a new g running fn with siz bytes of arguments.// Put it on the queue of g's waiting to run.// The compiler turns a go statement into a call to this.//// The stack layout of this call is unusual: it assumes that the// arguments to pass to fn are on the stack sequentially immediately// after &fn. Hence, they are logically part of newproc's argument// frame, even though they don't appear in its signature (and can't// because their types differ between call sites).//// This must be nosplit because this stack layout means there are// untyped arguments in newproc's argument frame. Stack copies won't// be able to adjust them and stack splits won't be able to copy them.////go:nosplitfunc newproc(siz int32, fn *funcval) { argp := add(unsafe.Pointer(&fn), sys.PtrSize) // 上面例子a=1的地位 gp := getg() pc := getcallerpc() // 上面例子funCaller的PC systemstack(func() { // 在g0的堆栈上执行 newg := newproc1(fn, argp, siz, gp, pc) // 新建g,上面剖析源码 _p_ := getg().m.p.ptr() // 把newg放到_p_的runnext // runqput第三个参数如果是True就把g放到runnext,runnext原有的放到runq。 否则g放到runq // 如果runq满了就放到sched.runq(要加锁) // 参考 https://blog.csdn.net/diaosssss/article/details/93066804 runqput(_p_, newg, true) if mainStarted { wakep() // 上面剖析源码 } })}
假如是调用
go funCaller() { go funcA(a=1, b=2)}
newproc的调用栈, caller SP示意funCaller的SP
地位 | 值 |
---|---|
(24)caller SP | b=2 |
(16)caller SP | a=1 |
(8)caller SP | *funcA |
(0)caller SP | siz=24, sizeof(1)+sizeof(2)+sizeof(*funcA) |
(-8)caller SP | caller PC |
newproc1
// Create a new g in state _Grunnable, starting at fn, with narg bytes// of arguments starting at argp. callerpc is the address of the go// statement that created this. The caller is responsible for adding// the new g to the scheduler.//// This must run on the system stack because it's the continuation of// newproc, which cannot split the stack.////go:systemstackfunc newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerpc uintptr) *g { _g_ := getg() if fn == nil { _g_.m.throwing = -1 // do not dump full stacks throw("go of nil func value") } acquirem() // disable preemption because it can be holding p in a local var, loc++ siz := narg siz = (siz + 7) &^ 7 // We could allocate a larger initial stack if necessary. // Not worth it: this is almost always an error. // 4*sizeof(uintreg): extra space added below // sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall). // 参数大小不能大概初始栈大小 if siz >= _StackMin-4*sys.RegSize-sys.RegSize { throw("newproc: function arguments too large for new goroutine") } _p_ := _g_.m.p.ptr() newg := gfget(_p_) // 从缓存中读取g, 详见下文 if newg == nil { newg = malg(_StackMin) // 缓存中没有g, 新建g。调配栈为 2k 大小的G对象 casgstatus(newg, _Gidle, _Gdead) //将g的状态改为_Gdead // 增加到allg数组,避免gc扫描革除掉 allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack. } if newg.stack.hi == 0 { throw("newproc1: newg missing stack") } if readgstatus(newg) != _Gdead { throw("newproc1: new g is not Gdead") } totalSize := 4*sys.RegSize + uintptr(siz) + sys.MinFrameSize // extra space in case of reads slightly beyond frame totalSize += -totalSize & (sys.SpAlign - 1) // align to spAlign // 新协程的栈顶计算,将栈顶减去参数占用的空间 sp := newg.stack.hi - totalSize spArg := sp if usesLR { // caller's LR *(*uintptr)(unsafe.Pointer(sp)) = 0 prepGoExitFrame(sp) spArg += sys.MinFrameSize } if narg > 0 { // 如果有参数 // copy参数到栈上 memmove(unsafe.Pointer(spArg), argp, uintptr(narg)) // This is a stack-to-stack copy. If write barriers // are enabled and the source stack is grey (the // destination is always black), then perform a // barrier copy. We do this *after* the memmove // because the destination stack may have garbage on // it. if writeBarrier.needed && !_g_.m.curg.gcscandone { f := findfunc(fn.fn) stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps)) //一些gc相干的工作省略 if stkmap.nbit > 0 { // We're in the prologue, so it's always stack map index 0. bv := stackmapdata(stkmap, 0) bulkBarrierBitmap(spArg, spArg, uintptr(bv.n)*sys.PtrSize, 0, bv.bytedata) } } } // 初始化G的gobuf,保留sp,pc,工作函数等 memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched)) newg.sched.sp = sp newg.stktopsp = sp newg.sched.pc = funcPC(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function newg.sched.g = guintptr(unsafe.Pointer(newg)) gostartcallfn(&newg.sched, fn) newg.gopc = callerpc newg.ancestors = saveAncestors(callergp) newg.startpc = fn.fn if _g_.m.curg != nil { newg.labels = _g_.m.curg.labels } if isSystemGoroutine(newg, false) { atomic.Xadd(&sched.ngsys, +1) } casgstatus(newg, _Gdead, _Grunnable) if _p_.goidcache == _p_.goidcacheend { // Sched.goidgen is the last allocated id, // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. // At startup sched.goidgen=0, so main goroutine receives goid=1. _p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch) _p_.goidcache -= _GoidCacheBatch - 1 _p_.goidcacheend = _p_.goidcache + _GoidCacheBatch } newg.goid = int64(_p_.goidcache) _p_.goidcache++ if raceenabled { newg.racectx = racegostart(callerpc) } if trace.enabled { traceGoCreate(newg, newg.startpc) } releasem(_g_.m) return newg}
gfget
从缓存中获取g
// Get from gfree list.// If local list is empty, grab a batch from global list.func gfget(_p_ *p) *g {retry: // 如果_p_.gFree为空,sched.gFree.stack或者sched.gFree.noStack不为空,偷最多32个过去 if _p_.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) { lock(&sched.gFree.lock) // Move a batch of free Gs to the P. for _p_.gFree.n < 32 { // Prefer Gs with stacks. gp := sched.gFree.stack.pop() if gp == nil { gp = sched.gFree.noStack.pop() if gp == nil { break } } sched.gFree.n-- _p_.gFree.push(gp) _p_.gFree.n++ } unlock(&sched.gFree.lock) goto retry } gp := _p_.gFree.pop() if gp == nil { return nil } _p_.gFree.n-- if gp.stack.lo == 0 { // Stack was deallocated in gfput. Allocate a new one. systemstack(func() { gp.stack = stackalloc(_FixedStack) }) gp.stackguard0 = gp.stack.lo + _StackGuard } else { if raceenabled { racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) } if msanenabled { msanmalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) } } return gp}
malg()
malg()函数创立一个新的g,包含为该g申请栈空间(反对程序调配栈的零碎)。零碎中的每个g都是由该函数创立而来的
// Allocate a new g, with a stack big enough for stacksize bytes.func malg(stacksize int32) *g { newg := new(g) if stacksize >= 0 { stacksize = round2(_StackSystem + stacksize) systemstack(func() { newg.stack = stackalloc(uint32(stacksize)) }) newg.stackguard0 = newg.stack.lo + _StackGuard newg.stackguard1 = ^uintptr(0) // Clear the bottom word of the stack. We record g // there on gsignal stack during VDSO on ARM and ARM64. *(*uintptr)(unsafe.Pointer(newg.stack.lo)) = 0 } return newg}
gfput
// Put on gfree list.// If local list is too long, transfer a batch to the global list.func gfput(_p_ *p, gp *g) { if readgstatus(gp) != _Gdead { throw("gfput: bad status (not Gdead)") } stksize := gp.stack.hi - gp.stack.lo if stksize != _FixedStack { // non-standard stack size - free it. stackfree(gp.stack) gp.stack.lo = 0 gp.stack.hi = 0 gp.stackguard0 = 0 } _p_.gFree.push(gp) _p_.gFree.n++ if _p_.gFree.n >= 64 { lock(&sched.gFree.lock) for _p_.gFree.n >= 32 { _p_.gFree.n-- gp = _p_.gFree.pop() if gp.stack.lo == 0 { sched.gFree.noStack.push(gp) } else { sched.gFree.stack.push(gp) } sched.gFree.n++ } unlock(&sched.gFree.lock) }}
wakep
援用文章
[1] Go语言底细(6):启动和内存调配初始化
https://studygolang.com/artic...