golang调度高效秘诀之一是它的抢占式调度。当工作函数执行的工夫超过了肯定的工夫,
sysmon办法会一直的检测所有p上工作的执行状况,当有超过预约执行工夫的g时,会发动抢占。这所有也是在retake函数中实现的,上文形容了该函数在零碎调用中的性能,这里讲下该函数如何执行抢占。
retake
retake()函数会遍历所有的P,如果一个P处于执行状态, 且曾经间断执行了较长时间,就会被抢占。retake()调用preemptone()将P的stackguard0设为 stackPreempt(对于stackguard的具体内容,能够参考 Split Stacks),这将导致该P中正在执行的G进行下一次函数调用时,导致栈空间查看失败。进而触发morestack()(汇编代码,位于asm_XXX.s中)而后进行一连串的函数调用,次要的调用过程如下:
morestack()(汇编代码)-> newstack() -> gopreempt_m() -> goschedImpl() -> schedule()
http://ga0.github.io/golang/2...
func retake(now int64) uint32 { n := 0 // Prevent allp slice changes. This lock will be completely // uncontended unless we're already stopping the world. lock(&allpLock) // We can't use a range loop over allp because we may // temporarily drop the allpLock. Hence, we need to re-fetch // allp each time around the loop. for i := 0; i < len(allp); i++ { _p_ := allp[i] if _p_ == nil { // This can happen if procresize has grown // allp but not yet created new Ps. continue } pd := &_p_.sysmontick s := _p_.status sysretake := false if s == _Prunning || s == _Psyscall { // Preempt G if it's running for too long. t := int64(_p_.schedtick) if int64(pd.schedtick) != t { pd.schedtick = uint32(t) pd.schedwhen = now } else if pd.schedwhen+forcePreemptNS <= now { // 超时抢占 preemptone(_p_) // In case of syscall, preemptone() doesn't // work, because there is no M wired to P. sysretake = true } } //p在零碎调用中或者被调用 if s == _Psyscall { // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us). t := int64(_p_.syscalltick) if !sysretake && int64(pd.syscalltick) != t { pd.syscalltick = uint32(t) pd.syscallwhen = now continue } // On the one hand we don't want to retake Ps if there is no other work to do, // but on the other hand we want to retake them eventually // because they can prevent the sysmon thread from deep sleep. //没有能够调度的工作且工夫阻塞工夫未到阀值,间接跳过 if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now { continue } // Drop allpLock so we can take sched.lock. // 这里登程了零碎调用长时间阻塞的调度 unlock(&allpLock) // Need to decrement number of idle locked M's // (pretending that one more is running) before the CAS. // Otherwise the M from which we retake can exit the syscall, // increment nmidle and report deadlock. incidlelocked(-1) if atomic.Cas(&_p_.status, s, _Pidle) { if trace.enabled { traceGoSysBlock(_p_) traceProcStop(_p_) } n++ _p_.syscalltick++ //要害办法,将对长时间阻塞的p进行从新调度 handoffp(_p_) } incidlelocked(1) lock(&allpLock) } } unlock(&allpLock) return uint32(n)}
handoffp
当零碎调用工夫过长的时候,会调用handoffp()办法:
func handoffp(_p_ *p) { // handoffp must start an M in any situation where // findrunnable would return a G to run on _p_. // if it has local work, start it straight away if !runqempty(_p_) || sched.runqsize != 0 { startm(_p_, false) return } // if it has GC work, start it straight away if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) { startm(_p_, false) return } // no local work, check that there are no spinning/idle M's, // otherwise our help is not required if atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) == 0 && atomic.Cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic startm(_p_, true) return } lock(&sched.lock) if sched.gcwaiting != 0 { _p_.status = _Pgcstop sched.stopwait-- if sched.stopwait == 0 { notewakeup(&sched.stopnote) } unlock(&sched.lock) return } if _p_.runSafePointFn != 0 && atomic.Cas(&_p_.runSafePointFn, 1, 0) { sched.safePointFn(_p_) sched.safePointWait-- if sched.safePointWait == 0 { notewakeup(&sched.safePointNote) } } if sched.runqsize != 0 { unlock(&sched.lock) startm(_p_, false) return } // If this is the last running P and nobody is polling network, // need to wakeup another M to poll network. if sched.npidle == uint32(gomaxprocs-1) && atomic.Load64(&sched.lastpoll) != 0 { unlock(&sched.lock) startm(_p_, false) return } // The scheduler lock cannot be held when calling wakeNetPoller below // because wakeNetPoller may call wakep which may call startm. when := nobarrierWakeTime(_p_) pidleput(_p_) unlock(&sched.lock) if when != 0 { wakeNetPoller(when) }}