golang 调度高效秘诀之一是它的抢占式调度。当工作函数执行的工夫超过了肯定的工夫,
sysmon 办法会一直的检测所有 p 上工作的执行状况,当有超过预约执行工夫的 g 时,会发动抢占。这所有也是在 retake 函数中实现的,上文形容了该函数在零碎调用中的性能,这里讲下该函数如何执行抢占。
retake
retake() 函数会遍历所有的 P,如果一个 P 处于执行状态,且曾经间断执行了较长时间,就会被抢占。retake() 调用 preemptone() 将 P 的 stackguard0 设为 stackPreempt( 对于 stackguard 的具体内容,能够参考 Split Stacks),这将导致该 P 中正在执行的 G 进行下一次函数调用时,导致栈空间查看失败。进而触发 morestack()(汇编代码,位于 asm_XXX.s 中)而后进行一连串的函数调用,次要的调用过程如下:
morestack()(汇编代码)-> newstack() -> gopreempt_m() -> goschedImpl() -> schedule()
http://ga0.github.io/golang/2…
func retake(now int64) uint32 {
n := 0
// Prevent allp slice changes. This lock will be completely
// uncontended unless we're already stopping the world.
lock(&allpLock)
// We can't use a range loop over allp because we may
// temporarily drop the allpLock. Hence, we need to re-fetch
// allp each time around the loop.
for i := 0; i < len(allp); i++ {_p_ := allp[i]
if _p_ == nil {
// This can happen if procresize has grown
// allp but not yet created new Ps.
continue
}
pd := &_p_.sysmontick
s := _p_.status
sysretake := false
if s == _Prunning || s == _Psyscall {
// Preempt G if it's running for too long.
t := int64(_p_.schedtick)
if int64(pd.schedtick) != t {pd.schedtick = uint32(t)
pd.schedwhen = now
} else if pd.schedwhen+forcePreemptNS <= now {
// 超时抢占
preemptone(_p_)
// In case of syscall, preemptone() doesn't
// work, because there is no M wired to P.
sysretake = true
}
}
// p 在零碎调用中或者被调用
if s == _Psyscall {// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
t := int64(_p_.syscalltick)
if !sysretake && int64(pd.syscalltick) != t {pd.syscalltick = uint32(t)
pd.syscallwhen = now
continue
}
// On the one hand we don't want to retake Ps if there is no other work to do,
// but on the other hand we want to retake them eventually
// because they can prevent the sysmon thread from deep sleep.
// 没有能够调度的工作且工夫阻塞工夫未到阀值,间接跳过
if runqempty(_p_) && atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {continue}
// Drop allpLock so we can take sched.lock.
// 这里登程了零碎调用长时间阻塞的调度
unlock(&allpLock)
// Need to decrement number of idle locked M's
// (pretending that one more is running) before the CAS.
// Otherwise the M from which we retake can exit the syscall,
// increment nmidle and report deadlock.
incidlelocked(-1)
if atomic.Cas(&_p_.status, s, _Pidle) {
if trace.enabled {traceGoSysBlock(_p_)
traceProcStop(_p_)
}
n++
_p_.syscalltick++
// 要害办法,将对长时间阻塞的 p 进行从新调度
handoffp(_p_)
}
incidlelocked(1)
lock(&allpLock)
}
}
unlock(&allpLock)
return uint32(n)
}
handoffp
当零碎调用工夫过长的时候,会调用 handoffp() 办法:
func handoffp(_p_ *p) {
// handoffp must start an M in any situation where
// findrunnable would return a G to run on _p_.
// if it has local work, start it straight away
if !runqempty(_p_) || sched.runqsize != 0 {startm(_p_, false)
return
}
// if it has GC work, start it straight away
if gcBlackenEnabled != 0 && gcMarkWorkAvailable(_p_) {startm(_p_, false)
return
}
// no local work, check that there are no spinning/idle M's,
// otherwise our help is not required
if atomic.Load(&sched.nmspinning)+atomic.Load(&sched.npidle) == 0 && atomic.Cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
startm(_p_, true)
return
}
lock(&sched.lock)
if sched.gcwaiting != 0 {
_p_.status = _Pgcstop
sched.stopwait--
if sched.stopwait == 0 {notewakeup(&sched.stopnote)
}
unlock(&sched.lock)
return
}
if _p_.runSafePointFn != 0 && atomic.Cas(&_p_.runSafePointFn, 1, 0) {sched.safePointFn(_p_)
sched.safePointWait--
if sched.safePointWait == 0 {notewakeup(&sched.safePointNote)
}
}
if sched.runqsize != 0 {unlock(&sched.lock)
startm(_p_, false)
return
}
// If this is the last running P and nobody is polling network,
// need to wakeup another M to poll network.
if sched.npidle == uint32(gomaxprocs-1) && atomic.Load64(&sched.lastpoll) != 0 {unlock(&sched.lock)
startm(_p_, false)
return
}
// The scheduler lock cannot be held when calling wakeNetPoller below
// because wakeNetPoller may call wakep which may call startm.
when := nobarrierWakeTime(_p_)
pidleput(_p_)
unlock(&sched.lock)
if when != 0 {wakeNetPoller(when)
}
}