关于golang:golang调度学习调度流程-五-Syscall

syscall函数

Syscall函数的定义如下，传入4个参数，返回3个参数。

func syscall(fn, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno)

syscall函数的作用是传入零碎调用的地址和参数，执行实现后返回。流程次要是零碎调用前执行entersyscall，设置g p的状态，而后入参，执行后，写返回值而后执行exitsyscall设置g p的状态。
entersyscall和exitsyscall在g的调用中细讲。

// func Syscall(trap int64, a1, a2, a3 uintptr) (r1, r2, err uintptr);// Trap # in AX, args in DI SI DX R10 R8 R9, return in AX DX// Note that this differs from "standard" ABI convention, which// would pass 4th arg in CX, not R10.// 4个入参：PC param1 param2 param3TEXT ·Syscall(SB),NOSPLIT,$0-56    // 调用entersyscall 判断是执行条件是否满足 记录调度信息 切换g p的状态    CALL    runtime·entersyscall(SB)    // 将参数存入寄存器中    MOVQ    a1+8(FP), DI    MOVQ    a2+16(FP), SI    MOVQ    a3+24(FP), DX    MOVQ    trap+0(FP), AX  // syscall entry    SYSCALL    CMPQ    AX, $0xfffffffffffff001    JLS ok    // 执行失败时 写返回值    MOVQ    $-1, r1+32(FP)    MOVQ    $0, r2+40(FP)    NEGQ    AX    MOVQ    AX, err+48(FP)    // 调用exitsyscall 记录调度信息    CALL    runtime·exitsyscall(SB)    RETok:    // 执行胜利时 写返回值    MOVQ    AX, r1+32(FP)    MOVQ    DX, r2+40(FP)    MOVQ    $0, err+48(FP)    CALL    runtime·exitsyscall(SB)    RET TEXT    ·RawSyscall(SB),NOSPLIT,$0-56    MOVQ    a1+8(FP), DI    MOVQ    a2+16(FP), SI    MOVQ    a3+24(FP), DX    MOVQ    trap+0(FP), AX    // syscall entry    SYSCALL    JCC    ok1    MOVQ    $-1, r1+32(FP)    // r1    MOVQ    $0, r2+40(FP)    // r2    MOVQ    AX, err+48(FP)    // errno    RETok1:    MOVQ    AX, r1+32(FP)    // r1    MOVQ    DX, r2+40(FP)    // r2    MOVQ    $0, err+48(FP)    // errno    RET

显著SysCall比RawSyscall多调用了两个办法，entersyscall和exitsyscall，减少这两个函数的调用，让调度器有机会去对行将要进入零碎调用的goroutine进行调整，不便调度。

entersyscall

// 零碎调用的时候调用该函数// 进入零碎调用，G将会进入_Gsyscall状态，也就是会被临时挂起，直到零碎调用完结。// 此时M进入零碎调用，那么P也会放弃该M。然而，此时M还指向P，在M从零碎调用返回后还能找到Pfunc entersyscall() {    reentersyscall(getcallerpc(), getcallersp())}// Syscall跟踪：// 在零碎调用开始时，咱们收回traceGoSysCall来捕捉堆栈跟踪。// 如果零碎调用未阻止，则咱们不会收回任何其余事件。// 如果零碎调用被阻止（即，从新获取了P），则retaker会收回traceGoSysBlock；// 当syscall返回时，咱们收回traceGoSysExit，当goroutine开始运行时// （可能立刻，如果exitsyscallfast返回true），咱们收回traceGoStart。// 为了确保在traceGoSysBlock之后严格收回traceGoSysExit，// 咱们记得syscalltick的以后值以m为单位（_g_.m.syscalltick = _g_.m.p.ptr（）。syscalltick），// 之后收回traceGoSysBlock的人将递增p.syscalltick；// 咱们在收回traceGoSysExit之前期待增量。// 请留神，即便未启用跟踪，增量也会实现，// 因为能够在syscall的两头启用跟踪。 咱们不心愿期待挂起。//go:nosplitfunc reentersyscall(pc, sp uintptr) {    _g_ := getg()       //禁用抢占，因为在此性能期间g处于Gsyscall状态，但g-> sched可能不统一，请勿让GC察看它。    _g_.m.locks++    // Entersyscall must not call any function that might split/grow the stack.    // (See details in comment above.)        // 捕捉可能产生的调用，办法是将堆栈爱护替换为会使任何堆栈查看失败的内容，并留下一个标记来告诉newstack终止。    _g_.stackguard0 = stackPreempt    _g_.throwsplit = true    // Leave SP around for GC and traceback.    save(pc, sp)    _g_.syscallsp = sp    _g_.syscallpc = pc    // 让G进入_Gsyscall状态，此时G曾经被挂起了，直到零碎调用完结，才会让G从新写进入running    casgstatus(_g_, _Grunning, _Gsyscall)    if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {        systemstack(func() {            print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")            throw("entersyscall")        })    }    if trace.enabled {        systemstack(traceGoSysCall)        // systemstack itself clobbers g.sched.{pc,sp} and we might        // need them later when the G is genuinely blocked in a        // syscall        save(pc, sp)    }    if atomic.Load(&sched.sysmonwait) != 0 {        systemstack(entersyscall_sysmon)        save(pc, sp)    }    if _g_.m.p.ptr().runSafePointFn != 0 {        // runSafePointFn may stack split if run on this stack        systemstack(runSafePointFn)        save(pc, sp)    }    _g_.m.syscalltick = _g_.m.p.ptr().syscalltick    _g_.sysblocktraced = true    // 这里很要害：P的M曾经陷入零碎调用，于是P忍痛放弃该M        // 然而请留神：此时M还指向P，在M从零碎调用返回后还能找到P    pp := _g_.m.p.ptr()    pp.m = 0    _g_.m.oldp.set(pp)    _g_.m.p = 0    // P的状态变为Psyscall    atomic.Store(&pp.status, _Psyscall)    if sched.gcwaiting != 0 {        systemstack(entersyscall_gcwait)        save(pc, sp)    }    _g_.m.locks--}

该办法次要是为零碎调用前做了筹备工作：

批改g的状态为_Gsyscall
查看sysmon线程是否在执行，睡眠须要唤醒
p放弃m，然而m仍旧持有p的指针，完结调用后优先选择p
批改p的状态为_Psyscal

做好这些筹备工作便能够真正的执行零碎调用了。当该线程m长时间阻塞在零碎调用的时候，始终在运行的sysmon线程会检测到该p的状态，并将其剥离，驱动其余的m（新建或获取）来调度执行该p上的工作,这其中次要是在retake办法中实现的，该办法还解决了goroutine抢占调度，这里省略，前面介绍抢占调度在介绍：

exitsyscall

当零碎Syscall返回的时，会调用exitsyscall办法复原调度：

//go:nosplit//go:nowritebarrierrec//go:linkname exitsyscallfunc exitsyscall() {    _g_ := getg()    _g_.m.locks++ // see comment in entersyscall    if getcallersp() > _g_.syscallsp {        throw("exitsyscall: syscall frame is no longer valid")    }    _g_.waitsince = 0    oldp := _g_.m.oldp.ptr()    _g_.m.oldp = 0     // 从新获取p    if exitsyscallfast(oldp) {        if trace.enabled {            if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {                systemstack(traceGoStart)            }        }        // There's a cpu for us, so we can run.        _g_.m.p.ptr().syscalltick++        // We need to cas the status and scan before resuming...        casgstatus(_g_, _Gsyscall, _Grunning)        // Garbage collector isn't running (since we are),        // so okay to clear syscallsp.        _g_.syscallsp = 0        _g_.m.locks--        if _g_.preempt {            // restore the preemption request in case we've cleared it in newstack            _g_.stackguard0 = stackPreempt        } else {            // otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock            _g_.stackguard0 = _g_.stack.lo + _StackGuard        }        _g_.throwsplit = false        if sched.disable.user && !schedEnabled(_g_) {            // Scheduling of this goroutine is disabled.            Gosched()        }        return    }    _g_.sysexitticks = 0    if trace.enabled {        // Wait till traceGoSysBlock event is emitted.        // This ensures consistency of the trace (the goroutine is started after it is blocked).        for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {            osyield()        }        // We can't trace syscall exit right now because we don't have a P.        // Tracing code can invoke write barriers that cannot run without a P.        // So instead we remember the syscall exit time and emit the event        // in execute when we have a P.        _g_.sysexitticks = cputicks()    }    _g_.m.locks--    // 没有获取到p，只能解绑以后g，从新调度该m了    mcall(exitsyscall0)    // Scheduler returned, so we're allowed to run now.    // Delete the syscallsp information that we left for    // the garbage collector during the system call.    // Must wait until now because until gosched returns    // we don't know for sure that the garbage collector    // is not running.    _g_.syscallsp = 0    _g_.m.p.ptr().syscalltick++    _g_.throwsplit = false}

exitsyscallfast

exitsyscall会尝试从新绑定p，优先选择之前m绑定的p（进入零碎的调用的时候，p只是单方面解绑了和m的关系，通过m仍旧能够找到p）：

//go:nosplitfunc exitsyscallfast(oldp *p) bool {    _g_ := getg()    // Freezetheworld sets stopwait but does not retake P's.    //stw，间接解绑p，而后退出    if sched.stopwait == freezeStopWait {        return false    }    // Try to re-acquire the last P.    // 如果之前从属的P尚未被其余M,尝试绑定该P    if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {        // There's a cpu for us, so we can run.        wirep(oldp)        exitsyscallfast_reacquired()        return true    }        // 否则从闲暇P列表中取出一个来    // Try to get any other idle P.    if sched.pidle != 0 {        var ok bool        systemstack(func() {            ok = exitsyscallfast_pidle()            if ok && trace.enabled {                if oldp != nil {                    // Wait till traceGoSysBlock event is emitted.                    // This ensures consistency of the trace (the goroutine is started after it is blocked).                    for oldp.syscalltick == _g_.m.syscalltick {                        osyield()                    }                }                traceGoSysExit(0)            }        })        if ok {            return true        }    }    return false}

exitsyscall0

func exitsyscall0(gp *g) {    _g_ := getg()        //批改g状态为 _Grunable    casgstatus(gp, _Gsyscall, _Grunnable)    dropg()                  //解绑    lock(&sched.lock)    var _p_ *p    //尝试获取p    if schedEnabled(_g_) {        _p_ = pidleget()    }    if _p_ == nil {            // 未获取到p，g进入全局队列期待调度        globrunqput(gp)    } else if atomic.Load(&sched.sysmonwait) != 0 {        atomic.Store(&sched.sysmonwait, 0)        notewakeup(&sched.sysmonnote)    }    unlock(&sched.lock)    // 获取到p，绑定，而后执行    if _p_ != nil {        acquirep(_p_)        execute(gp, false) // Never returns.    }    //  // m有绑定的g，解绑p而后绑定的g来唤醒，执行    if _g_.m.lockedg != 0 {        // Wait until another thread schedules gp and so m again.        stoplockedm()        execute(gp, false) // Never returns.    }    // 关联p失败了，休眠，期待唤醒，在进行调度。    stopm()    schedule() // Never returns.}

总结

上述便是golang零碎调用的整个流程，大抵如下：

业务调用封装好的零碎调用函数，编译器翻译到Syscall
执行entersyscall()办法，批改g，p的状态，p单方面解绑m，并查看唤醒sysmon线程，检测零碎调用。
当sysmon线程检测到零碎调用阻塞工夫过长的时候，调用retake，从新调度该p，让p上可执行的得以执行，不浪费资源
零碎调用返回，进入exitsyscall办法，优先获取之前的p，如果该p曾经被占有，从新获取闲暇的p，绑定，而后继续执行该g。当获取不到p的时候，调用exitsyscall0，解绑g，休眠，期待下次唤醒调度。