关于golang:golang调度学习调度流程-五-Syscall

Syscall 函数的定义如下，传入 4 个参数，返回 3 个参数。

func syscall(fn, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno)

syscall 函数的作用是传入零碎调用的地址和参数，执行实现后返回。流程次要是零碎调用前执行 entersyscall，设置 g p 的状态，而后入参，执行后，写返回值而后执行 exitsyscall 设置 g p 的状态。
entersyscall 和 exitsyscall 在 g 的调用中细讲。

// func Syscall(trap int64, a1, a2, a3 uintptr) (r1, r2, err uintptr);
// Trap # in AX, args in DI SI DX R10 R8 R9, return in AX DX
// Note that this differs from "standard" ABI convention, which
// would pass 4th arg in CX, not R10.

// 4 个入参：PC param1 param2 param3
TEXT ·Syscall(SB),NOSPLIT,$0-56
    // 调用 entersyscall 判断是执行条件是否满足 记录调度信息 切换 g p 的状态
    CALL    runtime·entersyscall(SB)
    // 将参数存入寄存器中
    MOVQ    a1+8(FP), DI
    MOVQ    a2+16(FP), SI
    MOVQ    a3+24(FP), DX
    MOVQ    trap+0(FP), AX  // syscall entry
    SYSCALL
    CMPQ    AX, $0xfffffffffffff001
    JLS ok
    // 执行失败时 写返回值
    MOVQ    $-1, r1+32(FP)
    MOVQ    $0, r2+40(FP)
    NEGQ    AX
    MOVQ    AX, err+48(FP)
    // 调用 exitsyscall 记录调度信息
    CALL    runtime·exitsyscall(SB)
    RET
ok:
    // 执行胜利时 写返回值
    MOVQ    AX, r1+32(FP)
    MOVQ    DX, r2+40(FP)
    MOVQ    $0, err+48(FP)
    CALL    runtime·exitsyscall(SB)
    RET 

TEXT    ·RawSyscall(SB),NOSPLIT,$0-56
    MOVQ    a1+8(FP), DI
    MOVQ    a2+16(FP), SI
    MOVQ    a3+24(FP), DX
    MOVQ    trap+0(FP), AX    // syscall entry
    SYSCALL
    JCC    ok1
    MOVQ    $-1, r1+32(FP)    // r1
    MOVQ    $0, r2+40(FP)    // r2
    MOVQ    AX, err+48(FP)    // errno
    RET
ok1:
    MOVQ    AX, r1+32(FP)    // r1
    MOVQ    DX, r2+40(FP)    // r2
    MOVQ    $0, err+48(FP)    // errno
    RET

显著 SysCall 比 RawSyscall 多调用了两个办法，entersyscall 和 exitsyscall，减少这两个函数的调用，让调度器有机会去对行将要进入零碎调用的 goroutine 进行调整，不便调度。

// 零碎调用的时候调用该函数
// 进入零碎调用，G 将会进入_Gsyscall 状态，也就是会被临时挂起，直到零碎调用完结。// 此时 M 进入零碎调用，那么 P 也会放弃该 M。然而，此时 M 还指向 P，在 M 从零碎调用返回后还能找到 P
func entersyscall() {reentersyscall(getcallerpc(), getcallersp())
}
// Syscall 跟踪：// 在零碎调用开始时，咱们收回 traceGoSysCall 来捕捉堆栈跟踪。// 如果零碎调用未阻止，则咱们不会收回任何其余事件。// 如果零碎调用被阻止（即，从新获取了 P），则 retaker 会收回 traceGoSysBlock；// 当 syscall 返回时，咱们收回 traceGoSysExit，当 goroutine 开始运行时
//（可能立刻，如果 exitsyscallfast 返回 true），咱们收回 traceGoStart。// 为了确保在 traceGoSysBlock 之后严格收回 traceGoSysExit，// 咱们记得 syscalltick 的以后值以 m 为单位（_g_.m.syscalltick = _g_.m.p.ptr（）。syscalltick），// 之后收回 traceGoSysBlock 的人将递增 p.syscalltick；// 咱们在收回 traceGoSysExit 之前期待增量。// 请留神，即便未启用跟踪，增量也会实现，// 因为能够在 syscall 的两头启用跟踪。咱们不心愿期待挂起。//go:nosplit
func reentersyscall(pc, sp uintptr) {_g_ := getg()

       // 禁用抢占，因为在此性能期间 g 处于 Gsyscall 状态，但 g -> sched 可能不统一，请勿让 GC 察看它。_g_.m.locks++

    // Entersyscall must not call any function that might split/grow the stack.
    // (See details in comment above.)
        // 捕捉可能产生的调用，办法是将堆栈爱护替换为会使任何堆栈查看失败的内容，并留下一个标记来告诉 newstack 终止。_g_.stackguard0 = stackPreempt
    _g_.throwsplit = true

    // Leave SP around for GC and traceback.
    save(pc, sp)
    _g_.syscallsp = sp
    _g_.syscallpc = pc
    // 让 G 进入_Gsyscall 状态，此时 G 曾经被挂起了，直到零碎调用完结，才会让 G 从新写进入 running
    casgstatus(_g_, _Grunning, _Gsyscall)
    if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {systemstack(func() {print("entersyscall inconsistent", hex(_g_.syscallsp), "[", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
            throw("entersyscall")
        })
    }

    if trace.enabled {systemstack(traceGoSysCall)
        // systemstack itself clobbers g.sched.{pc,sp} and we might
        // need them later when the G is genuinely blocked in a
        // syscall
        save(pc, sp)
    }

    if atomic.Load(&sched.sysmonwait) != 0 {systemstack(entersyscall_sysmon)
        save(pc, sp)
    }

    if _g_.m.p.ptr().runSafePointFn != 0 {
        // runSafePointFn may stack split if run on this stack
        systemstack(runSafePointFn)
        save(pc, sp)
    }

    _g_.m.syscalltick = _g_.m.p.ptr().syscalltick
    _g_.sysblocktraced = true
    // 这里很要害：P 的 M 曾经陷入零碎调用，于是 P 忍痛放弃该 M
        // 然而请留神：此时 M 还指向 P，在 M 从零碎调用返回后还能找到 P
    pp := _g_.m.p.ptr()
    pp.m = 0
    _g_.m.oldp.set(pp)
    _g_.m.p = 0
    // P 的状态变为 Psyscall
    atomic.Store(&pp.status, _Psyscall)
    if sched.gcwaiting != 0 {systemstack(entersyscall_gcwait)
        save(pc, sp)
    }
    _g_.m.locks--
}

该办法次要是为零碎调用前做了筹备工作：

批改 g 的状态为_Gsyscall
查看 sysmon 线程是否在执行，睡眠须要唤醒
p 放弃 m，然而 m 仍旧持有 p 的指针，完结调用后优先选择 p
批改 p 的状态为_Psyscal

做好这些筹备工作便能够真正的执行零碎调用了。当该线程 m 长时间阻塞在零碎调用的时候，始终在运行的 sysmon 线程会检测到该 p 的状态，并将其剥离，驱动其余的 m（新建或获取）来调度执行该 p 上的工作, 这其中次要是在 retake 办法中实现的，该办法还解决了 goroutine 抢占调度，这里省略，前面介绍抢占调度在介绍：

当零碎 Syscall 返回的时，会调用 exitsyscall 办法复原调度：

//go:nosplit
//go:nowritebarrierrec
//go:linkname exitsyscall
func exitsyscall() {_g_ := getg()

    _g_.m.locks++ // see comment in entersyscall
    if getcallersp() > _g_.syscallsp {throw("exitsyscall: syscall frame is no longer valid")
    }

    _g_.waitsince = 0
    oldp := _g_.m.oldp.ptr()
    _g_.m.oldp = 0
     // 从新获取 p
    if exitsyscallfast(oldp) {
        if trace.enabled {if oldp != _g_.m.p.ptr() || _g_.m.syscalltick != _g_.m.p.ptr().syscalltick {systemstack(traceGoStart)
            }
        }
        // There's a cpu for us, so we can run.
        _g_.m.p.ptr().syscalltick++
        // We need to cas the status and scan before resuming...
        casgstatus(_g_, _Gsyscall, _Grunning)

        // Garbage collector isn't running (since we are),
        // so okay to clear syscallsp.
        _g_.syscallsp = 0
        _g_.m.locks--
        if _g_.preempt {
            // restore the preemption request in case we've cleared it in newstack
            _g_.stackguard0 = stackPreempt
        } else {
            // otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock
            _g_.stackguard0 = _g_.stack.lo + _StackGuard
        }
        _g_.throwsplit = false

        if sched.disable.user && !schedEnabled(_g_) {
            // Scheduling of this goroutine is disabled.
            Gosched()}

        return
    }

    _g_.sysexitticks = 0
    if trace.enabled {
        // Wait till traceGoSysBlock event is emitted.
        // This ensures consistency of the trace (the goroutine is started after it is blocked).
        for oldp != nil && oldp.syscalltick == _g_.m.syscalltick {osyield()
        }
        // We can't trace syscall exit right now because we don't have a P.
        // Tracing code can invoke write barriers that cannot run without a P.
        // So instead we remember the syscall exit time and emit the event
        // in execute when we have a P.
        _g_.sysexitticks = cputicks()}

    _g_.m.locks--

    // 没有获取到 p，只能解绑以后 g，从新调度该 m 了
    mcall(exitsyscall0)

    // Scheduler returned, so we're allowed to run now.
    // Delete the syscallsp information that we left for
    // the garbage collector during the system call.
    // Must wait until now because until gosched returns
    // we don't know for sure that the garbage collector
    // is not running.
    _g_.syscallsp = 0
    _g_.m.p.ptr().syscalltick++
    _g_.throwsplit = false
}

exitsyscall 会尝试从新绑定 p，优先选择之前 m 绑定的 p（进入零碎的调用的时候，p 只是单方面解绑了和 m 的关系，通过 m 仍旧能够找到 p）：


//go:nosplit
func exitsyscallfast(oldp *p) bool {_g_ := getg()

    // Freezetheworld sets stopwait but does not retake P's.
    //stw，间接解绑 p，而后退出
    if sched.stopwait == freezeStopWait {return false}

    // Try to re-acquire the last P.
    // 如果之前从属的 P 尚未被其余 M, 尝试绑定该 P
    if oldp != nil && oldp.status == _Psyscall && atomic.Cas(&oldp.status, _Psyscall, _Pidle) {
        // There's a cpu for us, so we can run.
        wirep(oldp)
        exitsyscallfast_reacquired()
        return true
    }
        // 否则从闲暇 P 列表中取出一个来
    // Try to get any other idle P.
    if sched.pidle != 0 {
        var ok bool
        systemstack(func() {ok = exitsyscallfast_pidle()
            if ok && trace.enabled {
                if oldp != nil {
                    // Wait till traceGoSysBlock event is emitted.
                    // This ensures consistency of the trace (the goroutine is started after it is blocked).
                    for oldp.syscalltick == _g_.m.syscalltick {osyield()
                    }
                }
                traceGoSysExit(0)
            }
        })
        if ok {return true}
    }
    return false
}

func exitsyscall0(gp *g) {_g_ := getg()
        // 批改 g 状态为 _Grunable
    casgstatus(gp, _Gsyscall, _Grunnable)
    dropg()                  // 解绑
    lock(&sched.lock)
    var _p_ *p
    // 尝试获取 p
    if schedEnabled(_g_) {_p_ = pidleget()
    }
    if _p_ == nil {
            // 未获取到 p，g 进入全局队列期待调度
        globrunqput(gp)
    } else if atomic.Load(&sched.sysmonwait) != 0 {atomic.Store(&sched.sysmonwait, 0)
        notewakeup(&sched.sysmonnote)
    }
    unlock(&sched.lock)
    // 获取到 p，绑定，而后执行
    if _p_ != nil {acquirep(_p_)
        execute(gp, false) // Never returns.
    }
    //  // m 有绑定的 g，解绑 p 而后绑定的 g 来唤醒，执行
    if _g_.m.lockedg != 0 {
        // Wait until another thread schedules gp and so m again.
        stoplockedm()
        execute(gp, false) // Never returns.
    }
    // 关联 p 失败了，休眠，期待唤醒，在进行调度。stopm()
    schedule() // Never returns.}

上述便是 golang 零碎调用的整个流程，大抵如下：

业务调用封装好的零碎调用函数，编译器翻译到 Syscall
执行 entersyscall() 办法，批改 g，p 的状态，p 单方面解绑 m，并查看唤醒 sysmon 线程，检测零碎调用。
当 sysmon 线程检测到零碎调用阻塞工夫过长的时候，调用 retake，从新调度该 p，让 p 上可执行的得以执行，不浪费资源
零碎调用返回，进入 exitsyscall 办法，优先获取之前的 p，如果该 p 曾经被占有，从新获取闲暇的 p，绑定，而后继续执行该 g。当获取不到 p 的时候，调用 exitsyscall0，解绑 g，休眠，期待下次唤醒调度。

syscall 函数

entersyscall

exitsyscall

exitsyscallfast

exitsyscall0

总结