Python线程为什么搞个setDaemon

共计 4955 个字符，预计需要花费 13 分钟才能阅读完成。

使用 Python 都不会错过线程这个知识，但是每次谈到线程，大家都下意识说 GIL 全局锁，

但其实除了这个老生常谈的话题，还有很多有价值的东西可以探索的，譬如：setDaemon()。

我们会写这样的代码来启动多线程:

 import time
import threading
 
def test():
    while True:
        print threading.currentThread()
        time.sleep(1)
 
if __name__ == '__main__':
    t1 = threading.Thread(target=test)
    t2 = threading.Thread(target=test)
    t1.start()
    t2.start()

输出：

 ^C<Thread(Thread-2, started 123145414086656)>
<Thread(Thread-1, started 123145409880064)>
^C^C^C^C^C^C<Thread(Thread-2, started 123145414086656)>    # ctrl-c 多次都无法中断
 <Thread(Thread-1, started 123145409880064)>
^C<Thread(Thread-1, started 123145409880064)>
 <Thread(Thread-2, started 123145414086656)>
<Thread(Thread-1, started 123145409880064)>
 <Thread(Thread-2, started 123145414086656)>
<Thread(Thread-2, started 123145414086656)><Thread(Thread-1, started 123145409880064)>
...（两个线程竞相打印）

通过 Threading 我们可以很简单的实现并发的需求，但是同时也给我们带来了一个大难题: 怎么退出呢？

在上面的程序运行中，我已经尝试按了多次的 ctrl-c，都无法中断这程序工作的热情！最后是迫不得已用 kill 才结束。

那么怎样才能可以避免这种问题呢？或者说，怎样才能在主线程退出的时候，子线程也自动退出呢？

有过相似经验的老司机肯定就知道，setDaemon() 将线程搞成 守护线程 不就得了呗:

 import time
import threading
 
def test():
    while True:
        print threading.currentThread()
        time.sleep(1)
 
if __name__ == '__main__':
    t1 = threading.Thread(target=test)
    t1.setDaemon(True)
    t1.start()
 
    t2 = threading.Thread(target=test)
    t2.setDaemon(True)
    t2.start()

输出：

 python2.7 1.py
<Thread(Thread-1, started daemon 123145439883264)>
<Thread(Thread-2, started daemon 123145444089856)>（直接退出了）

直接退出？理所当然，因为主线程已经执行完了，确实是已经结束了，正因为设置了守护线程，所以这时候子线程也一并退出了。

那么问题来了，我们以前学 C 语言的时候，好像不用 Daemon 也可以啊，比如这个：

 #include <stdio.h>
#include <sys/syscall.h>
#include <pthread.h>
 
void *test(void *args)
{while (1)
    {printf("ThreadID: %d\n", syscall(SYS_gettid));
        sleep(1);
    }
}
 
int main()
{
    pthread_t t1 ;
    int ret = pthread_create(&t1, NULL, test, NULL);
    if (ret != 0)
    {printf("Thread create failed\n");
    }
   
    // 避免直接退出
    sleep(2);
    printf("Main run..\n");
}

输出：

 # gcc -lpthread test_pytha.out & ./a
ThreadID: 31233
ThreadID: 31233
Main run..（毫不犹豫退出了）

既然 Python 也是用 C 写的，为什么 Python 多线程退出需要 setDaemon？？？

想要解决这个问题，我们怕不是要从主线程退出的一刻开始讲起，从前 ….

Python 解析器在结束的时候，会调用 wait_for_thread_shutdown 来做个例行清理：

 // python2.7/python/pythonrun.c
 
static void
wait_for_thread_shutdown(void)
{
#ifdef WITH_THREAD
    PyObject *result;
    PyThreadState *tstate = PyThreadState_GET();
    PyObject *threading = PyMapping_GetItemString(tstate->interp->modules,
                                                  "threading");
    if (threading == NULL) {
        /* threading not imported */
        PyErr_Clear();
        return;
    }
    result = PyObject_CallMethod(threading, "_shutdown", "");
    if (result == NULL)
        PyErr_WriteUnraisable(threading);
    else
        Py_DECREF(result);
    Py_DECREF(threading);
#endif
}

我们看到 #ifdef WITH_THREAD 就大概猜到对于是否多线程，这个函数是运行了不同的逻辑的

很明显，我们上面的脚本，就是命中了这个线程逻辑，所以它会动态 import threading 模块 ，然后执行 _shutdown 函数。

这个函数的内容，我们可以从 threading 模块看到：

 # /usr/lib/python2.7/threading.py
 
_shutdown = _MainThread()._exitfunc
 
class _MainThread(Thread):
 
    def __init__(self):
        Thread.__init__(self, name="MainThread")
        self._Thread__started.set()
        self._set_ident()
        with _active_limbo_lock:
            _active[_get_ident()] = self
 
    def _set_daemon(self):
        return False
 
    def _exitfunc(self):
        self._Thread__stop()
        t = _pickSomeNonDaemonThread()
        if t:
            if __debug__:
                self._note("%s: waiting for other threads", self)
        while t:
            t.join()
            t = _pickSomeNonDaemonThread()
        if __debug__:
            self._note("%s: exiting", self)
        self._Thread__delete()
 
def _pickSomeNonDaemonThread():
    for t in enumerate():
        if not t.daemon and t.is_alive():
            return t
    return None

_shutdown 实际上也就是 _MainThread()._exitfunc 的内容，主要是将 enumerate() 返回的所有结果，全部 join() 回收

而 enumerate() 是什么？

这个平时我们也会使用，就是当前进程的所有 符合条件 的 Python 线程对象:

 >>> print threading.enumerate()
[<_MainThread(MainThread, started 140691994822400)>]

 # /usr/lib/python2.7/threading.py
 
def enumerate():
    """Return a list of all Thread objects currently alive.
 
    The list includes daemonic threads, dummy thread objects created by
    current_thread(), and the main thread. It excludes terminated threads and
    threads that have not yet been started.
 
    """
    with _active_limbo_lock:
        return _active.values() + _limbo.values()

符合条件？？？符合什么条件？？不着急，容我娓娓道来：

在 Python 的线程模型里面，虽然有 GIL 的干涉，但是线程却是实实在在的原生线程

Python 只是多加一层封装: t_bootstrap，然后再在这层封装里面执行真正的处理函数。

在 threading 模块内，我们也能看到一个相似的：

 # /usr/lib/python2.7/threading.py
 
class Thread(_Verbose):
    def start(self):
        ... 省略
        with _active_limbo_lock:
            _limbo[self] = self             # 重点
        try:
            _start_new_thread(self.__bootstrap, ())
        except Exception:
            with _active_limbo_lock:
                del _limbo[self]            # 重点
            raise
        self.__started.wait()
        
    def __bootstrap(self):
        try:
            self.__bootstrap_inner()
        except:
            if self.__daemonic and _sys is None:
                return
            raise
         
    def __bootstrap_inner(self):
        try:
            ... 省略
            with _active_limbo_lock:
                _active[self.__ident] = self # 重点
                del _limbo[self]             # 重点
            ... 省略

在上面的一连串代码中，_limbo 和 _active 的变化都已经标记了重点，我们可以得到下面的定义：

     _limbo : 就是调用了 start，但是还没来得及 _start_new_thread 的对象
    _active: 活生生的线程对象

那么回到上文，当 _MainThread()._exitfunc 执行时，是会检查整个进程是否存在 _limbo + _active 的对象，

只要存在一个，就会调用 join(), 这个也就是堵塞的原因。

无限期堵塞不行，自作聪明帮用户强杀线程也不是办法，那么怎么做才会比较优雅呢？

那就是提供一个途径，让用户来设置随进程退出的标记，那就是 setDaemon：

 class Thread():
    ... 省略
    def setDaemon(self, daemonic):
        self.daemon = daemonic
        
    ... 省略
  
# 其实上面也贴了，这里再贴一次
def _pickSomeNonDaemonThread():
    for t in enumerate():
        if not t.daemon and t.is_alive():
            return t
    return None

只要子线程，全部设置 setDaemon(True), 那么主线程一准备退出，全都乖乖地由操作系统销毁回收。

之前一直很好奇，pthread 都没有 daemon 属性，为什么 Python 会有呢？

结果这玩意就是真的是仅作用于 Python 层（手动笑脸）

区区一个 setDaemon 可以引出很多本质内容的探索机会，比如线程的创建过程，管理流程等。

这些都是很有意思的内容，我们应该大胆探索，不局限于使用~

欢迎各位大神指点交流, QQ 讨论群: 258498217
转载请注明来源: https://segmentfault.com/a/11…

前言

线程的使用与存在的问题

守护线程

突如其来的 daemon

反藤摸瓜

从起源谈存活条件

setDaemon 用处

结语

Just My Socks（注册教程内含优惠码）

	import time
	import threading

	def test():
	while True:
	print threading.currentThread()
	time.sleep(1)

	if __name__ == '__main__':
	t1 = threading.Thread(target=test)
	t2 = threading.Thread(target=test)
	t1.start()
	t2.start()

	^C<Thread(Thread-2, started 123145414086656)>
	<Thread(Thread-1, started 123145409880064)>
	^C^C^C^C^C^C<Thread(Thread-2, started 123145414086656)> # ctrl-c 多次都无法中断
	<Thread(Thread-1, started 123145409880064)>
	^C<Thread(Thread-1, started 123145409880064)>
	<Thread(Thread-2, started 123145414086656)>
	<Thread(Thread-1, started 123145409880064)>
	<Thread(Thread-2, started 123145414086656)>
	<Thread(Thread-2, started 123145414086656)><Thread(Thread-1, started 123145409880064)>
	...（两个线程竞相打印）

	python2.7 1.py
	<Thread(Thread-1, started daemon 123145439883264)>
	<Thread(Thread-2, started daemon 123145444089856)>（直接退出了）

	#include <stdio.h>
	#include <sys/syscall.h>
	#include <pthread.h>

	void test(void args)
	{while (1)
	{printf("ThreadID: %d\n", syscall(SYS_gettid));
	sleep(1);
	}
	}

	int main()
	{
	pthread_t t1 ;
	int ret = pthread_create(&t1, NULL, test, NULL);
	if (ret != 0)
	{printf("Thread create failed\n");
	}

	// 避免直接退出
	sleep(2);
	printf("Main run..\n");
	}

	# gcc -lpthread test_pytha.out & ./a
	ThreadID: 31233
	ThreadID: 31233
	Main run..（毫不犹豫退出了）

	// python2.7/python/pythonrun.c

	static void
	wait_for_thread_shutdown(void)
	{
	#ifdef WITH_THREAD
	PyObject *result;
	PyThreadState *tstate = PyThreadState_GET();
	PyObject *threading = PyMapping_GetItemString(tstate->interp->modules,
	"threading");
	if (threading == NULL) {
	/* threading not imported */
	PyErr_Clear();
	return;
	}
	result = PyObject_CallMethod(threading, "_shutdown", "");
	if (result == NULL)
	PyErr_WriteUnraisable(threading);
	else
	Py_DECREF(result);
	Py_DECREF(threading);
	#endif
	}

	# /usr/lib/python2.7/threading.py

	_shutdown = _MainThread()._exitfunc

	class _MainThread(Thread):

	def __init__(self):
	Thread.__init__(self, name="MainThread")
	self._Thread__started.set()
	self._set_ident()
	with _active_limbo_lock:
	_active[_get_ident()] = self

	def _set_daemon(self):
	return False

	def _exitfunc(self):
	self._Thread__stop()
	t = _pickSomeNonDaemonThread()
	if t:
	if __debug__:
	self._note("%s: waiting for other threads", self)
	while t:
	t.join()
	t = _pickSomeNonDaemonThread()
	if __debug__:
	self._note("%s: exiting", self)
	self._Thread__delete()

	def _pickSomeNonDaemonThread():
	for t in enumerate():
	if not t.daemon and t.is_alive():
	return t
	return None

	>>> print threading.enumerate()
	[<_MainThread(MainThread, started 140691994822400)>]

	# /usr/lib/python2.7/threading.py

	def enumerate():
	"""Return a list of all Thread objects currently alive.

	The list includes daemonic threads, dummy thread objects created by
	current_thread(), and the main thread. It excludes terminated threads and
	threads that have not yet been started.

	"""
	with _active_limbo_lock:
	return _active.values() + _limbo.values()

	# /usr/lib/python2.7/threading.py

	class Thread(_Verbose):
	def start(self):
	... 省略
	with _active_limbo_lock:
	_limbo[self] = self # 重点
	try:
	_start_new_thread(self.__bootstrap, ())
	except Exception:
	with _active_limbo_lock:
	del _limbo[self] # 重点
	raise
	self.__started.wait()

	def __bootstrap(self):
	try:
	self.__bootstrap_inner()
	except:
	if self.__daemonic and _sys is None:
	return
	raise

	def __bootstrap_inner(self):
	try:
	... 省略
	with _active_limbo_lock:
	_active[self.__ident] = self # 重点
	del _limbo[self] # 重点
	... 省略

	_limbo : 就是调用了 start，但是还没来得及 _start_new_thread 的对象
	_active: 活生生的线程对象

	class Thread():
	... 省略
	def setDaemon(self, daemonic):
	self.daemon = daemonic

	... 省略

	# 其实上面也贴了，这里再贴一次
	def _pickSomeNonDaemonThread():
	for t in enumerate():
	if not t.daemon and t.is_alive():
	return t
	return None