关于redis:redis淘汰策略原理周期删除

基于 redis5.0

redis 周期删除过期数据的函数是activeExpireCycle，有两个中央调用这个函数：

beforeSleep：redis 在阻塞期待文件事件之前，即过程阻塞之前，会调用 beforeSleep 函数，清理过期数据的模式是ACTIVE_EXPIRE_CYCLE_FAST，疾速清理，不影响主流程。

 // server.c
/* This function gets called every time Redis is entering the
 * main loop of the event driven library, that is, before to sleep
 * for ready file descriptors. */
void beforeSleep(struct aeEventLoop *eventLoop) {
    ……
    /* Run a fast expire cycle (the called function will return
     * ASAP if a fast cycle is not needed). */
    if (server.active_expire_enabled && server.masterhost == NULL)
        activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
    ……
}

databasesCron：redis 周期工作，清理过期数据的模式是 ACTIVE_EXPIRE_CYCLE_SLOW，绝对ACTIVE_EXPIRE_CYCLE_FAST 执行工夫略微长点。

 // server.c
void databasesCron(void) {
    /* Expire keys by random sampling. Not required for slaves
     * as master will synthesize DELs for us. */
    if (server.active_expire_enabled) {if (server.masterhost == NULL) {activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
        } else {expireSlaveKeys();
        }
    }
    ……
}

以后节点为 master 时，server.masterhost == NULL，才执行 activeExpireCycle 函数。

 // server.h
int active_expire_effort;       /* 致力力度，1-10 取值范畴，默认 1，也就是遍历过期字典的力度，力度越大，遍历数量越多，然而性能损耗更多 */
double stat_expired_stale_perc; /* 已过期键占比近似值：以先前值占比 95%，本次调用运行值占比 5% 进行加权均匀 */    
 
// expire.c
#define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 /* % of stale keys after which we do extra efforts. */  
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */  
 
void activeExpireCycle(int type) {
    effort = server.active_expire_effort-1, /* Rescale from 0 to 9. */
    config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION + ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,
    config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE - effort;    
    long long start = ustime() /* server.c，单位 us */
    ……
    if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
        /* Don't start a fast cycle if the previous cycle did not exit
         * for time limit, unless the percentage of estimated stale keys is
         * too high. Also never repeat a fast cycle for the same period
         * as the fast cycle total duration itself. */
        if (!timelimit_exit &&
            server.stat_expired_stale_perc < config_cycle_acceptable_stale)
            return;
 
        if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2)
            return;
 
        last_fast_cycle = start;
    }
    ……
}

执行 FAST 模式有几个前提条件：

上一次工作不是因为超时而退出，且已过期键占比近似值 server.stat_expired_stale_perc 小于可容忍下限 config_cycle_acceptable_stale：也就是说过期数据没有那么多，SHOW 模式够用，不须要通过 FAST 来帮助。
间隔上一次 FAST 工夫，未超过指定的工夫距离，默认是2000us。

 // server.h
#define CRON_DBS_PER_CALL 16  /* 每次查看多少个数据库 */
#define CONFIG_DEFAULT_HZ        10             /* Time interrupt calls/sec. */
 
// server.c
server.hz = server.config_hz = CONFIG_DEFAULT_HZ;
   
// expire.c                                   
#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Max % of CPU to use. */
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */  
void activeExpireCycle(int type) {    
    ……   
    int dbs_per_call = CRON_DBS_PER_CALL;    
    config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC + 2 * effort,    
    config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION + ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,                       
    ……                                   
    /* We usually should test CRON_DBS_PER_CALL per iteration, with
     * two exceptions:
     *
     * 1) Don't test more DBs than we have.
     * 2) If last time we hit the time limit, we want to scan all DBs
     * in this iteration, as there is work to do in some DB and we don't want
     * expired keys to use memory for too much time. */
    if (dbs_per_call > server.dbnum || timelimit_exit)
        dbs_per_call = server.dbnum;
 
    /* We can use at max 'config_cycle_slow_time_perc' percentage of CPU
     * time per iteration. Since this function gets called with a frequency of
     * server.hz times per second, the following is the max amount of
     * microseconds we can spend in this function. */
    timelimit = config_cycle_slow_time_perc*1000000/server.hz/100;
    timelimit_exit = 0;
    if (timelimit <= 0) timelimit = 1;
 
    if (type == ACTIVE_EXPIRE_CYCLE_FAST)
        timelimit = config_cycle_fast_duration; /* in microseconds. */                                   
                                   
    ……  
}

一次扫描 dbs_per_call 个 DB，默认值为16：

如果超过 redis 的 DB 数量，则 dbs_per_call 改为 redis 的 DB 数量。
如果上一次是因为 timelimit_exit（超时）而完结，阐明过期数据很多，则须要扫描所有 DB，尽快清理掉所有过期数据，dbs_per_call改为 redis 的 DB 数量。

计算单次执行工夫 timelimit：

SLOW模式下，默认为25*1000000/10/100us。
FAST模式下，默认为1000us。

 // dict.h
/* This is the initial size of every hash table */
#define DICT_HT_INITIAL_SIZE     4
 
// expire.c                                   
void activeExpireCycle(int type) {    
    ……
    static unsigned int current_db = 0; /* Last DB tested. */
    ……   
    for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
        /* Expired and checked in a single loop. */
        unsigned long expired, sampled;
 
        redisDb *db = server.db+(current_db % server.dbnum);
 
        /* Increment the DB now so we are sure if we run out of time
         * in the current DB we'll restart from the next. This allows to
         * distribute the time evenly across DBs. */
        current_db++;
 
        /* Continue to expire if at the end of the cycle there are still
         * a big percentage of keys to expire, compared to the number of keys
         * we scanned. The percentage, stored in config_cycle_acceptable_stale
         * is not fixed, but depends on the Redis configured "expire effort". */
        do {
            unsigned long num, slots;
            long long now, ttl_sum;
            int ttl_samples;
            iteration++;
 
            /* If there is nothing to expire try next DB ASAP. */
            if ((num = dictSize(db->expires)) == 0) {
                db->avg_ttl = 0;
                break;
            }
            slots = dictSlots(db->expires);
            now = mstime();
 
            /* When there are less than 1% filled slots, sampling the key
             * space is expensive, so stop here waiting for better times...
             * The dictionary will be resized asap. */
            if (slots > DICT_HT_INITIAL_SIZE &&
                (num*100/slots < 1)) break;
            ……                
            /* We can't block forever here even if there are many keys to
             * expire. So after a given amount of milliseconds return to the
             * caller waiting for the other active expire cycle. */
            if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
                elapsed = ustime()-start;
                if (elapsed > timelimit) {
                    timelimit_exit = 1;
                    server.stat_expired_time_cap_reached_count++;
                    break;
                }
            }
            /* We don't repeat the cycle for the current database if there are
             * an acceptable amount of stale keys (logically expired but yet
             * not reclaimed). */            
        } while (……); 
    }                                                                      
    ……
}

在未超时的前提下，循环 DB 清理数据：

动态变量 current_db 记录上一次清理的 DB 序号，每次都加 1。
如果以后 DB 没有 设置了过期工夫 的记录，则完结以后 DB 循环，清理下一个 DB。
如果存在过期数据，然而存储空间使用率有余 1%（过期数据量 /hash 桶数量 < 0.01），这样遍历数据无效命中率会很低，解决起来会浪费时间，前面的拜访会很快触发字典的缩容，缩容后再进行解决效率更高, 完结以后 DB 循环，清理下一个 DB。
每循环 16（0xf） 次查看下是否曾经超时，如果曾经超时，则设置超时标记timelimit_exit，即完结当前任务。

 // expire.c     
#define ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP 20 /* Keys for each DB loop. */
                              
void activeExpireCycle(int type) {    
    ……
    config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP + ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4 * effort,   
    ……    
    long total_sampled = 0;
    long total_expired = 0;
    for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {    
    ……
        do {
            unsigned long num, slots;
            long long now, ttl_sum;
            int ttl_samples;
            iteration++;    
            ……    
            expired = 0;
            sampled = 0;
            ttl_sum = 0;
            ttl_samples = 0;    
            if (num > config_keys_per_loop)
                num = config_keys_per_loop;
 
            long max_buckets = num*20;
            long checked_buckets = 0;
            while (sampled < num && checked_buckets < max_buckets) {for (int table = 0; table < 2; table++) {if (table == 1 && !dictIsRehashing(db->expires)) break;
 
                    unsigned long idx = db->expires_cursor;
                    idx &= db->expires->ht[table].sizemask;
                    dictEntry *de = db->expires->ht[table].table[idx];
                    long long ttl;
 
                    /* Scan the current bucket of the current table. */
                    checked_buckets++;
                    while(de) {
                        /* Get the next entry now since this entry may get
                         * deleted. */
                        dictEntry *e = de;
                        de = de->next;
 
                        ttl = dictGetSignedIntegerVal(e)-now;
                        if (activeExpireCycleTryExpire(db,e,now)) expired++;
                        if (ttl > 0) {
                            /* We want the average TTL of keys yet
                             * not expired. */
                            ttl_sum += ttl;
                            ttl_samples++;
                        }
                        sampled++;
                    }
                }
                db->expires_cursor++;
            }            
            total_expired += expired;
            total_sampled += sampled; 
            /* Update the average TTL stats for this database. */
            if (ttl_samples) {
                long long avg_ttl = ttl_sum/ttl_samples;
 
                /* Do a simple running average with a few samples.
                 * We just use the current estimate with a weight of 2%
                 * and the previous estimate with a weight of 98%. */
                if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
                db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
            } 
        } while (sampled == 0 || (expired*100/sampled) > config_cycle_acceptable_stale);
    }                                                                        
    ……
}
 
int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {long long t = dictGetSignedIntegerVal(de);
    if (now > t) {sds key = dictGetKey(de);
        robj *keyobj = createStringObject(key,sdslen(key));
 
        propagateExpire(db,keyobj,server.lazyfree_lazy_expire);
        if (server.lazyfree_lazy_expire)
            dbAsyncDelete(db,keyobj);
        else
            dbSyncDelete(db,keyobj);
        notifyKeyspaceEvent(NOTIFY_EXPIRED,
            "expired",keyobj,db->id);
        signalModifiedKey(NULL, db, keyobj);
        decrRefCount(keyobj);
        server.stat_expiredkeys++;
        return 1;
    } else {return 0;}
}

设定最大容许抽查的过期数据数量（num），抽查的数据最大不超过config_keys_per_loop(默认 20)。
设定最大容许抽查的 hash 桶数量（max_buckets），抽查的桶数量最大不超过config_keys_per_loop 的 20 倍(即默认 400)。

redis 的 hash 数据结构是 数组 + 链表 的形式，每遍历一格数组，抽查的 bucket 数量就加一；而只有在链表里有数据的状况下，每查看一个数据对象（dictEntry），才算入 num 计数中；
极其状况下：遍历了 max_buckets 个 bucket 都没有过期数据，或者一个 bucket 内就领有了 num 个过期数据。

当 hash 正在扩容时，redis 会复制出一个 hash 对象，所以是遍历两个 table for (int table = 0; table < 2; table++)，然而在遍历第二个 table 是会通过 dictIsRehashing 判断是否存在(即是否正在扩容)。
通过 activeExpireCycleTryExpire 清理过期数据。
依据清理后果，从新设置以后 DB 的均匀过期工夫数据（用于查看 / 统计 redis DB 状况）。

如果一轮抽样到的 key 中过期的比例是在可容忍的范畴config_cycle_acceptable_stale(默认 10%)，那么这个 db 就不用再抽样了，清理下一个 DB，否则持续清理以后 DB。

 void activeExpireCycle(int type) {    
    ……
    elapsed = ustime()-start;
    server.stat_expire_cycle_time_used += elapsed;
    latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
 
    /* Update our estimate of keys existing but yet to be expired.
     * Running average with this sample accounting for 5%. */
    double current_perc;
    if (total_sampled) {current_perc = (double)total_expired/total_sampled;
    } else
        current_perc = 0;
    server.stat_expired_stale_perc = (current_perc*0.05)+ (server.stat_expired_stale_perc*0.95);
}

从新计算 已过期键占比近似值：本轮占比 5%，历史值占比 95%。

参考

Redis 源码剖析之 key 过期
Redis 源码浏览 — Database
[[redis 源码走读] redis 过期策略](https://wenfh2020.com/2020/02…

关于redis:redis淘汰策略原理周期删除

入口

清理

是否容许 FAST 模式

工夫 & 数量限度

循环

清理

收尾

Just My Socks（注册教程内含优惠码）

	// server.c
	/* This function gets called every time Redis is entering the
	* main loop of the event driven library, that is, before to sleep
	* for ready file descriptors. */
	void beforeSleep(struct aeEventLoop *eventLoop) {
	……
	/* Run a fast expire cycle (the called function will return
	* ASAP if a fast cycle is not needed). */
	if (server.active_expire_enabled && server.masterhost == NULL)
	activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
	……
	}

	// server.c
	void databasesCron(void) {
	/* Expire keys by random sampling. Not required for slaves
	* as master will synthesize DELs for us. */
	if (server.active_expire_enabled) {if (server.masterhost == NULL) {activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
	} else {expireSlaveKeys();
	}
	}
	……
	}

	// server.h
	int active_expire_effort; /* 致力力度，1-10 取值范畴，默认 1，也就是遍历过期字典的力度，力度越大，遍历数量越多，然而性能损耗更多 */
	double stat_expired_stale_perc; /* 已过期键占比近似值：以先前值占比 95%，本次调用运行值占比 5% 进行加权均匀 */

	// expire.c
	#define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 /* % of stale keys after which we do extra efforts. */
	#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */

	void activeExpireCycle(int type) {
	effort = server.active_expire_effort-1, /* Rescale from 0 to 9. */
	config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION + ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,
	config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE - effort;
	long long start = ustime() /* server.c，单位 us */
	……
	if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
	/* Don't start a fast cycle if the previous cycle did not exit
	* for time limit, unless the percentage of estimated stale keys is
	* too high. Also never repeat a fast cycle for the same period
	* as the fast cycle total duration itself. */
	if (!timelimit_exit &&
	server.stat_expired_stale_perc < config_cycle_acceptable_stale)
	return;

	if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2)
	return;

	last_fast_cycle = start;
	}
	……
	}

	// server.h
	#define CRON_DBS_PER_CALL 16 /* 每次查看多少个数据库 */
	#define CONFIG_DEFAULT_HZ 10 /* Time interrupt calls/sec. */

	// server.c
	server.hz = server.config_hz = CONFIG_DEFAULT_HZ;

	// expire.c
	#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Max % of CPU to use. */
	#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */
	void activeExpireCycle(int type) {
	……
	int dbs_per_call = CRON_DBS_PER_CALL;
	config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC + 2 * effort,
	config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION + ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort,
	……
	/* We usually should test CRON_DBS_PER_CALL per iteration, with
	* two exceptions:
	*
	* 1) Don't test more DBs than we have.
	* 2) If last time we hit the time limit, we want to scan all DBs
	* in this iteration, as there is work to do in some DB and we don't want
	* expired keys to use memory for too much time. */
	if (dbs_per_call > server.dbnum \|\| timelimit_exit)
	dbs_per_call = server.dbnum;

	/* We can use at max 'config_cycle_slow_time_perc' percentage of CPU
	* time per iteration. Since this function gets called with a frequency of
	* server.hz times per second, the following is the max amount of
	* microseconds we can spend in this function. */
	timelimit = config_cycle_slow_time_perc*1000000/server.hz/100;
	timelimit_exit = 0;
	if (timelimit <= 0) timelimit = 1;

	if (type == ACTIVE_EXPIRE_CYCLE_FAST)
	timelimit = config_cycle_fast_duration; /* in microseconds. */

	……
	}

	// dict.h
	/* This is the initial size of every hash table */
	#define DICT_HT_INITIAL_SIZE 4

	// expire.c
	void activeExpireCycle(int type) {
	……
	static unsigned int current_db = 0; /* Last DB tested. */
	……
	for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
	/* Expired and checked in a single loop. */
	unsigned long expired, sampled;

	redisDb *db = server.db+(current_db % server.dbnum);

	/* Increment the DB now so we are sure if we run out of time
	* in the current DB we'll restart from the next. This allows to
	* distribute the time evenly across DBs. */
	current_db++;

	/* Continue to expire if at the end of the cycle there are still
	* a big percentage of keys to expire, compared to the number of keys
	* we scanned. The percentage, stored in config_cycle_acceptable_stale
	* is not fixed, but depends on the Redis configured "expire effort". */
	do {
	unsigned long num, slots;
	long long now, ttl_sum;
	int ttl_samples;
	iteration++;

	/* If there is nothing to expire try next DB ASAP. */
	if ((num = dictSize(db->expires)) == 0) {
	db->avg_ttl = 0;
	break;
	}
	slots = dictSlots(db->expires);
	now = mstime();

	/* When there are less than 1% filled slots, sampling the key
	* space is expensive, so stop here waiting for better times...
	* The dictionary will be resized asap. */
	if (slots > DICT_HT_INITIAL_SIZE &&
	(num*100/slots < 1)) break;
	……
	/* We can't block forever here even if there are many keys to
	* expire. So after a given amount of milliseconds return to the
	* caller waiting for the other active expire cycle. */
	if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
	elapsed = ustime()-start;
	if (elapsed > timelimit) {
	timelimit_exit = 1;
	server.stat_expired_time_cap_reached_count++;
	break;
	}
	}
	/* We don't repeat the cycle for the current database if there are
	* an acceptable amount of stale keys (logically expired but yet
	* not reclaimed). */
	} while (……);
	}
	……
	}

	// expire.c
	#define ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP 20 /* Keys for each DB loop. */

	void activeExpireCycle(int type) {
	……
	config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP + ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4 * effort,
	……
	long total_sampled = 0;
	long total_expired = 0;
	for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
	……
	do {
	unsigned long num, slots;
	long long now, ttl_sum;
	int ttl_samples;
	iteration++;
	……
	expired = 0;
	sampled = 0;
	ttl_sum = 0;
	ttl_samples = 0;
	if (num > config_keys_per_loop)
	num = config_keys_per_loop;

	long max_buckets = num*20;
	long checked_buckets = 0;
	while (sampled < num && checked_buckets < max_buckets) {for (int table = 0; table < 2; table++) {if (table == 1 && !dictIsRehashing(db->expires)) break;

	unsigned long idx = db->expires_cursor;
	idx &= db->expires->ht[table].sizemask;
	dictEntry *de = db->expires->ht[table].table[idx];
	long long ttl;

	/* Scan the current bucket of the current table. */
	checked_buckets++;
	while(de) {
	/* Get the next entry now since this entry may get
	* deleted. */
	dictEntry *e = de;
	de = de->next;

	ttl = dictGetSignedIntegerVal(e)-now;
	if (activeExpireCycleTryExpire(db,e,now)) expired++;
	if (ttl > 0) {
	/* We want the average TTL of keys yet
	* not expired. */
	ttl_sum += ttl;
	ttl_samples++;
	}
	sampled++;
	}
	}
	db->expires_cursor++;
	}
	total_expired += expired;
	total_sampled += sampled;
	/* Update the average TTL stats for this database. */
	if (ttl_samples) {
	long long avg_ttl = ttl_sum/ttl_samples;

	/* Do a simple running average with a few samples.
	* We just use the current estimate with a weight of 2%
	* and the previous estimate with a weight of 98%. */
	if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
	db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
	}
	} while (sampled == 0 \|\| (expired*100/sampled) > config_cycle_acceptable_stale);
	}
	……
	}

	int activeExpireCycleTryExpire(redisDb db, dictEntry de, long long now) {long long t = dictGetSignedIntegerVal(de);
	if (now > t) {sds key = dictGetKey(de);
	robj *keyobj = createStringObject(key,sdslen(key));

	propagateExpire(db,keyobj,server.lazyfree_lazy_expire);
	if (server.lazyfree_lazy_expire)
	dbAsyncDelete(db,keyobj);
	else
	dbSyncDelete(db,keyobj);
	notifyKeyspaceEvent(NOTIFY_EXPIRED,
	"expired",keyobj,db->id);
	signalModifiedKey(NULL, db, keyobj);
	decrRefCount(keyobj);
	server.stat_expiredkeys++;
	return 1;
	} else {return 0;}
	}

关于redis:redis淘汰策略原理周期删除

入口

清理

是否容许 FAST 模式

工夫 & 数量限度

循环

清理

收尾

Just My Socks（注册教程 内含优惠码）

Just My Socks（注册教程内含优惠码）