共计 8667 个字符,预计需要花费 22 分钟才能阅读完成。
基于 redis5.0
入口
redis 周期删除过期数据的函数是activeExpireCycle
,有两个中央调用这个函数:
- beforeSleep:redis 在阻塞期待文件事件之前,即过程阻塞之前,会调用
beforeSleep
函数,清理过期数据的模式是ACTIVE_EXPIRE_CYCLE_FAST
,疾速清理,不影响主流程。
// server.c | |
/* This function gets called every time Redis is entering the | |
* main loop of the event driven library, that is, before to sleep | |
* for ready file descriptors. */ | |
void beforeSleep(struct aeEventLoop *eventLoop) { | |
…… | |
/* Run a fast expire cycle (the called function will return | |
* ASAP if a fast cycle is not needed). */ | |
if (server.active_expire_enabled && server.masterhost == NULL) | |
activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST); | |
…… | |
} |
- databasesCron:redis 周期工作,清理过期数据的模式是
ACTIVE_EXPIRE_CYCLE_SLOW
,绝对ACTIVE_EXPIRE_CYCLE_FAST
执行工夫略微长点。
// server.c | |
void databasesCron(void) { | |
/* Expire keys by random sampling. Not required for slaves | |
* as master will synthesize DELs for us. */ | |
if (server.active_expire_enabled) {if (server.masterhost == NULL) {activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); | |
} else {expireSlaveKeys(); | |
} | |
} | |
…… | |
} |
以后节点为 master
时,server.masterhost == NULL
,才执行 activeExpireCycle
函数。
清理
是否容许 FAST 模式
// server.h | |
int active_expire_effort; /* 致力力度,1-10 取值范畴,默认 1,也就是遍历过期字典的力度,力度越大,遍历数量越多,然而性能损耗更多 */ | |
double stat_expired_stale_perc; /* 已过期键占比近似值:以先前值占比 95%,本次调用运行值占比 5% 进行加权均匀 */ | |
// expire.c | |
#define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 /* % of stale keys after which we do extra efforts. */ | |
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */ | |
void activeExpireCycle(int type) { | |
effort = server.active_expire_effort-1, /* Rescale from 0 to 9. */ | |
config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION + ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort, | |
config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE - effort; | |
long long start = ustime() /* server.c,单位 us */ | |
…… | |
if (type == ACTIVE_EXPIRE_CYCLE_FAST) { | |
/* Don't start a fast cycle if the previous cycle did not exit | |
* for time limit, unless the percentage of estimated stale keys is | |
* too high. Also never repeat a fast cycle for the same period | |
* as the fast cycle total duration itself. */ | |
if (!timelimit_exit && | |
server.stat_expired_stale_perc < config_cycle_acceptable_stale) | |
return; | |
if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2) | |
return; | |
last_fast_cycle = start; | |
} | |
…… | |
} |
执行 FAST
模式有几个前提条件:
- 上一次工作不是因为超时而退出,且已过期键占比近似值
server.stat_expired_stale_perc
小于可容忍下限config_cycle_acceptable_stale
:也就是说过期数据没有那么多,SHOW
模式够用,不须要通过FAST
来帮助。 - 间隔上一次
FAST
工夫,未超过指定的工夫距离,默认是2000us
。
工夫 & 数量限度
// server.h | |
#define CRON_DBS_PER_CALL 16 /* 每次查看多少个数据库 */ | |
#define CONFIG_DEFAULT_HZ 10 /* Time interrupt calls/sec. */ | |
// server.c | |
server.hz = server.config_hz = CONFIG_DEFAULT_HZ; | |
// expire.c | |
#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Max % of CPU to use. */ | |
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */ | |
void activeExpireCycle(int type) { | |
…… | |
int dbs_per_call = CRON_DBS_PER_CALL; | |
config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC + 2 * effort, | |
config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION + ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort, | |
…… | |
/* We usually should test CRON_DBS_PER_CALL per iteration, with | |
* two exceptions: | |
* | |
* 1) Don't test more DBs than we have. | |
* 2) If last time we hit the time limit, we want to scan all DBs | |
* in this iteration, as there is work to do in some DB and we don't want | |
* expired keys to use memory for too much time. */ | |
if (dbs_per_call > server.dbnum || timelimit_exit) | |
dbs_per_call = server.dbnum; | |
/* We can use at max 'config_cycle_slow_time_perc' percentage of CPU | |
* time per iteration. Since this function gets called with a frequency of | |
* server.hz times per second, the following is the max amount of | |
* microseconds we can spend in this function. */ | |
timelimit = config_cycle_slow_time_perc*1000000/server.hz/100; | |
timelimit_exit = 0; | |
if (timelimit <= 0) timelimit = 1; | |
if (type == ACTIVE_EXPIRE_CYCLE_FAST) | |
timelimit = config_cycle_fast_duration; /* in microseconds. */ | |
…… | |
} |
一次扫描 dbs_per_call
个 DB,默认值为16
:
- 如果超过 redis 的 DB 数量,则
dbs_per_call
改为 redis 的 DB 数量。 - 如果上一次是因为 timelimit_exit(超时)而完结,阐明过期数据很多,则须要扫描所有 DB,尽快清理掉所有过期数据,
dbs_per_call
改为 redis 的 DB 数量。
计算单次执行工夫 timelimit:
SLOW
模式下,默认为25*1000000/10/100
us。FAST
模式下,默认为1000
us。
循环
// dict.h | |
/* This is the initial size of every hash table */ | |
#define DICT_HT_INITIAL_SIZE 4 | |
// expire.c | |
void activeExpireCycle(int type) { | |
…… | |
static unsigned int current_db = 0; /* Last DB tested. */ | |
…… | |
for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) { | |
/* Expired and checked in a single loop. */ | |
unsigned long expired, sampled; | |
redisDb *db = server.db+(current_db % server.dbnum); | |
/* Increment the DB now so we are sure if we run out of time | |
* in the current DB we'll restart from the next. This allows to | |
* distribute the time evenly across DBs. */ | |
current_db++; | |
/* Continue to expire if at the end of the cycle there are still | |
* a big percentage of keys to expire, compared to the number of keys | |
* we scanned. The percentage, stored in config_cycle_acceptable_stale | |
* is not fixed, but depends on the Redis configured "expire effort". */ | |
do { | |
unsigned long num, slots; | |
long long now, ttl_sum; | |
int ttl_samples; | |
iteration++; | |
/* If there is nothing to expire try next DB ASAP. */ | |
if ((num = dictSize(db->expires)) == 0) { | |
db->avg_ttl = 0; | |
break; | |
} | |
slots = dictSlots(db->expires); | |
now = mstime(); | |
/* When there are less than 1% filled slots, sampling the key | |
* space is expensive, so stop here waiting for better times... | |
* The dictionary will be resized asap. */ | |
if (slots > DICT_HT_INITIAL_SIZE && | |
(num*100/slots < 1)) break; | |
…… | |
/* We can't block forever here even if there are many keys to | |
* expire. So after a given amount of milliseconds return to the | |
* caller waiting for the other active expire cycle. */ | |
if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */ | |
elapsed = ustime()-start; | |
if (elapsed > timelimit) { | |
timelimit_exit = 1; | |
server.stat_expired_time_cap_reached_count++; | |
break; | |
} | |
} | |
/* We don't repeat the cycle for the current database if there are | |
* an acceptable amount of stale keys (logically expired but yet | |
* not reclaimed). */ | |
} while (……); | |
} | |
…… | |
} |
在未超时的前提下,循环 DB 清理数据:
- 动态变量
current_db
记录上一次清理的 DB 序号,每次都加 1。 - 如果以后 DB 没有
设置了过期工夫
的记录,则完结以后 DB 循环,清理下一个 DB。 - 如果存在过期数据,然而存储空间使用率有余 1%(过期数据量 /hash 桶数量 < 0.01),这样遍历数据无效命中率会很低,解决起来会浪费时间,前面的拜访会很快触发字典的缩容,缩容后再进行解决效率更高, 完结以后 DB 循环,清理下一个 DB。
- 每循环
16(0xf)
次查看下是否曾经超时,如果曾经超时,则设置超时标记timelimit_exit
,即完结当前任务。
清理
// expire.c | |
#define ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP 20 /* Keys for each DB loop. */ | |
void activeExpireCycle(int type) { | |
…… | |
config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP + ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4 * effort, | |
…… | |
long total_sampled = 0; | |
long total_expired = 0; | |
for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) { | |
…… | |
do { | |
unsigned long num, slots; | |
long long now, ttl_sum; | |
int ttl_samples; | |
iteration++; | |
…… | |
expired = 0; | |
sampled = 0; | |
ttl_sum = 0; | |
ttl_samples = 0; | |
if (num > config_keys_per_loop) | |
num = config_keys_per_loop; | |
long max_buckets = num*20; | |
long checked_buckets = 0; | |
while (sampled < num && checked_buckets < max_buckets) {for (int table = 0; table < 2; table++) {if (table == 1 && !dictIsRehashing(db->expires)) break; | |
unsigned long idx = db->expires_cursor; | |
idx &= db->expires->ht[table].sizemask; | |
dictEntry *de = db->expires->ht[table].table[idx]; | |
long long ttl; | |
/* Scan the current bucket of the current table. */ | |
checked_buckets++; | |
while(de) { | |
/* Get the next entry now since this entry may get | |
* deleted. */ | |
dictEntry *e = de; | |
de = de->next; | |
ttl = dictGetSignedIntegerVal(e)-now; | |
if (activeExpireCycleTryExpire(db,e,now)) expired++; | |
if (ttl > 0) { | |
/* We want the average TTL of keys yet | |
* not expired. */ | |
ttl_sum += ttl; | |
ttl_samples++; | |
} | |
sampled++; | |
} | |
} | |
db->expires_cursor++; | |
} | |
total_expired += expired; | |
total_sampled += sampled; | |
/* Update the average TTL stats for this database. */ | |
if (ttl_samples) { | |
long long avg_ttl = ttl_sum/ttl_samples; | |
/* Do a simple running average with a few samples. | |
* We just use the current estimate with a weight of 2% | |
* and the previous estimate with a weight of 98%. */ | |
if (db->avg_ttl == 0) db->avg_ttl = avg_ttl; | |
db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50); | |
} | |
} while (sampled == 0 || (expired*100/sampled) > config_cycle_acceptable_stale); | |
} | |
…… | |
} | |
int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {long long t = dictGetSignedIntegerVal(de); | |
if (now > t) {sds key = dictGetKey(de); | |
robj *keyobj = createStringObject(key,sdslen(key)); | |
propagateExpire(db,keyobj,server.lazyfree_lazy_expire); | |
if (server.lazyfree_lazy_expire) | |
dbAsyncDelete(db,keyobj); | |
else | |
dbSyncDelete(db,keyobj); | |
notifyKeyspaceEvent(NOTIFY_EXPIRED, | |
"expired",keyobj,db->id); | |
signalModifiedKey(NULL, db, keyobj); | |
decrRefCount(keyobj); | |
server.stat_expiredkeys++; | |
return 1; | |
} else {return 0;} | |
} |
- 设定最大容许抽查的过期数据数量(num),抽查的数据最大不超过
config_keys_per_loop(默认 20)
。 - 设定最大容许抽查的 hash 桶数量(max_buckets),抽查的桶数量最大不超过
config_keys_per_loop 的 20 倍(即默认 400)
。
redis 的 hash 数据结构是
数组 + 链表
的形式,每遍历一格数组,抽查的 bucket 数量就加一;而只有在链表里有数据的状况下,每查看一个数据对象(dictEntry),才算入 num 计数中;
极其状况下:遍历了 max_buckets 个 bucket 都没有过期数据,或者一个 bucket 内就领有了 num 个过期数据。
- 当 hash 正在扩容时,redis 会复制出一个 hash 对象,所以是遍历两个 table
for (int table = 0; table < 2; table++)
,然而在遍历第二个 table 是会通过dictIsRehashing
判断是否存在(即是否正在扩容)。 - 通过
activeExpireCycleTryExpire
清理过期数据。 - 依据清理后果,从新设置以后 DB 的均匀过期工夫数据(用于查看 / 统计 redis DB 状况)。
如果一轮抽样到的 key 中过期的比例是在可容忍的范畴config_cycle_acceptable_stale(默认 10%)
,那么这个 db 就不用再抽样了,清理下一个 DB,否则持续清理以后 DB。
收尾
void activeExpireCycle(int type) { | |
…… | |
elapsed = ustime()-start; | |
server.stat_expire_cycle_time_used += elapsed; | |
latencyAddSampleIfNeeded("expire-cycle",elapsed/1000); | |
/* Update our estimate of keys existing but yet to be expired. | |
* Running average with this sample accounting for 5%. */ | |
double current_perc; | |
if (total_sampled) {current_perc = (double)total_expired/total_sampled; | |
} else | |
current_perc = 0; | |
server.stat_expired_stale_perc = (current_perc*0.05)+ (server.stat_expired_stale_perc*0.95); | |
} |
从新计算 已过期键占比近似值
:本轮占比 5%,历史值占比 95%。
参考
Redis 源码剖析之 key 过期
Redis 源码浏览 — Database
[[redis 源码走读] redis 过期策略](https://wenfh2020.com/2020/02…
正文完