欢送关注我的公众号 [极智视界],回复001获取Google编程标准
O_o
>_<
o_O
O_o
~_~
o_O
本文剖析下 darknet load_weights 接口,这个接口次要做模型权重的加载。
1、darknet 数据加载流程
之前的文章曾经介绍了一下 darknet 指标检测的数据加载流程,并介绍了.data、.names 和 .cfg 的加载实现。
接下来这里 load_weights 接口次要做 .weights 模型权重的加载。
2、load_weights 接口
先来看一下接口调用:
load_weights(&net, weightfile);
其中 net 为 network 构造体的实例,weightfile 为权重的文件门路,看一下 load_weights 的实现:
/// parser.cvoid load_weights(network *net, char *filename){ load_weights_upto(net, filename, net->n);}
次要调用了 load_weights_upto 函数:
/// parser.cvoid load_weights_upto(network *net, char *filename, int cutoff){#ifdef GPU if(net->gpu_index >= 0){ cuda_set_device(net->gpu_index); // 设置 gpu_index } #endif fprintf(stderr, "Loading weights from %s...", filename); fflush(stdout); // 强制马上输入 FILE *fp = fopen(filename, "rb"); if(!fp) file_error(filename); int major; int minor; int revision; fread(&major, sizeof(int), 1, fp); // 一些标记位的加载 fread(&minor, sizeof(int), 1, fp); fread(&revision, sizeof(int), 1, fp); if ((major * 10 + minor) >= 2) { printf("\n seen 64"); uint64_t iseen = 0; fread(&iseen, sizeof(uint64_t), 1, fp); *net->seen = iseen; } else { printf("\n seen 32"); uint32_t iseen = 0; fread(&iseen, sizeof(uint32_t), 1, fp); *net->seen = iseen; } *net->cur_iteration = get_current_batch(*net); printf(", trained: %.0f K-images (%.0f Kilo-batches_64) \n", (float)(*net->seen / 1000), (float)(*net->seen / 64000)); int transpose = (major > 1000) || (minor > 1000); int i; for(i = 0; i < net->n && i < cutoff; ++i){ // 辨认不同算子进行权重加载 layer l = net->layers[i]; if (l.dontload) continue; if(l.type == CONVOLUTIONAL && l.share_layer == NULL){ load_convolutional_weights(l, fp); } if (l.type == SHORTCUT && l.nweights > 0) { load_shortcut_weights(l, fp); } if (l.type == IMPLICIT) { load_implicit_weights(l, fp); } if(l.type == CONNECTED){ load_connected_weights(l, fp, transpose); } if(l.type == BATCHNORM){ load_batchnorm_weights(l, fp); } if(l.type == CRNN){ load_convolutional_weights(*(l.input_layer), fp); load_convolutional_weights(*(l.self_layer), fp); load_convolutional_weights(*(l.output_layer), fp); } if(l.type == RNN){ load_connected_weights(*(l.input_layer), fp, transpose); load_connected_weights(*(l.self_layer), fp, transpose); load_connected_weights(*(l.output_layer), fp, transpose); } if(l.type == GRU){ load_connected_weights(*(l.input_z_layer), fp, transpose); load_connected_weights(*(l.input_r_layer), fp, transpose); load_connected_weights(*(l.input_h_layer), fp, transpose); load_connected_weights(*(l.state_z_layer), fp, transpose); load_connected_weights(*(l.state_r_layer), fp, transpose); load_connected_weights(*(l.state_h_layer), fp, transpose); } if(l.type == LSTM){ load_connected_weights(*(l.wf), fp, transpose); load_connected_weights(*(l.wi), fp, transpose); load_connected_weights(*(l.wg), fp, transpose); load_connected_weights(*(l.wo), fp, transpose); load_connected_weights(*(l.uf), fp, transpose); load_connected_weights(*(l.ui), fp, transpose); load_connected_weights(*(l.ug), fp, transpose); load_connected_weights(*(l.uo), fp, transpose); } if (l.type == CONV_LSTM) { if (l.peephole) { load_convolutional_weights(*(l.vf), fp); load_convolutional_weights(*(l.vi), fp); load_convolutional_weights(*(l.vo), fp); } load_convolutional_weights(*(l.wf), fp); if (!l.bottleneck) { load_convolutional_weights(*(l.wi), fp); load_convolutional_weights(*(l.wg), fp); load_convolutional_weights(*(l.wo), fp); } load_convolutional_weights(*(l.uf), fp); load_convolutional_weights(*(l.ui), fp); load_convolutional_weights(*(l.ug), fp); load_convolutional_weights(*(l.uo), fp); } if(l.type == LOCAL){ int locations = l.out_w*l.out_h; int size = l.size*l.size*l.c*l.n*locations; fread(l.biases, sizeof(float), l.outputs, fp); fread(l.weights, sizeof(float), size, fp);#ifdef GPU if(gpu_index >= 0){ push_local_layer(l); }#endif } if (feof(fp)) break; } fprintf(stderr, "Done! Loaded %d layers from weights-file \n", i); fclose(fp);}
以上有几个点不容易看懂,如以下这段:
int major;int minor;int revision;fread(&major, sizeof(int), 1, fp);fread(&minor, sizeof(int), 1, fp);fread(&revision, sizeof(int), 1, fp);
这个最好联合保留权重的接口一起来看,load_weights 是 save_weights 的解码过程,来看一下 save_weights_upto 的后面局部:
void save_weights_upto(network net, char *filename, int cutoff, int save_ema){#ifdef GPU if(net.gpu_index >= 0){ cuda_set_device(net.gpu_index); }#endif fprintf(stderr, "Saving weights to %s\n", filename); FILE *fp = fopen(filename, "wb"); if(!fp) file_error(filename); int major = MAJOR_VERSION; int minor = MINOR_VERSION; int revision = PATCH_VERSION; fwrite(&major, sizeof(int), 1, fp); // 先打上 major fwrite(&minor, sizeof(int), 1, fp); // 再打上 minor fwrite(&revision, sizeof(int), 1, fp); // 再打上 revision (*net.seen) = get_current_iteration(net) * net.batch * net.subdivisions; // remove this line, when you will save to weights-file both: seen & cur_iteration fwrite(net.seen, sizeof(uint64_t), 1, fp); // 最初打上 net.seen......}
从下面的 save_weights 接口能够看出 darknet 的权重在后面会先打上几个标记:major、minor、revision、net.seen,而后再间断存储各层的权重数据,这样就不难理解 load_weights 的时候做这个解码了,以下是这几个参数的宏定义:
/// version.h#define MAJOR_VERSION 0#define MINOR_VERSION 2#define PATCH_VERSION 5
再回到 load_weights,在加载这些标记后是加载各层的权重,以卷积权重加载来说,外面的逻辑分两个:
(1) 单 conv,应用 fread 依据特定大小顺次加载 biases 和 weights;
(2) conv + bn 交融,应用 fread 依据特定大小顺次加载 biases、scales、rolling_mean、rolling_variance、weights。
来看实现:
/// parser.cvoid load_convolutional_weights(layer l, FILE *fp){ if(l.binary){ //load_convolutional_weights_binary(l, fp); //return; } int num = l.nweights; int read_bytes; read_bytes = fread(l.biases, sizeof(float), l.n, fp); // load biases if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.biases - l.index = %d \n", l.index); //fread(l.weights, sizeof(float), num, fp); // as in connected layer if (l.batch_normalize && (!l.dontloadscales)){ read_bytes = fread(l.scales, sizeof(float), l.n, fp); // load scales if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.scales - l.index = %d \n", l.index); read_bytes = fread(l.rolling_mean, sizeof(float), l.n, fp); // load rolling_mean if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_mean - l.index = %d \n", l.index); read_bytes = fread(l.rolling_variance, sizeof(float), l.n, fp); // load rolling_variance if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_variance - l.index = %d \n", l.index); if(0){ int i; for(i = 0; i < l.n; ++i){ printf("%g, ", l.rolling_mean[i]); } printf("\n"); for(i = 0; i < l.n; ++i){ printf("%g, ", l.rolling_variance[i]); } printf("\n"); } if(0){ fill_cpu(l.n, 0, l.rolling_mean, 1); fill_cpu(l.n, 0, l.rolling_variance, 1); } } read_bytes = fread(l.weights, sizeof(float), num, fp); // load weights if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index); //if(l.adam){ // fread(l.m, sizeof(float), num, fp); // fread(l.v, sizeof(float), num, fp); //} //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1); if (l.flipped) { transpose_matrix(l.weights, (l.c/l.groups)*l.size*l.size, l.n); } //if (l.binary) binarize_weights(l.weights, l.n, (l.c/l.groups)*l.size*l.size, l.weights);#ifdef GPU if(gpu_index >= 0){ push_convolutional_layer(l); }#endif}
再说一下 fread,这个函数在框架源码中数据读取方面会用的比拟多,来看一下这个 C 语言的函数:
size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream)
参数阐明:
- ptr:指向带有最小尺寸
size * nmemb
字节的内存块的指针; - size:要读取的每个元素的大小,以字节为单位;
- nmemb:元素的个数,每个元素的大小为 size 字节;
- stream:指向 FILE 对象的指针,指定了一个输出流;
返回值:胜利读取的元素个数以 size_t 对象返回,返回值与 nmenb 参数一样,若不一样,则可能产生了读谬误或达到了文件尾。
好了,以上剖析了 darknet 的 load_weights 接口及 weights 数据结构,再联合之前的文章就曾经集齐了 darkent 指标检测数据加载局部的解读,心愿我的分享对你的学习能有一点帮忙。
【公众号传送】
《【编程艺术】分析 darknet load_weights 接口》