欢送关注我的公众号 [极智视界],回复001获取Google编程标准

O_o>_<   o_OO_o~_~o_O

  本文剖析下 darknet load_weights 接口,这个接口次要做模型权重的加载。

1、darknet 数据加载流程

   之前的文章曾经介绍了一下 darknet 指标检测的数据加载流程,并介绍了.data、.names 和 .cfg 的加载实现。

   接下来这里 load_weights 接口次要做 .weights 模型权重的加载。

2、load_weights 接口

   先来看一下接口调用:

load_weights(&net, weightfile);

   其中 net 为 network 构造体的实例,weightfile 为权重的文件门路,看一下 load_weights 的实现:

/// parser.cvoid load_weights(network *net, char *filename){    load_weights_upto(net, filename, net->n);}

   次要调用了 load_weights_upto 函数:

/// parser.cvoid load_weights_upto(network *net, char *filename, int cutoff){#ifdef GPU    if(net->gpu_index >= 0){        cuda_set_device(net->gpu_index);                         // 设置 gpu_index    } #endif    fprintf(stderr, "Loading weights from %s...", filename);    fflush(stdout);                                             // 强制马上输入    FILE *fp = fopen(filename, "rb");    if(!fp) file_error(filename);    int major;    int minor;    int revision;    fread(&major, sizeof(int), 1, fp);                          // 一些标记位的加载    fread(&minor, sizeof(int), 1, fp);    fread(&revision, sizeof(int), 1, fp);    if ((major * 10 + minor) >= 2) {        printf("\n seen 64");        uint64_t iseen = 0;        fread(&iseen, sizeof(uint64_t), 1, fp);        *net->seen = iseen;    }    else {        printf("\n seen 32");        uint32_t iseen = 0;        fread(&iseen, sizeof(uint32_t), 1, fp);        *net->seen = iseen;    }    *net->cur_iteration = get_current_batch(*net);    printf(", trained: %.0f K-images (%.0f Kilo-batches_64) \n", (float)(*net->seen / 1000), (float)(*net->seen / 64000));    int transpose = (major > 1000) || (minor > 1000);    int i;    for(i = 0; i < net->n && i < cutoff; ++i){                     // 辨认不同算子进行权重加载        layer l = net->layers[i];        if (l.dontload) continue;        if(l.type == CONVOLUTIONAL && l.share_layer == NULL){            load_convolutional_weights(l, fp);        }        if (l.type == SHORTCUT && l.nweights > 0) {            load_shortcut_weights(l, fp);        }        if (l.type == IMPLICIT) {            load_implicit_weights(l, fp);        }        if(l.type == CONNECTED){            load_connected_weights(l, fp, transpose);        }        if(l.type == BATCHNORM){            load_batchnorm_weights(l, fp);        }        if(l.type == CRNN){            load_convolutional_weights(*(l.input_layer), fp);            load_convolutional_weights(*(l.self_layer), fp);            load_convolutional_weights(*(l.output_layer), fp);        }        if(l.type == RNN){            load_connected_weights(*(l.input_layer), fp, transpose);            load_connected_weights(*(l.self_layer), fp, transpose);            load_connected_weights(*(l.output_layer), fp, transpose);        }        if(l.type == GRU){            load_connected_weights(*(l.input_z_layer), fp, transpose);            load_connected_weights(*(l.input_r_layer), fp, transpose);            load_connected_weights(*(l.input_h_layer), fp, transpose);            load_connected_weights(*(l.state_z_layer), fp, transpose);            load_connected_weights(*(l.state_r_layer), fp, transpose);            load_connected_weights(*(l.state_h_layer), fp, transpose);        }        if(l.type == LSTM){            load_connected_weights(*(l.wf), fp, transpose);            load_connected_weights(*(l.wi), fp, transpose);            load_connected_weights(*(l.wg), fp, transpose);            load_connected_weights(*(l.wo), fp, transpose);            load_connected_weights(*(l.uf), fp, transpose);            load_connected_weights(*(l.ui), fp, transpose);            load_connected_weights(*(l.ug), fp, transpose);            load_connected_weights(*(l.uo), fp, transpose);        }        if (l.type == CONV_LSTM) {            if (l.peephole) {                load_convolutional_weights(*(l.vf), fp);                load_convolutional_weights(*(l.vi), fp);                load_convolutional_weights(*(l.vo), fp);            }            load_convolutional_weights(*(l.wf), fp);            if (!l.bottleneck) {                load_convolutional_weights(*(l.wi), fp);                load_convolutional_weights(*(l.wg), fp);                load_convolutional_weights(*(l.wo), fp);            }            load_convolutional_weights(*(l.uf), fp);            load_convolutional_weights(*(l.ui), fp);            load_convolutional_weights(*(l.ug), fp);            load_convolutional_weights(*(l.uo), fp);        }        if(l.type == LOCAL){            int locations = l.out_w*l.out_h;            int size = l.size*l.size*l.c*l.n*locations;            fread(l.biases, sizeof(float), l.outputs, fp);            fread(l.weights, sizeof(float), size, fp);#ifdef GPU            if(gpu_index >= 0){                push_local_layer(l);            }#endif        }        if (feof(fp)) break;    }    fprintf(stderr, "Done! Loaded %d layers from weights-file \n", i);    fclose(fp);}

   以上有几个点不容易看懂,如以下这段:

int major;int minor;int revision;fread(&major, sizeof(int), 1, fp);fread(&minor, sizeof(int), 1, fp);fread(&revision, sizeof(int), 1, fp);

   这个最好联合保留权重的接口一起来看,load_weights 是 save_weights 的解码过程,来看一下 save_weights_upto 的后面局部:

void save_weights_upto(network net, char *filename, int cutoff, int save_ema){#ifdef GPU    if(net.gpu_index >= 0){        cuda_set_device(net.gpu_index);    }#endif    fprintf(stderr, "Saving weights to %s\n", filename);    FILE *fp = fopen(filename, "wb");    if(!fp) file_error(filename);    int major = MAJOR_VERSION;    int minor = MINOR_VERSION;    int revision = PATCH_VERSION;    fwrite(&major, sizeof(int), 1, fp);              // 先打上 major    fwrite(&minor, sizeof(int), 1, fp);              // 再打上 minor    fwrite(&revision, sizeof(int), 1, fp);           // 再打上 revision    (*net.seen) = get_current_iteration(net) * net.batch * net.subdivisions; // remove this line, when you will save to weights-file both: seen & cur_iteration    fwrite(net.seen, sizeof(uint64_t), 1, fp);       // 最初打上 net.seen......}

  从下面的 save_weights 接口能够看出 darknet 的权重在后面会先打上几个标记:major、minor、revision、net.seen,而后再间断存储各层的权重数据,这样就不难理解 load_weights 的时候做这个解码了,以下是这几个参数的宏定义:

/// version.h#define MAJOR_VERSION 0#define MINOR_VERSION 2#define PATCH_VERSION 5

   再回到 load_weights,在加载这些标记后是加载各层的权重,以卷积权重加载来说,外面的逻辑分两个:

   (1) 单 conv,应用 fread 依据特定大小顺次加载 biases 和 weights;

  (2) conv + bn 交融,应用 fread 依据特定大小顺次加载 biases、scales、rolling_mean、rolling_variance、weights。

   来看实现:

/// parser.cvoid load_convolutional_weights(layer l, FILE *fp){    if(l.binary){        //load_convolutional_weights_binary(l, fp);        //return;    }    int num = l.nweights;    int read_bytes;    read_bytes = fread(l.biases, sizeof(float), l.n, fp);               // load biases    if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.biases - l.index = %d \n", l.index);    //fread(l.weights, sizeof(float), num, fp); // as in connected layer    if (l.batch_normalize && (!l.dontloadscales)){        read_bytes = fread(l.scales, sizeof(float), l.n, fp);          // load scales        if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.scales - l.index = %d \n", l.index);        read_bytes = fread(l.rolling_mean, sizeof(float), l.n, fp);    // load rolling_mean        if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_mean - l.index = %d \n", l.index);        read_bytes = fread(l.rolling_variance, sizeof(float), l.n, fp);   // load rolling_variance        if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_variance - l.index = %d \n", l.index);        if(0){            int i;            for(i = 0; i < l.n; ++i){                printf("%g, ", l.rolling_mean[i]);            }            printf("\n");            for(i = 0; i < l.n; ++i){                printf("%g, ", l.rolling_variance[i]);            }            printf("\n");        }        if(0){            fill_cpu(l.n, 0, l.rolling_mean, 1);            fill_cpu(l.n, 0, l.rolling_variance, 1);        }    }    read_bytes = fread(l.weights, sizeof(float), num, fp);          // load weights    if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index);    //if(l.adam){    //    fread(l.m, sizeof(float), num, fp);    //    fread(l.v, sizeof(float), num, fp);    //}    //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1);    if (l.flipped) {        transpose_matrix(l.weights, (l.c/l.groups)*l.size*l.size, l.n);    }    //if (l.binary) binarize_weights(l.weights, l.n, (l.c/l.groups)*l.size*l.size, l.weights);#ifdef GPU    if(gpu_index >= 0){        push_convolutional_layer(l);    }#endif}

   再说一下 fread,这个函数在框架源码中数据读取方面会用的比拟多,来看一下这个 C 语言的函数:

size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream)

   参数阐明:

  • ptr:指向带有最小尺寸 size * nmemb 字节的内存块的指针;
  • size:要读取的每个元素的大小,以字节为单位;
  • nmemb:元素的个数,每个元素的大小为 size 字节;
  • stream:指向 FILE 对象的指针,指定了一个输出流;

  返回值:胜利读取的元素个数以 size_t 对象返回,返回值与 nmenb 参数一样,若不一样,则可能产生了读谬误或达到了文件尾。

  好了,以上剖析了 darknet 的 load_weights 接口及 weights 数据结构,再联合之前的文章就曾经集齐了 darkent 指标检测数据加载局部的解读,心愿我的分享对你的学习能有一点帮忙。


【公众号传送】
《【编程艺术】分析 darknet load_weights 接口》