OVS分类器四subtable

49次阅读

共计 6498 个字符,预计需要花费 17 分钟才能阅读完成。

简介

分类器会将规则根据掩码的情况分成子表,每一种掩码情况一个子表。相同掩码的规则放在同一个子表中。规则的匹配是在子表的基础上进行。

数据结构

/* Classifier internal definitions, subject to change at any time. */
/* 分类器内部定义的数据结构 */
/* A set of rules that all have the same fields wildcarded. */
struct cls_subtable {
    struct cmap_node cmap_node;    /* Within classifier's'subtables_map'. 连接到分类器的子表 hash 表中,根据掩码进行 hash */

    /* These fields are only used by writers. 这些域只能被写者改写 */
    int max_priority;              /* Max priority of any rule in subtable. 所有规则中可能存在的最大的优先级 */
    unsigned int max_count;        /* Count of max_priority rules. 最大优先级规则的个数 */

    /* Accessed by iterators. 使用迭代器遍历访问同一掩码的规则链表 */
    struct rculist rules_list;              /* Unordered. 无序的规则链表 */

    /* Identical, but lower priority rules are not inserted to any of the
     * following data structures. */

    /* These fields are accessed by readers who care about wildcarding. 这个值为 3,不包括最后一个段 */
    const uint8_t n_indices;                   /* How many indices to use. 存在掩码有效段个数 */
    // 这里 CLS_MAX_INDICES + 1 是因为有四个段,最后一个段单独处理。const struct flowmap index_maps[CLS_MAX_INDICES + 1]; /* Stage maps. 阶段索引映射 */
    // 前缀树的前缀长度,所有前缀有相同的前缀长度。unsigned int trie_plen[CLS_MAX_TRIES];  /* Trie prefix length in 'mask'
                                             * (runtime configurable). 子表前缀树的长度 */
    const int ports_mask_len;
    // 阶段查询 hash map,保存的是前三个段的 hash 值,可以通过 hash 进行快速过滤
    struct ccmap indices[CLS_MAX_INDICES];  /* Staged lookup indices. 截断查询索引 */
    rcu_trie_ptr ports_trie;                /* NULL if none. 前缀树,基于传输层的源端口 */

    /* These fields are accessed by all readers. */
    /* 根据最后一个段进行 hash,确定 hash 桶,然后进行匹配 */
    struct cmap rules;                      /* Contains 'cls_match'es. 同一掩码的规则的匹配器 hash 到该表中       */
    const struct minimask mask;             /* Wildcards for fields. 掩码,用于比较 */
    /* 'mask' must be the last field. */
};

函数

insert_subtable

该函数新建一个子表,插入到分类器中。当一个规则的掩码与现有的所有子表的掩码都不相同时,会调用新建一个子表。

/* The new subtable will be visible to the readers only after this. */
/* 在这个函数后,新的子表可以被读者看到 */
static struct cls_subtable *
insert_subtable(struct classifier *cls, const struct minimask *mask)
{uint32_t hash = minimask_hash(mask, 0);/* 根据掩码计算 hash 值 */
    struct cls_subtable *subtable;
    int i, index = 0;
    struct flowmap stage_map;
    uint8_t prev;
    size_t count = miniflow_n_values(&mask->masks);/* 计算掩码中 1 的个数 */

    subtable = xzalloc(sizeof *subtable + MINIFLOW_VALUES_SIZE(count));/* 分配子表 */
    cmap_init(&subtable->rules);/* 初始化规则 hash 表 */
    miniflow_clone(CONST_CAST(struct miniflow *, &subtable->mask.masks),
                   &mask->masks, count);/* 将掩码拷贝到子表中 */

    /* Init indices for segmented lookup, if any. */
    /* 初始化段查询索引 */
    prev = 0;
    for (i = 0; i < cls->n_flow_segments; i++) {/* 遍历每一个段,所有的子表都跟分类器拥有一样的段匹配器 */
        /* 构建段映射 map 表,prev 为段起始地址,cls->flow_segments[i] 为段的结束地址 */
        stage_map = miniflow_get_map_in_range(&mask->masks, prev,
                                              cls->flow_segments[i]);
        /* Add an index if it adds mask bits. */
        /* 如果非空,即存在有效值域 */
        if (!flowmap_is_empty(stage_map)) {/* 如果该子表的掩码在这个段不为空 */
            ccmap_init(&subtable->indices[index]);/* 初始化该段匹配 hash 表 */
            *CONST_CAST(struct flowmap *, &subtable->index_maps[index])/* 将该段的掩码赋值给段掩码数组 */
                = stage_map;
            index++;/* 统计存在掩码段的个数 */
        }
        prev = cls->flow_segments[i];/* 获取下一个段的边界 */
    }
    /* Map for the final stage. */
    *CONST_CAST(struct flowmap *, &subtable->index_maps[index])
        = miniflow_get_map_in_range(&mask->masks, prev, FLOW_U64S);/* 进行最后一个段的处理 */

    /* Check if the final stage adds any bits. */
    /* 检查最后一个段是否为空 */
    if (index > 0) {if (flowmap_is_empty(subtable->index_maps[index])) {/* 最后一个段没有掩码,则统计减掉 1 */
            /* Remove the last index, as it has the same fields as the rules
             * map. */
            --index;
            ccmap_destroy(&subtable->indices[index]);
        }
    }
    *CONST_CAST(uint8_t *, &subtable->n_indices) = index;/* 存在有效掩码段的个数 */
    // 计算前缀树的前缀长度
    for (i = 0; i < cls->n_tries; i++) {/* 遍历该分类器支持的前缀树,初始化该子表相应的前缀树掩码的长度 */
        subtable->trie_plen[i] = minimask_get_prefix_len(mask,
                                                         cls->tries[i].field);/* 获取前缀长度 */
    }

    /* Ports trie. 端口前缀树 */
    ovsrcu_set_hidden(&subtable->ports_trie, NULL);
    *CONST_CAST(int *, &subtable->ports_mask_len)/* 得到该位置 1 的个数 */
        = 32 - ctz32(ntohl(MINIFLOW_GET_BE32(&mask->masks, tp_src)));

    /* List of rules. 初始化子表规则链表 */
    rculist_init(&subtable->rules_list);

    /* 将子表插入到分类器的子表 hash 表中 */
    cmap_insert(&cls->subtables_map, &subtable->cmap_node, hash);

    return subtable;
}

find_match

/* 查找匹配域 */
static inline const struct cls_match *
find_match(const struct cls_subtable *subtable, ovs_version_t version,
           const struct flow *flow, uint32_t hash)
{
    const struct cls_match *head, *rule;

    CMAP_FOR_EACH_WITH_HASH (head, cmap_node, hash, &subtable->rules) {/* 遍历规则 hash 表 */
        if (OVS_LIKELY(miniflow_and_mask_matches_flow(&head->flow,/* 匹配域 */
                                                      &subtable->mask,/* 子表掩码 */
                                                      flow))) {/* 进行掩码匹配 */
            /* Return highest priority rule that is visible. */
            /* 返回本设备支持的最高优先级的规则 */
            CLS_MATCH_FOR_EACH (rule, head) {/* 遍历 hash 桶链表的每一个规则 */
                if (OVS_LIKELY(cls_match_visible_in_version(rule, version))) {return rule;}
            }
        }
    }

    return NULL;
}

find_match_wc

/* 进行通配符的查找 */
static const struct cls_match *
find_match_wc(const struct cls_subtable *subtable, ovs_version_t version,
              const struct flow *flow, struct trie_ctx trie_ctx[CLS_MAX_TRIES],
              unsigned int n_tries, struct flow_wildcards *wc)
{if (OVS_UNLIKELY(!wc)) {/* 对于不需要生成缓存的匹配器,wc 为 null,比如 underlay 路由表 */
        return find_match(subtable, version, flow,
                          flow_hash_in_minimask(flow, &subtable->mask, 0));
    }

    uint32_t basis = 0, hash;
    const struct cls_match *rule = NULL;
    struct flowmap stages_map = FLOWMAP_EMPTY_INITIALIZER;
    unsigned int mask_offset = 0;
    int i;

    /* Try to finish early by checking fields in segments. */
    /* 在段里面尝试完成早期的校验 */
    for (i = 0; i < subtable->n_indices; i++) {/* 进行 segments 查找 */
        if (check_tries(trie_ctx, n_tries, subtable->trie_plen/* 前缀长度,匹配,查看是否可以跳过这个子表 */,
                        subtable->index_maps[i], flow, wc)) {
            /* 'wc' bits for the trie field set, now unwildcard the preceding
             * bits used so far. */
            goto no_match;
        }

        /* Accumulate the map used so far. */
        /* 累积到目前为止的掩码           */
        stages_map = flowmap_or(stages_map, subtable->index_maps[i]);
        /* 计算当前的 hash 值 */
        hash = flow_hash_in_minimask_range(flow, &subtable->mask,
                                           subtable->index_maps[i],
                                           &mask_offset, &basis);

        /* 段匹配,没有命中,跳出 */
        if (!ccmap_find(&subtable->indices[i], hash)) {goto no_match;}
    }
    /* Trie check for the final range. */
    /* 为最后的范围做树校验 */
    if (check_tries(trie_ctx, n_tries, subtable->trie_plen,
                    subtable->index_maps[i], flow, wc)) {goto no_match;}
    /* 对指定 range 进行 hash */
    hash = flow_hash_in_minimask_range(flow, &subtable->mask,
                                       subtable->index_maps[i],
                                       &mask_offset, &basis);
    /* 进行子表匹配,找到最高优先级的 */
    rule = find_match(subtable, version, flow, hash);
    if (!rule && subtable->ports_mask_len) {/* 没有找到,并且有传输层 src ports 引擎树 */
        /* The final stage had ports, but there was no match.  Instead of
         * unwildcarding all the ports bits, use the ports trie to figure out a
         * smaller set of bits to unwildcard. 
         * 最后阶段存在端口,但是这儿没有匹配,代替非通配所有的端口,使用 ports tree 去计算最小的通配端口集合 */
        unsigned int mbits;
        ovs_be32 value, plens, mask;
        /* 获取四层端口的掩码 */
        mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src);/* 获取四层源端口掩码 */
        /* 两者相与 */
        value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;/* 与流进行掩码与操作,得到我们需要查找的值 */
        /* 对端口树进行查找 */
        mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32);

        ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
            mask & be32_prefix_mask(mbits);

        goto no_match;
    }

    /* Must unwildcard all the fields, as they were looked at. */
    /* 必须非通配所有的域,因为他们需要被查找匹配,这里采用的是或操作
    ** 查询多个表格后会命中多个规则,mask 进行或
    */
    flow_wildcards_fold_minimask(wc, &subtable->mask);
    // 返回掩码
    return rule;

no_match:
    /* Unwildcard the bits in stages so far, as they were used in determining
     * there is no match. */
    flow_wildcards_fold_minimask_in_map(wc, &subtable->mask, stages_map);
    return NULL;
}

正文完
 0