乐趣区

OVS分类器五分类器

分类器用途

classifier 在 ovs 中非常重要,用于流分类,执行相应的动作。属于 match+action 中的 match。

在 ovs 中有三个地方会用到分类器:

  • miss 报文处理(用户态慢路径)
/* The returned rule (if any) is valid at least until the next RCU quiescent
 * period.  If the rule needs to stay around longer, the caller should take
 * a reference.
 *
 * 'flow' is non-const to allow for temporary modifications during the lookup.
 * Any changes are restored before returning. 
 * 允许用户临时改变 flow 中的内容
 */
static struct rule_dpif *
rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, ovs_version_t version,
                          uint8_t table_id, struct flow *flow,
                          struct flow_wildcards *wc)
{struct classifier *cls = &ofproto->up.tables[table_id].cls;
    return rule_dpif_cast(rule_from_cls_rule(classifier_lookup(cls, version,
                                                               flow, wc)));
}
  • underlay 路由表查找
/* ovs 路由查找,得到网关的 IP 地址和出接口的源 IP 地址 */
bool
ovs_router_lookup(const struct in6_addr *ip6_dst, char output_bridge[],
                  struct in6_addr *src, struct in6_addr *gw)
{
    const struct cls_rule *cr;
    struct flow flow = {.ipv6_dst = *ip6_dst};

    cr = classifier_lookup(&cls, OVS_VERSION_MAX, &flow, NULL);
    if (cr) {struct ovs_router_entry *p = ovs_router_entry_cast(cr);

        ovs_strlcpy(output_bridge, p->output_bridge, IFNAMSIZ);
        *gw = p->gw;
        if (src) {*src = p->src_addr;}
        return true;
    }
    return ovs_router_lookup_fallback(ip6_dst, output_bridge, src, gw);
}
  • tunnel terminate 表查找
/* 'flow' is non-const to allow for temporary modifications during the lookup.
 * Any changes are restored before returning. 
 * flow 参数允许临时修改一些值,但是在返回之前需要被恢复原来的值。*/
odp_port_t
tnl_port_map_lookup(struct flow *flow, struct flow_wildcards *wc)
{
    // 进行分类器规则查找
    const struct cls_rule *cr = classifier_lookup(&cls, OVS_VERSION_MAX, flow,
                                                  wc);

    return (cr) ? tnl_port_cast(cr)->portno : ODPP_NONE;
}

分类器的核心函数为 classifier_lookup 函数,它有四个参数:

  • cls:table 的分类器
  • version:当前符合要求的规则版本,即规则的添加版本需要小于该 version,并且其删除版本要大于该 version,即该规则在该版本是可见的。
  • flow:需要匹配的报文的提取值,中途可以邻居修改,但是最终是保持报文原样的,将来用来作为 megaflow 的 value。
  • wc:通配符,报文起始查找时是通配所有的,在整个报文匹配中使用同一个 wc,命中一个规则就将该规则对应的掩码 unwildcard,表示报文需要匹配这些域,当报文通过 goto 和 resubmit 动作转移到另外一个 table 时,依然会使用原来的上个 table 处理后的 wc 继续处理,多个规则之间的 mask 采用或的关系处理 wc。wc 与 flow 共同构成快转表中规则的 value 与 mask。wc 非常重要,它最大限度的用来减少误匹配。对于那些不用生成 megaflow 的使用场景,该值可以为 NULL,所以 ovs_router_lookup 中该值为 NULL。

数据结构关系图

数据结构

/* A flow classifier. */
/* flow 分类器 */
struct classifier {
    int n_rules;                    /* Total number of rules. 总共的规则数目 */
    uint8_t n_flow_segments;        /* 段匹配器的段的个数,  一般是四个段:metadata,l2,l3,l4,但是这个值只能是 3,最后一个段单独处理 */
    // 存储每个段的起始地址的 64 字节偏移
    uint8_t flow_segments[CLS_MAX_INDICES]; /* Flow segment boundaries to use
                                             * for staged lookup. 段匹配器的结束边界, 总共有四个段,三个边界 
                                             * |--- 段 1 ----|----- 段 2 ----|---- 段三 ----|---- 段 4 -----|
                                             *           边界 1         边界 2           边界 3   
                                             */
    struct cmap subtables_map;      /* Contains "struct cls_subtable"s. 子表 hash 表,分类器下,按照 flow 的掩码不同
                                     * 划分成不同的子表,每个子表的规则的掩码是一样的,一个流表有多个子表
                                     */
    struct pvector subtables;       /* 子表数组,安装表格的优先级进行排列,这样的话,可以跳过低优先级的表格 */
    struct cmap partitions;         /* Contains "struct cls_partition"s. */
    struct cls_trie tries[CLS_MAX_TRIES]; /* Prefix tries. 前缀树,用于过滤一些规则 */
    unsigned int n_tries;           /* 前缀树的个数 */
    bool publish;                   /* Make changes visible to lookups? 表示是否可以查询了 */
};

段边界

/* U64 indices for segmented flow classification. */
/* 64bit 段 flow 分类器 */
const uint8_t flow_segment_u64s[4] = {FLOW_SEGMENT_1_ENDS_AT / sizeof(uint64_t),/* 元数据匹配结束地址 */
    FLOW_SEGMENT_2_ENDS_AT / sizeof(uint64_t),/* 链路层匹配结束   地址 */
    FLOW_SEGMENT_3_ENDS_AT / sizeof(uint64_t),/* 网络层匹配结束地址 */
    FLOW_U64S                                 /* 传输层匹配结束地址 */
};

classifier 流程

  1. 从 cls->subtables 第一个子表开始循环遍历每一个符合要求的子表,对子表调用函数 find_match_wc 进行处理。

函数 find_match_wc 进行如下处理:

​ 对 flow 在子表的前三个段中进行 trie 过滤,不符合命中条件的话,开始下一个子表。

​ 计算 flow 在子表在前三个段中的 hash 值,在 subtable->indices[i] 中查找是否有对应的 hash 值,前三个段就是通过 hash 值进行

​ 过滤。

​ 前面两条都满足要求后,对最后一个段进行 trie 过滤,不符合命中条件的话,开始下一个子表。

​ 对最后一个段进行 hash,使用 hash 值在子表中调用函数 find_match 进行精确匹配。

  1. 所有子表遍历完毕后,开始对匹配中命中的规则进行关联匹配处理,找出最高优先级的规则。

相关函数

classifier_init

初始化一个分类器。

/* Initializes 'cls' as a classifier that initially contains no classification
 * rules. 初始化一个分类器
 * 参数 flow_segments 为段边界数组,即 flow_segment_u64s */
void
classifier_init(struct classifier *cls, const uint8_t *flow_segments)
{
    cls->n_rules = 0;/* 规则个数为 0 */
    cmap_init(&cls->subtables_map);/* 初始化子表 map */
    pvector_init(&cls->subtables);// 初始化子表数组
    cls->n_flow_segments = 0;/* 初始化段为 0 */
    if (flow_segments) {/* 如果存在段匹配器,那么初始化段匹配器的结束地址,这里只初始化了三个段 */
        while (cls->n_flow_segments < CLS_MAX_INDICES
               && *flow_segments < FLOW_U64S) {cls->flow_segments[cls->n_flow_segments++] = *flow_segments++;
        }
    }
    cls->n_tries = 0;/* 默认前缀树匹配器的个数为 0 */
    for (int i = 0; i < CLS_MAX_TRIES; i++) {/* 初始化前缀树 */
        trie_init(cls, i, NULL);
    }
    cls->publish = true;
}

classifier_insert

插入一个规则到分类器。

/* Inserts 'rule' into 'cls'.  Until 'rule' is removed from 'cls', the caller
 * must not modify or free it.
 *
 * 'cls' must not contain an identical rule (including wildcards, values of
 * fixed fields, and priority).  Use classifier_find_rule_exactly() to find
 * such a rule. 
 * 插入规则到分类器,直到规则从分类器中删除,调用者不能修改和释放该规则 */
void
classifier_insert(struct classifier *cls,/* 分类器 */ 
                        const struct cls_rule *rule,/* 规则 */
                        ovs_version_t version, /* 规则版本 */
                        const struct cls_conjunction conj[],
                        size_t n_conj)
{
    const struct cls_rule *displaced_rule
        = classifier_replace(cls, rule, version, conj, n_conj);
    ovs_assert(!displaced_rule);
}

/* Inserts 'rule' into 'cls' in 'version'.  Until 'rule' is removed from 'cls',
 * the caller must not modify or free it.
 *
 * If 'cls' already contains an identical rule (including wildcards, values of
 * fixed fields, and priority) that is visible in 'version', replaces the old
 * rule by 'rule' and returns the rule that was replaced.  The caller takes
 * ownership of the returned rule and is thus responsible for destroying it
 * with cls_rule_destroy(), after RCU grace period has passed (see
 * ovsrcu_postpone()).
 *
 * Returns NULL if 'cls' does not contain a rule with an identical key, after
 * inserting the new rule.  In this case, no rules are displaced by the new
 * rule, even rules that cannot have any effect because the new rule matches a
 * superset of their flows and has higher priority.
 *
 * 如果不包含一个一样的规则,则返回空。*/
const struct cls_rule *
classifier_replace(struct classifier *cls, const struct cls_rule *rule,
                   ovs_version_t version,
                   const struct cls_conjunction *conjs, size_t n_conjs)
{
    struct cls_match *new;
    struct cls_subtable *subtable;
    uint32_t ihash[CLS_MAX_INDICES];
    struct cls_match *head;
    unsigned int mask_offset;
    size_t n_rules = 0;
    uint32_t basis;
    uint32_t hash;
    unsigned int i;

    /* 'new' is initially invisible to lookups. */
    new = cls_match_alloc(rule, version, conjs, n_conjs);/* 分配一个新的匹配器 */
    ovsrcu_set(&CONST_CAST(struct cls_rule *, rule)->cls_match, new);/* 设置规则的匹配器 */

    /* 根据规则的掩码进行子表的查找 */
    subtable = find_subtable(cls, rule->match.mask);/* 根据掩码查找子表 */
    if (!subtable) {/* 没有找到子表,新建一个子表,将规则插入新的子表  */
        subtable = insert_subtable(cls, rule->match.mask);
    }

    /* Compute hashes in segments. */
    /* 计算在段里面的 hash 值 */
    basis = 0;
    mask_offset = 0;
    for (i = 0; i < subtable->n_indices; i++) {/* 遍历每一个段,计算每一个段的 hash 值,这些 hash 值可以用来过滤 */
        /* 对规则的每一个段进行 hash */
        ihash[i] = minimatch_hash_range(&rule->match, subtable->index_maps[i],
                                        &mask_offset, &basis);
    }
    /* 对最后一个段进行 hash */
    hash = minimatch_hash_range(&rule->match, subtable->index_maps[i],
                                &mask_offset, &basis);

    /* 根据 hash 值对子表中的规则进行查找,最后一个段的 hash 值作为 hash 桶的 key */
    head = find_equal(subtable, rule->match.flow, hash);
    if (!head) {/* 没有找到,添加规则 */
        /* Add rule to tries.
         *
         * Concurrent readers might miss seeing the rule until this update,
         * which might require being fixed up by revalidation later. */
        for (i = 0; i < cls->n_tries; i++) {/* 遍历分类器的每一个前缀树 */
            if (subtable->trie_plen[i]) {/* 如果该子表的对应的掩码长度存在,插入该前缀树 */
                trie_insert(&cls->tries[i], rule, subtable->trie_plen[i]);/* 这个是前缀长度 */
            }
        }

        /* Add rule to ports trie. */
        /* 添加规则到端口树中,只对传输层源端口进行树插入 */
        if (subtable->ports_mask_len) {
            /* We mask the value to be inserted to always have the wildcarded
             * bits in known (zero) state, so we can include them in comparison
             * and they will always match (== their original value does not
             * matter). */
            ovs_be32 masked_ports = minimatch_get_ports(&rule->match);

            trie_insert_prefix(&subtable->ports_trie, &masked_ports,
                               subtable->ports_mask_len);
        }

        /* Add new node to segment indices. 将 hash 值插入子表段 hash 过滤表 */
        for (i = 0; i < subtable->n_indices; i++) {ccmap_inc(&subtable->indices[i], ihash[i]);
        }
        n_rules = cmap_insert(&subtable->rules, &new->cmap_node, hash);
    } else {   /* Equal rules exist in the classifier already. */
        struct cls_match *prev, *iter;

        /* Scan the list for the insertion point that will keep the list in
         * order of decreasing priority.  Insert after rules marked invisible
         * in any version of the same priority. */
        FOR_EACH_RULE_IN_LIST_PROTECTED (iter, prev, head) {
            if (rule->priority > iter->priority
                || (rule->priority == iter->priority
                    && !cls_match_is_eventually_invisible(iter))) {break;}
        }

        /* Replace 'iter' with 'new' or insert 'new' between 'prev' and
         * 'iter'. */
        if (iter) {
            struct cls_rule *old;

            if (rule->priority == iter->priority) {cls_match_replace(prev, iter, new);
                old = CONST_CAST(struct cls_rule *, iter->cls_rule);
            } else {cls_match_insert(prev, iter, new);
                old = NULL;
            }

            /* Replace the existing head in data structures, if rule is the new
             * head. */
            if (iter == head) {
                cmap_replace(&subtable->rules, &head->cmap_node,
                             &new->cmap_node, hash);
            }

            if (old) {
                struct cls_conjunction_set *conj_set;

                conj_set = ovsrcu_get_protected(struct cls_conjunction_set *,
                                                &iter->conj_set);
                if (conj_set) {ovsrcu_postpone(free, conj_set);
                }

                ovsrcu_set(&old->cls_match, NULL); /* Marks old rule as removed
                                                    * from the classifier. */
                ovsrcu_postpone(cls_match_free_cb, iter);

                /* No change in subtable's max priority or max count. */

                /* Make 'new' visible to lookups in the appropriate version. */
                cls_match_set_remove_version(new, OVS_VERSION_NOT_REMOVED);

                /* Make rule visible to iterators (immediately). */
                rculist_replace(CONST_CAST(struct rculist *, &rule->node),
                                &old->node);

                /* Return displaced rule.  Caller is responsible for keeping it
                 * around until all threads quiesce. */
                return old;
            }
        } else {
            /* 'new' is new node after 'prev' */
            cls_match_insert(prev, iter, new);
        }
    }

    /* Make 'new' visible to lookups in the appropriate version. */
    cls_match_set_remove_version(new, OVS_VERSION_NOT_REMOVED);

    /* Make rule visible to iterators (immediately). */
    rculist_push_back(&subtable->rules_list,
                      CONST_CAST(struct rculist *, &rule->node));

    /* Rule was added, not replaced.  Update 'subtable's 'max_priority' and
     * 'max_count', if necessary.
     *
     * The rule was already inserted, but concurrent readers may not see the
     * rule yet as the subtables vector is not updated yet.  This will have to
     * be fixed by revalidation later. */
    if (n_rules == 1) {
        subtable->max_priority = rule->priority;
        subtable->max_count = 1;
        pvector_insert(&cls->subtables, subtable, rule->priority);
    } else if (rule->priority == subtable->max_priority) {++subtable->max_count;} else if (rule->priority > subtable->max_priority) {
        subtable->max_priority = rule->priority;
        subtable->max_count = 1;
        pvector_change_priority(&cls->subtables, subtable, rule->priority);
    }

    /* Nothing was replaced. */
    cls->n_rules++;

    if (cls->publish) {/* 更新子表 */
        pvector_publish(&cls->subtables);
    }

    return NULL;
}

classifier_lookup

/* Finds and returns the highest-priority rule in 'cls' that matches 'flow' and
 * that is visible in 'version'.  Returns a null pointer if no rules in 'cls'
 * match 'flow'.  If multiple rules of equal priority match 'flow', returns one
 * arbitrarily.
 *
 * If a rule is found and 'wc' is non-null, bitwise-OR's'wc' with the
 * set of bits that were significant in the lookup.  At some point
 * earlier, 'wc' should have been initialized (e.g., by
 * flow_wildcards_init_catchall()).
 *
 * 'flow' is non-const to allow for temporary modifications during the lookup.
 * Any changes are restored before returning.
 * 从 cls 分类器中找到一个能够匹配 flow 的最高优先级的规则。* 如果在分类器 cls 没有规则匹配这个 flow,则返回空,如果多个规则匹配该流,则返回一个最高
 * 优先级的。* 可以临时修改 flow
 */
const struct cls_rule *
classifier_lookup(const struct classifier *cls, /* 分类器 */ovs_version_t version,/* 版本 */
                  struct flow *flow,/* 需要查找的 flow */ struct flow_wildcards *wc/* 通配符 */)
{return classifier_lookup__(cls, version, flow, wc, true);
}

/* Like classifier_lookup(), except that support for conjunctive matches can be
 * configured with 'allow_conjunctive_matches'.  That feature is not exposed
 * externally because turning off conjunctive matches is only useful to avoid
 * recursion within this function itself.
 *
 * 'flow' is non-const to allow for temporary modifications during the lookup.
 * Any changes are restored before returning. */
static const struct cls_rule *
classifier_lookup__(const struct classifier *cls, ovs_version_t version,/* 分类器及其版本号 */
                    struct flow *flow, struct flow_wildcards *wc,/* 匹配流和通配符 */
                    bool allow_conjunctive_matches)/* 是否允许 conjunctive 匹配 */
{struct trie_ctx trie_ctx[CLS_MAX_TRIES];
    const struct cls_match *match;
    /* Highest-priority flow in 'cls' that certainly matches 'flow'. */
    /* 指向最高优先级的 flow 在 cls 中 */
    const struct cls_match *hard = NULL;
    int hard_pri = INT_MIN;     /* hard ? hard->priority : INT_MIN. */

    /* Highest-priority conjunctive flows in 'cls' matching 'flow'.  Since
     * these are (components of) conjunctive flows, we can only know whether
     * the full conjunctive flow matches after seeing multiple of them.  Thus,
     * we refer to these as "soft matches". 我们只有在查看了所有的关联匹配后
     * 我们才能知道是全匹配了,因此我们将其称之为软匹配
     * 最高优先级的关联匹配,在分类器中,使用最高优先级的关联 flows 匹配 flow
     * */
    struct cls_conjunction_set *soft_stub[64];/* 关联匹配集合,我们默认 64 个集合 */
    struct cls_conjunction_set **soft = soft_stub;/* 软匹配集合 */
    size_t n_soft = 0, allocated_soft = ARRAY_SIZE(soft_stub);/* 64 个元素 */
    int soft_pri = INT_MIN;    /* n_soft ? MAX(soft[*]->priority) : INT_MIN. */

    /* Synchronize for cls->n_tries and subtable->trie_plen.  They can change
     * when table configuration changes, which happens typically only on
     * startup. */
    atomic_thread_fence(memory_order_acquire);

    /* Initialize trie contexts for find_match_wc(). */
    /* 初始化 trie 上下文用于查找 wc */
    for (int i = 0; i < cls->n_tries; i++) {trie_ctx_init(&trie_ctx[i], &cls->tries[i]);
    }

    /* Main loop. */
    /* 主循环,遍历所有的子表,从这里的循环可以看出,每一次循环处理一个子表,由于子表掩码相同,所以一次返回
    ** 与掩码区域相同的多个规则的公用 match,该 match 是一个单链表,按照优先级高低进行排列。*/
    struct cls_subtable *subtable;
    PVECTOR_FOR_EACH_PRIORITY (subtable, hard_pri + 1, 2, sizeof *subtable,
                               &cls->subtables) {
        struct cls_conjunction_set *conj_set;/* 关联器集合 */

        /* Skip subtables with no match, or where the match is lower-priority
         * than some certain match we've already found.
         * 跳过没有相同的 match 的子表,或者匹配的优先级比先前匹配的较低
         */
        match = find_match_wc(subtable, version, flow, trie_ctx, cls->n_tries,
                              wc);
        if (!match || match->priority <= hard_pri) {/* 过滤掉有先级比已经匹配的规则低或者相等的 */
            continue;
        }

        /* 获取规则的关联匹配 */
        conj_set = ovsrcu_get(struct cls_conjunction_set *, &match->conj_set);
        if (!conj_set) {/* 该匹配项没有关联匹配项,直接作为硬匹配 */
            /* 'match' isn't part of a conjunctive match.  It's the best
             * certain match we've got so far, since we know that it's
             * higher-priority than hard_pri.
             *
             * (There might be a higher-priority conjunctive match.  We can't
             * tell yet.) */
            hard = match;
            hard_pri = hard->priority;/* 当前命中的最高优先级的硬匹配 */
        } else if (allow_conjunctive_matches) {/* 存在关联匹配,并且允许关联匹配 */
            /* 'match' is part of a conjunctive match.  Add it to the list. */
            /* 该 match 是关联匹配的一部分,将其添加到链表中 */
            if (OVS_UNLIKELY(n_soft >= allocated_soft)) {/* 超过了我们能够容纳的关联个数,需要进行重新分配 */
                struct cls_conjunction_set **old_soft = soft;/* 先保存老的匹配 */

                allocated_soft *= 2;/* 进行指数扩展 */
                soft = xmalloc(allocated_soft * sizeof *soft);/* 分配软匹配数组 */
                memcpy(soft, old_soft, n_soft * sizeof *soft);/* 将原来的内容拷贝进去 */
                if (old_soft != soft_stub) {/* 如果更换了软匹配,释放以前的内容 */
                    free(old_soft);
                }
            }
            /* 将新增加的软匹配加入 */
            soft[n_soft++] = conj_set;

            /* Keep track of the highest-priority soft match. */
            /* 如果原来的有先级小于现在的这个规则,更新优先级 */
            if (soft_pri < match->priority) {soft_pri = match->priority;}
        }
    }

    /* In the common case, at this point we have no soft matches and we can
     * return immediately.  (We do the same thing if we have potential soft
     * matches but none of them are higher-priority than our hard match.) 
     * 在通常情况下:到这里的时候,我们没有软匹配,我们将会立即返回。* 如果我们有潜在的软匹配,我们会做同样的事情 */
    if (hard_pri >= soft_pri) {/* 如果硬匹配的优先级大于等于软匹配,说明我们将会选择硬匹配规则,软匹配如果分配了内存则需要释放 */
        if (soft != soft_stub) {free(soft);/* 释放我们分配的内存 */
        }
        return hard ? hard->cls_rule : NULL;/* 返回命中的规则 */
    }

    /* At this point, we have some soft matches.  We might also have a hard
     * match; if so, its priority is lower than the highest-priority soft
     * match. 
     * 能够走到这里,说明我们有多个软匹配,我们需要逐个将软匹配的关联匹配变成新的
     * 全关联匹配 */

    /* Soft match loop.
     * 外层循环,遍历每一个优先级,通过第一个 for 语句选出本次循环的最高优先级。* Check whether soft matches are real matches. 检查软匹配是否是一个真实的全匹配,即关联规则的每一个维度都命中了,则软匹配升级为硬匹配 */
    for (;;) {
        /* Delete soft matches that are null.  This only happens in second and
         * subsequent iterations of the soft match loop, when we drop back from
         * a high-priority soft match to a lower-priority one.
         *
         * Also, delete soft matches whose priority is less than or equal to
         * the hard match's priority.  In the first iteration of the soft
         * match, these can be in 'soft' because the earlier main loop found
         * the soft match before the hard match.  In second and later iteration
         * of the soft match loop, these can be in 'soft' because we dropped
         * back from a high-priority soft match to a lower-priority soft match.
         *
         * It is tempting to delete soft matches that cannot be satisfied
         * because there are fewer soft matches than required to satisfy any of
         * their conjunctions, but we cannot do that because there might be
         * lower priority soft or hard matches with otherwise identical
         * matches.  (We could special case those here, but there's no
         * need--we'll do so at the bottom of the soft match loop anyway and
         * this duplicates less code.)
         *
         * It's also tempting to break out of the soft match loop if'n_soft ==
         * 1' but that would also miss lower-priority hard matches.  We could
         * special case that also but again there's no need. */
        /* 过滤掉所有的优先级比硬匹配低的,空的软匹配。*/
        for (int i = 0; i < n_soft;) {if (!soft[i] || soft[i]->priority <= hard_pri) {soft[i] = soft[--n_soft];
            } else {i++;}
        }
        /* 如果所有的软匹配都不合格的话,直接跳出 */
        if (!n_soft) {break;}

        /* Find the highest priority among the soft matches.  (We know this
         * must be higher than the hard match's priority; otherwise we would
         * have deleted all of the soft matches in the previous loop.)  Count
         * the number of soft matches that have that priority. 
         * 从所有的软匹配中找到最高优先级的软匹配,这个软匹配必须高于硬匹配。同时计算出这个优先级的软匹配的个数
         */
        soft_pri = INT_MIN;
        int n_soft_pri = 0;/* 获取一个最高的软匹配优先级及其个数 */
        for (int i = 0; i < n_soft; i++) {/* 遍历剩下的合规的软匹配 */
            if (soft[i]->priority > soft_pri) {soft_pri = soft[i]->priority;/* 更新软匹配的有先级 */
                n_soft_pri = 1;
            } else if (soft[i]->priority == soft_pri) {n_soft_pri++;/* 计算同一最高优先级的软匹配的个数 */}
        }
        ovs_assert(soft_pri > hard_pri);

        /* Look for a real match among the highest-priority soft matches.
         *
         * It's unusual to have many conjunctive matches, so we use stubs to
         * avoid calling malloc() in the common case.  An hmap has a built-in
         * stub for up to 2 hmap_nodes; possibly, we would benefit a variant
         * with a bigger stub. */
        struct conjunctive_match cm_stubs[16];
        struct hmap matches;

        hmap_init(&matches);/* 初始暂存 hash,同一个优先级的所有软匹配都使用该 hash 表,每次循环清零。这样可以组合
                             * 多个子表中的软匹配形成硬匹配。*/
        for (int i = 0; i < n_soft; i++) {/* 遍历每一个软匹配,只处理我们需要的优先级,即本次选出的最高优先级 
                                           * 一次循环处理一个该优先级的软匹配。设置一个维度 bit。*/
            uint32_t id;

            if (soft[i]->priority == soft_pri/* 如果是我们需要的优先级 */
                && find_conjunctive_match(soft[i], n_soft_pri, &matches,/* 根据关联 id,查找关联匹配 */
                                          cm_stubs, ARRAY_SIZE(cm_stubs),
                                          &id)) {/* 将该软匹配的每一个关联匹配进行过滤,放到 matches 杂凑表中 */
                uint32_t saved_conj_id = flow->conj_id;/* 记录流的关联动作 id */
                const struct cls_rule *rule;

                flow->conj_id = id; /* 最终的关联 id,根据关联 id 进行第二步查找 */
                rule = classifier_lookup__(cls, version, flow, wc, false);/* 进行流表查找,不允许处理关联动作,因为前面已经处理过了 */
                flow->conj_id = saved_conj_id;

                if (rule) {/* 找到规则 */
                    free_conjunctive_matches(&matches,
                                             cm_stubs, ARRAY_SIZE(cm_stubs));
                    if (soft != soft_stub) {free(soft);
                    }
                    return rule;
                }
            }
        }
        free_conjunctive_matches(&matches, cm_stubs, ARRAY_SIZE(cm_stubs));

        /* There's no real match among the highest-priority soft matches.
         * However, if any of those soft matches has a lower-priority but
         * otherwise identical flow match, then we need to consider those for
         * soft or hard matches.
         *
         * The next iteration of the soft match loop will delete any null
         * pointers we put into 'soft' (and some others too). 
         * 走到这里说明,本优先级没有形成硬匹配
         */
        for (int i = 0; i < n_soft; i++) {if (soft[i]->priority != soft_pri) {continue;}

            /* Find next-lower-priority flow with identical flow match. */
            /* 从同样的匹配条件中找到下一个优先级的匹配,如果该匹配是关联匹配,则
            ** 准备下一轮。否则作为硬匹配。*/
            match = next_visible_rule_in_list(soft[i]->match, version);
            if (match) {soft[i] = ovsrcu_get(struct cls_conjunction_set *,
                                     &match->conj_set);
                if (!soft[i]) {// 该规则不是关联匹配,作为硬匹配与原来的硬匹配比较优先级。/* The flow is a hard match; don't treat as a soft
                     * match. */
                    if (match->priority > hard_pri) {
                        hard = match;
                        hard_pri = hard->priority;
                    }
                }
            } else {/* No such lower-priority flow (probably the common case). */
                /* 为空,需要清除,在下一次循环中会进行计数修改 */
                soft[i] = NULL;
            }
        }
    }

    /* 如果是新分配的软匹配保存控制块,则释放 */
    if (soft != soft_stub) {free(soft);
    }
    return hard ? hard->cls_rule : NULL;
}

总结

经过长时间的分析,大概理清了 ovs 的分类器的实现机制,还有一些比较重要的细节没有想明白:

  1. 一般来说缓存都是精确匹配的,掩码匹配很容易导致误匹配。ovs 选择 megaflow 作为掩码匹配器,是否会存在误匹配的可能了?
  2. classifier_lookup 函数的 wc 参数保存了所有命中规则的掩码或的结果。不仅包含了每一个 table 中最终的最高优先级的规则的掩码,也包括了每一个 table 中各个子表中命中的其它优先级的掩码。也就是说,wc 不仅仅是最终执行了的动作的规则的掩码的或,还包含了一些命中了但是低优先级不回被执行动作的规则的掩码。这样做的意义是啥?
  3. 函数 check_tries 中如下代码片段的作用是啥?
 /* Possible to skip the rest of the subtable if subtable's
             * prefix on the field is not included in the lookup result. 
             * 查看该子表是否有命中,有命中则匹配下一个过滤树。否则
             */
            if (!be_get_bit_at(&ctx->match_plens.be32, field_plen[j] - 1)) {
                /* We want the trie lookup to never result in unwildcarding
                 * any bits that would not be unwildcarded otherwise.
                 * Since the trie is shared by the whole classifier, it is
                 * possible that the 'maskbits' contain bits that are
                 * irrelevant for the partition relevant for the current
                 * packet.  Hence the checks below. 
                 * Trie 是整个分类器共享的。它是可能的 maskbits 可能包含一些与这个报文不相关的部分。* 因此需要进行下面的检查。*/

                /* Check that the trie result will not unwildcard more bits
                 * than this subtable would otherwise. 
                 * 这棵树最长 */
                if (ctx->maskbits <= field_plen[j]) {// 没有全匹配
                    /* Unwildcard the bits and skip the rest. */
                    /* 设置非通配 bit,用于跳过其子表其他 */
                    mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits);
                    /* Note: Prerequisite already unwildcarded, as the only
                     * prerequisite of the supported trie lookup fields is
                     * the ethertype, which is always unwildcarded. */
                    return true;
                }
                /* Can skip if the field is already unwildcarded. */
                if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) {return true;}
            }
  1. 函数 find_match_wc 中的如下代码片段,没能想明白其作用是啥?
if (!rule && subtable->ports_mask_len) {/* 没有找到,并且存在 tdports 引擎树 */
        /* The final stage had ports, but there was no match.  Instead of
         * unwildcarding all the ports bits, use the ports trie to figure out a
         * smaller set of bits to unwildcard. 
         * 最后一个 stage 为传输层信息,但是这个子表没有规则命中。代替规则需要匹配整个
         * 端口域,通过 trie 计算一个最小的掩码域,可以最大限度减少误匹配。*/
        unsigned int mbits;
        ovs_be32 value, plens, mask;
        /* 获取四层端口的掩码 */
        mask = MINIFLOW_GET_BE32(&subtable->mask.masks, tp_src);/* 获取四层源端口掩码 */
        /* 两者相与 */
        value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask;/* 与流进行掩码与操作,得到我们需要查找的值 */
        /* 对端口树进行查找,返回的 mbits 表示 value 能够在 trie 中命中的最长的位置,如果是没有命中的话,表示参与了匹配
        ** 的 bit 数,即 mbits 掩码下得到的值一定不会命中。用于减少误匹配。*/
        mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32);

        ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |=
            mask & be32_prefix_mask(mbits);

        goto no_match;
    }

能力有限,暂时还没能想明白上面的问题,希望熟悉这方面的朋友能指正。

退出移动版