乐趣区

OVS分类器分析一FLOW-MINIFLOW

数据结构

struct flow

/*
 * A flow in the network.
 *
 * Must be initialized to all zeros to make any compiler-induced padding
 * zeroed.  Helps also in keeping unused fields (such as mutually exclusive
 * IPv4 and IPv6 addresses) zeroed out.
 *
 * The meaning of 'in_port' is context-dependent.  In most cases, it is a
 * 16-bit OpenFlow 1.0 port number.  In the software datapath interface (dpif)
 * layer and its implementations (e.g. dpif-netlink, dpif-netdev), it is
 * instead a 32-bit datapath port number.
 *
 * The fields are organized in four segments to facilitate staged lookup, where
 * lower layer fields are first used to determine if the later fields need to
 * be looked at.  This enables better wildcarding for datapath flows.
 *
 * NOTE: Order of the fields is significant, any change in the order must be
 * reflected in miniflow_extract()!
 */
struct flow {
    /* Metadata 隧道元数据 */
    struct flow_tnl tunnel;     /* Encapsulating tunnel parameters. 封装的隧道元数据 */
    ovs_be64 metadata;          /* OpenFlow Metadata. openflow 元数据,用于在表格之间传递数据 */
    uint32_t regs[FLOW_N_REGS]; /* Registers. 寄存器 */
    uint32_t skb_priority;      /* Packet priority for QoS. 报文优先级 */
    uint32_t pkt_mark;          /* Packet mark. 报文标记 */
    uint32_t dp_hash;           /* Datapath computed hash value. The exact
                                 * computation is opaque to the user space. 
                                 * datapath 计算的 hash 值,这个值与用户态是无关的 */
                                     
    union flow_in_port in_port; /* Input port. 报文输入端口,可以是逻辑端口,也可以是物理端口 */
    uint32_t recirc_id;         /* Must be exact match. 重入流水线 id,用于解冻的时候,一般是 mpls 和 bond 情况 */
    uint16_t ct_state;          /* Connection tracking state. 连接跟踪状态 */
    uint16_t ct_zone;           /* Connection tracking zone. 连接跟踪区域 */
    uint32_t ct_mark;           /* Connection mark. 连接跟踪 mark */
    uint8_t pad1[4];            /* Pad to 64 bits. 填充 */
    ovs_u128 ct_label;          /* Connection label. 连接跟踪标签 */
    uint32_t conj_id;           /* Conjunction ID. 关联 id,多个规则有关联关系 */
    ofp_port_t actset_output;   /* Output port in action set. 动作设置的出端口 */

    /* L2, Order the same as in the Ethernet header! (64-bit aligned) */
    /* 二层数据,这些值域以太网头顺序是一致的 */
    struct eth_addr dl_dst;     /* Ethernet destination address. */
    struct eth_addr dl_src;     /* Ethernet source address. */
    ovs_be16 dl_type;           /* Ethernet frame type. */
    ovs_be16 vlan_tci;          /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */
    ovs_be32 mpls_lse[ROUND_UP(FLOW_MAX_MPLS_LABELS, 2)]; /* MPLS label stack
                                                             (with padding). */
    /* L3 (64-bit aligned) */
    /* 三层报文头元数据 */
    ovs_be32 nw_src;            /* IPv4 source address or ARP SPA. */
    ovs_be32 nw_dst;            /* IPv4 destination address or ARP TPA. */
    struct in6_addr ipv6_src;   /* IPv6 source address. */
    struct in6_addr ipv6_dst;   /* IPv6 destination address. */
    ovs_be32 ipv6_label;        /* IPv6 flow label. */
    uint8_t nw_frag;            /* FLOW_FRAG_* flags. */
    uint8_t nw_tos;             /* IP ToS (including DSCP and ECN). */
    uint8_t nw_ttl;             /* IP TTL/Hop Limit. */
    uint8_t nw_proto;           /* IP protocol or low 8 bits of ARP opcode. */
    struct in6_addr nd_target;  /* IPv6 neighbor discovery (ND) target. */
    struct eth_addr arp_sha;    /* ARP/ND source hardware address. */
    struct eth_addr arp_tha;    /* ARP/ND target hardware address. */
    ovs_be16 tcp_flags;         /* TCP flags. With L3 to avoid matching L4. */
    ovs_be16 pad3;              /* Pad to 64 bits. */

    /* L4 (64-bit aligned) */
    /* 四层报文数据 */
    ovs_be16 tp_src;            /* TCP/UDP/SCTP source port/ICMP type. 源端口 */
    ovs_be16 tp_dst;            /* TCP/UDP/SCTP destination port/ICMP code. 目的端口 */
    ovs_be32 igmp_group_ip4;    /* IGMP group IPv4 address. IGMP-IPV4 组播地址
                                 * Keep last for BUILD_ASSERT_DECL below. */
};

struct miniflow

/* The number of value bits in an signed or unsigned integer TYPE:
 *
 *    - _Bool has 1 value bit. bool 类型的有一个 bit
 *
 *    - An N-bit unsigned integer type has N value bits. 一个 N -BIT 宽度整型数有 n 个值域
 *
 *    - An N-bit signed integer type has N-1 value bits. 有符号的只有 N - 1 个值域
 * 该宏用于计算数据类型占用的 bit 数,bool 类型占用 1bit,无符号数使用字节数 * 每字节比特数,有符号数还需要减掉 1 个 bit
 * 符号位。*/
#define TYPE_VALUE_BITS(TYPE) \
    (TYPE_IS_BOOL(TYPE) ? 1 : sizeof(TYPE) * CHAR_BIT - TYPE_IS_SIGNED(TYPE))

/* The minimum or maximum value of a signed or unsigned integer TYPE. */
/* 根据类型获取其最小值:有符号数为最大值的负数减掉 1,无符号数为 0 */
#define TYPE_MINIMUM(TYPE) (TYPE_IS_SIGNED(TYPE) ? -TYPE_MAXIMUM(TYPE) - 1 : 0)
/* 根据类型获取对应类型的最大值,即该类型的 1bit 移动到最高位后减去 1,再移动一位,然后加 1
*/
#define TYPE_MAXIMUM(TYPE) \
    ((((TYPE)1 << (TYPE_VALUE_BITS(TYPE) - 1)) - 1) * 2 + 1)

typedef unsigned long long map_t; /* 是一个 unsigned long long 8 字节 */
#define MAP_T_BITS (sizeof(map_t) * CHAR_BIT) /* 一个占 8 *8=64 个 bit */
#define MAP_1 (map_t)1            /* 常数 1,等价于 0x1ull */
#define MAP_MAX TYPE_MAXIMUM(map_t)// 类型最大值,即 0xffffffffffffffffull
// 判断 map 的第 idx bit 是否被设置了,设置了返回 1,否则为 0,idx 从 0 开始
#define MAP_IS_SET(MAP, IDX) ((MAP) & (MAP_1 << (IDX)))

/* More efficient access to a map of single ullong. */
#define ULLONG_FOR_EACH_1(IDX, MAP)                 \
    for (uint64_t map__ = (MAP);                    \
         map__ && (((IDX) = raw_ctz(map__)), true); \
         map__ = zero_rightmost_1bit(map__))

/* Iterate through the indices of all 1-bits in 'MAP'. */
/* 遍历 map 中每一个 bit 为 1 的位置,其中 idx 为 1bit 的位置 */
#define MAP_FOR_EACH_INDEX(IDX, MAP)            \
    ULLONG_FOR_EACH_1(IDX, MAP)

/* Returns X / Y, rounding up.  X must be nonnegative to round correctly. */
/* 该宏定义的作用是计算 X 是 Y 的多少倍,并向上取整,比如 X 是 5,Y 是 2,那么 X 除以 Y =2.5,向上取整那么为 3。*/
#define DIV_ROUND_UP(X, Y) (((X) + ((Y) - 1)) / (Y))
/* 判断 flow 结构有多少个 8 字节 */
#define FLOW_U64S (sizeof(struct flow) / sizeof(uint64_t))
// 判断需要多个 FLOW_U64S 是 64 的多少倍,取整
#define FLOWMAP_UNITS DIV_ROUND_UP(FLOW_U64S, MAP_T_BITS)

struct flowmap {map_t bits[FLOWMAP_UNITS];
};
// 该结构是一个压缩结构,一个 bit 代表 flow 结构中的 8 字节,一个 map_t 成员代表 64* 8 个字节。/* Compressed flow. */

/* A sparse representation of a "struct flow".
 *
 * A "struct flow" is fairly large and tends to be mostly zeros.  Sparse
 * representation has two advantages.  First, it saves memory and, more
 * importantly, minimizes the number of accessed cache lines.  Second, it saves
 * time when the goal is to iterate over only the nonzero parts of the struct.
 *
 * The map member hold one bit for each uint64_t in a "struct flow".  Each
 * 0-bit indicates that the corresponding uint64_t is zero, each 1-bit that it
 * *may* be nonzero (see below how this applies to minimasks).
 *
 * The values indicated by 'map' always follow the miniflow in memory.  The
 * user of the miniflow is responsible for always having enough storage after
 * the struct miniflow corresponding to the number of 1-bits in maps.
 *
 * Elements in values array are allowed to be zero.  This is useful for "struct
 * minimatch", for which ensuring that the miniflow and minimask members have
 * same maps allows optimization.  This allowance applies only to a miniflow
 * that is not a mask.  That is, a minimask may NOT have zero elements in its
 * values.
 *
 * A miniflow is always dynamically allocated so that the maps are followed by
 * at least as many elements as there are 1-bits in maps. */
struct miniflow {
    struct flowmap map;
    /* Followed by:
     *     uint64_t values[n];
     * where 'n' is miniflow_n_values(miniflow). */
};
// 一个 flow 结构非常大,非常耗费内存,所以使用 miniflow 这个结构进行压缩存储。该结构分为两部分://struct flowmap map; 是 bit 数组,使用其中的 bit 表示 flow 中哪个 8 字节存在有效数据,flow 中占多少个 8 字节,那么
// 就需要 map 中多个个 bit,并且按照 64bit 向上取整。// 第二部分是有效数据,有效数据动态分配,根据 struct flowmap map; 中 1bit 数个数进行分配,大小为 bit 数 * 8 字节,该
// 部分直接跟在 map 后面。

函数

FLOWMAP_SET

// 计算对应域在 flow 中的字节偏移,然后转换成 8 字节的倍数
#define FLOW_U64_OFFSET(FIELD)                          \
    (offsetof(struct flow, FIELD) / sizeof(uint64_t))

/* Number of 64-bit units spanned by a 'FIELD'. */
/* 需要设置的域按照 8 字节对齐后,跨越了多少个 8 字节 (起始和结束地址对 8 字节对齐后的总大小是多少个 8 字节) */
#define FLOW_U64_SIZE(FIELD)                                            \
    DIV_ROUND_UP(FLOW_U64_OFFREM(FIELD) + MEMBER_SIZEOF(struct flow, FIELD), \
                 sizeof(uint64_t))

/* Set the 'n_bits' consecutive bits in 'fm', starting at bit 'idx'.
 * 'n_bits' can be at most MAP_T_BITS. 
 * fm 为 miniflow 的 map 成员,idx 是 8 字节起始偏移,nbits 为多少个 8 字节 bit 需要设置。*/
static inline void
flowmap_set(struct flowmap *fm, size_t idx, unsigned int n_bits)
{map_t n_bits_mask = (MAP_1 << n_bits) - 1;// 取掩码
    size_t unit = idx / MAP_T_BITS;// 计算位于第几个 64bit

    idx %= MAP_T_BITS;// 本 64bit 中的偏移
    // 设置位
    fm->bits[unit] |= n_bits_mask << idx;
    /* The seemingly unnecessary bounds check on 'unit' is a workaround for a
     * false-positive array out of bounds error by GCC 4.9. */
    /* 对于跨越两个 64bit 的位进行调整,比如有 10 个 bit 需要设置,但是起始的 bit 为 62,也就是
    ** 本 64bit fm->bits[unit] 只能存储两位即 62 和 63,剩下 8 位需要设置到下一个 64bit 位置
    ** 即 fm->bits[unit+1]
    ** 判断条件 unit + 1 < FLOWMAP_UNITS 是防止越界,idx + n_bits > MAP_T_BITS 表示确实跨越了
    ** 两个 64bit 域
    */
    if (unit + 1 < FLOWMAP_UNITS && idx + n_bits > MAP_T_BITS) {
        /* 'MAP_T_BITS - idx' bits were set on 'unit', set the remaining
         * bits from the next unit. */
        fm->bits[unit + 1] |= n_bits_mask >> (MAP_T_BITS - idx);
    }
}

// 将 flow 中的有效域在 miniflow 中的 map 中的对应 bit 设置上。#define FLOWMAP_SET(FM, FIELD)                                      \
    flowmap_set(FM, FLOW_U64_OFFSET(FIELD), FLOW_U64_SIZE(FIELD))

#define miniflow_set_map(MF, OFS)            \
    {                                        \
    ASSERT_FLOWMAP_NOT_SET(&MF.map, (OFS));  \
    flowmap_set(&MF.map, (OFS), 1);          \
}
//OFS 是 field 在 flow 中的偏移,OFS / 8 表示该 field 在第几个 8 字节中
#define miniflow_push_uint32_(MF, OFS, VALUE)   \
    {                                           \
    MINIFLOW_ASSERT(MF.data < MF.end);          \
    // 在 8 字节前四字节。直接设置对应的 8 字节 bit 位即可    \
    if ((OFS) % 8 == 0) {                       \
        miniflow_set_map(MF, OFS / 8);          \
        *(uint32_t *)MF.data = VALUE;           \
    } else if ((OFS) % 8 == 4) {                \ 
        miniflow_assert_in_map(MF, OFS / 8);    \
        *((uint32_t *)MF.data + 1) = VALUE;     \// 在 8 字节后四字节
        MF.data++;                              \
    }                                           \
}
// 将一个 32bit 的域设置到 miniflow 中,包括设置 map 和 value
#define miniflow_push_be32(MF, FIELD, VALUE)                        \
    miniflow_push_be32_(MF, offsetof(struct flow, FIELD), VALUE)

miniflow_extract

该函数用于将报文内容提取出来构建 miniflow 结构。

/* Caller is responsible for initializing 'dst' with enough storage for
 * FLOW_U64S * 8 bytes. */
void
miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
{
    const struct pkt_metadata *md = &packet->md;/* 报文元数据 */
    const void *data = dp_packet_data(packet);/* 获取起始位置 */
    size_t size = dp_packet_size(packet);/* 获取报文大小 */
    uint64_t *values = miniflow_values(dst);/* 获取掩码的起始位置 */
    struct mf_ctx mf = { FLOWMAP_EMPTY_INITIALIZER, values,
                         values + FLOW_U64S };
    const char *l2;
    ovs_be16 dl_type;
    uint8_t nw_frag, nw_tos, nw_ttl, nw_proto;

    /* Metadata. */
    if (flow_tnl_dst_is_set(&md->tunnel)) {/* 判断隧道的目的 mac 地址是否已经设置了,没有设置说明隧道还没有用,不需要提取 */
        miniflow_push_words(mf, tunnel, &md->tunnel,
                            offsetof(struct flow_tnl, metadata) /
                            sizeof(uint64_t));

        if (!(md->tunnel.flags & FLOW_TNL_F_UDPIF)) {if (md->tunnel.metadata.present.map) {
                miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata,
                                    sizeof md->tunnel.metadata /
                                    sizeof(uint64_t));
            }
        } else {if (md->tunnel.metadata.present.len) {
                miniflow_push_words(mf, tunnel.metadata.present,
                                    &md->tunnel.metadata.present, 1);
                miniflow_push_words(mf, tunnel.metadata.opts.gnv,
                                    md->tunnel.metadata.opts.gnv,
                                    DIV_ROUND_UP(md->tunnel.metadata.present.len,
                                                 sizeof(uint64_t)));
            }
        }
    }
    if (md->skb_priority || md->pkt_mark) {miniflow_push_uint32(mf, skb_priority, md->skb_priority);
        miniflow_push_uint32(mf, pkt_mark, md->pkt_mark);
    }
    miniflow_push_uint32(mf, dp_hash, md->dp_hash);
    miniflow_push_uint32(mf, in_port, odp_to_u32(md->in_port.odp_port));
    if (md->recirc_id || md->ct_state) {miniflow_push_uint32(mf, recirc_id, md->recirc_id);
        miniflow_push_uint16(mf, ct_state, md->ct_state);
        miniflow_push_uint16(mf, ct_zone, md->ct_zone);
    }

    if (md->ct_state) {miniflow_push_uint32(mf, ct_mark, md->ct_mark);
        miniflow_pad_to_64(mf, ct_mark);

        if (!ovs_u128_is_zero(md->ct_label)) {
            miniflow_push_words(mf, ct_label, &md->ct_label,
                                sizeof md->ct_label / sizeof(uint64_t));
        }
    }

    /* Initialize packet's layer pointer and offsets. */
    l2 = data;
    dp_packet_reset_offsets(packet);

    /* Must have full Ethernet header to proceed. */
    if (OVS_UNLIKELY(size < sizeof(struct eth_header))) {goto out;} else {
        ovs_be16 vlan_tci;

        /* Link layer. */
        ASSERT_SEQUENTIAL(dl_dst, dl_src);
        miniflow_push_macs(mf, dl_dst, data);
        /* dl_type, vlan_tci. */
        vlan_tci = parse_vlan(&data, &size);
        dl_type = parse_ethertype(&data, &size);
        miniflow_push_be16(mf, dl_type, dl_type);
        miniflow_push_be16(mf, vlan_tci, vlan_tci);
    }

    /* Parse mpls. 如果是 mpls 报文,提取后,后面的内容将不在提取 */
    if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
        int count;
        const void *mpls = data;

        packet->l2_5_ofs = (char *)data - l2;
        count = parse_mpls(&data, &size);
        miniflow_push_words_32(mf, mpls_lse, mpls, count);
    }

    /* Network layer. */
    packet->l3_ofs = (char *)data - l2;
    //ip 报文,去取网络层信息, 从这里可以看出,ovs 暂时只支持 IP,IPV6,ARP,RARP 报文
    nw_frag = 0;
    if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
        const struct ip_header *nh = data;
        int ip_len;
        uint16_t tot_len;

        if (OVS_UNLIKELY(size < IP_HEADER_LEN)) {goto out;}
        ip_len = IP_IHL(nh->ip_ihl_ver) * 4;

        if (OVS_UNLIKELY(ip_len < IP_HEADER_LEN)) {goto out;}
        if (OVS_UNLIKELY(size < ip_len)) {goto out;}
        tot_len = ntohs(nh->ip_tot_len);
        if (OVS_UNLIKELY(tot_len > size || ip_len > tot_len)) {goto out;}
        if (OVS_UNLIKELY(size - tot_len > UINT8_MAX)) {goto out;}
        dp_packet_set_l2_pad_size(packet, size - tot_len);
        size = tot_len;   /* Never pull padding. */

        /* Push both source and destination address at once. */
        miniflow_push_words(mf, nw_src, &nh->ip_src, 1);

        miniflow_push_be32(mf, ipv6_label, 0); /* Padding for IPv4. */

        nw_tos = nh->ip_tos;
        nw_ttl = nh->ip_ttl;
        nw_proto = nh->ip_proto;
        if (OVS_UNLIKELY(IP_IS_FRAGMENT(nh->ip_frag_off))) {
            nw_frag = FLOW_NW_FRAG_ANY;
            if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {nw_frag |= FLOW_NW_FRAG_LATER;}
        }
        data_pull(&data, &size, ip_len);
    } else if (dl_type == htons(ETH_TYPE_IPV6)) {
        const struct ovs_16aligned_ip6_hdr *nh;
        ovs_be32 tc_flow;
        uint16_t plen;

        if (OVS_UNLIKELY(size < sizeof *nh)) {goto out;}
        nh = data_pull(&data, &size, sizeof *nh);

        plen = ntohs(nh->ip6_plen);
        if (OVS_UNLIKELY(plen > size)) {goto out;}
        /* Jumbo Payload option not supported yet. */
        if (OVS_UNLIKELY(size - plen > UINT8_MAX)) {goto out;}
        dp_packet_set_l2_pad_size(packet, size - plen);
        size = plen;   /* Never pull padding. */

        miniflow_push_words(mf, ipv6_src, &nh->ip6_src,
                            sizeof nh->ip6_src / 8);
        miniflow_push_words(mf, ipv6_dst, &nh->ip6_dst,
                            sizeof nh->ip6_dst / 8);

        tc_flow = get_16aligned_be32(&nh->ip6_flow);
        {ovs_be32 label = tc_flow & htonl(IPV6_LABEL_MASK);
            miniflow_push_be32(mf, ipv6_label, label);
        }

        nw_tos = ntohl(tc_flow) >> 20;
        nw_ttl = nh->ip6_hlim;
        nw_proto = nh->ip6_nxt;

        if (!parse_ipv6_ext_hdrs__(&data, &size, &nw_proto, &nw_frag)) {goto out;}
    } else {if (dl_type == htons(ETH_TYPE_ARP) ||
            dl_type == htons(ETH_TYPE_RARP)) {struct eth_addr arp_buf[2];
            const struct arp_eth_header *arp = (const struct arp_eth_header *)
                data_try_pull(&data, &size, ARP_ETH_HEADER_LEN);

            if (OVS_LIKELY(arp) && OVS_LIKELY(arp->ar_hrd == htons(1))
                && OVS_LIKELY(arp->ar_pro == htons(ETH_TYPE_IP))
                && OVS_LIKELY(arp->ar_hln == ETH_ADDR_LEN)
                && OVS_LIKELY(arp->ar_pln == 4)) {
                miniflow_push_be32(mf, nw_src,
                                   get_16aligned_be32(&arp->ar_spa));
                miniflow_push_be32(mf, nw_dst,
                                   get_16aligned_be32(&arp->ar_tpa));

                /* We only match on the lower 8 bits of the opcode. */
                if (OVS_LIKELY(ntohs(arp->ar_op) <= 0xff)) {miniflow_push_be32(mf, ipv6_label, 0); /* Pad with ARP. */
                    miniflow_push_be32(mf, nw_frag, htonl(ntohs(arp->ar_op)));
                }

                /* Must be adjacent. */
                ASSERT_SEQUENTIAL(arp_sha, arp_tha);

                arp_buf[0] = arp->ar_sha;
                arp_buf[1] = arp->ar_tha;
                miniflow_push_macs(mf, arp_sha, arp_buf);
                miniflow_pad_to_64(mf, arp_tha);
            }
        }
        goto out;
    }

    packet->l4_ofs = (char *)data - l2;
    miniflow_push_be32(mf, nw_frag,
                       BYTES_TO_BE32(nw_frag, nw_tos, nw_ttl, nw_proto));
    // 提取传输层,从这里可以看出,ovs 暂时支持传输层协议有 TCP,UDP,SCTP,ICMP,ICMPV6
    if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) {if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
                const struct tcp_header *tcp = data;

                miniflow_push_be32(mf, arp_tha.ea[2], 0);
                miniflow_push_be32(mf, tcp_flags,
                                   TCP_FLAGS_BE32(tcp->tcp_ctl));
                miniflow_push_be16(mf, tp_src, tcp->tcp_src);
                miniflow_push_be16(mf, tp_dst, tcp->tcp_dst);
                miniflow_pad_to_64(mf, tp_dst);
            }
        } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {if (OVS_LIKELY(size >= UDP_HEADER_LEN)) {
                const struct udp_header *udp = data;

                miniflow_push_be16(mf, tp_src, udp->udp_src);
                miniflow_push_be16(mf, tp_dst, udp->udp_dst);
                miniflow_pad_to_64(mf, tp_dst);
            }
        } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
                const struct sctp_header *sctp = data;

                miniflow_push_be16(mf, tp_src, sctp->sctp_src);
                miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
                miniflow_pad_to_64(mf, tp_dst);
            }
        } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
                const struct icmp_header *icmp = data;

                miniflow_push_be16(mf, tp_src, htons(icmp->icmp_type));
                miniflow_push_be16(mf, tp_dst, htons(icmp->icmp_code));
                miniflow_pad_to_64(mf, tp_dst);
            }
        } else if (OVS_LIKELY(nw_proto == IPPROTO_IGMP)) {if (OVS_LIKELY(size >= IGMP_HEADER_LEN)) {
                const struct igmp_header *igmp = data;

                miniflow_push_be16(mf, tp_src, htons(igmp->igmp_type));
                miniflow_push_be16(mf, tp_dst, htons(igmp->igmp_code));
                miniflow_push_be32(mf, igmp_group_ip4,
                                   get_16aligned_be32(&igmp->group));
            }
        } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMPV6)) {if (OVS_LIKELY(size >= sizeof(struct icmp6_hdr))) {
                const struct in6_addr *nd_target = NULL;
                struct eth_addr arp_buf[2] = {{ { { 0} } } };
                const struct icmp6_hdr *icmp = data_pull(&data, &size,
                                                         sizeof *icmp);
                parse_icmpv6(&data, &size, icmp, &nd_target, arp_buf);
                if (nd_target) {
                    miniflow_push_words(mf, nd_target, nd_target,
                                        sizeof *nd_target / sizeof(uint64_t));
                }
                miniflow_push_macs(mf, arp_sha, arp_buf);
                miniflow_pad_to_64(mf, arp_tha);
                miniflow_push_be16(mf, tp_src, htons(icmp->icmp6_type));
                miniflow_push_be16(mf, tp_dst, htons(icmp->icmp6_code));
                miniflow_pad_to_64(mf, tp_dst);
            }
        }
    }
 out:
    dst->map = mf.map;
}

miniflow_expand

该函数用于将 miniflow 还原成 flow。

/* Perform a bitwise OR of miniflow 'src' flow data specified in 'subset' with
 * the equivalent fields in 'dst', storing the result in 'dst'.  'subset' must
 * be a subset of 'src's map. */
static inline void
flow_union_with_miniflow_subset(struct flow *dst, const struct miniflow *src,
                                struct flowmap subset)
{uint64_t *dst_u64 = (uint64_t *) dst;/* 将 dst 强制类型转换为 8 字节类型的指针 */
    const uint64_t *p = miniflow_get_values(src);/* 指向 miniflow 的 data 部分 */
    map_t map;

    FLOWMAP_FOR_EACH_MAP (map, subset) {/* subset 为 src->map,这里是遍历 map 数组的每一个元素,一个元素是 8 字节 */
        size_t idx;

        MAP_FOR_EACH_INDEX(idx, map) {/* 遍历 64 个 bit,如果该 bit 为 1,则进行赋值,idx 为 8 字节偏移 */
            dst_u64[idx] |= *p++;/* 进行或赋值 */
        }
        /* 指向下一个填充域,MAP_T_BITS 为 64,即一个 MAP_FOR_EACH_INDEX 循环处理 64* 8 字节 */
        dst_u64 += MAP_T_BITS;
    }
}
/* Perform a bitwise OR of miniflow 'src' flow data with the equivalent
 * fields in 'dst', storing the result in 'dst'. 
 * 通过按位或进行 flow 的等价填充
 */
static inline void
flow_union_with_miniflow(struct flow *dst, const struct miniflow *src)
{flow_union_with_miniflow_subset(dst, src, src->map);
}
/* Initializes 'dst' as a copy of 'src'. */
/* 将 src 还原成 flow 的 dst */
void
miniflow_expand(const struct miniflow *src, struct flow *dst)
{memset(dst, 0, sizeof *dst);/* 现将 dst 清零 */
    flow_union_with_miniflow(dst, src);
}
退出移动版