共计 35041 个字符,预计需要花费 88 分钟才能阅读完成。
VPP 接口层分析
接口层是硬件驱动和上层软件之间一层抽象代码,屏蔽硬件的差异,为上层软件提供一些统一的操作接口。上层软件调用接口层的操作进行报文的读入与发出,同时可以进行硬件设备的设置以及相关信息 (比如统计数据) 的读取。
vpp 支持多种驱动类型的网络设备,比如 dpdk,netmap,af_packet 等等。同一种驱动的物理设备可以有多个,比如一个设备可以有多个 dpdk 接口,因此抽象了链路层接口 hw_interface。在网络中有很多的虚拟设备,它们依附于物理设备,例如 vlan 设备对于同一个 hw_interface 可以有 4096 个虚拟的子接口。为了描述这些虚拟接口,vpp 在 hw_interface 的基础上又封装了一层 sw_interface 来表示接口。sw_interface 是整个接口层对上层软件的一个抽象,上层软件使用 sw_interface 索引用来表示具体操作的设备。
VPP 支持的物理网络设备类型
一类设备表示使用相同硬件驱动的设备,比如 dpdk 类型的以太网设备,af_packet 类型的虚拟以太网设备,netmap 以太网设备等,属于物理层的描述。
设备类描述结构
/* A class of hardware interface devices. */
/* 一类硬件接口的操作函数集合,这些函数是在硬件设备的驱动上封装的一层 */
typedef struct _vnet_device_class
{
/* Index into main vector. 类索引 */
u32 index;
/* Device name (e.g. "FOOBAR 1234a"). 设备类名字 */
char *name;
/* Function to call when hardware interface is added/deleted. */
/* 添加 / 删除一个该类设备的实例函数 */
vnet_interface_function_t *interface_add_del_function;
/* Function to bring device administratively up/down. */
/* 设备 UP/DOWN 操作函数 */
vnet_interface_function_t *admin_up_down_function;
/* Function to call when sub-interface is added/deleted */
/* 以该类型设备为主设备,添加 / 删除一个子接口的函数 */
vnet_subif_add_del_function_t *subif_add_del_function;
/* Function to call interface rx mode is changed */
/* 接收模式变化函数 */
vnet_interface_set_rx_mode_function_t *rx_mode_change_function;
/* Function to call interface l2 mode is changed */
/* 接口的二层模式变化操作函数 */
vnet_interface_set_l2_mode_function_t *set_l2_mode_function;
/* Redistribute flag changes/existence of this interface class. */
u32 redistribute;
/* Transmit function. */
/* 发送函数 */
vlib_node_function_t *tx_function;
/* Transmit function candidate registration with priority */
/* 注册的多个候选的发送函数,最终根据优先级选择一个最高的赋值给 tx_function */
vlib_node_fn_registration_t *tx_fn_registrations;
/* Error strings indexed by error code for this node. */
/* 发送函数错误原因字符数组 */
char **tx_function_error_strings;
/* Number of error codes used by this node. */
/* tx_function_error_strings 数组大小 */
u32 tx_function_n_errors;
/* Renumber device name [only!] support, a control-plane kludge */
int (*name_renumber) (struct vnet_hw_interface_t * hi,
u32 new_dev_instance);
/* Interface flow offload operations */
/* 流量卸载功能操作函数集合 */
vnet_flow_dev_ops_function_t *flow_ops_function;
/* Format device instance as name. */
format_function_t *format_device_name;
/* Parse function for device name. */
unformat_function_t *unformat_device_name;
/* Format device verbosely for this class. */
format_function_t *format_device;
/* Trace buffer format for TX function. */
format_function_t *format_tx_trace;
/* Format flow offload entry */
format_function_t *format_flow;
/* Function to clear hardware counters for device. */
/* 清除统计函数 */
void (*clear_counters) (u32 dev_class_instance);
uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm,
u32 hw_if_index,
u32 hw_class_index);
/* Called when hardware class of an interface changes. */
void (*hw_class_change) (struct vnet_main_t * vnm,
u32 hw_if_index, u32 new_hw_class_index);
/* Called to redirect traffic from a specific interface instance */
/* 强制重定向一个接口的流量到指定的 node */
void (*rx_redirect_to_node) (struct vnet_main_t * vnm,
u32 hw_if_index, u32 node_index);
/* Link-list of all device classes set up by constructors created below */
/* 形成链表 */
struct _vnet_device_class *next_class_registration;
/* Function to set mac address. */
/* 链路层地址变化函数 */
vnet_interface_set_mac_address_function_t *mac_addr_change_function;
} vnet_device_class_t;
设备类输出函数注册结构
该结构与 node 的功能函数注册结构是一样的
typedef struct _vlib_node_fn_registration
{
vlib_node_function_t *function;
int priority;
struct _vlib_node_fn_registration *next_registration;
char *name;
} vlib_node_fn_registration_t;
注册宏
#ifndef CLIB_MARCH_VARIANT /* 注册 / 去注册设备操作类函数集合的宏 */
#define VNET_DEVICE_CLASS(x,...) \
__VA_ARGS__ vnet_device_class_t x; \
static void __vnet_add_device_class_registration_##x (void) \
__attribute__((__constructor__)) ; \
static void __vnet_add_device_class_registration_##x (void) \
{ \
vnet_main_t * vnm = vnet_get_main(); \
x.next_class_registration = vnm->device_class_registrations; \
vnm->device_class_registrations = &x; \
} \
static void __vnet_rm_device_class_registration_##x (void) \
__attribute__((__destructor__)) ; \
static void __vnet_rm_device_class_registration_##x (void) \
{ \
vnet_main_t * vnm = vnet_get_main(); \
VLIB_REMOVE_FROM_LINKED_LIST (vnm->device_class_registrations, \
&x, next_class_registration); \
} \
__VA_ARGS__ vnet_device_class_t x
#else
/* create unused pointer to silence compiler warnings and get whole
function optimized out */
#define VNET_DEVICE_CLASS(x,...) \
static __clib_unused vnet_device_class_t __clib_unused_##x
#endif
/* 注册 / 去注册设备类的输出函数宏 */
#define VNET_DEVICE_CLASS_TX_FN(devclass) \
uword CLIB_MARCH_SFX (devclass##_tx_fn)(); \
static vlib_node_fn_registration_t \
CLIB_MARCH_SFX(devclass##_tx_fn_registration) = \
{.function = &CLIB_MARCH_SFX (devclass##_tx_fn), }; \
\
static void __clib_constructor \
CLIB_MARCH_SFX (devclass##_tx_fn_multiarch_register) (void) \
{ \
extern vnet_device_class_t devclass; \
vlib_node_fn_registration_t *r; \
r = &CLIB_MARCH_SFX (devclass##_tx_fn_registration); \
r->priority = CLIB_MARCH_FN_PRIORITY(); \
r->next_registration = devclass.tx_fn_registrations; \
devclass.tx_fn_registrations = r; \
} \
uword CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (devclass##_tx_fn)
实例
/* *INDENT-OFF* */
VNET_DEVICE_CLASS (dpdk_device_class) =
{
.name = "dpdk",
.tx_function_n_errors = DPDK_TX_FUNC_N_ERROR,
.tx_function_error_strings = dpdk_tx_func_error_strings,
.format_device_name = format_dpdk_device_name,
.format_device = format_dpdk_device,
.format_tx_trace = format_dpdk_tx_trace,
.clear_counters = dpdk_clear_hw_interface_counters,
.admin_up_down_function = dpdk_interface_admin_up_down,
.subif_add_del_function = dpdk_subif_add_del_function,
.rx_redirect_to_node = dpdk_set_interface_next_node,
.mac_addr_change_function = dpdk_set_mac_address,
.format_flow = format_dpdk_flow,
.flow_ops_function = dpdk_flow_ops_fn,
};
链路层 interface 类型
在物理设备上,根据接口的功能进一步抽象一层硬件设备类型,七层中属于链路层。比如 ethernet 设备,bond 设备,vlan 设备等。
interface 类型描述结构
/* Layer-2 (e.g. Ethernet) interface class. */
typedef struct _vnet_hw_interface_class
{
/* Index into main vector. */
/* 索引 */
u32 index;
/* Class name (e.g. "Ethernet"). 其所属类名字 */
char *name;
/* Flags 标志,目前只有一个,是否为点到点设备,点到点设备不需要邻居信息 */
vnet_hw_interface_class_flags_t flags;
/* Function to call when hardware interface is added/deleted. */
vnet_interface_function_t *interface_add_del_function;
/* Function to bring interface administratively up/down. */
vnet_interface_function_t *admin_up_down_function;
/* Function to call when link state changes. */
vnet_interface_function_t *link_up_down_function;
/* Function to call when link MAC changes. */
vnet_interface_set_mac_address_function_t *mac_addr_change_function;
/* Format function to display interface name. */
format_function_t *format_interface_name;
/* Format function to display interface address. */
format_function_t *format_address;
/* Format packet header for this interface class. */
format_function_t *format_header;
/* Format device verbosely for this class. */
format_function_t *format_device;
/* Parser for hardware (e.g. ethernet) address. */
unformat_function_t *unformat_hw_address;
/* Parser for packet header for e.g. rewrite string. */
unformat_function_t *unformat_header;
/* Builds a rewrite string for the interface to the destination
* for the payload/link type. */
u8 *(*build_rewrite) (struct vnet_main_t * vnm,
u32 sw_if_index,
vnet_link_t link_type, const void *dst_hw_address);
/* Update an adjacency added by FIB (as opposed to via the
* neighbour resolution protocol). */
void (*update_adjacency) (struct vnet_main_t * vnm,
u32 sw_if_index, u32 adj_index);
uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm,
u32 hw_if_index,
u32 hw_class_index);
/* Called when hw interface class is changed and old hardware instance
may want to be deleted. */
void (*hw_class_change) (struct vnet_main_t * vnm, u32 hw_if_index,
u32 old_class_index, u32 new_class_index);
/* List of hw interface classes, built by constructors */
struct _vnet_hw_interface_class *next_class_registration;
} vnet_hw_interface_class_t;
注册宏
/**
* @brief Default adjacency update function
*/
extern void default_update_adjacency (struct vnet_main_t *vnm,
u32 sw_if_index, u32 adj_index);
#define VNET_HW_INTERFACE_CLASS(x,...) \
__VA_ARGS__ vnet_hw_interface_class_t x; \
static void __vnet_add_hw_interface_class_registration_##x (void) \
__attribute__((__constructor__)) ; \
static void __vnet_add_hw_interface_class_registration_##x (void) \
{ \
vnet_main_t * vnm = vnet_get_main(); \
x.next_class_registration = vnm->hw_interface_class_registrations; \
vnm->hw_interface_class_registrations = &x; \
} \
static void __vnet_rm_hw_interface_class_registration_##x (void) \
__attribute__((__destructor__)) ; \
static void __vnet_rm_hw_interface_class_registration_##x (void) \
{ \
vnet_main_t * vnm = vnet_get_main(); \
VLIB_REMOVE_FROM_LINKED_LIST (vnm->hw_interface_class_registrations,\
&x, next_class_registration); \
} \
__VA_ARGS__ vnet_hw_interface_class_t x
实例
/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) =
{
.name = "Ethernet",
.format_address = format_ethernet_address,
.format_header = format_ethernet_header_with_length,
.unformat_hw_address = unformat_ethernet_address,
.unformat_header = unformat_ethernet_header,
.build_rewrite = ethernet_build_rewrite,
.update_adjacency = ethernet_update_adjacency,
.mac_addr_change_function = ethernet_mac_change,
};
/* *INDENT-ON* */
软件层 interface
typedef enum vnet_sw_interface_flags_t_
{
VNET_SW_INTERFACE_FLAG_NONE = 0,
/* Interface is "up" meaning administratively up.
Up in the sense of link state being up is maintained by hardware interface. */
VNET_SW_INTERFACE_FLAG_ADMIN_UP = (1 << 0),
/* Interface is disabled for forwarding: punt all traffic to slow-path. */
VNET_SW_INTERFACE_FLAG_PUNT = (1 << 1),
VNET_SW_INTERFACE_FLAG_PROXY_ARP = (1 << 2),
VNET_SW_INTERFACE_FLAG_UNNUMBERED = (1 << 3),
VNET_SW_INTERFACE_FLAG_BOND_SLAVE = (1 << 4),
/* Interface does not appear in CLI/API */
VNET_SW_INTERFACE_FLAG_HIDDEN = (1 << 5),
/* Interface in ERROR state */
VNET_SW_INTERFACE_FLAG_ERROR = (1 << 6),
/* Interface has IP configured directed broadcast */
VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST = (1 << 7),
} __attribute__ ((packed)) vnet_sw_interface_flags_t;
/* Software-interface. This corresponds to a Ethernet VLAN, ATM vc, a
tunnel, etc. Configuration (e.g. IP address) gets attached to
software interface.
* 软件层接口,例如 vlan,atm,tunnel 等
*/
typedef struct
{
/* 接口类型 */
vnet_sw_interface_type_t type:16;
/* 接口标志 */
vnet_sw_interface_flags_t flags;
/* Index for this interface. */
/* 软件接口索引 */
u32 sw_if_index;
/* Software interface index of super-interface;
equal to sw_if_index if this interface is not a
sub-interface.
* 如果该接口是一个子接口的话,那么该值为其依附的接口的 sw_if_index,否则就是 sw_if_index
*/
u32 sup_sw_if_index;
/* this swif is unnumbered, use addresses on unnumbered_sw_if_index... */
u32 unnumbered_sw_if_index;
/* VNET_SW_INTERFACE_TYPE_HARDWARE. */
u32 hw_if_index;
/* MTU for network layer (not including L2 headers) */
/* 网络层 mtu,不包括链路层 */
u32 mtu[VNET_N_MTU];
/* VNET_SW_INTERFACE_TYPE_SUB. */
/* 如果该接口是子接口的话,那么子接口信息保存在该结构体中 */
vnet_sub_interface_t sub;
/* VNET_SW_INTERFACE_TYPE_P2P. */
vnet_p2p_sub_interface_t p2p;
vnet_flood_class_t flood_class;
} vnet_sw_interface_t;
子接口
typedef enum
{
/* A hw interface. */
VNET_SW_INTERFACE_TYPE_HARDWARE,
/* A sub-interface. */
VNET_SW_INTERFACE_TYPE_SUB,
VNET_SW_INTERFACE_TYPE_P2P,
VNET_SW_INTERFACE_TYPE_PIPE,
} vnet_sw_interface_type_t;
typedef struct
{
/*
* Subinterface ID. A number 0-N to uniquely identify
* this subinterface under the main (parent?) interface
*/
u32 id;
/* Classification data. Used to associate packet header with subinterface. */
struct
{
u16 outer_vlan_id;
u16 inner_vlan_id;
union
{
u16 raw_flags;
struct
{
u16 no_tags:1;
u16 one_tag:1;
u16 two_tags:1;
u16 dot1ad:1; /* 0 = dot1q, 1=dot1ad */
u16 exact_match:1;
u16 default_sub:1;
u16 outer_vlan_id_any:1;
u16 inner_vlan_id_any:1;
} flags;
};
} eth;
} vnet_sub_interface_t;
接口层初始化过程
前面所述的设备和接口注册信息,是在 main 函数之前进行注册的,main 函数需要对这些信息进行整理加工。在 vpp 中 vnet_main_t 中的 interface_main 成员维护了接口相关的信息。
接口管理接口结构
/* 接口管理全局结构 */
typedef struct
{
/* Hardware interfaces. 硬件接口数组,所有链路层接口都放置在该数组中 */
vnet_hw_interface_t *hw_interfaces;
/* Hash table mapping HW interface name to index. 链路岑该接口名字与索引映射表 */
uword *hw_interface_by_name;
/* Vectors if hardware interface classes and device classes. */
/* 静态注册的链路层接口类型链表 */
vnet_hw_interface_class_t *hw_interface_classes;
/* 静态注册的物理层设备类型链表 */
vnet_device_class_t *device_classes;
/* Hash table mapping name to hw interface/device class. */
/* 链路层接口类型索引与名字映射表 */
uword *hw_interface_class_by_name;
/* 设备类索引与名字映射表 */
uword *device_class_by_name;
/* Software interfaces. */
/* 软件接口数组 */
vnet_sw_interface_t *sw_interfaces;
/* Hash table mapping sub intfc sw_if_index by sup sw_if_index and sub id */
uword *sw_if_index_by_sup_and_sub;
/* Software interface counters both simple and combined
* packet and byte counters.
* 软件接口统计与其对应的锁
*/
volatile u32 *sw_if_counter_lock;
vlib_simple_counter_main_t *sw_if_counters;
vlib_combined_counter_main_t *combined_sw_if_counters;
vnet_hw_interface_nodes_t *deleted_hw_interface_nodes;
/* pcap drop tracing */
int drop_pcap_enable;
pcap_main_t pcap_main;
u8 *pcap_filename;
u32 pcap_sw_if_index;
u32 pcap_pkts_to_capture;
uword *pcap_drop_filter_hash;
/* per-thread data */
vnet_interface_per_thread_data_t *per_thread_data;
/* enable GSO processing in packet path if this count is > 0 */
u32 gso_interface_count;
/* feature_arc_index */
u8 output_feature_arc_index;
} vnet_interface_main_t;
接口管理初始化函数
/* 接口初始化 */
clib_error_t *
vnet_interface_init (vlib_main_t * vm)
{vnet_main_t *vnm = vnet_get_main ();
vnet_interface_main_t *im = &vnm->interface_main;
vlib_buffer_t *b = 0;
vnet_buffer_opaque_t *o = 0;
clib_error_t *error;
/*
* Keep people from shooting themselves in the foot.
*/
if (sizeof (b->opaque) != sizeof (vnet_buffer_opaque_t))
{#define _(a) if (sizeof(o->a) > sizeof (o->unused)) \
clib_warning \
("FATAL: size of opaque union subtype %s is %d (max %d)", \
#a, sizeof(o->a), sizeof (o->unused));
foreach_buffer_opaque_union_subtype;
#undef _
return clib_error_return
(0, "FATAL: size of vlib buffer opaque %d, size of vnet opaque %d",
sizeof (b->opaque), sizeof (vnet_buffer_opaque_t));
}
/* 统计信息锁,负责保护 sw_if_counters */
im->sw_if_counter_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
CLIB_CACHE_LINE_BYTES);
im->sw_if_counter_lock[0] = 1; /* should be no need */
vec_validate (im->sw_if_counters, VNET_N_SIMPLE_INTERFACE_COUNTER - 1);
#define _(E,n,p) /* 各种统计信息初始化 */ \
im->sw_if_counters[VNET_INTERFACE_COUNTER_##E].name = #n; \
im->sw_if_counters[VNET_INTERFACE_COUNTER_##E].stat_segment_name = "/" #p "/" #n;
foreach_simple_interface_counter_name
#undef _
vec_validate (im->combined_sw_if_counters,
VNET_N_COMBINED_INTERFACE_COUNTER - 1);
#define _(E,n,p) \
im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_##E].name = #n; \
im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_##E].stat_segment_name = "/" #p "/" #n;
foreach_combined_interface_counter_name
#undef _
im->sw_if_counter_lock[0] = 0;
/* 构建设备类型描述结构体索引与类型名之间的映射表 */
im->device_class_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
{
vnet_device_class_t *c;
/* 遍历每一个注册的设备 */
c = vnm->device_class_registrations;
while (c)
{c->index = vec_len (im->device_classes);/* 分配索引 */
/* 设置索引与名字之间的映射关系 */
hash_set_mem (im->device_class_by_name, c->name, c->index);
if (c->tx_fn_registrations)
{
vlib_node_fn_registration_t *fnr = c->tx_fn_registrations;
int priority = -1;
/* to avoid confusion, please remove ".tx_function" statement
from VNET_DEVICE_CLASS() if using function candidates */
ASSERT (c->tx_function == 0);
/* 获取最高优先级的发送函数 */
while (fnr)
{if (fnr->priority > priority)
{
priority = fnr->priority;
c->tx_function = fnr->function;
}
fnr = fnr->next_registration;
}
}
vec_add1 (im->device_classes, c[0]);
c = c->next_class_registration;
}
}
/* 初始化硬件 interface 索引与名字映射表 */
im->hw_interface_class_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
im->sw_if_index_by_sup_and_sub = hash_create_mem (0, sizeof (u64),
sizeof (uword));
{
vnet_hw_interface_class_t *c;
/* 遍历每一个注册的链路层 interface 类型 */
c = vnm->hw_interface_class_registrations;
while (c)
{c->index = vec_len (im->hw_interface_classes);
hash_set_mem (im->hw_interface_class_by_name, c->name, c->index);
if (NULL == c->build_rewrite)
c->build_rewrite = default_build_rewrite;
if (NULL == c->update_adjacency)
c->update_adjacency = default_update_adjacency;
vec_add1 (im->hw_interface_classes, c[0]);
c = c->next_class_registration;
}
}
im->gso_interface_count = 0;
/* init per-thread data */
vec_validate_aligned (im->per_thread_data, vlib_num_workers (),
CLIB_CACHE_LINE_BYTES);
if ((error = vlib_call_init_function (vm, vnet_interface_cli_init)))
return error;
vnm->interface_tag_by_sw_if_index = hash_create (0, sizeof (uword));
#if VLIB_BUFFER_TRACE_TRAJECTORY > 0
if ((error = vlib_call_init_function (vm, trajectory_trace_init)))
return error;
#endif
return 0;
}
VLIB_INIT_FUNCTION (vnet_interface_init);
添加一个接口
以 af_packet 类型的以太网接口为例进行说明
/* 创建一个 af_packet 类型的接口 */
int
af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
u32 * sw_if_index)
{
af_packet_main_t *apm = &af_packet_main;
int ret, fd = -1, fd2 = -1;
struct tpacket_req *rx_req = 0;
struct tpacket_req *tx_req = 0;
struct ifreq ifr;
u8 *ring = 0;
af_packet_if_t *apif = 0;
u8 hw_addr[6];
clib_error_t *error;
vnet_sw_interface_t *sw;
vnet_hw_interface_t *hw;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_main_t *vnm = vnet_get_main ();
uword *p;
uword if_index;
u8 *host_if_name_dup = vec_dup (host_if_name);
int host_if_index = -1;
p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
if (p)
{apif = vec_elt_at_index (apm->interfaces, p[0]);
*sw_if_index = apif->sw_if_index;
return VNET_API_ERROR_IF_ALREADY_EXISTS;
}
vec_validate (rx_req, 0);
rx_req->tp_block_size = AF_PACKET_RX_BLOCK_SIZE;
rx_req->tp_frame_size = AF_PACKET_RX_FRAME_SIZE;
rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR;
rx_req->tp_frame_nr = AF_PACKET_RX_FRAME_NR;
vec_validate (tx_req, 0);
tx_req->tp_block_size = AF_PACKET_TX_BLOCK_SIZE;
tx_req->tp_frame_size = AF_PACKET_TX_FRAME_SIZE;
tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR;
tx_req->tp_frame_nr = AF_PACKET_TX_FRAME_NR;
/*
* make sure host side of interface is 'UP' before binding AF_PACKET
* socket on it.
* 需要确保 af_packet 类型的接口依附的主机侧的接口是 up 的
*/
if ((fd2 = socket (AF_UNIX, SOCK_DGRAM, 0)) < 0)
{vlib_log_debug (apm->log_class, "Failed to create socket");
ret = VNET_API_ERROR_SYSCALL_ERROR_1;
goto error;
}
clib_memcpy (ifr.ifr_name, (const char *) host_if_name,
vec_len (host_if_name));
/* 根据名字获取主机接口的索引 */
if ((ret = ioctl (fd2, SIOCGIFINDEX, &ifr)) < 0)
{vlib_log_debug (apm->log_class, "af_packet_create error: %d", ret);
close (fd2);
return VNET_API_ERROR_INVALID_INTERFACE;
}
host_if_index = ifr.ifr_ifindex;
/* 获取主机接口的标志信息 */
if ((ret = ioctl (fd2, SIOCGIFFLAGS, &ifr)) < 0)
{vlib_log_warn (apm->log_class, "af_packet_create error: %d", ret);
goto error;
}
/* 如果没有 up,那么设置其为 UP */
if (!(ifr.ifr_flags & IFF_UP))
{
ifr.ifr_flags |= IFF_UP;
if ((ret = ioctl (fd2, SIOCSIFFLAGS, &ifr)) < 0)
{vlib_log_warn (apm->log_class, "af_packet_create error: %d", ret);
goto error;
}
}
if (fd2 > -1)
close (fd2);
/* 创建 af_packet 套接字 */
ret = create_packet_v2_sock (host_if_index, rx_req, tx_req, &fd, &ring);
if (ret != 0)
goto error;
ret = is_bridge (host_if_name);
if (ret == 0) /* is a bridge, ignore state */
host_if_index = -1;
/* So far everything looks good, let's create interface */
pool_get (apm->interfaces, apif);
if_index = apif - apm->interfaces;
apif->host_if_index = host_if_index;
apif->fd = fd;
apif->rx_ring = ring;
apif->tx_ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr;
apif->rx_req = rx_req;
apif->tx_req = tx_req;
apif->host_if_name = host_if_name_dup;
apif->per_interface_next_index = ~0;
apif->next_tx_frame = 0;
apif->next_rx_frame = 0;
if (tm->n_vlib_mains > 1)/* 添加 epoll 监听事件,用于模拟中断 */
clib_spinlock_init (&apif->lockp);
{clib_file_t template = { 0};
template.read_function = af_packet_fd_read_ready;
template.file_descriptor = fd;
template.private_data = if_index;
template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
template.description = format (0, "%U", format_af_packet_device_name,
if_index);
apif->clib_file_index = clib_file_add (&file_main, &template);
}
/*use configured or generate random MAC address */
if (hw_addr_set)
clib_memcpy (hw_addr, hw_addr_set, 6);
else
{f64 now = vlib_time_now (vm);
u32 rnd;
rnd = (u32) (now * 1e6);
rnd = random_u32 (&rnd);
clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
hw_addr[0] = 2;
hw_addr[1] = 0xfe;
}
/* 注册以太网接口 */
error = ethernet_register_interface (vnm, af_packet_device_class.index,
if_index, hw_addr, &apif->hw_if_index,
af_packet_eth_flag_change);
if (error)
{clib_memset (apif, 0, sizeof (*apif));
pool_put (apm->interfaces, apif);
vlib_log_err (apm->log_class, "Unable to register interface: %U",
format_clib_error, error);
clib_error_free (error);
ret = VNET_API_ERROR_SYSCALL_ERROR_1;
goto error;
}
sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
hw = vnet_get_hw_interface (vnm, apif->hw_if_index);
apif->sw_if_index = sw->sw_if_index;
/* 设置该接口对应的输入节点索引 */
vnet_hw_interface_set_input_node (vnm, apif->hw_if_index,
af_packet_input_node.index);
/* 分配该接口到指定的收包线程中 */
vnet_hw_interface_assign_rx_thread (vnm, apif->hw_if_index, 0, /* queue */
~0 /* any cpu */ );
/* 接口支持中断模式 */
hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
vnet_hw_interface_set_rx_mode (vnm, apif->hw_if_index, 0,
VNET_HW_INTERFACE_RX_MODE_INTERRUPT);
mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
0);
if (sw_if_index)
*sw_if_index = apif->sw_if_index;
return 0;
error:
if (fd2 > -1)
close (fd2);
vec_free (host_if_name_dup);
vec_free (rx_req);
vec_free (tx_req);
return ret;
}
注册 ethernet 接口
/* 注册以太网接口 */
clib_error_t *
ethernet_register_interface (vnet_main_t * vnm,
u32 dev_class_index,/* 设备类型 */
u32 dev_instance,/* 设备实例索引 */
const u8 * address,/* 链路层地址 */
u32 * hw_if_index_return,/* 返回的硬件设备索引 */
ethernet_flag_change_function_t flag_change)
{
ethernet_main_t *em = ðernet_main;
ethernet_interface_t *ei;
vnet_hw_interface_t *hi;
clib_error_t *error = 0;
u32 hw_if_index;
/* 获取一个以太网设备实例 */
pool_get (em->interfaces, ei);
/* 用于配置以太网设备的硬件特性,比如混杂模式等 */
ei->flag_change = flag_change;
/* 注册以太网接口,给该以太网设备分配一个硬件索引 */
hw_if_index = vnet_register_interface
(vnm,
dev_class_index, dev_instance,
ethernet_hw_interface_class.index, ei - em->interfaces);
*hw_if_index_return = hw_if_index;
hi = vnet_get_hw_interface (vnm, hw_if_index);
/* 设置 node 的以太网相关的函数 */
ethernet_setup_node (vnm->vlib_main, hi->output_node_index);
hi->min_packet_bytes = hi->min_supported_packet_bytes =
ETHERNET_MIN_PACKET_BYTES;
hi->max_packet_bytes = hi->max_supported_packet_bytes =
ETHERNET_MAX_PACKET_BYTES;
/* Standard default ethernet MTU. */
/* 标准的以太网 mtu 为 9000 */
vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
clib_memcpy (ei->address, address, sizeof (ei->address));
vec_add (hi->hw_address, address, sizeof (ei->address));
if (error)
{pool_put (em->interfaces, ei);
return error;
}
return error;
}
向接口管理中注册一个接口
/* Register an interface instance. */
u32
vnet_register_interface (vnet_main_t * vnm,
u32 dev_class_index,
u32 dev_instance,/* 使用同一种驱动的设备实例编号 */
u32 hw_class_index,
u32 hw_instance)/* 同一类硬件设备的编号,比如以太网设备 */
{
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_interface_t *hw;
/* 根据设备类型索引获取对应的类型描述控制块,相当于 linux 内核的驱动 ops */
vnet_device_class_t *dev_class =
vnet_get_device_class (vnm, dev_class_index);
vnet_hw_interface_class_t *hw_class =
vnet_get_hw_interface_class (vnm, hw_class_index);
vlib_main_t *vm = vnm->vlib_main;
vnet_feature_config_main_t *fcm;
vnet_config_main_t *cm;
u32 hw_index, i;
char *tx_node_name = NULL, *output_node_name = NULL;
/* 分配一个硬件接口描述控制块 */
pool_get (im->hw_interfaces, hw);
clib_memset (hw, 0, sizeof (*hw));
hw_index = hw - im->hw_interfaces;
hw->hw_if_index = hw_index;
/* 默认模式就是 POLLING 模式 */
hw->default_rx_mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
/* 设置设备名字 */
if (dev_class->format_device_name)
hw->name = format (0, "%U", dev_class->format_device_name, dev_instance);
else if (hw_class->format_interface_name)
hw->name = format (0, "%U", hw_class->format_interface_name,
dev_instance);
else
hw->name = format (0, "%s%x", hw_class->name, dev_instance);
/* 构建接口名字到接口索引的 hash 表 */
if (!im->hw_interface_by_name)
im->hw_interface_by_name = hash_create_vec ( /* size */ 0,
sizeof (hw->name[0]),
sizeof (uword));
/* 设置本接口名字与索引的映射关系 */
hash_set_mem (im->hw_interface_by_name, hw->name, hw_index);
/* Make hardware interface point to software interface. */
/* 在硬件接口的基础上构建软件接口 */
{
vnet_sw_interface_t sw =
{
.type = VNET_SW_INTERFACE_TYPE_HARDWARE,
.flood_class = VNET_FLOOD_CLASS_NORMAL,
.hw_if_index = hw_index
};
/* 分配软件接口索引 */
hw->sw_if_index = vnet_create_sw_interface_no_callbacks (vnm, &sw);
}
/* 设置设备类索引 */
hw->dev_class_index = dev_class_index;
/* 同类设备的索引 */
hw->dev_instance = dev_instance;
/* 硬件类索引 */
hw->hw_class_index = hw_class_index;
/* 同硬件类索引 */
hw->hw_instance = hw_instance;
hw->max_rate_bits_per_sec = 0;
hw->min_packet_bytes = 0;
/* 设置 mtu 默认为 0 */
vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, 0);
/* 这类设备没有发送函数,创建一个无发送类型节点 */
if (dev_class->tx_function == 0)
goto no_output_nodes; /* No output/tx nodes to create */
/* interface 不仅需要承当报文输入功能,同时也要承担报文输出功能,所以需要两个 node
* 输入功能采用的是依附于 input-node,而输出功能需要单独添加一个 node,在 vpp 实现中
* 采用了两个 node 来实现输出功能,一个是 hw-interface 层,用于构建链路层头,另外一个
* 物理层,调用驱动发包函数发送报文。*/
tx_node_name = (char *) format (0, "%v-tx", hw->name);
output_node_name = (char *) format (0, "%v-output", hw->name);
/* If we have previously deleted interface nodes, re-use them. */
/* 如果以前有些接口 node 删除了,复用它们 */
if (vec_len (im->deleted_hw_interface_nodes) > 0)
{
vnet_hw_interface_nodes_t *hn;
vlib_node_t *node;
vlib_node_runtime_t *nrt;
hn = vec_end (im->deleted_hw_interface_nodes) - 1;
hw->tx_node_index = hn->tx_node_index;
hw->output_node_index = hn->output_node_index;
vlib_node_rename (vm, hw->tx_node_index, "%v", tx_node_name);
vlib_node_rename (vm, hw->output_node_index, "%v", output_node_name);
/* *INDENT-OFF* */
foreach_vlib_main (
{
vnet_interface_output_runtime_t *rt;
rt = vlib_node_get_runtime_data (this_vlib_main, hw->output_node_index);
ASSERT (rt->is_deleted == 1);
rt->is_deleted = 0;
rt->hw_if_index = hw_index;
rt->sw_if_index = hw->sw_if_index;
rt->dev_instance = hw->dev_instance;
rt = vlib_node_get_runtime_data (this_vlib_main, hw->tx_node_index);
rt->hw_if_index = hw_index;
rt->sw_if_index = hw->sw_if_index;
rt->dev_instance = hw->dev_instance;
});
/* *INDENT-ON* */
/* The new class may differ from the old one.
* Functions have to be updated. */
node = vlib_get_node (vm, hw->output_node_index);
node->function = vnet_interface_output_node;
node->format_trace = format_vnet_interface_output_trace;
/* *INDENT-OFF* */
foreach_vlib_main (
{nrt = vlib_node_get_runtime (this_vlib_main, hw->output_node_index);
nrt->function = node->function;
});
/* *INDENT-ON* */
node = vlib_get_node (vm, hw->tx_node_index);
node->function = dev_class->tx_function;
node->format_trace = dev_class->format_tx_trace;
/* *INDENT-OFF* */
foreach_vlib_main (
{nrt = vlib_node_get_runtime (this_vlib_main, hw->tx_node_index);
nrt->function = node->function;
});
/* *INDENT-ON* */
_vec_len (im->deleted_hw_interface_nodes) -= 1;
}
else
{
vlib_node_registration_t r;
vnet_interface_output_runtime_t rt =
{
.hw_if_index = hw_index,
.sw_if_index = hw->sw_if_index,
.dev_instance = hw->dev_instance,
.is_deleted = 0,
};
/* 注册物理层输出节点,该节点直接将报文输出,不需要下一个节点,除非报文出错 */
clib_memset (&r, 0, sizeof (r));
r.type = VLIB_NODE_TYPE_INTERNAL;
r.runtime_data = &rt;
r.runtime_data_bytes = sizeof (rt);
r.scalar_size = 0;
r.vector_size = sizeof (u32);
r.flags = VLIB_NODE_FLAG_IS_OUTPUT;
r.name = tx_node_name;
r.function = dev_class->tx_function;
hw->tx_node_index = vlib_register_node (vm, &r);
vlib_node_add_named_next_with_slot (vm, hw->tx_node_index,
"error-drop",
VNET_INTERFACE_TX_NEXT_DROP);
/* 注册链路层输出节点,用于构建链路层信息,指向物理层节点 */
r.flags = 0;
r.name = output_node_name;
r.function = vnet_interface_output_node;
r.format_trace = format_vnet_interface_output_trace;
{static char *e[] =
{
"interface is down",
"interface is deleted",
"no buffers to segment GSO",
};
r.n_errors = ARRAY_LEN (e);
r.error_strings = e;
}
hw->output_node_index = vlib_register_node (vm, &r);
vlib_node_add_named_next_with_slot (vm, hw->output_node_index,
"error-drop",
VNET_INTERFACE_OUTPUT_NEXT_DROP);
/* 指向物理层输出节点 */
vlib_node_add_next_with_slot (vm, hw->output_node_index,
hw->tx_node_index,
VNET_INTERFACE_OUTPUT_NEXT_TX);
/* add interface to the list of "output-interface" feature arc start nodes
* and clone nexts from 1st interface if it exists
* 构建链路层输出节点的 output-featrue-arc。* 将本 output 节点作为 output-featrue-arc 的一个起始几点。*/
fcm = vnet_feature_get_config_main (im->output_feature_arc_index);
cm = &fcm->config_main;
i = vec_len (cm->start_node_indices);
vec_validate (cm->start_node_indices, i);
cm->start_node_indices[i] = hw->output_node_index;
if (hw_index)
{
/* copy nexts from 1st interface */
vnet_hw_interface_t *first_hw;
vlib_node_t *first_node;
first_hw = vnet_get_hw_interface (vnm, /* hw_if_index */ 0);
first_node = vlib_get_node (vm, first_hw->output_node_index);
/* 1st 2 nexts are already added above */
for (i = 2; i < vec_len (first_node->next_nodes); i++)
vlib_node_add_next_with_slot (vm, hw->output_node_index,
first_node->next_nodes[i], i);
}
}
/* 构建链路层输出节点 */
setup_output_node (vm, hw->output_node_index, hw_class);
/* 构建物理层输出节点 */
setup_tx_node (vm, hw->tx_node_index, dev_class);
no_output_nodes:
/* Call all up/down callbacks with zero flags when interface is created. */
vnet_sw_interface_set_flags_helper (vnm, hw->sw_if_index, /* flags */ 0,
VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
vnet_hw_interface_set_flags_helper (vnm, hw_index, /* flags */ 0,
VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
vec_free (tx_node_name);
vec_free (output_node_name);
return hw_index;
}
给接口分配指定的收包线程
在函数 af_packet_create_if 中调用了如下代码,用于设置该接口的收包线程,对于 af_packet 采用的是中断模式收包。
/* 设置该接口对应的输入节点索引 */
vnet_hw_interface_set_input_node (vnm, apif->hw_if_index,
af_packet_input_node.index);
/* 分配该接口到指定的收包线程中 */
vnet_hw_interface_assign_rx_thread (vnm, apif->hw_if_index, 0, /* queue */
~0 /* any cpu */ );
vnet_hw_interface_assign_rx_thread
/* 给接口分配收包线程 */
void
vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
u16 queue_id, uword thread_index)
{
vnet_device_main_t *vdm = &vnet_device_main;
vlib_main_t *vm, *vm0;
vnet_device_input_runtime_t *rt;
vnet_device_and_queue_t *dq;
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
ASSERT (hw->input_node_index > 0);
if (vdm->first_worker_thread_index == 0)/* 没有收包线程,即主线程负责所有工作,则使用主模式进行收包 */
thread_index = 0;
if (thread_index != 0 &&/* 选取合适的线程进行收包,算法为 rr */
(thread_index < vdm->first_worker_thread_index ||
thread_index > vdm->last_worker_thread_index))
{
thread_index = vdm->next_worker_thread_index++;
if (vdm->next_worker_thread_index > vdm->last_worker_thread_index)/* 开始下一个来回 */
vdm->next_worker_thread_index = vdm->first_worker_thread_index;
}
vm = vlib_mains[thread_index];/* 收包线程 */
vm0 = vlib_get_main ();/* 本线程,一般是主线程 */
vlib_worker_thread_barrier_sync (vm0);/* 开始同步 */
rt = vlib_node_get_runtime_data (vm, hw->input_node_index);/* 获取输入节点的运行数据 */
vec_add2 (rt->devices_and_queues, dq, 1);/* 增加一个队列 */
dq->hw_if_index = hw_if_index;
dq->dev_instance = hw->dev_instance;
dq->queue_id = queue_id;
dq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;/* 默认设置轮询模式 */
rt->enabled_node_state = VLIB_NODE_STATE_POLLING;
vnet_device_queue_update (vnm, rt);
/* 建立队列与线程之间的映射关系 */
vec_validate (hw->input_node_thread_index_by_queue, queue_id);
vec_validate (hw->rx_mode_by_queue, queue_id);
hw->input_node_thread_index_by_queue[queue_id] = thread_index;
hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_POLLING;
vlib_worker_thread_barrier_release (vm0);
/* 更新节点状态统计信息 */
vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state);
}
删除一个 interface
我们还是以 af_packet 接口为例:
int
af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name)
{vnet_main_t *vnm = vnet_get_main ();
af_packet_main_t *apm = &af_packet_main;
af_packet_if_t *apif;
uword *p;
uword if_index;
u32 ring_sz;
p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
if (p == NULL)
{
vlib_log_warn (apm->log_class, "Host interface %s does not exist",
host_if_name);
return VNET_API_ERROR_SYSCALL_ERROR_1;
}
apif = pool_elt_at_index (apm->interfaces, p[0]);
if_index = apif - apm->interfaces;
/* bring down the interface 将主机接口 down 掉 */
vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
/* 从 input 节点摘除 */
vnet_hw_interface_unassign_rx_thread (vnm, apif->hw_if_index, 0);
/* clean up 清除注册的中断,没有中断了将不会收包 */
if (apif->clib_file_index != ~0)
{clib_file_del (&file_main, file_main.file_pool + apif->clib_file_index);
apif->clib_file_index = ~0;
}
else
close (apif->fd);
ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr +
apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr;
if (munmap (apif->rx_ring, ring_sz))
vlib_log_warn (apm->log_class,
"Host interface %s could not free rx/tx ring",
host_if_name);
apif->rx_ring = NULL;
apif->tx_ring = NULL;
apif->fd = -1;
vec_free (apif->rx_req);
apif->rx_req = NULL;
vec_free (apif->tx_req);
apif->tx_req = NULL;
vec_free (apif->host_if_name);
apif->host_if_name = NULL;
apif->host_if_index = -1;
mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index);
/* 将以太网端口删除 */
ethernet_delete_interface (vnm, apif->hw_if_index);
pool_put (apm->interfaces, apif);
return 0;
}
解除收包线程
int
vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
u16 queue_id)
{
vlib_main_t *vm, *vm0;
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
vnet_device_input_runtime_t *rt;
vnet_device_and_queue_t *dq;
uword old_thread_index;
vnet_hw_interface_rx_mode mode;
if (hw->input_node_thread_index_by_queue == 0)
return VNET_API_ERROR_INVALID_INTERFACE;
if (vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1)
return VNET_API_ERROR_INVALID_INTERFACE;
old_thread_index = hw->input_node_thread_index_by_queue[queue_id];
vm = vlib_mains[old_thread_index];
/* 从该输入节点的运行数据中删除 hw->input_node_index */
rt = vlib_node_get_runtime_data (vm, hw->input_node_index);
vec_foreach (dq, rt->devices_and_queues)
if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id)
{
mode = dq->mode;
goto delete;
}
return VNET_API_ERROR_INVALID_INTERFACE;
delete:
vm0 = vlib_get_main ();
vlib_worker_thread_barrier_sync (vm0);
vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues);
vnet_device_queue_update (vnm, rt);
hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_UNKNOWN;
vlib_worker_thread_barrier_release (vm0);
if (vec_len (rt->devices_and_queues) == 0)/* 如果该类型的 input 节点没有了接口了,那么将该接口设置为 disable*/
vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED);
else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
{
/*
* if the deleted interface is polling, we may need to set the node state
* to interrupt if there is no more polling interface for this device's
* corresponding thread. This is because mixed interfaces
* (polling and interrupt), assigned to the same thread, set the
* thread to polling prior to the deletion.
*/
vec_foreach (dq, rt->devices_and_queues)
{if (dq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
return 0;
}
rt->enabled_node_state = VLIB_NODE_STATE_INTERRUPT;
vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state);
}
return 0;
}
ethernet_delete_interface
void
ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index)
{
ethernet_main_t *em = ðernet_main;
ethernet_interface_t *ei;
vnet_hw_interface_t *hi;
main_intf_t *main_intf;
vlan_table_t *vlan_table;
u32 idx;
hi = vnet_get_hw_interface (vnm, hw_if_index);
ei = pool_elt_at_index (em->interfaces, hi->hw_instance);
/* Delete vlan mapping table for dot1q and dot1ad. */
main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
if (main_intf->dot1q_vlans)
{vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
for (idx = 0; idx < ETHERNET_N_VLAN; idx++)
{if (vlan_table->vlans[idx].qinqs)
{pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs);
vlan_table->vlans[idx].qinqs = 0;
}
}
pool_put_index (em->vlan_pool, main_intf->dot1q_vlans);
main_intf->dot1q_vlans = 0;
}
if (main_intf->dot1ad_vlans)
{vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
for (idx = 0; idx < ETHERNET_N_VLAN; idx++)
{if (vlan_table->vlans[idx].qinqs)
{pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs);
vlan_table->vlans[idx].qinqs = 0;
}
}
pool_put_index (em->vlan_pool, main_intf->dot1ad_vlans);
main_intf->dot1ad_vlans = 0;
}
vnet_delete_hw_interface (vnm, hw_if_index);
pool_put (em->interfaces, ei);
}
删除硬件接口信息
void
vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
{
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
vlib_main_t *vm = vnm->vlib_main;
vnet_device_class_t *dev_class = vnet_get_device_class (vnm,
hw->dev_class_index);
/* If it is up, mark it down. */
if (hw->flags != 0)
vnet_hw_interface_set_flags (vnm, hw_if_index, /* flags */ 0);
/* Call delete callbacks. */
call_hw_interface_add_del_callbacks (vnm, hw_if_index, /* is_create */ 0);
/* Delete any sub-interfaces. */
{
u32 id, sw_if_index;
/* *INDENT-OFF* */
hash_foreach (id, sw_if_index, hw->sub_interface_sw_if_index_by_id,
(
{vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
u64 sup_and_sub_key =
((u64) (si->sup_sw_if_index) << 32) | (u64) si->sub.id;
hash_unset_mem_free (&im->sw_if_index_by_sup_and_sub, &sup_and_sub_key);
vnet_delete_sw_interface (vnm, sw_if_index);
}));
hash_free (hw->sub_interface_sw_if_index_by_id);
/* *INDENT-ON* */
}
/* Delete software interface corresponding to hardware interface. */
vnet_delete_sw_interface (vnm, hw->sw_if_index);
if (dev_class->tx_function)
{
/* Put output/tx nodes into recycle pool */
vnet_hw_interface_nodes_t *dn;
/* *INDENT-OFF* 遍历每一个线程,设置该节点的运行节点信息的标志位 rt->is_deleted 为 1,* 而不是摘除该节点,提升处理速度。*/
foreach_vlib_main
(
{
vnet_interface_output_runtime_t *rt =
vlib_node_get_runtime_data (this_vlib_main, hw->output_node_index);
/* Mark node runtime as deleted so output node (if called)
* will drop packets. */
rt->is_deleted = 1;
});
/* *INDENT-ON* */
/* 重命名节点,会遍历所有的线程 */
vlib_node_rename (vm, hw->output_node_index,
"interface-%d-output-deleted", hw_if_index);
vlib_node_rename (vm, hw->tx_node_index, "interface-%d-tx-deleted",
hw_if_index);
/* 放置在 */
vec_add2 (im->deleted_hw_interface_nodes, dn, 1);
dn->tx_node_index = hw->tx_node_index;
dn->output_node_index = hw->output_node_index;
}
hash_unset_mem (im->hw_interface_by_name, hw->name);
vec_free (hw->name);
vec_free (hw->hw_address);
vec_free (hw->input_node_thread_index_by_queue);
vec_free (hw->dq_runtime_index_by_queue);
pool_put (im->hw_interfaces, hw);
}