共计 14354 个字符,预计需要花费 36 分钟才能阅读完成。
syncd 进程是介于 orchagent 与 driver 之间的进程。syncd 从 asic-db 中读取的数据经转换后调用驱动提供的 sai 接口进行下硬件,同时需要将驱动的应答进行一定的处理,还需要处理驱动的事件通知 (比如端口 up/down,mac 老化等信息)。处理的消息如下图所示:
orchagent 与 syncd 之间的操作
orchagent 与 syncd 之间会进行如下几种操作:
- create:创建一个对象
- remove: 删除一个对象
- set: 设置对象属性
- get: 获取对象属性
- notify:driver 事件通知
对于 create,remove,set 请求,orchagent 会在 sairedis 层构建一个虚拟的 sai 层:sairedis。orchagent 执行 sai 接口只是对 asic-db 进行操作,生成或者删除虚拟对象 (vid)。默认所有操作都是成功的,直接返回,不等待 syncd 的应答。执行上图的 1 和 6。syncd 从 asic-db 中读出请求执行上图的 2,3,4。如果 4 步骤返回成功,则整个请求运行结束,否则 syncd 将会发送 shutdown 通知给 orchagent。orchagent 会退出,如上图的 5,6.
对于 get 操作,orchagent 执行 1 后会使用 select 阻塞等待 syncd 的应答,如果 syncd 在 60 分钟内没有应答,那么 orchagent 会产生 segment 退出。get 操作执行顺序为 1 ->2->3->4->5->6。
对于 driver 的 notify,orchagent 会在主进程的 select 中监听 asic-db。驱动检测到硬件事件后,调用 syncd 注册的回调函数通知 syncd。syncd 中有一个专门处理 driver-notify 的线程 ntf-thread。ntf-thread 解析 driver 的 notify,然后通过 asic-db 通知 orchagent。执行顺序 7 ->8->9。
注:orchagent 与 syncd 关于 sai 这一层非常相似。它们会调用大量的同名函数。这些函数只是名字相同,orchagent 调用的是 sai-redis 库中的函数,而 syncd 调用的是 driver 提供的 sai 库
get 操作阻塞等待
orchagent 执行 sai 的 get 操作时会调用到 redis_generic_get 函数。
std::shared_ptr<swss::ConsumerTable> g_redisGetConsumer; | |
sai_status_t redis_generic_get( | |
_In_ sai_object_type_t object_type, | |
_In_ sai_object_id_t object_id, | |
_In_ uint32_t attr_count, | |
_Out_ sai_attribute_t *attr_list) | |
{SWSS_LOG_ENTER(); | |
std::string str_object_id = sai_serialize_object_id(object_id); | |
return internal_redis_generic_get( | |
object_type, | |
str_object_id, | |
attr_count, | |
attr_list); | |
} | |
sai_status_t internal_redis_generic_get( | |
_In_ sai_object_type_t object_type, | |
_In_ const std::string &serialized_object_id, | |
_In_ uint32_t attr_count, | |
_Out_ sai_attribute_t *attr_list) | |
{SWSS_LOG_ENTER(); | |
/* | |
* Since user may reuse buffers, then oid list buffers maybe not cleared | |
* and contain som garbage, let's clean them so we send all oids as null to | |
* syncd. | |
*/ | |
clear_oid_values(object_type, attr_count, attr_list); | |
std::vector<swss::FieldValueTuple> entry = SaiAttributeList::serialize_attr_list( | |
object_type, | |
attr_count, | |
attr_list, | |
false); | |
std::string str_object_type = sai_serialize_object_type(object_type); | |
std::string key = str_object_type + ":" + serialized_object_id; | |
SWSS_LOG_DEBUG("generic get key: %s, fields: %lu", key.c_str(), entry.size()); | |
if (g_record) | |
{recordLine("g|" + key + "|" + joinFieldValues(entry)); | |
} | |
// get is special, it will not put data | |
// into asic view, only to message queue | |
// 写入本次 get 事件 | |
g_asicState->set(key, entry, "get"); | |
// wait for response | |
// 创建临时 select | |
swss::Select s; | |
// 添加事件 | |
s.addSelectable(g_redisGetConsumer.get()); | |
// 循环等待 syncd 的应答 | |
while (true) | |
{SWSS_LOG_DEBUG("wait for response"); | |
swss::Selectable *sel; | |
// 阻塞等待,时间为 GET_RESPONSE_TIMEOUT | |
int result = s.select(&sel, GET_RESPONSE_TIMEOUT); | |
// 只处理应答情况 OBJECT | |
if (result == swss::Select::OBJECT) | |
{ | |
swss::KeyOpFieldsValuesTuple kco; | |
g_redisGetConsumer->pop(kco); | |
const std::string &op = kfvOp(kco); | |
const std::string &opkey = kfvKey(kco); | |
SWSS_LOG_DEBUG("response: op = %s, key = %s", opkey.c_str(), op.c_str()); | |
if (op != "getresponse") // ignore non response messages | |
{continue;} | |
sai_status_t status = internal_redis_get_process( | |
object_type, | |
attr_count, | |
attr_list, | |
kco); | |
if (g_record) | |
{const std::string &str_status = kfvKey(kco); | |
const std::vector<swss::FieldValueTuple> &values = kfvFieldsValues(kco); | |
// first serialized is status | |
recordLine("G|" + str_status + "|" + joinFieldValues(values)); | |
} | |
SWSS_LOG_DEBUG("generic get status: %d", status); | |
return status; | |
} | |
SWSS_LOG_ERROR("generic get failed due to SELECT operation result: %s", getSelectResultAsString(result).c_str()); | |
break; | |
} | |
// 超时和异常都返回 SAI_STATUS_FAILURE | |
if (g_record) | |
{recordLine("G|SAI_STATUS_FAILURE"); | |
} | |
SWSS_LOG_ERROR("generic get failed to get response"); | |
return SAI_STATUS_FAILURE; | |
} |
对于 get 操作,当 syncd 比较忙的时候,极端情况下会导致 orchagent 异常退出。
notify 处理
syncd 向驱动注册回调函数
syncd 定义了几个 notify 全局函数指针:
sai_switch_state_change_notification_fn on_switch_state_change_ntf = on_switch_state_change; | |
sai_switch_shutdown_request_notification_fn on_switch_shutdown_request_ntf = on_switch_shutdown_request; | |
sai_fdb_event_notification_fn on_fdb_event_ntf = on_fdb_event; | |
sai_port_state_change_notification_fn on_port_state_change_ntf = on_port_state_change; | |
sai_packet_event_notification_fn on_packet_event_ntf = on_packet_event; | |
sai_queue_pfc_deadlock_notification_fn on_queue_deadlock_ntf = on_queue_deadlock; |
syncd 和 sai 共享命名空间,所以驱动直接使用这些函数指针即可调用对应的函数,在初始化的时候将这些全局函数指针通过 sai_set_switch_attribute 函数设置到 sai 层。
syncd 设置 sai 层
void check_notifications_pointers( | |
_In_ uint32_t attr_count, | |
_In_ sai_attribute_t *attr_list) | |
{SWSS_LOG_ENTER(); | |
/* | |
* This function should only be called on CREATE/SET api when object is | |
* SWITCH. | |
* | |
* Notifications pointers needs to be corrected since those we receive from | |
* sairedis are in sairedis memory space and here we are using those ones | |
* we declared in syncd memory space. | |
* | |
* Also notice that we are using the same pointers for ALL switches. | |
*/ | |
for (uint32_t index = 0; index < attr_count; ++index) | |
{sai_attribute_t &attr = attr_list[index]; | |
auto meta = sai_metadata_get_attr_metadata(SAI_OBJECT_TYPE_SWITCH, attr.id); | |
if (meta->attrvaluetype != SAI_ATTR_VALUE_TYPE_POINTER) | |
{continue;} | |
/* | |
* Does not matter if pointer is valid or not, we just want the | |
* previous value. | |
*/ | |
sai_pointer_t prev = attr.value.ptr; | |
if (prev == NULL) | |
{ | |
/* | |
* If pointer is NULL, then fine, let it be. | |
*/ | |
continue; | |
} | |
switch (attr.id) | |
{ | |
case SAI_SWITCH_ATTR_SWITCH_STATE_CHANGE_NOTIFY: | |
attr.value.ptr = (void*)on_switch_state_change_ntf; | |
break; | |
case SAI_SWITCH_ATTR_SHUTDOWN_REQUEST_NOTIFY: | |
attr.value.ptr = (void*)on_switch_shutdown_request_ntf; | |
break; | |
case SAI_SWITCH_ATTR_FDB_EVENT_NOTIFY: | |
attr.value.ptr = (void*)on_fdb_event_ntf; | |
break; | |
case SAI_SWITCH_ATTR_PORT_STATE_CHANGE_NOTIFY: | |
attr.value.ptr = (void*)on_port_state_change_ntf; | |
break; | |
case SAI_SWITCH_ATTR_PACKET_EVENT_NOTIFY: | |
attr.value.ptr = (void*)on_packet_event_ntf; | |
break; | |
case SAI_SWITCH_ATTR_QUEUE_PFC_DEADLOCK_NOTIFY: | |
attr.value.ptr = (void*)on_queue_deadlock_ntf; | |
break; | |
default: | |
SWSS_LOG_ERROR("pointer for %s is not handled, FIXME!", meta->attridname); | |
continue; | |
} | |
/* | |
* Here we translated pointer, just log it. | |
*/ | |
SWSS_LOG_INFO("%s: %lp (orch) => %lp (syncd)", meta->attridname, prev, attr.value.ptr); | |
} | |
} |
/* | |
* Routine Description: | |
* Set switch attribute value | |
* | |
* Arguments: | |
* [in] switch_id Switch id | |
* [in] attr - switch attribute | |
* | |
* Return Values: | |
* SAI_STATUS_SUCCESS on success | |
* Failure status code on error | |
*/ | |
sai_status_t sai_set_switch_attribute(_In_ sai_object_id_t switch_id, | |
_In_ const sai_attribute_t *attr) {SAI_LOG_ENTER(); | |
sai_status_t status = SAI_STATUS_SUCCESS; | |
switch_status_t switch_status = SWITCH_STATUS_SUCCESS; | |
switch_uint64_t flags = 0; | |
switch_api_device_info_t api_device_info; | |
sai_packet_action_t sai_packet_action; | |
switch_acl_action_t switch_packet_action; | |
switch_packet_type_t switch_packet_type = SWITCH_PACKET_TYPE_UNICAST; | |
bool cut_through = false; | |
if (!attr) { | |
status = SAI_STATUS_INVALID_PARAMETER; | |
SAI_LOG_ERROR("null attribute: %s", sai_status_to_string(status)); | |
return status; | |
} | |
memset(&api_device_info, 0x0, sizeof(api_device_info)); | |
if (status != SAI_STATUS_SUCCESS) {return status;} | |
if (attr->id <= SAI_SWITCH_ATTR_ACL_STAGE_EGRESS) { // Unsupported | |
SAI_LOG_DEBUG("Switch attribute set: %s", switch_attr_name[attr->id]); | |
} | |
switch (attr->id) { | |
...... | |
case SAI_SWITCH_ATTR_FDB_EVENT_NOTIFY: | |
sai_switch_notifications.on_fdb_event = attr->value.ptr; | |
break; | |
case SAI_SWITCH_ATTR_PORT_STATE_CHANGE_NOTIFY: | |
sai_switch_notifications.on_port_state_change = attr->value.ptr; | |
break; | |
case SAI_SWITCH_ATTR_PACKET_EVENT_NOTIFY: | |
sai_switch_notifications.on_packet_event = attr->value.ptr; | |
break; | |
case SAI_SWITCH_ATTR_SWITCH_STATE_CHANGE_NOTIFY: | |
sai_switch_notifications.on_switch_state_change = attr->value.ptr; | |
break; | |
case SAI_SWITCH_ATTR_SHUTDOWN_REQUEST_NOTIFY: | |
sai_switch_notifications.on_switch_shutdown_request = attr->value.ptr; | |
break; | |
...... | |
default: | |
SAI_LOG_ERROR("Unsupported Switch attribute: %d", attr->id); | |
// Unsupported: Temporary hack till all attrs are supported | |
switch_status = SWITCH_STATUS_SUCCESS; | |
} | |
...... | |
} |
sai 接口初始化的时候会向驱动注册回调函数,回调函数中会调用我们注册的全局函数指针,我们以 fdb 为例进行说明:
sai_status_t sai_fdb_initialize(sai_api_service_t *sai_api_service) {SAI_LOG_DEBUG("initializing fdb"); | |
sai_api_service->fdb_api = fdb_api; | |
switch_uint16_t mac_event_flags = 0; | |
mac_event_flags |= SWITCH_MAC_EVENT_LEARN | SWITCH_MAC_EVENT_AGE | | |
SWITCH_MAC_EVENT_MOVE | SWITCH_MAC_EVENT_DELETE; | |
switch_api_mac_notification_register(device, SWITCH_SAI_APP_ID, mac_event_flags, &sai_mac_notify_cb); | |
switch_api_mac_table_set_learning_timeout(device, SAI_L2_LEARN_TIMEOUT); | |
return SAI_STATUS_SUCCESS; | |
} | |
// 初始化 fdb 的 sai 接口的时候,向驱动注册了 sai_mac_notify_cb 回调函数。static void sai_mac_notify_cb(const switch_device_t device, | |
const uint16_t num_entries, | |
const switch_api_mac_entry_t *mac_entry, | |
const switch_mac_event_t mac_event, | |
void *app_data) {SAI_LOG_ENTER(); | |
sai_fdb_event_notification_data_t fdb_event[num_entries]; | |
sai_attribute_t attr_lists[num_entries][2]; | |
uint16_t entry = 0; | |
// 判断回调函数是否为空 | |
if (!sai_switch_notifications.on_fdb_event) {return;} | |
if (!mac_entry) {SAI_LOG_ERROR("invalid argument"); | |
return; | |
} | |
if (!num_entries) {SAI_LOG_DEBUG("sai mac notify callback with null entries"); | |
return; | |
} | |
for (entry = 0; entry < num_entries; entry++) {memset(&fdb_event[entry], 0, sizeof(fdb_event[entry])); | |
fdb_event[entry].event_type = switch_mac_event_to_sai_fdb_event(mac_event); | |
memcpy(fdb_event[entry].fdb_entry.mac_address, | |
mac_entry[entry].mac.mac_addr, | |
ETH_ALEN); | |
fdb_event[entry].fdb_entry.switch_id = | |
(((unsigned long)SWITCH_HANDLE_TYPE_DEVICE) | |
<< SWITCH_HANDLE_TYPE_SHIFT) | | |
0x1; | |
fdb_event[entry].fdb_entry.bv_id = mac_entry[entry].network_handle; | |
memset(attr_lists[entry], 0, sizeof(attr_lists[entry])); | |
attr_lists[entry][0].id = SAI_FDB_ENTRY_ATTR_TYPE; | |
attr_lists[entry][0].value.s32 = SAI_FDB_ENTRY_TYPE_DYNAMIC; | |
attr_lists[entry][1].id = SAI_FDB_ENTRY_ATTR_BRIDGE_PORT_ID; | |
attr_lists[entry][1].value.oid = mac_entry->handle; | |
fdb_event[entry].attr_count = 2; | |
if (fdb_event[entry].event_type == SAI_FDB_EVENT_FLUSHED) { | |
// Overwriting now for SONiC to be able to process it correctly | |
fdb_event[entry].event_type = SAI_FDB_EVENT_AGED; | |
} | |
fdb_event[entry].attr = attr_lists[entry]; | |
} | |
// 调用 syncd 的回调函数 | |
sai_switch_notifications.on_fdb_event(num_entries, fdb_event); | |
SAI_LOG_EXIT(); | |
return; | |
} |
syncd 启动 notify 线程
std::shared_ptr<std::thread> ntf_process_thread; | |
void startNotificationsProcessingThread() | |
{SWSS_LOG_ENTER(); | |
runThread = true; | |
ntf_process_thread = std::make_shared<std::thread>(ntf_process_function); | |
} | |
void ntf_process_function() | |
{SWSS_LOG_ENTER(); | |
while (runThread) | |
{cv.wait(ulock); | |
// this is notifications processing thread context, which is different | |
// from SAI notifications context, we can safe use g_mutex here, | |
// processing each notification is under same mutex as processing main | |
// events, counters and reinit | |
swss::KeyOpFieldsValuesTuple item; | |
while (tryDequeue(item))// 从队列中取出 notify | |
{processNotification(item);// 处理 notify | |
} | |
} | |
} | |
bool tryDequeue(_Out_ swss::KeyOpFieldsValuesTuple &item) | |
{std::lock_guard<std::mutex> lock(queue_mutex); | |
SWSS_LOG_ENTER(); | |
if (ntf_queue.empty()) | |
{return false;} | |
item = ntf_queue.front(); | |
ntf_queue.pop(); | |
return true; | |
} | |
void processNotification(_In_ const swss::KeyOpFieldsValuesTuple &item) | |
{std::lock_guard<std::mutex> lock(g_mutex); | |
SWSS_LOG_ENTER(); | |
std::string notification = kfvKey(item); | |
std::string data = kfvOp(item); | |
if (notification == "switch_state_change") | |
{handle_switch_state_change(data); | |
} | |
else if (notification == "fdb_event") | |
{handle_fdb_event(data); | |
} | |
else if (notification == "port_state_change") | |
{handle_port_state_change(data); | |
} | |
else if (notification == "switch_shutdown_request") | |
{handle_switch_shutdown_request(data); | |
} | |
else if (notification == "queue_deadlock") | |
{handle_queue_deadlock(data); | |
} | |
else | |
{SWSS_LOG_ERROR("unknow notification: %s", notification.c_str()); | |
} | |
} | |
void handle_fdb_event(_In_ const std::string &data) | |
{SWSS_LOG_ENTER(); | |
uint32_t count; | |
sai_fdb_event_notification_data_t *fdbevent = NULL; | |
sai_deserialize_fdb_event_ntf(data, count, &fdbevent); | |
process_on_fdb_event(count, fdbevent); | |
sai_deserialize_free_fdb_event_ntf(count, fdbevent); | |
} | |
void process_on_fdb_event( | |
_In_ uint32_t count, | |
_In_ sai_fdb_event_notification_data_t *data) | |
{SWSS_LOG_ENTER(); | |
SWSS_LOG_DEBUG("fdb event count: %d", count); | |
for (uint32_t i = 0; i < count; i++) | |
{sai_fdb_event_notification_data_t *fdb = &data[i]; | |
SWSS_LOG_DEBUG("fdb %u: type: %d", i, fdb->event_type); | |
fdb->fdb_entry.switch_id = translate_rid_to_vid(fdb->fdb_entry.switch_id, SAI_NULL_OBJECT_ID); | |
fdb->fdb_entry.bv_id = translate_rid_to_vid(fdb->fdb_entry.bv_id, fdb->fdb_entry.switch_id); | |
translate_rid_to_vid_list(SAI_OBJECT_TYPE_FDB_ENTRY, fdb->fdb_entry.switch_id, fdb->attr_count, fdb->attr); | |
/* | |
* Currently because of bcrm bug, we need to install fdb entries in | |
* asic view and currently this event don't have fdb type which is | |
* required on creation. | |
*/ | |
redisPutFdbEntryToAsicView(fdb); | |
} | |
std::string s = sai_serialize_fdb_event_ntf(count, data); | |
send_notification("fdb_event", s); | |
} | |
void send_notification( | |
_In_ std::string op, | |
_In_ std::string data, | |
_In_ std::vector<swss::FieldValueTuple> &entry) | |
{SWSS_LOG_ENTER(); | |
SWSS_LOG_INFO("%s %s", op.c_str(), data.c_str()); | |
// 写入数据库 | |
notifications->send(op, data, entry); | |
SWSS_LOG_DEBUG("notification send successfull"); | |
} | |
void send_notification( | |
_In_ std::string op, | |
_In_ std::string data) | |
{SWSS_LOG_ENTER(); | |
std::vector<swss::FieldValueTuple> entry; | |
send_notification(op, data, entry); | |
} |
orchagent 启动 notify 线程
// 启动线程 | |
sai_status_t sai_api_initialize( | |
_In_ uint64_t flags, | |
_In_ const sai_service_method_table_t* services) | |
{ | |
...... | |
notification_thread = std::make_shared<std::thread>(std::thread(ntf_thread)); | |
...... | |
} | |
// 线程主函数 | |
void ntf_thread() | |
{SWSS_LOG_ENTER(); | |
swss::Select s; | |
s.addSelectable(g_redisNotifications.get()); | |
s.addSelectable(&g_redisNotificationTrheadEvent); | |
while (g_run) | |
{ | |
swss::Selectable *sel; | |
int result = s.select(&sel); | |
if (sel == &g_redisNotificationTrheadEvent) | |
{ | |
// user requested shutdown_switch | |
break; | |
} | |
if (result == swss::Select::OBJECT) | |
{ | |
swss::KeyOpFieldsValuesTuple kco; | |
std::string op; | |
std::string data; | |
std::vector<swss::FieldValueTuple> values; | |
g_redisNotifications->pop(op, data, values); | |
SWSS_LOG_DEBUG("notification: op = %s, data = %s", op.c_str(), data.c_str()); | |
handle_notification(op, data, values); | |
} | |
} | |
} | |
void handle_fdb_event(_In_ const std::string &data) | |
{SWSS_LOG_ENTER(); | |
SWSS_LOG_DEBUG("data: %s", data.c_str()); | |
uint32_t count; | |
sai_fdb_event_notification_data_t *fdbevent = NULL; | |
sai_deserialize_fdb_event_ntf(data, count, &fdbevent); | |
{std::lock_guard<std::mutex> lock(g_apimutex); | |
// NOTE: this meta api must be under mutex since | |
// it will access meta DB and notification comes | |
// from different thread | |
meta_sai_on_fdb_event(count, fdbevent); | |
} | |
if (on_fdb_event != NULL) | |
{on_fdb_event(count, fdbevent); | |
} | |
sai_deserialize_free_fdb_event_ntf(count, fdbevent); | |
} |
syncd 的回调函数
std::mutex queue_mutex; | |
std::queue<swss::KeyOpFieldsValuesTuple> ntf_queue; | |
void on_fdb_event( | |
_In_ uint32_t count, | |
_In_ const sai_fdb_event_notification_data_t *data) | |
{SWSS_LOG_ENTER(); | |
std::string s = sai_serialize_fdb_event_ntf(count, data); | |
enqueue_notification("fdb_event", s); | |
} | |
void enqueue_notification( | |
_In_ std::string op, | |
_In_ std::string data, | |
_In_ std::vector<swss::FieldValueTuple> &entry) | |
{SWSS_LOG_ENTER(); | |
SWSS_LOG_INFO("%s %s", op.c_str(), data.c_str()); | |
swss::KeyOpFieldsValuesTuple item(op, data, entry); | |
// this is notification context, so we need to protect queue | |
std::lock_guard<std::mutex> lock(queue_mutex); | |
// 压入队列 | |
ntf_queue.push(item); | |
cv.notify_all();} | |
void enqueue_notification( | |
_In_ std::string op, | |
_In_ std::string data) | |
{SWSS_LOG_ENTER(); | |
std::vector<swss::FieldValueTuple> entry; | |
enqueue_notification(op, data, entry); | |
} |
上面三部分就是硬件触发的异步事件从硬件层同步到 syncd 层,再到 orchagent 层的处理过程。涉及一个回调函数,两个 notify 处理线程。