beyla反对通过ebpf,无侵入的、主动采集应用程序的trace信息。

以golang的nethttp为例,讲述beyla对trace的采集的实现原理。

一. 整体原理

trace采集时,监听了golang应用程序的net/http中的函数:

  • net/http.serverHandler.ServeHTTP;
  • net/http.(*Transport).roundTrip;

监听ServeHTTP时:

  • 若requset中没有trace信息,则生成traceparent,存入go_trace_map构造(key=goroutine地址,value=trace信息);
  • 若request中有trace信息,则依据trace信息,从新生成span,存入go_trace_map构造;

监听roundTrip的调用:

  • 首先,依据goroutine地址,读go_trace_map构造,失去trace信息;
  • 而后,将以后连贯的trace信息,存入ongoing_http_client_requests构造(key=goroutine地址,value=trace信息);

监听roundTrip的调用返回:

  • 首先,依据goroutine地址,读ongoing_http_client_requests构造,失去trace信息;
  • 而后,将以后调用的trace信息,转换为http_request_trace构造,保留到ringbuf中;

最终,ebpf用户程序,读取ringbuf中的trace信息,采集到trace信息。

二. 监听uprobe/ServeHTTP

解决流程:

  • 首先,提取goroutine和request指针;
  • 而后,通过server_trace_parent()函数,解决trace信息,存入go_trace_map构造;
  • 最初,将数据存入onging_http_server_requests构造;
// beyla/bpf/go_nethttp.cSEC("uprobe/ServeHTTP")int uprobe_ServeHTTP(struct pt_regs *ctx) {    void *goroutine_addr = GOROUTINE_PTR(ctx);    void *req = GO_PARAM4(ctx);    http_func_invocation_t invocation = {        .start_monotime_ns = bpf_ktime_get_ns(),        .req_ptr = (u64)req,        .tp = {0}    };    if (req) {        // 解决trace信息,存入go_trace_map        server_trace_parent(goroutine_addr, &invocation.tp, (void*)(req + req_header_ptr_pos));    }    // write event    if (bpf_map_update_elem(&ongoing_http_server_requests, &goroutine_addr, &invocation, BPF_ANY)) {        bpf_dbg_printk("can't update map element");    }    return 0;}

重点看一下server_trace_parent()函数:

  • 首先,从req_header读取traceparent:

    • 若读到了,则copy traceId,将parentId=下层的spanId;
    • 否则,则生成trace_id,将parentId=0;
  • 而后,应用urand,生成随机的spanId;
  • 最初,将trace信息存入go_trace_map构造,key=goroutine地址,value=trace信息;
// bpf/go_common.hstatic __always_inline void server_trace_parent(void *goroutine_addr, tp_info_t *tp, void *req_header) {    // May get overriden when decoding existing traceparent, but otherwise we set sample ON    tp->flags = 1;    // Get traceparent from the Request.Header    void *traceparent_ptr = extract_traceparent_from_req_headers(req_header);    if (traceparent_ptr != NULL) {    // 读到了traceparent       ....    } else {     // 未读到traceparent        bpf_dbg_printk("No traceparent in headers, generating");        urand_bytes(tp->trace_id, TRACE_ID_SIZE_BYTES);       // 生成随机的trace_id;        *((u64 *)tp->parent_id) = 0;    }    urand_bytes(tp->span_id, SPAN_ID_SIZE_BYTES);    bpf_map_update_elem(&go_trace_map, &goroutine_addr, tp, BPF_ANY);}

go_trace_map对象的定义:

struct {    __uint(type, BPF_MAP_TYPE_LRU_HASH);    __type(key, void *); // key: pointer to the goroutine    __type(value, tp_info_t);  // value: traceparent info    __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS);    __uint(pinning, LIBBPF_PIN_BY_NAME);} go_trace_map SEC(".maps");typedef struct tp_info {    unsigned char trace_id[TRACE_ID_SIZE_BYTES];    unsigned char span_id[SPAN_ID_SIZE_BYTES];    unsigned char parent_id[SPAN_ID_SIZE_BYTES];    u64 ts;    u8  flags;} tp_info_t;

三. 监听uprobe/roundTrip

roundTrip函数,在应用http client发动申请时,被调用。

解决流程:

  • 首先,提取goroutine地址和request地址;
  • 而后,依据goroutine_addr和request,查找trace信息;
  • 最初,将trace信息写入ongoing_http_client_requests对象;
// beyla/bpf/go_nethttp.cSEC("uprobe/roundTrip")int uprobe_roundTrip(struct pt_regs *ctx) {    roundTripStartHelper(ctx);    return 0;}static __always_inline void roundTripStartHelper(struct pt_regs *ctx) {    void *goroutine_addr = GOROUTINE_PTR(ctx);    void *req = GO_PARAM2(ctx);    http_func_invocation_t invocation = {        .start_monotime_ns = bpf_ktime_get_ns(),        .req_ptr = (u64)req,        .tp = {0}    };    // 依据request和goroutine_addr,查找trace信息    __attribute__((__unused__)) u8 existing_tp = client_trace_parent(goroutine_addr, &invocation.tp, (void*)(req + req_header_ptr_pos));    // 将trace信息写入ongoing_http_client_requests    if (bpf_map_update_elem(&ongoing_http_client_requests, &goroutine_addr, &invocation, BPF_ANY)) {        bpf_dbg_printk("can't update http client map element");    }}  

重点看一下查找trace信息的client_trace_parent()函数:

  • 首先,尝试从request的header中提取traceparent:

    • 若找到了,则copy traceId,设置以后span.parentId=上游span的spanId;
  • 而后,再应用goroutine及其parent_goroutine,去go_trace_map中找:

    • 若找到了,则copy traceId,设置以后span.parentId=上游span的spanId;
// beyla/go_common.hstatic __always_inline u8 client_trace_parent(void *goroutine_addr, tp_info_t *tp_i, void *req_header) {    u8 found_trace_id = 0;    u8 trace_id_exists = 0;        // May get overriden when decoding existing traceparent or finding a server span, but otherwise we set sample ON    tp_i->flags = 1;    // 首先尝试从request的header中提取traceparent    if (req_header) {        ...    }    // 而后再应用goroutine去go_trace_map中找    if (!found_trace_id) {        tp_info_t *tp = 0;        u64 parent_id = find_parent_goroutine(goroutine_addr);        if (parent_id) {// we found a parent request            tp = (tp_info_t *)bpf_map_lookup_elem(&go_trace_map, &parent_id);        }        if (tp) {   // 找到了,copy traceId,以后span.parentId=上流span.spanId            *((u64 *)tp_i->trace_id) = *((u64 *)tp->trace_id);            *((u64 *)(tp_i->trace_id + 8)) = *((u64 *)(tp->trace_id + 8));            *((u64 *)tp_i->parent_id) = *((u64 *)tp->span_id);            tp_i->flags = tp->flags;        }         ...        // 生成以后span.spanId        urand_bytes(tp_i->span_id, SPAN_ID_SIZE_BYTES);    }    return trace_id_exists;}  

这里有个隐形的假如条件:

  • 一个goroutine及其child goroutine仅解决一个http申请;
  • nethttp的框架在设计时,就由一个goroutine去解决一个http申请,是合乎这个假如的;

四. 监听uprobe/roundTrip_return

解决流程:

  • 首先,应用goroutine_addr,从ongoing_http_client_requests中找trace信息;
  • 而后,初始化http_request_trace:

    • 从request中找method/host/url/content_length,赋值给http_request_trace;
    • 将trace信息赋值到http_request_trace;
    • 从response中找status,赋值给http_request_trace;
  • 最初,将http_request_trace提交到ringbuf;
// beyla/bpf/go_nethttp.cSEC("uprobe/roundTrip_return")int uprobe_roundTripReturn(struct pt_regs *ctx) {    void *goroutine_addr = GOROUTINE_PTR(ctx);    // 应用goroutine_addr找ongoing_http_client_requests    http_func_invocation_t *invocation =        bpf_map_lookup_elem(&ongoing_http_client_requests, &goroutine_addr);    bpf_map_delete_elem(&ongoing_http_client_requests, &goroutine_addr);    http_request_trace *trace = bpf_ringbuf_reserve(&events, sizeof(http_request_trace), 0);    // 初始化http_request_trace    task_pid(&trace->pid);    trace->type = EVENT_HTTP_CLIENT;    trace->start_monotime_ns = invocation->start_monotime_ns;    trace->go_start_monotime_ns = invocation->start_monotime_ns;    trace->end_monotime_ns = bpf_ktime_get_ns();    void *req_ptr = (void *)invocation->req_ptr;    void *resp_ptr = (void *)GO_PARAM1(ctx);    // 从request中找method,赋值给trace->method    if (!read_go_str("method", req_ptr, method_ptr_pos, &trace->method, sizeof(trace->method))) {        ...    }    // 从request中找host,赋值给trace->host    if (!read_go_str("host", req_ptr, host_ptr_pos, &trace->host, sizeof(trace->host))) {        ...    }    // 从request中找url,赋值给trace->path    void *url_ptr = 0;    bpf_probe_read(&url_ptr, sizeof(url_ptr), (void *)(req_ptr + url_ptr_pos));    if (!url_ptr || !read_go_str("path", url_ptr, path_ptr_pos, &trace->path, sizeof(trace->path))) {        ...    }    // 赋值trace信息    trace->tp = invocation->tp;    // 从request中找content_length,赋值给trace->content_length    bpf_probe_read(&trace->content_length, sizeof(trace->content_length), (void *)(req_ptr + content_length_ptr_pos));    // 从resp中找status,赋值给trace->status    bpf_probe_read(&trace->status, sizeof(trace->status), (void *)(resp_ptr + status_code_ptr_pos));    // 提交trace到ringbuf    bpf_ringbuf_submit(trace, get_flags());    return 0;}

参考:

1.https://github.com/grafana/beyla/issues/521
2.https://github.com/grafana/beyla/blob/main/docs/sources/distributed-traces.md