From bf1ad108da0667d1676764ac821ff2ccaf692723 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Sat, 21 Feb 2026 23:05:24 +0800 Subject: [PATCH 1/2] feat: migrate interpreter symbolization from agent to server Move interpreter frame formatting (PHP/V8/Python/Lua) from agent-side string generation to server-side structured symbolization. Agent changes: - Send structured interp_symbol_info_t via protobuf InterpreterStack - Remove agent-side symbol formatting (fold_interpreter_frames_*) - Add RawInterpreterData/NativeStackTrace fields to Profile message - Agent C code extracts raw symbols, Rust converts to protobuf Server changes: - Add symbolizer stub package (enterprise provides full implementation) - decoder.go detects RawInterpreterData flag, calls symbolizer.Symbolize() - Backward compatible: old agents still send pre-formatted Data field Protobuf changes: - Add InterpreterFrameSymbol, InterpreterStack messages to metric.proto - Add interpreter_stack, native_stack_trace, raw_interpreter_data fields Co-Authored-By: Claude Opus 4.6 --- agent/crates/trace-utils/src/lib.rs | 34 -- agent/crates/trace-utils/src/trace_utils.h | 38 --- agent/src/ebpf/mod.rs | 19 ++ agent/src/ebpf/user/extended/extended.c | 29 +- agent/src/ebpf/user/extended/extended.h | 90 +++-- agent/src/ebpf/user/profile/perf_profiler.c | 8 +- agent/src/ebpf/user/profile/perf_profiler.h | 3 + agent/src/ebpf/user/profile/profile_common.c | 77 +++-- agent/src/ebpf/user/profile/profile_common.h | 3 - agent/src/ebpf/user/profile/stringifier.c | 307 ++++-------------- agent/src/ebpf/user/profile/stringifier.h | 24 -- agent/src/ebpf_dispatcher.rs | 47 +++ message/metric.proto | 37 +++ server/ingester/profile/decoder/decoder.go | 44 ++- .../ingester/profile/symbolizer/symbolizer.go | 49 +++ 15 files changed, 391 insertions(+), 418 deletions(-) create mode 100644 server/ingester/profile/symbolizer/symbolizer.go diff --git a/agent/crates/trace-utils/src/lib.rs b/agent/crates/trace-utils/src/lib.rs index 386066b9236..1e2cc530c44 100644 --- a/agent/crates/trace-utils/src/lib.rs +++ b/agent/crates/trace-utils/src/lib.rs @@ -575,15 +575,6 @@ extern "C" { pub fn is_v8_process(pid: u32) -> bool; pub fn lua_detect(pid: u32, out: *mut LuaRuntimeInfo) -> i32; - pub fn lua_format_folded_stack_trace( - tracer: *mut libc::c_void, - pid: u32, - frames: *const u64, - frame_count: u32, - new_cache: bool, - info_p: *mut libc::c_void, - err_tag: *const libc::c_char, - ) -> *mut libc::c_char; pub fn lua_set_map_fds( lang_flags_fd: i32, unwind_info_fd: i32, @@ -622,31 +613,6 @@ extern "C" { pub fn v8_unwind_table_load(table: *mut V8UnwindTable, pid: u32); pub fn v8_unwind_table_unload(table: *mut V8UnwindTable, pid: u32); - pub fn merge_lua_stacks( - trace_str: *mut libc::c_void, - len: usize, - u_trace: *const libc::c_void, - i_trace: *const libc::c_void, - ) -> usize; - pub fn merge_python_stacks( - trace_str: *mut libc::c_void, - len: usize, - i_trace: *const libc::c_void, - u_trace: *const libc::c_void, - ) -> usize; - pub fn merge_php_stacks( - trace_str: *mut libc::c_void, - len: usize, - i_trace: *const libc::c_void, - u_trace: *const libc::c_void, - ) -> usize; - pub fn merge_v8_stacks( - trace_str: *mut libc::c_void, - len: usize, - i_trace: *const libc::c_void, - u_trace: *const libc::c_void, - ) -> usize; - pub fn resolve_php_frame( pid: u32, zend_function_ptr: u64, diff --git a/agent/crates/trace-utils/src/trace_utils.h b/agent/crates/trace-utils/src/trace_utils.h index 39d9a3f98f6..12cfcf46648 100644 --- a/agent/crates/trace-utils/src/trace_utils.h +++ b/agent/crates/trace-utils/src/trace_utils.h @@ -608,16 +608,6 @@ extern bool is_v8_process(uint32_t pid); extern int32_t lua_detect(uint32_t pid, lua_runtime_info_t *out); #endif -#if defined(DF_ENTERPRISE) -extern char *lua_format_folded_stack_trace(void *tracer, - uint32_t pid, - const uint64_t *frames, - uint32_t frame_count, - bool new_cache, - void *info_p, - const char *err_tag); -#endif - #if defined(DF_ENTERPRISE) extern void lua_set_map_fds(int32_t lang_flags_fd, int32_t unwind_info_fd, @@ -644,34 +634,6 @@ extern void lua_unwind_table_load(lua_unwind_table_t *table, uint32_t pid); extern void lua_unwind_table_unload(lua_unwind_table_t *table, uint32_t pid); #endif -#if defined(DF_ENTERPRISE) -extern size_t merge_lua_stacks(void *trace_str, - size_t len, - const void *u_trace, - const void *i_trace); -#endif - -#if defined(DF_ENTERPRISE) -extern size_t merge_php_stacks(void *trace_str, - size_t len, - const void *i_trace, - const void *u_trace); -#endif - -#if defined(DF_ENTERPRISE) -extern size_t merge_python_stacks(void *trace_str, - size_t len, - const void *i_trace, - const void *u_trace); -#endif - -#if defined(DF_ENTERPRISE) -extern size_t merge_v8_stacks(void *trace_str, - size_t len, - const void *i_trace, - const void *u_trace); -#endif - #if defined(DF_ENTERPRISE) extern php_unwind_table_t *php_unwind_table_create(int32_t unwind_info_map_fd, int32_t offsets_map_fd); diff --git a/agent/src/ebpf/mod.rs b/agent/src/ebpf/mod.rs index e47bb80995c..5303c60c066 100644 --- a/agent/src/ebpf/mod.rs +++ b/agent/src/ebpf/mod.rs @@ -544,6 +544,9 @@ pub struct stack_profile_data { pub comm: [u8; PACKET_KNAME_MAX_PADDING + 1], pub process_name: [u8; PACKET_KNAME_MAX_PADDING + 1], // process name pub container_id: [u8; CONTAINER_ID_SIZE], // container id + pub interp_frame_count: u32, // number of structured interpreter frames + pub interp_frames_ptr: u64, // pointer to CSymbolInfo array + pub raw_interpreter_data: u8, // 1 = has structured interpreter data pub stack_data_len: u32, // stack data length /* @@ -558,6 +561,22 @@ pub struct stack_profile_data { pub stack_data: *mut c_char, } +/// C-compatible interpreter frame info struct. +/// Must match the C `interp_symbol_info_t` layout defined in extended.h. +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct CInterpreterFrameInfo { + pub frame_type: u32, + pub function_name: *mut c_char, + pub class_name: *mut c_char, + pub lineno: u32, + pub file_name: *mut c_char, + pub sub_type: u32, + pub is_jit: u8, + pub raw_addr: u64, + pub resolve_failed: u8, +} + extern "C" { /* * Set maximum amount of data passed to the agent by eBPF program. diff --git a/agent/src/ebpf/user/extended/extended.c b/agent/src/ebpf/user/extended/extended.c index 870fd1dcfff..e139d78d0f5 100755 --- a/agent/src/ebpf/user/extended/extended.c +++ b/agent/src/ebpf/user/extended/extended.c @@ -26,6 +26,7 @@ #include "../tracer.h" #include "../socket.h" #include "../proc.h" +#include "extended.h" int __attribute__ ((weak)) extended_reader_create(struct bpf_tracer *tracer) { @@ -91,19 +92,31 @@ int __attribute__ ((weak)) print_extra_pkt_info(bool datadump_enable, return 0; } -char * __attribute__ ((weak)) extended_resolve_frame(int pid, u64 addr, u8 frame_type, u64 extra_a, u64 extra_b) +int __attribute__ ((weak)) extended_extract_interpreter_frames(int pid, + const u8 *frame_types, + const u64 *addrs, + const u64 *extra_data_a, + const u64 *extra_data_b, + int frame_count, + void *tracer, + bool new_cache, + void *info_p, + interp_symbol_info_t *out_frames, + int max_out) { - return NULL; + return 0; } -int __attribute__ ((weak)) extended_merge_stacks(char *dst, int len, const char *i_trace, const char *u_trace, int pid) +void __attribute__ ((weak)) extended_free_interp_frames(interp_symbol_info_t *frames, int count) { - return 0; } -char * __attribute__ ((weak)) extended_format_lua_stack(void *tracer, int pid, int stack_id, - const char *stack_map_name, void *h, - bool new_cache, void *info_p) +int __attribute__ ((weak)) extended_extract_structured_frames(void *tracer, int tgid, + int user_stack_id, int interp_stack_id, + const char *custom_stack_map_name, + bool new_cache, void *info_p, + interp_symbol_info_t *out_frames, + int max_out) { - return NULL; + return 0; } diff --git a/agent/src/ebpf/user/extended/extended.h b/agent/src/ebpf/user/extended/extended.h index 4464caea5ec..d2a4f2aff97 100644 --- a/agent/src/ebpf/user/extended/extended.h +++ b/agent/src/ebpf/user/extended/extended.h @@ -110,40 +110,80 @@ int print_extra_pkt_info(bool datadump_enable, const char *pkt_data, int len, char *buf, int buf_len, u8 direction); /** - * @brief **extended_resolve_frame()** Resolve a custom/interpreter frame - * @param pid Process ID - * @param addr Frame address/ID - * @param frame_type Frame type identifier - * @param extra_a Extra data A from stack map - * @param extra_b Extra data B from stack map - * @return Resolved symbol string (must be freed) or NULL + * @brief Structured interpreter symbol info for per-frame extraction. + * Matches the Rust CSymbolInfo layout (#[repr(C)]). */ -char *extended_resolve_frame(int pid, u64 addr, u8 frame_type, u64 extra_a, u64 extra_b); +#ifndef INTERP_SYMBOL_INFO_DEFINED +#define INTERP_SYMBOL_INFO_DEFINED +typedef struct { + u32 frame_type; // FRAME_TYPE_PHP/V8/LUA/PYTHON + char *function_name; // allocated via clib_mem_alloc + char *class_name; // allocated via clib_mem_alloc (or NULL) + u32 lineno; + char *file_name; // allocated via clib_mem_alloc (or NULL) + u32 sub_type; // language-specific sub-type + u8 is_jit; // 1 = JIT-compiled frame + u64 raw_addr; // original address + u8 resolve_failed; // 1 = resolution failed +} interp_symbol_info_t; +#endif /** - * @brief **extended_merge_stacks()** Merge interpreter and user stacks - * @param dst Destination buffer - * @param len Buffer length - * @param i_trace Interpreter stack string - * @param u_trace User stack string + * @brief **extended_extract_interpreter_frames()** Extract structured interpreter frames * @param pid Process ID - * @return Bytes written + * @param frame_types Array of frame types from BPF map + * @param addrs Array of frame addresses + * @param extra_data_a Array of extra data A values + * @param extra_data_b Array of extra data B values + * @param frame_count Number of frames in arrays + * @param tracer BPF tracer handle (for Lua) + * @param new_cache Whether this is a new cache entry (for Lua) + * @param info_p Process info pointer (for Lua) + * @param out_frames Output array of interp_symbol_info_t (caller-allocated) + * @param max_out Maximum output frames + * @return Number of frames written to out_frames + */ +int extended_extract_interpreter_frames(int pid, + const u8 *frame_types, + const u64 *addrs, + const u64 *extra_data_a, + const u64 *extra_data_b, + int frame_count, + void *tracer, + bool new_cache, + void *info_p, + interp_symbol_info_t *out_frames, + int max_out); + +/** + * @brief **extended_free_interp_frames()** Free memory owned by interp_symbol_info_t array + * @param frames Array of interp_symbol_info_t + * @param count Number of entries */ -int extended_merge_stacks(char *dst, int len, const char *i_trace, const char *u_trace, int pid); +void extended_free_interp_frames(interp_symbol_info_t *frames, int count); /** - * @brief **extended_format_lua_stack()** Format Lua interpreter stack frames + * @brief **extended_extract_structured_frames()** High-level extraction for a stack trace + * + * Reads user and interpreter BPF stack maps, extracts structured interpreter + * frame symbols via per-symbol cache + extract functions. + * * @param tracer BPF tracer handle - * @param pid Process ID - * @param stack_id Interpreter stack ID from BPF map - * @param stack_map_name Name of the stack map - * @param h Stack string hash table - * @param new_cache Whether to create new cache entry + * @param tgid Process ID (for cache lookup and process type detection) + * @param user_stack_id User stack ID from BPF map (-1 if none) + * @param interp_stack_id Interpreter stack ID from BPF map (-1 if none) + * @param custom_stack_map_name Name of the custom stack map + * @param new_cache Whether cache entry is new * @param info_p Process info pointer - * @return Formatted stack string (caller must free) or NULL + * @param out_frames Caller-allocated output array + * @param max_out Maximum output frame count + * @return Number of frames written to out_frames */ -char *extended_format_lua_stack(void *tracer, int pid, int stack_id, - const char *stack_map_name, void *h, - bool new_cache, void *info_p); +int extended_extract_structured_frames(void *tracer, int tgid, + int user_stack_id, int interp_stack_id, + const char *custom_stack_map_name, + bool new_cache, void *info_p, + interp_symbol_info_t *out_frames, + int max_out); #endif /* DF_EXTENDED_H */ diff --git a/agent/src/ebpf/user/profile/perf_profiler.c b/agent/src/ebpf/user/profile/perf_profiler.c index 5814cc83c0f..283aaa02ed8 100644 --- a/agent/src/ebpf/user/profile/perf_profiler.c +++ b/agent/src/ebpf/user/profile/perf_profiler.c @@ -257,14 +257,8 @@ static void oncpu_reader_work(void *arg) exit: print_cp_tracer_status(); - print_hash_stack_str(&oncpu_ctx.stack_str_hash); - /* free stack_str_hash */ - if (likely(oncpu_ctx.stack_str_hash.buckets != NULL)) { - release_stack_str_hash(&oncpu_ctx.stack_str_hash); - } - print_hash_stack_trace_msg(&oncpu_ctx.msg_hash); - /* free stack_str_hash */ + /* free msg_hash */ if (likely(oncpu_ctx.msg_hash.buckets != NULL)) { /* Ensure that all elements are released properly/cleanly */ push_and_release_stack_trace_msg(&oncpu_ctx, diff --git a/agent/src/ebpf/user/profile/perf_profiler.h b/agent/src/ebpf/user/profile/perf_profiler.h index d1d03cbd9c8..edb54b4d8cf 100644 --- a/agent/src/ebpf/user/profile/perf_profiler.h +++ b/agent/src/ebpf/user/profile/perf_profiler.h @@ -186,6 +186,9 @@ typedef struct { u8 comm[TASK_COMM_LEN]; u8 process_name[TASK_COMM_LEN]; u8 container_id[CONTAINER_ID_SIZE]; + u32 interp_frame_count; // number of structured interpreter frames + u64 interp_frames_ptr; // pointer to interp_symbol_info_t array + u8 raw_interpreter_data; // 1 = has structured interpreter data u32 data_len; u64 data_ptr; u8 data[0]; diff --git a/agent/src/ebpf/user/profile/profile_common.c b/agent/src/ebpf/user/profile/profile_common.c index fccbd1079e8..8b65c1c27d4 100644 --- a/agent/src/ebpf/user/profile/profile_common.c +++ b/agent/src/ebpf/user/profile/profile_common.c @@ -40,7 +40,6 @@ #include "../load.h" #include "../../kernel/include/perf_profiler.h" #include "../perf_reader.h" -#include "../bihash_8_8.h" #include "stringifier.h" #include "../table.h" #include @@ -48,7 +47,6 @@ #include "java/jvm_symbol_collect.h" #include "profile_common.h" #include "../proc.h" -#include "stringifier.h" #define UNKNOWN_JAVA_SYMBOL_STR "Unknown" @@ -438,13 +436,12 @@ print_profiler_status(struct profiler_context *ctx, "stackmap_clear_failed_count\t%lu\n" "ransfer_count:\t%lu iter_count:\t%lu\nall" "oc_b:\t%lu bytes free_b:\t%lu bytes use:\t%lu bytes\n" - "stack_str_hash.hit_count %lu\nstack_trace_msg_hash hit %lu\n", + "stack_trace_msg_hash hit %lu\n", ctx->tag, atomic64_read(&t->recv), atomic64_read(&t->lost), ctx->perf_buf_lost_a_count, ctx->perf_buf_lost_b_count, ctx->stack_trace_err, ctx->stackmap_clear_failed_count, ctx->transfer_count, iter_count, alloc_b, free_b, alloc_b - free_b, - ctx->stack_str_hash.hit_hash_count, ctx->msg_hash.hit_hash_count); } @@ -468,8 +465,19 @@ static int push_and_free_msg_kvp_cb(stack_trace_msg_hash_kv * kv, void *arg) if (likely(ctx->profiler_stop == 0)) r = fun(ctx->callback_ctx, 0, msg); - if (!(r & TRACER_CALLBACK_FLAG_KEEP_DATA)) + if (!(r & TRACER_CALLBACK_FLAG_KEEP_DATA)) { + /* Free structured interpreter frames if present */ + if (msg->interp_frame_count > 0 + && msg->interp_frames_ptr != 0) { + interp_symbol_info_t *frames = + (interp_symbol_info_t *) + msg->interp_frames_ptr; + extended_free_interp_frames( + frames, msg->interp_frame_count); + clib_mem_free(frames); + } clib_mem_free((void *)msg); + } msg_kv->msg_ptr = 0; } @@ -869,7 +877,6 @@ static void aggregate_stack_traces(struct profiler_context *ctx, struct bpf_tracer *t, stack_map_t *stack_map, stack_map_t *custom_stack_map, - stack_str_hash_t * stack_str_hash, stack_trace_msg_hash_t * msg_hash, u32 * count, bool use_a_map) { @@ -1056,7 +1063,7 @@ static void aggregate_stack_traces(struct profiler_context *ctx, char *trace_str = resolve_and_gen_stack_trace_str(t, v, stack_map->name, custom_stack_map->name, - stack_str_hash, matched, + matched, process_name, info_p, ctx->type == PROFILER_TYPE_MEMORY); @@ -1136,6 +1143,38 @@ static void aggregate_stack_traces(struct profiler_context *ctx, msg->data_len = strlen((char *)msg->data); clib_mem_free(trace_str); + + /* + * Extract structured interpreter frames (PHP/V8/Lua) + * for server-side symbolization. The weak symbol default + * returns 0 in open-source builds. + */ + if (matched && ctx->type != PROFILER_TYPE_MEMORY) { + interp_symbol_info_t interp_buf[128]; + int interp_count = + extended_extract_structured_frames( + t, v->tgid, + v->userstack, v->intpstack, + custom_stack_map->name, + matched, info_p, + interp_buf, 128); + if (interp_count > 0) { + size_t fsz = interp_count * + sizeof(interp_symbol_info_t); + interp_symbol_info_t *fc = + clib_mem_alloc_aligned( + "interp_frames", fsz, 0, NULL); + if (fc) { + memcpy(fc, interp_buf, fsz); + msg->interp_frame_count = + interp_count; + msg->interp_frames_ptr = + pointer_to_uword(fc); + msg->raw_interpreter_data = 1; + } + } + } + kv.msg_ptr = pointer_to_uword(msg); if (stack_trace_msg_hash_add_del(msg_hash, @@ -1190,25 +1229,6 @@ void process_bpf_stacktraces(struct profiler_context *ctx, struct bpf_tracer *t) /* eBPF map record count for this iteration. */ u64 sample_cnt_val = 0; - /* - * Why use g_stack_str_hash? - * - * When the stringizer encounters a stack-ID for the first time in - * the stack trace table, it clears it. If a stack-ID is reused by - * different stack trace keys, the stringizer returns its memoized - * stack trace string. Since stack IDs are unstable between profile - * iterations, we create and destroy the stringizer in each profile - * iteration. - */ - if (unlikely(ctx->stack_str_hash.buckets == NULL)) { - if (init_stack_str_hash - (&ctx->stack_str_hash, "profile_stack_str")) { - ebpf_warning("%sinit_stack_str_hash() failed.\n", - ctx->tag); - return; - } - } - /* * During each transmission iteration, we have a hashmap structure in * place for the following purposes: @@ -1250,7 +1270,7 @@ void process_bpf_stacktraces(struct profiler_context *ctx, struct bpf_tracer *t) * data aggregation will be blocked if there is no data. */ aggregate_stack_traces(ctx, t, stack_map, custom_stack_map, - &ctx->stack_str_hash, &ctx->msg_hash, + &ctx->msg_hash, &count, using_map_set_a); /* @@ -1284,9 +1304,6 @@ void process_bpf_stacktraces(struct profiler_context *ctx, struct bpf_tracer *t) //print_profiler_status(ctx, t, count); - /* free all elems */ - clean_stack_strs(&ctx->stack_str_hash); - /* Push messages and free stack_trace_msg_hash */ push_and_release_stack_trace_msg(ctx, &ctx->msg_hash, false); } diff --git a/agent/src/ebpf/user/profile/profile_common.h b/agent/src/ebpf/user/profile/profile_common.h index c319c4310ee..c28ee166f08 100644 --- a/agent/src/ebpf/user/profile/profile_common.h +++ b/agent/src/ebpf/user/profile/profile_common.h @@ -19,7 +19,6 @@ #include "../load.h" #include "perf_profiler.h" -#include "stringifier.h" typedef struct { char name[MAP_NAME_SZ]; @@ -50,8 +49,6 @@ struct profiler_context { // Read raw data from the eBPF perfbuf and temporarily store it. struct stack_trace_key_t *raw_stack_data; - // Cache hash: obtain folded stack trace string from stack ID. - stack_str_hash_t stack_str_hash; // Used for tracking data statistics and pushing. stack_trace_msg_hash_t msg_hash; diff --git a/agent/src/ebpf/user/profile/stringifier.c b/agent/src/ebpf/user/profile/stringifier.c index 2a977d0a6fa..8d7a6b46ea2 100644 --- a/agent/src/ebpf/user/profile/stringifier.c +++ b/agent/src/ebpf/user/profile/stringifier.c @@ -17,9 +17,8 @@ /* * Excute Stringifier on each iteration of the continuous perf profiler. * - * The Stringifier serves two purposes: - * 1. It constructs a "folded stack trace string" based on the stack frame addresses. - * 2. It records the result of (1) when reusing a stack identifier (stack-id). + * The Stringifier constructs a "folded stack trace string" based on + * the stack frame addresses. * * Example of a folded stack trace string (taken from a perf profiler test): * main;xxx();yyy() @@ -28,16 +27,11 @@ * * Kernel collects stack-traces separately for user & kernel space, * at the BPF level, we track stack traces with a key that includes two "stack-trace-ids", - * one for user space and one for kernel. Therefore, it is common to see reuse of - * individual stack trace identifiers... - * for example, when the same kernel stack trace is observed from multiple user - * stack traces, or when a given user space stack occasionally (but not always) enters - * the kernel. + * one for user space and one for kernel. * - * When the Stringifier reads the shared BPF stack trace address map, it uses a - * destructive read approach (it reads a stack trace from the table and then clears it). - * Due to the reuse of stack trace identifiers and destructive reads, the Stringifier - * caches the result of its stringification. In each iteration of a continuous perf profiler. + * Note: The per-stack string cache (stack_str_hash, ~1GB) has been replaced by a + * per-symbol cache (~5-50MB) in trace-utils-interp. Interpreter frame formatting + * and stack merging now happen server-side. */ #ifndef AARCH64_MUSL @@ -54,8 +48,6 @@ #include "../../kernel/include/perf_profiler.h" #include "../perf_reader.h" #include "../table.h" -#include "../bihash_8_8.h" -#include "../bihash_16_8.h" #include "java/collect_symbol_files.h" #include "stringifier.h" #include @@ -65,7 +57,6 @@ // static const char *k_err_tag = "[kernel stack trace error]"; // static const char *u_err_tag = "[user stack trace error]"; -const char *i_err_tag = "[interpreter stack trace error]"; static const char *lost_tag = "[stack trace lost]"; static const char *k_sym_prefix = "[k] "; static const char *lib_sym_prefix = "[l] "; @@ -105,69 +96,6 @@ u64 get_stack_table_data_miss_count(void) return stack_table_data_miss; } -int init_stack_str_hash(stack_str_hash_t * h, const char *name) -{ - memset(h, 0, sizeof(*h)); - u32 nbuckets = STRINGIFIER_STACK_STR_HASH_BUCKETS_NUM; - u64 hash_memory_size = STRINGIFIER_STACK_STR_HASH_MEM_SZ; // 1G bytes - h->private = - clib_mem_alloc_aligned("hash_ext_data", - sizeof(struct stack_str_hash_ext_data), - 0, NULL); - if (h->private == NULL) - return ETR_NOMEM; - - struct stack_str_hash_ext_data *ext = h->private; - ext->stack_str_kvps = NULL; - ext->clear_hash = false; - - return stack_str_hash_init(h, (char *)name, nbuckets, hash_memory_size); -} - -void release_stack_str_hash(stack_str_hash_t * h) -{ - if (h->private) { - struct stack_str_hash_ext_data *ext = h->private; - vec_free(ext->stack_str_kvps); - clib_mem_free(ext); - } - - stack_str_hash_free(h); -} - -void clean_stack_strs(stack_str_hash_t * h) -{ - u64 elems_count = 0; - - /* - * In this iteration, all elements will be cleared, and in the - * next iteration, this hash will be reused. - */ - stack_str_hash_kv *v; - struct stack_str_hash_ext_data *ext = h->private; - vec_foreach(v, ext->stack_str_kvps) { - if (v->value != 0) - clib_mem_free((void *)v->value); - - if (stack_str_hash_add_del(h, v, 0 /* delete */ )) { - ebpf_warning("stack_str_hash_add_del() failed.\n"); - ext->clear_hash = true; - } - - elems_count++; - } - - vec_free(ext->stack_str_kvps); - - h->hit_hash_count = 0; - h->hash_elems_count = 0; - - if (ext->clear_hash) { - release_stack_str_hash(h); - } - - ebpf_debug("clean_stack_strs hashmap clear %lu elems.\n", elems_count); -} static inline char *create_symbol_str(int len, char *src, const char *tag) { @@ -540,7 +468,6 @@ static char *build_stack_trace_string(struct bpf_tracer *t, const char *stack_map_name, pid_t pid, int stack_id, - stack_str_hash_t * h, bool new_cache, int *ret_val, void *info_p, u64 ts, bool ignore_libs, bool use_symbol_table) @@ -606,28 +533,27 @@ static char *build_stack_trace_string(struct bpf_tracer *t, if (ips[i] == 0 || ips[i] == sentinel_addr) continue; + /* + * Skip interpreter frames (PHP/V8/Python/Lua). + * These are now sent as structured data via + * extended_extract_structured_frames() and merged + * server-side. Only native frames go into the + * folded stack trace string. + */ + if (stack.frame_types[i] != FRAME_TYPE_NORMAL) + continue; + if (start_idx == -1) start_idx = i; - /* - * Use extended hook to resolve frame if it's special. - * We pass possible extra data. If the frame type is 0 (normal), - * this call should return NULL. - */ - str = extended_resolve_frame(pid, ips[i], stack.frame_types[i], - stack.extra_data_a[i], - stack.extra_data_b[i]); - if (str == NULL) { - /* Normal fallback */ - if (use_symbol_table) { - str = resolve_custom_symbol_addr(symbols, symbol_ids, - n_symbols, - (i == start_idx), - ips[i]); - } else { - str = resolve_addr(t, pid, (i == start_idx), - ips[i], new_cache, info_p); - } + if (use_symbol_table) { + str = resolve_custom_symbol_addr(symbols, symbol_ids, + n_symbols, + (i == start_idx), + ips[i]); + } else { + str = resolve_addr(t, pid, (i == start_idx), + ips[i], new_cache, info_p); } if (str) { // ignore frames in library for memory profiling @@ -680,88 +606,27 @@ static char *build_stack_trace_string(struct bpf_tracer *t, +/* + * Build a folded stack trace string for the given stack ID. + * Returns a freshly allocated string that the caller must free. + */ static char *folded_stack_trace_string(struct bpf_tracer *t, int stack_id, pid_t pid, const char *stack_map_name, - stack_str_hash_t * h, bool new_cache, void *info_p, u64 ts, bool ignore_libs, bool use_symbol_table) { ASSERT(pid >= 0 && stack_id >= 0); - /* - * Firstly, search the stack-trace hash to see if the - * stack trace string has already been stored. - */ - stack_str_hash_kv kv; - kv.key = (u64) stack_id; - kv.value = 0; - if (stack_str_hash_search(h, &kv, &kv) == 0) { - __sync_fetch_and_add(&h->hit_hash_count, 1); - return (char *)kv.value; - } - - char *str = NULL; int ret_val = 0; - - /* - * Lua frames require special handling via extended hook. - * Lua uses its own stack format with encoded tag + pointer values. - */ - if (use_symbol_table) { - str = extended_format_lua_stack(t, pid, stack_id, stack_map_name, - h, new_cache, info_p); - if (str != NULL) { - /* Cache the result */ - kv.key = (u64) stack_id; - kv.value = pointer_to_uword(str); - if (stack_str_hash_add_del(h, &kv, 1)) { - clib_mem_free((void *)str); - return NULL; - } - int ret = VEC_OK; - struct stack_str_hash_ext_data *ext = h->private; - vec_add1(ext->stack_str_kvps, kv, ret); - if (ret != VEC_OK) { - ebpf_warning("vec add failed\n"); - } - return str; - } - } - - str = build_stack_trace_string(t, stack_map_name, pid, stack_id, - h, new_cache, &ret_val, info_p, ts, - ignore_libs, use_symbol_table); + char *str = build_stack_trace_string(t, stack_map_name, pid, stack_id, + new_cache, &ret_val, info_p, ts, + ignore_libs, use_symbol_table); if (ret_val == ETR_NOTEXIST) return NULL; - if (str == NULL) - return NULL; - - kv.key = (u64) stack_id; - kv.value = pointer_to_uword(str); - /* memoized stack trace string. Because the stack-ids - are not stable across profiler iterations. */ - if (stack_str_hash_add_del(h, &kv, 1 /* is_add */ )) { - ebpf_warning("stack_str_hash_add_del() failed.\n"); - clib_mem_free((void *)str); - str = NULL; - } else { - /* - * The new key-value pair has been successfully added. - * At the same time, add it to the additional data fo - * quick reference and easy access. - */ - int ret = VEC_OK; - struct stack_str_hash_ext_data *ext = h->private; - vec_add1(ext->stack_str_kvps, kv, ret); - if (ret != VEC_OK) { - ebpf_warning("vec add failed\n"); - } - } - return str; } @@ -780,30 +645,14 @@ char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, struct stack_trace_key_t *v, const char *stack_map_name, const char *custom_stack_map_name, - stack_str_hash_t * h, bool new_cache, char *process_name, void *info_p, bool ignore_libs) { - /* - * We need to prepare a hashtable (stack_trace_strs) to record the results - * of this iteration analysis. The key is the user-stack-ID or kernel-stack-ID, - * and the value is the "folded stack trace string". There are two reasons why - * we use this hashtable: - * - * 1. It is common to see reuse of individual stack trace identifiers - * (avoiding repetitive symbolization work). - * 2. When the Stringifier reads the shared BPF stack trace address map, - * it uses a destructive read approach (it reads a stack trace from - * the table and then clears it). It means that when a stackID is - * deleted, the list of IPs (function addresses) associated with it - * no longer exists, and must be kept beforehand. - */ - /* add separator and '\0' */ int len = 2; - char *k_trace_str, *u_trace_str, *trace_str, *uprobe_str, *i_trace_str; - k_trace_str = u_trace_str = trace_str = uprobe_str = i_trace_str = NULL; + char *k_trace_str, *u_trace_str, *trace_str, *uprobe_str; + k_trace_str = u_trace_str = trace_str = uprobe_str = NULL; /* For processes without configuration, the stack string is in the format 'process name;thread name'. */ @@ -838,11 +687,11 @@ char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, if (v->kernstack >= 0) { k_trace_str = folded_stack_trace_string(t, v->kernstack, 0, stack_map_name, - h, new_cache, info_p, + new_cache, info_p, v->timestamp, ignore_libs, false); if (k_trace_str == NULL) - return NULL; + goto cleanup; len += strlen(k_trace_str); } @@ -852,12 +701,12 @@ char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, v->flags & STACK_TRACE_FLAGS_DWARF ? custom_stack_map_name - : stack_map_name, h, + : stack_map_name, new_cache, info_p, v->timestamp, ignore_libs, false); if (u_trace_str == NULL) - return NULL; + goto cleanup; len += strlen(u_trace_str); } @@ -866,53 +715,28 @@ char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, resolve_addr(t, v->tgid, false, v->uprobe_addr, new_cache, info_p); if (uprobe_str == NULL) { - return NULL; + goto cleanup; } len += strlen(uprobe_str) + 1; } - bool has_intpstack = v->intpstack > 0; - if (has_intpstack) { - i_trace_str = folded_stack_trace_string(t, v->intpstack, v->tgid, - custom_stack_map_name, h, - new_cache, info_p, - v->timestamp, - ignore_libs, true); - if (i_trace_str != NULL) { - len += strlen(i_trace_str) + strlen(INCOMPLETE_PYTHON_STACK) + 2; - } else { - len += strlen(i_err_tag); - } - } + /* + * Interpreter frames (Python/Lua) from the interpreter stack map are + * now sent as structured data via extended_extract_structured_frames() + * and merged server-side. Skip reading the interpreter stack map here. + */ trace_str = alloc_stack_trace_str(len); if (trace_str == NULL) { ebpf_warning("No available memory space.\n"); - goto error; + goto cleanup; } - /* trace_str combines user/interpreter/kstack strings in call-order (root -> leaf). */ + /* trace_str combines user/kstack strings in call-order (root -> leaf). + * Interpreter-native stack merging now happens server-side. */ int offset = 0; - if (i_trace_str && u_trace_str) { - /* Use extended merge */ - int merged = extended_merge_stacks(trace_str + offset, - len - offset, i_trace_str, - u_trace_str, v->tgid); - if (merged > 0) { - offset += merged; - } else { - /* Fallback */ - offset += snprintf(trace_str + offset, len - offset, - "%s;%s", i_trace_str, u_trace_str); - } - } else if (i_trace_str) { - offset += snprintf(trace_str + offset, len - offset, "%s", i_trace_str); - } else if (u_trace_str) { - if (has_intpstack) { - offset += snprintf(trace_str + offset, len - offset, "%s;%s", i_err_tag, u_trace_str); - } else { - offset += snprintf(trace_str + offset, len - offset, "%s", u_trace_str); - } + if (u_trace_str) { + offset += snprintf(trace_str + offset, len - offset, "%s", u_trace_str); } if (u_trace_str && uprobe_str) { offset += snprintf(trace_str + offset, len - offset, ";%s", uprobe_str); @@ -922,41 +746,34 @@ char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, } if (offset == 0) { - /* - * The kernel can indicate the invalidity of a stack ID in two - * different ways: - * - * -EFAULT: Stack trace is unavailable - * For example, if the stack trace is only available in user space - * and the kstack_id is invalid, this error code (-EFAULT) is used. - * - * -EEXIST: Hash bucket collision in the stack trace table - * - * If there is a hash table collision for one or both stack IDs, we - * may reach this branch. However, we should not reach this point when - * both stack IDs are set to "invalid" with the error code -EFAULT. - */ - + clib_mem_free(trace_str); len += strlen(lost_tag); trace_str = alloc_stack_trace_str(len); if (trace_str == NULL) { ebpf_warning("No available memory space.\n"); - goto error; + goto cleanup; } snprintf(trace_str, len, "%s", lost_tag); } - if (uprobe_str) { + /* Free sub-strings (now owned by caller, no longer cached) */ + if (k_trace_str) + clib_mem_free(k_trace_str); + if (u_trace_str) + clib_mem_free(u_trace_str); + if (uprobe_str) clib_mem_free(uprobe_str); - } return trace_str; -error: - if (uprobe_str) { +cleanup: + if (k_trace_str) + clib_mem_free(k_trace_str); + if (u_trace_str) + clib_mem_free(u_trace_str); + if (uprobe_str) clib_mem_free(uprobe_str); - } return NULL; } diff --git a/agent/src/ebpf/user/profile/stringifier.h b/agent/src/ebpf/user/profile/stringifier.h index 19c992df769..e8630025bc3 100644 --- a/agent/src/ebpf/user/profile/stringifier.h +++ b/agent/src/ebpf/user/profile/stringifier.h @@ -17,37 +17,13 @@ #ifndef DF_USER_STRINGIFIER_H #define DF_USER_STRINGIFIER_H -#include "../bihash_8_8.h" - -#define stack_str_hash_t clib_bihash_8_8_t -#define stack_str_hash_init clib_bihash_init_8_8 -#define stack_str_hash_kv clib_bihash_kv_8_8_t -#define print_hash_stack_str print_bihash_8_8 -#define stack_str_hash_search clib_bihash_search_8_8 -#define stack_str_hash_add_del clib_bihash_add_del_8_8 -#define stack_str_hash_free clib_bihash_free_8_8 -#define stack_str_hash_key_value_pair_cb clib_bihash_foreach_key_value_pair_cb_8_8 -#define stack_str_hash_foreach_key_value_pair clib_bihash_foreach_key_value_pair_8_8 - -struct stack_str_hash_ext_data { - /* - * It is used for quickly releasing the stack_str_hash resource. - */ - stack_str_hash_kv *stack_str_kvps; - bool clear_hash; -}; - #ifndef AARCH64_MUSL u64 get_stack_table_data_miss_count(void); -int init_stack_str_hash(stack_str_hash_t *h, const char *name); -void clean_stack_strs(stack_str_hash_t *h); -void release_stack_str_hash(stack_str_hash_t *h); char *resolve_and_gen_stack_trace_str(struct bpf_tracer *t, struct stack_trace_key_t *v, const char *stack_map_name, const char *custom_stack_map_name, - stack_str_hash_t *h, bool new_cache, char *process_name, void *info_p, bool ignore_libs); char *rewrite_java_symbol(char *sym); diff --git a/agent/src/ebpf_dispatcher.rs b/agent/src/ebpf_dispatcher.rs index 8c471fd1c84..108b7728841 100644 --- a/agent/src/ebpf_dispatcher.rs +++ b/agent/src/ebpf_dispatcher.rs @@ -731,6 +731,53 @@ impl EbpfCollector { } else { profile.data = profile_data.to_vec(); } + + // Convert structured interpreter frames to protobuf if present + if data.raw_interpreter_data != 0 + && data.interp_frame_count > 0 + && data.interp_frames_ptr != 0 + { + let frames_ptr = data.interp_frames_ptr as *const ebpf::CInterpreterFrameInfo; + let frame_count = data.interp_frame_count as usize; + let mut pb_frames = Vec::with_capacity(frame_count); + for i in 0..frame_count { + let f = &*frames_ptr.add(i); + let function_name = if f.function_name.is_null() { + String::new() + } else { + CStr::from_ptr(f.function_name) + .to_string_lossy() + .into_owned() + }; + let class_name = if f.class_name.is_null() { + String::new() + } else { + CStr::from_ptr(f.class_name).to_string_lossy().into_owned() + }; + let file_name = if f.file_name.is_null() { + String::new() + } else { + CStr::from_ptr(f.file_name).to_string_lossy().into_owned() + }; + pb_frames.push(metric::InterpreterFrameSymbol { + frame_type: f.frame_type as i32, + function_name, + class_name, + lineno: f.lineno, + file_name, + sub_type: f.sub_type, + is_jit: f.is_jit != 0, + raw_addr: f.raw_addr, + resolve_failed: f.resolve_failed != 0, + }); + } + profile.interpreter_stack = Some(metric::InterpreterStack { frames: pb_frames }); + profile.raw_interpreter_data = true; + // stack_data now contains native+kernel frames only (interpreter frames + // are skipped in build_stack_trace_string). Server uses native_stack_trace + // + interpreter_stack to merge the complete folded stack. + profile.native_stack_trace = String::from_utf8_lossy(profile_data).into_owned(); + } let container_id = CStr::from_ptr(data.container_id.as_ptr() as *const libc::c_char).to_string_lossy(); if let Some(policy_getter) = POLICY_GETTER.as_ref() { diff --git a/message/metric.proto b/message/metric.proto index 1c5754beae6..0de628dab54 100644 --- a/message/metric.proto +++ b/message/metric.proto @@ -233,6 +233,43 @@ message Profile { // Deprecated in v6.4.1: string container_id = 32; uint32 pod_id = 33; uint64 wide_count = 34; + + // Server-side symbolization fields (added for symbolization migration) + InterpreterStack interpreter_stack = 40; // Structured interpreter stack frames + string native_stack_trace = 41; // Native frame folded stack string + bool raw_interpreter_data = 42; // true = new format (needs server symbolization) +} + +// Interpreter frame type constants (matches eBPF FRAME_TYPE_* values) +enum InterpreterFrameType { + FRAME_TYPE_UNKNOWN = 0; + FRAME_TYPE_PYTHON = 1; + FRAME_TYPE_PHP = 2; + FRAME_TYPE_V8 = 3; + FRAME_TYPE_LUA = 4; +} + +// Structured symbol information for a single interpreter stack frame. +// Agent extracts raw symbol strings from process memory and sends +// structured data; Server performs formatting and stack merging. +message InterpreterFrameSymbol { + InterpreterFrameType frame_type = 1; + string function_name = 2; // Function/method name (extracted from process memory) + string class_name = 3; // Class name (PHP/Python) + uint32 lineno = 4; // Line number + string file_name = 5; // File/script name (V8 Script name, Lua chunk name) + uint32 sub_type = 6; // Language-specific sub-type: + // PHP: type_info (distinguishes top-level code, etc.) + // V8: frame sub-type (BYTECODE/NATIVE_SFI/NATIVE_CODE/...) + // Lua: tag type (LUA_FUNC/CFUNC/FFUNC) + bool is_jit = 7; // JIT compiled flag + uint64 raw_addr = 8; // Original address (for fallback/debugging) + bool resolve_failed = 9; // Symbol extraction failed flag +} + +// A complete interpreter call stack +message InterpreterStack { + repeated InterpreterFrameSymbol frames = 1; } enum IoOperation { diff --git a/server/ingester/profile/decoder/decoder.go b/server/ingester/profile/decoder/decoder.go index 2747fb38f83..3b49f9b09c3 100644 --- a/server/ingester/profile/decoder/decoder.go +++ b/server/ingester/profile/decoder/decoder.go @@ -29,6 +29,7 @@ import ( "github.com/deepflowio/deepflow/server/ingester/flow_tag" profile_common "github.com/deepflowio/deepflow/server/ingester/profile/common" "github.com/deepflowio/deepflow/server/ingester/profile/dbwriter" + "github.com/deepflowio/deepflow/server/ingester/profile/symbolizer" "github.com/deepflowio/deepflow/server/libs/codec" "github.com/deepflowio/deepflow/server/libs/datatype" "github.com/deepflowio/deepflow/server/libs/flow-metrics/pb" @@ -282,16 +283,32 @@ func (d *Decoder) handleProfileData(vtapID uint16, decoder *codec.SimpleDecoder) // adapt agent version before v6.6 parser.processTracer.value = uint64(profile.Count) } - // for ebpf profiling data, directly write, no need to parse - log.Debugf("decode ebpf profile data, compression: %d, data: %v", profile.DataCompressed, profile.Data) + + var stackData []byte + var srcCompressed bool + if profile.RawInterpreterData && profile.InterpreterStack != nil && len(profile.InterpreterStack.Frames) > 0 { + // New-format agent: structured interpreter frames need server-side symbolization + frames := convertPBFrames(profile.InterpreterStack.Frames) + mergedStack := symbolizer.Symbolize(frames, profile.NativeStackTrace) + stackData = []byte(mergedStack) + srcCompressed = false // symbolized string is uncompressed + log.Debugf("decode ebpf profile data with server-side symbolization, frames=%d, native_len=%d", + len(profile.InterpreterStack.Frames), len(profile.NativeStackTrace)) + } else { + // Old-format agent or pure native stack: use profile.Data as-is + stackData = profile.Data + srcCompressed = profile.DataCompressed + log.Debugf("decode ebpf profile data, compression: %v, data_len: %d", profile.DataCompressed, len(profile.Data)) + } + err := parser.rawStackToInProcess( - profile.Data, + stackData, parser.value, metadata.StartTime, metadata.Units.String(), metadata.SpyName, metadata.Key.Labels(), - profile.DataCompressed, + srcCompressed, ) if err != nil { log.Errorf("decode ebpf profile data failed, offset=%d, len=%d, err=%s", decoder.Offset(), len(decoder.Bytes()), err) @@ -305,6 +322,25 @@ func (d *Decoder) handleProfileData(vtapID uint16, decoder *codec.SimpleDecoder) } } +// convertPBFrames converts protobuf InterpreterFrameSymbol messages to symbolizer InterpreterFrame structs. +func convertPBFrames(pbFrames []*pb.InterpreterFrameSymbol) []*symbolizer.InterpreterFrame { + frames := make([]*symbolizer.InterpreterFrame, 0, len(pbFrames)) + for _, pf := range pbFrames { + frames = append(frames, &symbolizer.InterpreterFrame{ + FrameType: int32(pf.FrameType), + FunctionName: pf.FunctionName, + ClassName: pf.ClassName, + Lineno: pf.Lineno, + FileName: pf.FileName, + SubType: pf.SubType, + IsJIT: pf.IsJit, + RawAddr: pf.RawAddr, + ResolveFailed: pf.ResolveFailed, + }) + } + return frames +} + func (d *Decoder) filleBPFData(profile *pb.Profile) *pb.Profile { profile.From = uint32(profile.Timestamp / 1e9) // ns to s profile.Until = uint32(time.Now().Unix()) diff --git a/server/ingester/profile/symbolizer/symbolizer.go b/server/ingester/profile/symbolizer/symbolizer.go new file mode 100644 index 00000000000..e26ee159faa --- /dev/null +++ b/server/ingester/profile/symbolizer/symbolizer.go @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2024 Yunshan Networks + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package symbolizer + +// InterpreterFrame is the server-side representation of an interpreter stack frame. +// It mirrors the protobuf InterpreterFrameSymbol message. +type InterpreterFrame struct { + FrameType int32 + FunctionName string + ClassName string + Lineno uint32 + FileName string + SubType uint32 + IsJIT bool + RawAddr uint64 + ResolveFailed bool +} + +// InterpreterType constants matching protobuf InterpreterFrameType enum values. +const ( + FrameTypeUnknown = 0 + FrameTypePython = 1 + FrameTypePHP = 2 + FrameTypeV8 = 3 + FrameTypeLua = 4 +) + +// Symbolize is a stub for the open-source edition. +// The enterprise edition provides the full implementation that formats interpreter +// frames and merges them with the native stack trace. +// In the open-source edition, interpreter frames are ignored and the native stack +// trace is returned as-is. +func Symbolize(frames []*InterpreterFrame, nativeStackTrace string) string { + return nativeStackTrace +} From e8d5a7313cdcc39b714d3b069d1fe613f54e40b9 Mon Sep 17 00:00:00 2001 From: Kai WANG Date: Sat, 28 Feb 2026 12:21:20 +0800 Subject: [PATCH 2/2] refactor: utilize cache to optimize profile's cpu occupation --- agent/src/ebpf/user/profile/stringifier.c | 255 +++++++++++++++++++++- 1 file changed, 246 insertions(+), 9 deletions(-) diff --git a/agent/src/ebpf/user/profile/stringifier.c b/agent/src/ebpf/user/profile/stringifier.c index 8d7a6b46ea2..0ee2f65e942 100644 --- a/agent/src/ebpf/user/profile/stringifier.c +++ b/agent/src/ebpf/user/profile/stringifier.c @@ -62,6 +62,159 @@ static const char *k_sym_prefix = "[k] "; static const char *lib_sym_prefix = "[l] "; static const char *u_sym_prefix = ""; +#define PROC_SYM_CACHE_SIZE 8192 +#define FOLDED_STACK_CACHE_SIZE 16384 +#define FOLDED_STACK_CACHE_OPT_IGNORE_LIBS 0x1 +#define FOLDED_STACK_CACHE_OPT_USE_SYMBOL_TABLE 0x2 + +struct proc_symbol_cache_entry { + pid_t pid; + u64 stime; + u64 resolver; + u64 address; + char *symbol; + bool valid; +}; + +static __thread struct proc_symbol_cache_entry g_proc_symbol_cache[PROC_SYM_CACHE_SIZE]; + +struct folded_stack_cache_entry { + pid_t pid; + u64 stime; + u64 stack_hash; + u32 opts; + char *folded; + bool valid; +}; + +static __thread struct folded_stack_cache_entry + g_folded_stack_cache[FOLDED_STACK_CACHE_SIZE]; + +static inline u32 proc_symbol_cache_hash(pid_t pid, u64 stime, u64 resolver, + u64 address) +{ + u64 h = address; + h ^= resolver + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + h ^= stime + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + h ^= ((u64)(u32)pid) + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + return (u32)h & (PROC_SYM_CACHE_SIZE - 1); +} + +static inline u32 folded_stack_cache_hash(pid_t pid, u64 stime, u64 stack_hash, + u32 opts) +{ + u64 h = stack_hash; + h ^= stime + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + h ^= ((u64)(u32)pid) + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + h ^= (u64)opts + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + return (u32)h & (FOLDED_STACK_CACHE_SIZE - 1); +} + +static char *clone_symbol_str(const char *src) +{ + if (src == NULL) { + return NULL; + } + int len = strlen(src); + char *dst = clib_mem_alloc_aligned("symbol_str", len + 1, 0, NULL); + if (dst == NULL) { + return NULL; + } + memcpy(dst, src, len + 1); + return dst; +} + +static inline bool is_unresolved_symbol(const char *symbol) +{ + return symbol + && strncmp(symbol, "[unknown", strlen("[unknown")) == 0; +} + +static bool proc_symbol_cache_lookup(pid_t pid, u64 stime, u64 resolver, + u64 address, char **sym_ptr) +{ + if (sym_ptr == NULL) { + return false; + } + u32 idx = proc_symbol_cache_hash(pid, stime, resolver, address); + struct proc_symbol_cache_entry *entry = &g_proc_symbol_cache[idx]; + if (!entry->valid || entry->pid != pid || entry->stime != stime + || entry->resolver != resolver || entry->address != address + || entry->symbol == NULL) { + return false; + } + *sym_ptr = clone_symbol_str(entry->symbol); + return *sym_ptr != NULL; +} + +static void proc_symbol_cache_store(pid_t pid, u64 stime, u64 resolver, + u64 address, const char *symbol) +{ + if (symbol == NULL) { + return; + } + u32 idx = proc_symbol_cache_hash(pid, stime, resolver, address); + struct proc_symbol_cache_entry *entry = &g_proc_symbol_cache[idx]; + char *new_symbol = clone_symbol_str(symbol); + if (new_symbol == NULL) { + return; + } + if (entry->symbol != NULL) { + clib_mem_free(entry->symbol); + } + entry->pid = pid; + entry->stime = stime; + entry->resolver = resolver; + entry->address = address; + entry->symbol = new_symbol; + entry->valid = true; +} + +static bool folded_stack_cache_lookup(pid_t pid, u64 stime, u64 stack_hash, + u32 opts, char **folded_ptr) +{ + if (folded_ptr == NULL) { + return false; + } + + u32 idx = folded_stack_cache_hash(pid, stime, stack_hash, opts); + struct folded_stack_cache_entry *entry = &g_folded_stack_cache[idx]; + if (!entry->valid || entry->pid != pid || entry->stime != stime + || entry->stack_hash != stack_hash || entry->opts != opts + || entry->folded == NULL) { + return false; + } + + *folded_ptr = clone_symbol_str(entry->folded); + return *folded_ptr != NULL; +} + +static void folded_stack_cache_store(pid_t pid, u64 stime, u64 stack_hash, + u32 opts, const char *folded) +{ + if (folded == NULL) { + return; + } + + u32 idx = folded_stack_cache_hash(pid, stime, stack_hash, opts); + struct folded_stack_cache_entry *entry = &g_folded_stack_cache[idx]; + char *new_folded = clone_symbol_str(folded); + if (new_folded == NULL) { + return; + } + + if (entry->folded != NULL) { + clib_mem_free(entry->folded); + } + + entry->pid = pid; + entry->stime = stime; + entry->stack_hash = stack_hash; + entry->opts = opts; + entry->folded = new_folded; + entry->valid = true; +} + // Stack trace structure definition (user-space copy of eBPF structure) // Must match the definition in perf_profiler.bpf.c #ifndef PERF_MAX_STACK_DEPTH @@ -78,6 +231,33 @@ typedef struct { u64 extra_data_b[PERF_MAX_STACK_DEPTH]; } stack_t; +static u64 folded_stack_calc_hash(const stack_t *stack, u64 sentinel_addr) +{ + if (!stack) { + return 0; + } + + u64 hash = 1469598103934665603ULL; + for (int i = 0; i < PERF_MAX_STACK_DEPTH; i++) { + u64 addr = stack->addrs[i]; + u8 frame_type = stack->frame_types[i]; + + if (addr == 0) { + break; + } + if (addr == sentinel_addr) { + continue; + } + + hash ^= addr + 0x9e3779b97f4a7c15ULL + (hash << 6) + + (hash >> 2); + hash ^= (u64)frame_type + 0x9e3779b97f4a7c15ULL + (hash << 6) + + (hash >> 2); + } + + return hash; +} + /* * To track the scenario where stack data is missing in the eBPF * 'stack_map_*' table. This typically occurs due to the design of @@ -147,9 +327,12 @@ static char *proc_symbol_name_fetch(pid_t pid, struct bcc_symbol *sym) ASSERT(pid >= 0); int len = 0; - char *ptr = (char *)sym->demangle_name; + char *ptr = (char *)(sym->demangle_name ? sym->demangle_name : sym->name); + if (ptr == NULL) { + ptr = "[unknown]"; + } - if (maybe_rust_symbol(sym->demangle_name)) { + if (maybe_rust_symbol(ptr)) { // likely a rust name char rust_name[RUST_SYM_MAX_LEN]; memset(rust_name, 0, sizeof(rust_name)); @@ -274,13 +457,29 @@ static inline int symcache_resolve(pid_t pid, void *resolver, u64 address, } else { struct symbolizer_proc_info *p = info_p; if (p) { + bool cache_allowed = !p->is_java && p->stime != 0; + u64 resolver_key = (u64)resolver; if (p->is_exit || ((u64) resolver != (u64) p->syms_cache)) return (-1); pthread_mutex_lock(&p->mutex); + if (cache_allowed + && proc_symbol_cache_lookup(pid, p->stime, + resolver_key, address, + sym_ptr)) { + pthread_mutex_unlock(&p->mutex); + return 0; + } + ret = bcc_symcache_resolve(resolver, address, sym); if (ret == 0) { *sym_ptr = proc_symbol_name_fetch(pid, sym); + if (cache_allowed && *sym_ptr != NULL + && !is_unresolved_symbol(*sym_ptr)) { + proc_symbol_cache_store(pid, p->stime, + resolver_key, + address, *sym_ptr); + } if (p->is_java) { // handle java encoded symbols char *new_sym = @@ -518,6 +717,32 @@ static char *build_stack_trace_string(struct bpf_tracer *t, return NULL; } + bool stack_cache_enabled = new_cache && !use_symbol_table; + u64 stime_key = 0; + u64 stack_hash = 0; + u32 stack_cache_opts = 0; + if (stack_cache_enabled) { + if (ignore_libs) { + stack_cache_opts |= FOLDED_STACK_CACHE_OPT_IGNORE_LIBS; + } + if (use_symbol_table) { + stack_cache_opts |= FOLDED_STACK_CACHE_OPT_USE_SYMBOL_TABLE; + } + + if (pid > 0 && info_p) { + struct symbolizer_proc_info *proc_info = info_p; + stime_key = proc_info->stime; + } + + stack_hash = folded_stack_calc_hash(&stack, sentinel_addr); + char *cached_folded = NULL; + if (folded_stack_cache_lookup(pid, stime_key, stack_hash, + stack_cache_opts, + &cached_folded)) { + return cached_folded; + } + } + // For debugging: stack.len is the number of frames u64 *ips = stack.addrs; @@ -529,6 +754,7 @@ static char *build_stack_trace_string(struct bpf_tracer *t, return NULL; int start_idx = -1, folded_size = 0; + bool has_unresolved_frame = false; for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) { if (ips[i] == 0 || ips[i] == sentinel_addr) continue; @@ -554,13 +780,16 @@ static char *build_stack_trace_string(struct bpf_tracer *t, } else { str = resolve_addr(t, pid, (i == start_idx), ips[i], new_cache, info_p); - } - if (str) { - // ignore frames in library for memory profiling - if (ignore_libs && strlen(str) >= strlen(lib_sym_prefix) - && strncmp(str, lib_sym_prefix, - strlen(lib_sym_prefix)) == 0) { - clib_mem_free(str); + } + if (str) { + if (is_unresolved_symbol(str)) { + has_unresolved_frame = true; + } + // ignore frames in library for memory profiling + if (ignore_libs && strlen(str) >= strlen(lib_sym_prefix) + && strncmp(str, lib_sym_prefix, + strlen(lib_sym_prefix)) == 0) { + clib_mem_free(str); continue; } symbol_array[i] = pointer_to_uword(str); @@ -590,6 +819,14 @@ static char *build_stack_trace_string(struct bpf_tracer *t, if (len - 1 >= 0) { fold_stack_trace_str[len - 1] = '\0'; } + + if (stack_cache_enabled && !has_unresolved_frame + && fold_stack_trace_str[0] != '\0') { + folded_stack_cache_store(pid, stime_key, stack_hash, + stack_cache_opts, + fold_stack_trace_str); + } + vec_free(symbol_array); return fold_stack_trace_str;