Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 4 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,6 @@ include(GNUInstallDirs)
# Use fixed name instead of absolute path for reproducible builds
add_compile_options("-ffile-prefix-map=${CCF_DIR}=CCF")

# In Debug builds, export symbols to the dynamic symbol table so that
# backtrace_symbols can resolve function names in stacktraces, and preserve
# frame pointers so that backtrace() can walk the full call stack.
add_link_options($<$<CONFIG:Debug>:-rdynamic>)
add_compile_options($<$<CONFIG:Debug>:-fno-omit-frame-pointer>)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not omitting frame pointers still seems useful?


set(CMAKE_MODULE_PATH "${CCF_DIR}/cmake;${CMAKE_MODULE_PATH}")

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Expand Down Expand Up @@ -283,13 +277,10 @@ add_ccf_static_library(
${CCF_DIR}/src/tasks/thread_manager.cpp
${CCF_DIR}/src/tasks/worker.cpp
)
target_link_libraries(ccf_tasks PRIVATE ${CMAKE_DL_LIBS})
if(USE_LIBCXX)
# worker.cpp uses backtrace() which, under libc++, resolves through libunwind.
# On systems where libunwind is built with LZMA support (e.g. Azure Linux),
# this creates a transitive dependency on liblzma.
target_link_libraries(ccf_tasks PRIVATE lzma)
endif()

# libbacktrace reads DWARF debug info directly, providing file/line/function
# resolution in stack traces.
target_link_libraries(ccf_tasks PRIVATE ${CMAKE_DL_LIBS} backtrace)

# Common test args for Python scripts starting up CCF networks
set(WORKER_THREADS
Expand Down
2 changes: 1 addition & 1 deletion cmake/cpack_settings.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ set(CCF_RPM_DEPENDENCIES
)
# + runtime dependencies
set(CCF_RPM_DEPENDENCIES
"${CCF_RPM_DEPENDENCIES}, libuv-devel >= ${LIBUV_MINIMAL_VERSION}, curl-devel >= ${CURL_MINIMAL_VERSION}, libcxxabi-devel >= ${CLANG_AND_LIBCXXABI_MINIMAL_VERSION}"
"${CCF_RPM_DEPENDENCIES}, libuv-devel >= ${LIBUV_MINIMAL_VERSION}, curl-devel >= ${CURL_MINIMAL_VERSION}, libcxxabi-devel >= ${CLANG_AND_LIBCXXABI_MINIMAL_VERSION}, libbacktrace-static >= ${LIBBACKTRACE_MINIMAL_VERSION}"
)
# + alter name
set(CPACK_PACKAGE_NAME "${CPACK_PACKAGE_NAME}_devel")
Expand Down
1 change: 1 addition & 0 deletions cmake/cpack_versions_pin.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ set(OPENSSL_MINIMAL_VERSION "3.3.0")
set(NGHTTP2_MINIMAL_VERSION "1.40.0")
set(LIBUV_MINIMAL_VERSION "1.34.2")
set(CURL_MINIMAL_VERSION "7.68.0")
set(LIBBACKTRACE_MINIMAL_VERSION "13.2.0")
set(CLANG_AND_LIBCXXABI_MINIMAL_VERSION "18.1.2")
3 changes: 2 additions & 1 deletion scripts/setup-ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ tdnf --snapshottime=$SOURCE_DATE_EPOCH -y install \
libarrow-devel \
parquet-libs-devel \
doxygen \
clang-tools-extra-devel
clang-tools-extra-devel \
libbacktrace-static

# To run standard tests
tdnf --snapshottime=$SOURCE_DATE_EPOCH -y install \
Expand Down
30 changes: 11 additions & 19 deletions src/tasks/test/basic_tasks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,36 +245,36 @@ TEST_CASE("Scheduling" * doctest::test_suite("basic_tasks"))
REQUIRE(count_with_me == target);
}

// Helper functions at namespace scope to ensure external linkage, so that
// backtrace_symbols can resolve their names with -rdynamic.
// Call chains for stack trace verification. noinline ensures each
// function survives as a distinct frame in optimised builds.
namespace exception_handling_test
{
void level_3_throws_runtime_error()
__attribute__((noinline)) void level_3_throws_runtime_error()
{
throw std::runtime_error("Test exception");
}

void level_2_calls_level_3()
__attribute__((noinline)) void level_2_calls_level_3()
{
level_3_throws_runtime_error();
}

void level_1_calls_level_2()
__attribute__((noinline)) void level_1_calls_level_2()
{
level_2_calls_level_3();
}

void level_3_throws_int()
__attribute__((noinline)) void level_3_throws_int()
{
throw 42;
}

void level_2_calls_level_3_int()
__attribute__((noinline)) void level_2_calls_level_3_int()
{
level_3_throws_int();
}

void level_1_calls_level_2_int()
__attribute__((noinline)) void level_1_calls_level_2_int()
{
level_2_calls_level_3_int();
}
Expand Down Expand Up @@ -389,20 +389,12 @@ TEST_CASE("Exception handling" * doctest::test_suite("basic_tasks"))
REQUIRE(
logger_ptr->contains("ThrowsUnknown task failed with unknown exception"));

// Verify that stack traces contain demangled function names from the
// known call chains. These functions have external linkage and are
// exported to the dynamic symbol table via -rdynamic in Debug builds.
// In Release builds, -rdynamic is not set and functions may be inlined,
// so we skip name-based assertions there.
// Verify demangled function names appear in the stack traces
#ifndef NDEBUG
// Note: very small leaf functions (e.g. level_3_throws_int, which is
// just `throw 42;`) may be inlined by the compiler, so we only assert
// on the caller frames that reliably appear.

// ThrowsException call chain
REQUIRE(logger_ptr->contains("level_3_throws_runtime_error"));
REQUIRE(logger_ptr->contains("level_2_calls_level_3()"));
REQUIRE(logger_ptr->contains("level_1_calls_level_2()"));
REQUIRE(logger_ptr->contains("level_2_calls_level_3"));
REQUIRE(logger_ptr->contains("level_1_calls_level_2"));

// ThrowsUnknown call chain
REQUIRE(logger_ptr->contains("level_2_calls_level_3_int"));
Expand Down
178 changes: 130 additions & 48 deletions src/tasks/worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

#include "tasks/worker.h"

#include <backtrace.h>
#include <cstdlib>
#include <cxxabi.h>
#include <dlfcn.h>
#include <execinfo.h>
#include <memory>
#include <sstream>

Expand Down Expand Up @@ -34,66 +34,129 @@ namespace ccf::tasks
}
};

struct FreePtrArrayDeleter
// Lazily initialise and return the process-wide backtrace_state.
// backtrace_create_state allocates resources that cannot be freed, so
// we call it at most once and cache the result.
backtrace_state* get_backtrace_state()
{
void operator()(char** p) const
static backtrace_state* state = backtrace_create_state(
nullptr, // let libbacktrace find the executable
1, // threaded = true
nullptr, // ignore errors during init
nullptr);
return state;
}

// Demangle a C++ mangled symbol name. Returns the original string
// unchanged if demangling fails.
std::string demangle(const char* name)
{
if (name == nullptr)
{
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc,cppcoreguidelines-owning-memory,bugprone-multi-level-implicit-pointer-conversion)
free(p);
return "<unknown>";
}

int status = 0;
std::unique_ptr<char, FreeDeleter> demangled(
abi::__cxa_demangle(name, nullptr, nullptr, &status));
if (status == 0 && demangled != nullptr)
{
return demangled.get();
}
return name;
}

// Data passed through libbacktrace callbacks to build up each frame's
// description.
struct PcinfoResult
{
bool resolved = false;
std::string function;
std::string filename;
int lineno = 0;
};

std::string demangle_symbol(const char* raw)
// Called by backtrace_pcinfo for each source location (may be called
// multiple times per PC when inlined calls are present).
int pcinfo_callback(
void* data,
uintptr_t /*pc*/,
const char* filename,
int lineno,
const char* function)
{
// backtrace_symbols format: "binary(mangled+0xoffset) [0xaddr]"
// Try to extract and demangle the symbol name between '(' and '+'/')'
std::string entry(raw);
auto open = entry.find('(');
auto plus = entry.find('+', open != std::string::npos ? open : 0);
auto close = entry.find(')', open != std::string::npos ? open : 0);

if (
open != std::string::npos && close != std::string::npos &&
close > open + 1)
auto* result = static_cast<PcinfoResult*>(data);
if (function != nullptr)
{
auto end = (plus != std::string::npos && plus < close) ? plus : close;
std::string mangled = entry.substr(open + 1, end - open - 1);

if (!mangled.empty())
{
int status = 0;
std::unique_ptr<char, FreeDeleter> demangled(
abi::__cxa_demangle(mangled.c_str(), nullptr, nullptr, &status));
if (status == 0 && demangled != nullptr)
{
std::string rest = entry.substr(end);
entry = entry.substr(0, open + 1) + demangled.get() + rest;
}
}
result->resolved = true;
result->function = demangle(function);
result->filename = (filename != nullptr) ? filename : "";
result->lineno = lineno;
}
Comment on lines +81 to 95
Copy link

Copilot AI Mar 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pcinfo_callback only marks the frame as resolved and records file/line when function != nullptr. libbacktrace can provide filename/lineno even when function is null; with the current logic, those frames will be treated as unresolved and printed as raw addresses. Consider setting resolved (and recording filename/lineno) whenever any of {function, filename, lineno} is present, and using a placeholder like <unknown> when the function name is missing.

Copilot uses AI. Check for mistakes.
return 0; // continue
}

return entry;
// Called by backtrace_syminfo when DWARF info is unavailable but the
// dynamic symbol table has an entry.
void syminfo_callback(
void* data,
uintptr_t /*pc*/,
const char* symname,
uintptr_t /*symval*/,
uintptr_t /*symsize*/)
{
auto* result = static_cast<PcinfoResult*>(data);
if (symname != nullptr)
{
result->resolved = true;
result->function = demangle(symname);
}
}

// Format a demangled stack trace as a string. Note: backtrace_symbols only
// resolves symbols exported to the dynamic symbol table (e.g. via
// -rdynamic). Static/internal functions will appear as raw addresses. For
// broader coverage, consider integrating libbacktrace (reads DWARF
// directly) or invoking addr2line at runtime.
// Silently ignore libbacktrace errors in individual frame resolution —
// we fall back to printing the raw PC address.
void error_callback(void* /*data*/, const char* /*msg*/, int /*errnum*/) {}

// Format a stack trace using libbacktrace for DWARF-aware symbol,
// file and line resolution. This works in all build configurations
// without requiring -rdynamic.
std::string format_stacktrace(void** frames, int num_frames)
{
std::ostringstream oss;
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc,cppcoreguidelines-owning-memory,bugprone-multi-level-implicit-pointer-conversion)
std::unique_ptr<char*, FreePtrArrayDeleter> symbols(
backtrace_symbols(frames, num_frames));
if (symbols == nullptr)
{
// If memory allocation fails, return a message indicating the issue
return " (failed to allocate memory for backtrace symbols)\n";
}
auto* state = get_backtrace_state();

for (int i = 0; i < num_frames; ++i)
{
oss << " #" << i << ": " << demangle_symbol(symbols.get()[i]) << "\n";
auto pc = reinterpret_cast<uintptr_t>(frames[i]);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this?

PcinfoResult result;

if (state != nullptr)
{
// Try DWARF-based resolution first (gives file + line + function)
backtrace_pcinfo(state, pc, pcinfo_callback, error_callback, &result);

// If DWARF info wasn't available, try the symbol table
if (!result.resolved)
{
backtrace_syminfo(
state, pc, syminfo_callback, error_callback, &result);
}
}

oss << " #" << i << ": ";
if (result.resolved)
{
oss << result.function;
if (!result.filename.empty())
{
oss << " at " << result.filename << ":" << result.lineno;
}
}
else
{
oss << "0x" << std::hex << pc << std::dec;
}
oss << "\n";
}
return oss.str();
}
Expand Down Expand Up @@ -131,10 +194,29 @@ extern "C"
void __cxa_throw(
void* thrown_exception, std::type_info* tinfo, void (*dest)(void*))
{
// Capture the backtrace at the throw site
// Capture the backtrace at the throw site using libbacktrace's own
// unwinder. glibc backtrace() can lose frames whose return address
// falls exactly at the end of a .eh_frame FDE range; libbacktrace's
// DWARF unwinder handles this correctly.
auto& trace = ccf::tasks::current_throw_trace;
trace.num_frames =
backtrace(trace.frames, ccf::tasks::throw_trace_max_frames);
trace.num_frames = 0;
auto* bt_state = ccf::tasks::get_backtrace_state();
if (bt_state != nullptr)
{
backtrace_simple(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we backtrace_full() depending on NDEBUG? It's not obvious from the doc to what extent full is nicer than simple, but it sounds like it might be?

bt_state,
0, // skip = 0, capture from here
[](void* data, uintptr_t pc) -> int {
auto* t = static_cast<ccf::tasks::ThrowTrace*>(data);
if (t->num_frames < ccf::tasks::throw_trace_max_frames)
{
t->frames[t->num_frames++] = reinterpret_cast<void*>(pc); // NOLINT
}
return 0;
},
Comment on lines +206 to +216
Copy link

Copilot AI Mar 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The backtrace_simple callback stores every pc value it receives and always returns 0 (continue). In the example output, this results in a bogus trailing frame 0xffffffffffffffff. Consider stopping/ignoring sentinel PCs (eg pc == 0 or pc == std::numeric_limits<uintptr_t>::max()) by returning non-zero and/or skipping these values so they don’t appear as frames.

Copilot uses AI. Check for mistakes.
nullptr, // ignore errors
&trace);
}
Comment on lines 201 to +219
Copy link

Copilot AI Mar 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If backtrace_create_state() fails (so bt_state == nullptr), this code records trace.num_frames = 0 and captures nothing. Downstream, this means exceptions may log with no stack trace at all. Consider adding a fallback capture path when bt_state is null (for example, a glibc backtrace() fallback, or a secondary libbacktrace state initialised with an error callback so failures are visible).

Copilot uses AI. Check for mistakes.

// Forward to the real __cxa_throw
static auto real_cxa_throw =
Expand Down
Loading