Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ if(BUILD_UNIVERSAL_DDPROF)
endif()
endif()

if(USE_LOADER)
target_compile_definitions(dd_profiling-embedded PRIVATE "DDPROF_USE_LOADER")
endif()

# Fix for link error in sanitizeddebug build mode with gcc:
# ~~~
# /usr/bin/ld: ./libdd_profiling.so: undefined reference to `__dynamic_cast'
Expand Down
2 changes: 1 addition & 1 deletion cmake/dd_profiling.version
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
global: ddprof_start_profiling; ddprof_stop_profiling;
global: ddprof_start_profiling; ddprof_stop_profiling; ddprof_lib_state;
local: *;
};
10 changes: 0 additions & 10 deletions include/lib/allocation_tracker.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include <cstddef>
#include <functional>
#include <mutex>
#include <pthread.h>

namespace ddprof {

Expand Down Expand Up @@ -118,10 +117,6 @@ class AllocationTracker {

static AllocationTracker *create_instance();

static void delete_tl_state(void *tl_state);

static void make_key();

void track_allocation(uintptr_t addr, size_t size,
TrackerThreadLocalState &tl_state, bool is_large_alloc);
void track_deallocation(uintptr_t addr, TrackerThreadLocalState &tl_state,
Expand Down Expand Up @@ -158,11 +153,6 @@ class AllocationTracker {
AddressBitset _allocated_address_set;
IntervalTimerCheck _interval_timer_check;

// These can not be tied to the internal state of the instance.
// The creation of the instance depends on this
static pthread_once_t _key_once; // ensures we call key creation a single time
static pthread_key_t _tl_state_key;

static AllocationTracker *_instance;
};

Expand Down
4 changes: 4 additions & 0 deletions include/lib/allocation_tracker_tls.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ struct TrackerThreadLocalState {
// should not allocate because we might already
// be inside an allocation)

// Set to true by placement new in init_tl_state().
// Zero-initialized (false) in a fresh thread's TLS before init.
bool initialized{true};

// In the choice of random generators, this one is smaller
// - smaller than mt19937 (8 vs 5K)
std::minstd_rand gen{std::random_device{}()};
Expand Down
19 changes: 19 additions & 0 deletions include/lib/tls_state_storage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0. This product includes software
// developed at Datadog (https://www.datadoghq.com/). Copyright 2021-Present
// Datadog, Inc.

#pragma once

// C-compatible constants for the TLS buffer that stores
// TrackerThreadLocalState. Used by loader.c (C) and allocation_tracker.cc
// (C++). Correctness enforced at compile time via static_assert in
// allocation_tracker.cc.
#ifdef __cplusplus
enum : unsigned char {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is slightly weird, but it has to be C compatible and linters were annoying, I initially just had a #define

#else
enum {
#endif
DDPROF_TLS_STATE_SIZE = 48,
DDPROF_TLS_STATE_ALIGN = 8,
};
4 changes: 4 additions & 0 deletions include/loghandle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,9 @@ class LogHandle {
LOG_setlevel(lvl);
}
~LogHandle() { LOG_close(); }
LogHandle(const LogHandle &) = delete;
LogHandle &operator=(const LogHandle &) = delete;
LogHandle(LogHandle &&) = delete;
LogHandle &operator=(LogHandle &&) = delete;
};
} // namespace ddprof
63 changes: 27 additions & 36 deletions src/lib/allocation_tracker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "ringbuffer_utils.hpp"
#include "savecontext.hpp"
#include "syscalls.hpp"
#include "tls_state_storage.h"
#include "tsc_clock.hpp"

#include <algorithm>
Expand All @@ -23,18 +24,29 @@
#include <chrono>
#include <cstdint>
#include <cstdlib>
#include <new>
#include <unistd.h>

namespace ddprof {

// Static declarations
pthread_once_t AllocationTracker::_key_once = PTHREAD_ONCE_INIT;

pthread_key_t AllocationTracker::_tl_state_key;

AllocationTracker *AllocationTracker::_instance;

static_assert(sizeof(TrackerThreadLocalState) == DDPROF_TLS_STATE_SIZE,
"Update DDPROF_TLS_STATE_SIZE in tls_state_storage.h");
static_assert(alignof(TrackerThreadLocalState) <= DDPROF_TLS_STATE_ALIGN,
"Update DDPROF_TLS_STATE_ALIGN in tls_state_storage.h");

namespace {

#ifdef DDPROF_USE_LOADER
extern "C" __attribute((tls_model(
"initial-exec"))) __thread char ddprof_lib_state[DDPROF_TLS_STATE_SIZE];
#else
__attribute((tls_model("initial-exec")))
__attribute((aligned(DDPROF_TLS_STATE_ALIGN))) __thread char
ddprof_lib_state[sizeof(TrackerThreadLocalState)];
#endif

DDPROF_NOINLINE auto sleep_and_retry_reserve(MPSCRingBufferWriter &writer,
size_t size, bool &timeout) {
constexpr std::chrono::nanoseconds k_sleep_duration =
Expand All @@ -53,31 +65,19 @@ DDPROF_NOINLINE auto sleep_and_retry_reserve(MPSCRingBufferWriter &writer,
} // namespace

TrackerThreadLocalState *AllocationTracker::get_tl_state() {
// In shared libraries, TLS access requires a call to tls_get_addr,
// tls_get_addr can call into malloc, which can create a recursive loop
// instead we call pthread APIs to control the creation of TLS objects
pthread_once(&_key_once, make_key);
auto *tl_state = static_cast<TrackerThreadLocalState *>(
pthread_getspecific(_tl_state_key));
return tl_state;
// ddprof_lib_state is zero-initialized by libc for each new thread.
// After placement new (init_tl_state), initialized is set to true.
auto *state = reinterpret_cast<TrackerThreadLocalState *>(ddprof_lib_state);
return state->initialized ? state : nullptr;
}

TrackerThreadLocalState *AllocationTracker::init_tl_state() {
// Since init_tl_state is only called in allocation_tracking_init and
// notify_thread_start, there is no danger of reentering it when doing an
// allocation.
auto tl_state = std::make_unique<TrackerThreadLocalState>();
tl_state->tid = ddprof::gettid();
tl_state->stack_bounds = retrieve_stack_bounds();

if (int const res = pthread_setspecific(_tl_state_key, tl_state.get());
res != 0) {
// should return 0
LG_DBG("Unable to store tl_state. Error %d: %s\n", res, strerror(res));
tl_state.reset();
}

return tl_state.release();
// Placement new into TLS -- no heap allocation, no cleanup needed on thread
// exit. Safe to call after fork (TLS memory is inherited by child).
auto *state = new (ddprof_lib_state) TrackerThreadLocalState{};
state->tid = ddprof::gettid();
state->stack_bounds = retrieve_stack_bounds();
return state;
}

AllocationTracker::AllocationTracker() = default;
Expand All @@ -87,15 +87,6 @@ AllocationTracker *AllocationTracker::create_instance() {
return &tracker;
}

void AllocationTracker::delete_tl_state(void *tl_state) {
delete static_cast<TrackerThreadLocalState *>(tl_state);
}

void AllocationTracker::make_key() {
// delete is called on all key objects
pthread_key_create(&_tl_state_key, delete_tl_state);
}

DDRes AllocationTracker::allocation_tracking_init(
uint64_t allocation_profiling_rate, uint32_t flags,
uint32_t stack_sample_size, const RingBufferInfo &ring_buffer,
Expand Down
6 changes: 6 additions & 0 deletions src/lib/loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "constants.hpp"
#include "dd_profiling.h"
#include "lib_embedded_data.h"
#include "tls_state_storage.h"

#include <dlfcn.h>
#include <fcntl.h>
Expand All @@ -18,6 +19,11 @@
#include <time.h>
#include <unistd.h>

__attribute__((__visibility__("default")))
__attribute__((tls_model("initial-exec")))
__attribute__((aligned(DDPROF_TLS_STATE_ALIGN))) __thread char
ddprof_lib_state[DDPROF_TLS_STATE_SIZE];

/* Role of loader is to ensure that all dependencies (libdl/lim/libpthread) of
* libdd_profiling-embedded.so are satisfied before dlopen'ing it.
* On musl, all libc features are in libc.so and hence are available once libc
Expand Down
12 changes: 3 additions & 9 deletions src/logger.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,9 @@ void vlprintfln(int lvl, int fac, const char *format, va_list args) {
"<%d>%s.%06ld %s[%d]: ", lvl + (fac * LL_LENGTH), tm_str,
d_us.count(), name, pid);
} else {
const char *levels[LL_LENGTH] = {
[LL_EMERGENCY] = "EMERGENCY",
[LL_ALERT] = "ALERT",
[LL_CRITICAL] = "CRITICAL",
[LL_ERROR] = "ERROR",
[LL_WARNING] = "WARNING",
[LL_NOTICE] = "NOTICE",
[LL_INFORMATIONAL] = "INFORMATIONAL",
[LL_DEBUG] = "DEBUG",
static constexpr const char *levels[] = {
"EMERGENCY", "ALERT", "CRITICAL", "ERROR",
"WARNING", "NOTICE", "INFORMATIONAL", "DEBUG",
};
sz_h = snprintf(buf, LOG_MSG_CAP, "<%s>%s.%06lu %s[%d]: ", levels[lvl],
tm_str, d_us.count(), name, pid);
Expand Down
31 changes: 31 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,37 @@ add_unit_test(address_bitset-ut address_bitset-ut.cc ../src/lib/address_bitset.c

add_unit_test(lib_logger-ut ./lib_logger-ut.cc)

# Standalone fork test for allocation tracker TLS (no gtest -- fork inside gtest is fragile)
add_exe(
allocation_tracker_fork_test
allocation_tracker_fork_test.cc
../src/lib/allocation_tracker.cc
../src/lib/address_bitset.cc
../src/logger.cc
../src/lib/pthread_fixes.cc
../src/lib/savecontext.cc
../src/lib/saveregisters.cc
../src/procutils.cc
../src/ratelimiter.cc
../src/ringbuffer_utils.cc
../src/sys_utils.cc
../src/tsc_clock.cc
../src/perf_clock.cc
../src/perf.cc
../src/ddres_list.cc
../src/perf_ringbuffer.cc
../src/pevent_lib.cc
../src/user_override.cc
LIBRARIES llvm-demangle)
target_include_directories(
allocation_tracker_fork_test PRIVATE ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/include/lib
${CMAKE_SOURCE_DIR}/src)

add_test(
NAME allocation_tracker_fork_test
COMMAND allocation_tracker_fork_test
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})

add_unit_test(
create_elf-ut
create_elf-ut.cc
Expand Down
Loading