Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
8d2b637
v0
zhengyu123 Jun 17, 2026
02614cf
v1
zhengyu123 Jun 17, 2026
9840b2f
An alternative to thread local
zhengyu123 Jun 17, 2026
a4aa192
v2
zhengyu123 Jun 17, 2026
17889ce
Added a test
zhengyu123 Jun 18, 2026
f38c9cd
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
b387308
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
c2b7014
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
a632c1c
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
a3d8950
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
792a4a3
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
f9c9b28
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
86c2e2e
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
2e788fd
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
282a346
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
5cf4a1c
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
337b80f
Potential fix for pull request finding
zhengyu123 Jun 18, 2026
c190286
fix
zhengyu123 Jun 18, 2026
13c8891
Merge branch 'main' into zgu/tls_replacement
zhengyu123 Jun 18, 2026
1de7f18
Comments and cleanup
zhengyu123 Jun 22, 2026
d4c822d
Add static_assert
zhengyu123 Jun 22, 2026
4ce78a4
Fix codex comment
zhengyu123 Jun 22, 2026
5328e7e
Merge branch 'main' into zgu/tls_replacement
zhengyu123 Jun 23, 2026
757ff41
Address review comments
zhengyu123 Jun 23, 2026
e6e0dc5
Merge branch 'zgu/tls_replacement' of github.com:DataDog/java-profile…
zhengyu123 Jun 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 37 additions & 8 deletions ddprof-lib/src/main/cpp/livenessTracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "os.h"
#include "profiler.h"
#include "thread.h"
#include "threadLocal.h"
#include "tsc.h"
#include <jni.h>
#include <string.h>
Expand Down Expand Up @@ -276,6 +277,29 @@ Error LivenessTracker::initialize(Arguments &args) {
return _stored_error = Error::OK;
}

static void* create_mt19937() {
// std::mt19937 itself is noexcept, but std::random_device and `new` may throw.
// If that happens we let the failure terminate the process (same outcome as
// failing thread_local initialization previously).
return (void*)(new std::mt19937(std::random_device{}()));
}

static void* create_uniform_real_distribution() {
// std::uniform_real_distribution<> construction is noexcept, but `new` may throw.
// If allocation fails the process is likely to abort anyway.
return (void*)(new std::uniform_real_distribution<>(0, 1.0));
}

static void free_mt19937(void* p) {
std::mt19937* mt = (std::mt19937*)p;
delete mt;
}

static void free_uniform_real_distribution(void* p) {
std::uniform_real_distribution<>* urd = (std::uniform_real_distribution<>*)p;
delete urd;
}

void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
jobject object, u64 call_trace_id) {
if (!_enabled) {
Expand All @@ -287,13 +311,17 @@ void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
return;
}

static thread_local std::mt19937 gen(std::random_device{}());
static thread_local std::uniform_real_distribution<> dis(0, 1.0);
static thread_local double skipped = 0;
static ThreadLocal<std::mt19937*, create_mt19937, free_mt19937> gen;
static ThreadLocal<std::uniform_real_distribution<>*, create_uniform_real_distribution, free_uniform_real_distribution> dis;
static ThreadLocal<double> skipped;

if (_subsample_ratio < 1.0 && dis(gen) > _subsample_ratio) {
skipped += static_cast<double>(event._weight) * event._size;
return;
if (_subsample_ratio < 1.0) {
std::mt19937* genp = gen.get();
std::uniform_real_distribution<>* disp = dis.get();
if (disp->operator()(*genp) > _subsample_ratio) {
skipped.set(skipped.get() + static_cast<double>(event._weight) * event._size);
return;
}
}

jweak ref = env->NewWeakGlobalRef(object);
Expand Down Expand Up @@ -322,7 +350,8 @@ void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
_table[idx].time = TSC::ticks();
_table[idx].ref = ref;
_table[idx].alloc = event;
_table[idx].skipped = skipped;
_table[idx].skipped = skipped.get();
Comment thread
zhengyu123 marked this conversation as resolved.
skipped.set(0);
_table[idx].age = 0;
_table[idx].call_trace_id = call_trace_id;
_table[idx].ctx = ContextApi::snapshot();
Expand Down Expand Up @@ -375,8 +404,8 @@ void LivenessTracker::track(JNIEnv *env, AllocEvent &event, jint tid,
} else {
env->DeleteWeakGlobalRef(ref);
}
skipped.set(0); // reset the subsampling skipped bytes
}
skipped = 0; // reset the subsampling skipped bytes
}

void JNICALL LivenessTracker::GarbageCollectionFinish(jvmtiEnv *jvmti_env) {
Expand Down
162 changes: 162 additions & 0 deletions ddprof-lib/src/main/cpp/threadLocal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Copyright 2026, Datadog, Inc.
* SPDX-License-Identifier: Apache-2.0
*/

#ifndef _THREADLOCAL_H
#define _THREADLOCAL_H

#include <cassert>
#include <cstring>
#include <pthread.h>
#include "arch.h"

/**
* This file implements an alternative to C/C++ thread local.
* Due to some restrictions of the language implementations, especially, on musl/aarch64,
* they cannot be safely used in profiler.
*
* How to use?
* A ThreadLocal should be declared as a static variable, e.g.
*
* static void* create_my_object() {
* return new MyObject();
* }
*
* static void delete_my_object(void* p) {
* MyObject* obj = (MyObject*)p;
* delete obj;
* }
*
* static ThreadLocal<MyObject*, create_my_object, delete_my_object> var;
* MyObject* value = var.get();
* ...
* var.clear();
* ...
* MyObject* new_value = new MyObject();
* var.set(new_value);
* ...
* var.clear();
*
*/

// The function to create value if it does not exist
typedef void* (*CREATE_FUNC)(void);
// Cleanup the value when deleting the key
typedef void (*CLEAN_FUNC)(void*);
template <typename T, CREATE_FUNC C = nullptr, CLEAN_FUNC F = nullptr>
class ThreadLocal {
protected:
pthread_key_t _key;
bool _key_valid;

public:
ThreadLocal(const ThreadLocal&) = delete;
ThreadLocal& operator=(const ThreadLocal&) = delete;

ThreadLocal() {
static_assert(sizeof(T) == sizeof(void*),
"ThreadLocal<T> requires sizeof(T)==sizeof(void*); use a pointer type or add a specialization");
_key_valid = pthread_key_create(&_key, F) == 0;
// What to do if we can not create a key?
// We probably want to shutdown profiler gracefully, instead of
// aborting user application - We will need this mechanism globally,
// defer to a separate task.
assert(_key_valid);
}
Comment thread
zhengyu123 marked this conversation as resolved.

~ThreadLocal() {
assert(_key_valid);
pthread_key_delete(_key);
Comment thread
zhengyu123 marked this conversation as resolved.
Comment thread
zhengyu123 marked this conversation as resolved.
}

/**
* set(nullptr) will result in the value being recreated when get() is called
* when CREATE_FUNC is not nullptr.
* Note: caller is responsible to free old value.
*/
void set(T value) {
Comment thread
zhengyu123 marked this conversation as resolved.
Comment thread
zhengyu123 marked this conversation as resolved.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM · completeness] set() does not invoke F (CLEAN_FUNC) on the existing TSD value before overwriting it. If get() auto-created an object via C() and a caller subsequently calls set() with a different pointer, the original allocation is leaked — POSIX does not call destructors on pthread_setspecific overwrite, only on thread exit.

Suggestion: in set(), retrieve the current value first and call F on it if non-null:

void set(T value) {
    void* prev = pthread_getspecific(_key);
    int err = pthread_setspecific(_key, (const void*)value);
    assert(err == 0);
    if (prev != nullptr && F != nullptr) F(prev);
}

Alternatively, document that set() must only be called when the TSD slot is empty (i.e. after clear()) and enforce with an assert.

assert(_key_valid);
int err = pthread_setspecific(_key, (const void*)value);
Comment thread
zhengyu123 marked this conversation as resolved.
assert(err == 0);
}
Comment thread
Copilot marked this conversation as resolved.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM · necessity] clear() has zero callers in production code and zero test coverage. The class docstring presents it as a core usage pattern, yet no shipped path exercises it — including the subtle branch where a failed pthread_setspecific in clear() leaves a still-reachable pointer un-freed.

Suggestion: either remove clear() until a caller needs it, or add unit tests that cover: (1) clear() resets the value, (2) the cleanup function is invoked, (3) F == nullptr branch actually zeroes the slot. Do not ship an untested method that the public docs present as core API.

T get() {
assert(_key_valid);
void* p = pthread_getspecific(_key);
if (p == nullptr && C != nullptr) {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW · robustness] get() cannot distinguish 'never set' from 'explicitly set to nullptr'. When a create function is registered, storing null (or C() returning null) causes C() to be re-invoked on every get(), repeatedly allocating. This differs from real thread_local semantics and is a foot-gun for the planned future conversions mentioned in the PR description.

Suggestion: document that C must never return nullptr and that set(nullptr) is reserved for clear(); optionally assert(p != nullptr) after C() to fail fast.

p = C();
Comment thread
zhengyu123 marked this conversation as resolved.
set((T)p);
}
return (T)p;
}

// Clear the value
void clear() {

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MEDIUM · correctness] clear() silently skips pthread_setspecific when F == nullptr, leaving the TSD slot non-zero. The documented use-case — clear before re-set — does not work for pointer types without a destructor function.

Suggestion: always zero the TSD slot and only call F() when non-null:

void clear() {
    void* p = pthread_getspecific(_key);
    if (p != nullptr) {
        pthread_setspecific(_key, nullptr);
        if (F != nullptr) F(p);
    }
}

assert(_key_valid);
void* p = pthread_getspecific(_key);
if (p == nullptr) return;
int err = pthread_setspecific(_key, nullptr);
// Safety: if reset the value failed, get() can see staled value if
// it is freed.
if (err == 0 && F != nullptr) {
F(p);
}
}
Comment thread
Copilot marked this conversation as resolved.
};

template <>
class ThreadLocal<double> {
protected:
pthread_key_t _key;
bool _key_valid;

public:
ThreadLocal(const ThreadLocal&) = delete;
ThreadLocal& operator=(const ThreadLocal&) = delete;

ThreadLocal() {
// Only support 64-bit platforms, double and void* are the same size
static_assert(sizeof(void*) == 8);
static_assert(sizeof(double) == 8);
_key_valid = pthread_key_create(&_key, nullptr) == 0;
// What to do if we can not create a key?
assert(_key_valid);
}
Comment thread
Copilot marked this conversation as resolved.

~ThreadLocal() {
assert(_key_valid);
pthread_key_delete(_key);
}

// double <--> u64 cast, preserve bit format
// Can use std::bit_cast after upgrade C++ version to 20
void set(double value) {
assert(_key_valid);
u64 val;
memcpy(&val, &value, sizeof(value));
int err = pthread_setspecific(_key, (const void*)val);

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW · consistency] Integer-to-pointer cast at line 120 uses a C-style (const void*)val while the reverse pointer-to-integer cast at line 130 uses a C-style (u64)p. Both should use reinterpret_cast for clarity and to suppress potential -Wint-to-pointer-cast warnings:

// line 120:
int err = pthread_setspecific(_key, reinterpret_cast<const void*>(val));
// line 130:
u64 val = reinterpret_cast<u64>(p);

assert(err == 0);
}
Comment thread
Copilot marked this conversation as resolved.

double get() {
assert(_key_valid);
void* p = pthread_getspecific(_key);
if (p == nullptr) {
return 0.0;
}

u64 val = (u64)p;
double value;
memcpy(&value, &val, sizeof(val));
return value;
}

void clear() {
Comment thread
zhengyu123 marked this conversation as resolved.
assert(_key_valid);
int err = pthread_setspecific(_key, nullptr);
assert(err == 0);
}
Comment thread
Copilot marked this conversation as resolved.
};

#endif // _THREADLOCAL_H
Loading
Loading