Skip to content

Commit 441fc1c

Browse files
feat(profiling): make Profile upload lock-free (#15302)
## Description https://datadoghq.atlassian.net/browse/PROF-13044 This PR reduces locking around the libdatadog Profile object to make uploading non-blocking for sampling. Previously, the Scheduler thread (which uploads every once in a while) would take the Lock on the Profile object, preventing any new Samples from being pushed to it until the upload finishes. Although we don't have any numbers on that, it is possible that this would trigger adaptive sampling or make us lose some data. The new implementation doesn't make uploading completely lock-free, but already improves it by eliminating the need for locking _during the upload_. We still need to lock the `Profile` object when we encode it. It may be possible to optimise this further by having two `Profile` objects (one we would be writing to, and one that would be _being uploaded_) but this requires more complicated lock handling to actually be safe. (Reason is, having two Profiles is still a problem if the previous encoding/uploading hasn't finished when the next needs to be encoded/uploaded.) In the new implementation, the Profile Lock is acquired just before encoding is done, and released right after. Profiler Stats are also copied and reset while the Lock is held. In order to make that work, I had to slightly adapt the interface of `Uploader` for it to work an `EncodedProfile` instead of a `Profile`, which allowed me to hoist the encoding outside of the `Uploader` (to the `UploaderBuilder`), and thus only hold the Profile Lock during `UploaderBuilder::Build` (as opposed to during the whole of `Uploader::upload`). `ProfilerStats` are captured and reset by `std::swap`-ping them with an empty `ProfilerStats` object, eliminating the previous "singleton-like" `ProfilerStats` we had. (This works and is trivial because `ProfilerStats` is a C++ object; we cannot do the same for the `Profile` itself, as it is managed by the Rust library.) `Uploader`'s interface has also slightly changed – it is now constructed with the `ProfilerStats` and `EncodedProfile` directly (not taken as argument for `upload`).
1 parent 274a440 commit 441fc1c

File tree

9 files changed

+75
-54
lines changed

9 files changed

+75
-54
lines changed

ddtrace/internal/datadog/profiling/dd_wrapper/include/profile.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ class Profile
4646
// The profile object is initialized here as a skeleton object, but it
4747
// cannot be used until it's initialized by libdatadog
4848
ddog_prof_Profile cur_profile{};
49-
50-
Datadog::ProfilerStats profiler_stats{};
49+
Datadog::ProfilerStats cur_profiler_stats{};
5150

5251
// Internal access methods - not for direct use
5352
ddog_prof_Profile& profile_borrow_internal();

ddtrace/internal/datadog/profiling/dd_wrapper/include/profiler_stats.hpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
#include <cstddef>
44

55
#include <string>
6-
#include <string_view>
76

87
namespace Datadog {
98

@@ -17,8 +16,6 @@ a mutex to protect access to the data.
1716
class ProfilerStats
1817
{
1918
private:
20-
std::string internal_metadata_json;
21-
2219
// Number of samples collected (one per thread)
2320
size_t sample_count = 0;
2421

@@ -37,9 +34,7 @@ class ProfilerStats
3734

3835
// Returns a JSON string containing relevant Profiler Stats to be included
3936
// in the libdatadog payload.
40-
// The function returned a string_view to a statically allocated string that
41-
// is updated every time the function is called.
42-
std::string_view get_internal_metadata_json();
37+
std::string get_internal_metadata_json();
4338

4439
void reset_state();
4540
};

ddtrace/internal/datadog/profiling/dd_wrapper/include/uploader.hpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,24 @@ class Uploader
2222
static inline std::atomic<uint64_t> upload_seq{ 0 };
2323
std::string output_filename;
2424
ddog_prof_ProfileExporter ddog_exporter{ .inner = nullptr };
25+
ddog_prof_EncodedProfile encoded_profile{};
26+
Datadog::ProfilerStats profiler_stats;
2527

26-
bool export_to_file(ddog_prof_EncodedProfile* encoded, std::string_view internal_metadata_json);
28+
bool export_to_file(ddog_prof_EncodedProfile& encoded, std::string_view internal_metadata_json);
2729

2830
public:
29-
bool upload(ddog_prof_Profile& profile, Datadog::ProfilerStats& profiler_stats);
31+
bool upload();
3032
static void cancel_inflight();
3133
static void lock();
3234
static void unlock();
3335
static void prefork();
3436
static void postfork_parent();
3537
static void postfork_child();
3638

37-
Uploader(std::string_view _url, ddog_prof_ProfileExporter ddog_exporter);
39+
Uploader(std::string_view _url,
40+
ddog_prof_ProfileExporter ddog_exporter,
41+
ddog_prof_EncodedProfile encoded,
42+
Datadog::ProfilerStats stats);
3843
~Uploader()
3944
{
4045
// We need to call _drop() on the exporter and the cancellation token,
@@ -44,6 +49,7 @@ class Uploader
4449
ddog_CancellationToken_cancel(&cancel);
4550
ddog_prof_Exporter_drop(&ddog_exporter);
4651
ddog_CancellationToken_drop(&cancel);
52+
ddog_prof_EncodedProfile_drop(&encoded_profile);
4753
}
4854

4955
// Disable copy constructor and copy assignment operator to avoid double-free
@@ -67,6 +73,9 @@ class Uploader
6773
{
6874
ddog_exporter = other.ddog_exporter;
6975
other.ddog_exporter = { .inner = nullptr };
76+
encoded_profile = other.encoded_profile;
77+
other.encoded_profile = { .inner = nullptr };
78+
profiler_stats = std::move(other.profiler_stats);
7079
output_filename = std::move(other.output_filename);
7180
errmsg = std::move(other.errmsg);
7281
}
@@ -75,8 +84,12 @@ class Uploader
7584
{
7685
if (this != &other) {
7786
ddog_prof_Exporter_drop(&ddog_exporter);
87+
ddog_prof_EncodedProfile_drop(&encoded_profile);
7888
ddog_exporter = other.ddog_exporter;
7989
other.ddog_exporter = { .inner = nullptr };
90+
encoded_profile = other.encoded_profile;
91+
other.encoded_profile = { .inner = nullptr };
92+
profiler_stats = std::move(other.profiler_stats);
8093
output_filename = std::move(other.output_filename);
8194
errmsg = std::move(other.errmsg);
8295
}

ddtrace/internal/datadog/profiling/dd_wrapper/src/ddup_interface.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,9 @@ ddup_upload() // cppcheck-suppress unusedFunction
347347
return false;
348348
}
349349

350+
// Build the Uploader, which takes care of serializing the Profile and capturing ProfilerStats.
351+
// This takes a reference in a way that locks the areas where the profile might
352+
// be modified. It gets cleared and released as soon as serialization is complete (or has failed).
350353
auto uploader_or_err = Datadog::UploaderBuilder::build();
351354

352355
if (std::holds_alternative<std::string>(uploader_or_err)) {
@@ -359,15 +362,11 @@ ddup_upload() // cppcheck-suppress unusedFunction
359362

360363
// Get the reference to the uploader
361364
auto& uploader = std::get<Datadog::Uploader>(uploader_or_err);
362-
// There are a few things going on here.
363-
// * profile_borrow() takes a reference in a way that locks the areas where the profile might
364-
// be modified. It gets released and cleared after uploading.
365-
// * Uploading cancels inflight uploads. There are better ways to do this, but this is what
366-
// we have for now.
367-
auto borrowed = Datadog::Sample::profile_borrow();
368-
bool success = uploader.upload(borrowed.profile(), borrowed.stats());
369-
borrowed.stats().reset_state();
370-
return success;
365+
366+
// Upload without holding any locks (encoding has already been done in UploaderBuilder::build)
367+
// This also cancels inflight uploads. There are better ways to do this, but this is what
368+
// we have for now.
369+
return uploader.upload();
371370
}
372371

373372
void

ddtrace/internal/datadog/profiling/dd_wrapper/src/profile.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
#include "profile_borrow.hpp"
55
#include "profiler_stats.hpp"
66

7-
#include <functional>
87
#include <iostream>
98

109
#include <fcntl.h>
@@ -58,7 +57,7 @@ Datadog::Profile::reset_profile()
5857
return false;
5958
}
6059

61-
profiler_stats.reset_state();
60+
cur_profiler_stats.reset_state();
6261
return true;
6362
}
6463

@@ -189,7 +188,7 @@ Datadog::Profile::one_time_init(SampleType type, unsigned int _max_nframes)
189188
if (!make_profile(sample_types, &default_period, cur_profile)) {
190189
if (!already_warned) {
191190
already_warned = true;
192-
std::cerr << "Error initializing profile" << std::endl;
191+
std::cerr << "Error initializing cur_profile" << std::endl;
193192
}
194193
return;
195194
}
@@ -228,6 +227,6 @@ Datadog::Profile::postfork_child()
228227
{
229228
new (&profile_mtx) std::mutex();
230229
// Reset the profile to clear any samples collected in the parent process
231-
profiler_stats.reset_state();
230+
cur_profiler_stats.reset_state();
232231
reset_profile();
233232
}

ddtrace/internal/datadog/profiling/dd_wrapper/src/profile_borrow.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,5 +46,5 @@ Datadog::ProfileBorrow::profile()
4646
Datadog::ProfilerStats&
4747
Datadog::ProfileBorrow::stats()
4848
{
49-
return profile_ptr->profiler_stats;
49+
return profile_ptr->cur_profiler_stats;
5050
}

ddtrace/internal/datadog/profiling/dd_wrapper/src/profiler_stats.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include "profiler_stats.hpp"
22

33
#include <charconv>
4-
#include <string>
54

65
namespace {
76

@@ -46,12 +45,13 @@ Datadog::ProfilerStats::reset_state()
4645
sampling_event_count = 0;
4746
}
4847

49-
std::string_view
48+
std::string
5049
Datadog::ProfilerStats::get_internal_metadata_json()
5150
{
51+
std::string internal_metadata_json;
5252
internal_metadata_json.reserve(128);
5353

54-
internal_metadata_json = "{";
54+
internal_metadata_json += "{";
5555

5656
internal_metadata_json += R"("sample_count": )";
5757
append_to_string(internal_metadata_json, sample_count);

ddtrace/internal/datadog/profiling/dd_wrapper/src/uploader.cpp

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,22 @@
1313

1414
using namespace Datadog;
1515

16-
Datadog::Uploader::Uploader(std::string_view _output_filename, ddog_prof_ProfileExporter _ddog_exporter)
16+
Datadog::Uploader::Uploader(std::string_view _output_filename,
17+
ddog_prof_ProfileExporter _ddog_exporter,
18+
ddog_prof_EncodedProfile _encoded_profile,
19+
Datadog::ProfilerStats _stats)
1720
: output_filename{ _output_filename }
1821
, ddog_exporter{ _ddog_exporter }
22+
, encoded_profile{ _encoded_profile }
23+
, profiler_stats{ std::move(_stats) }
1924
{
2025
// Increment the upload sequence number every time we build an uploader.
2126
// Uploaders are use-once-and-destroy.
2227
upload_seq++;
2328
}
2429

2530
bool
26-
Datadog::Uploader::export_to_file(ddog_prof_EncodedProfile* encoded, std::string_view internal_metadata_json)
31+
Datadog::Uploader::export_to_file(ddog_prof_EncodedProfile& encoded, std::string_view internal_metadata_json)
2732
{
2833
// Write the profile to a file using the following format for filename:
2934
// <output_filename>.<process_id>.<sequence_number>
@@ -37,7 +42,8 @@ Datadog::Uploader::export_to_file(ddog_prof_EncodedProfile* encoded, std::string
3742
std::cerr << "Error opening output file " << pprof_filename << ": " << strerror(errno) << std::endl;
3843
return false;
3944
}
40-
auto bytes_res = ddog_prof_EncodedProfile_bytes(encoded);
45+
46+
auto bytes_res = ddog_prof_EncodedProfile_bytes(&encoded);
4147
if (bytes_res.tag == DDOG_PROF_RESULT_BYTE_SLICE_ERR_BYTE_SLICE) {
4248
std::cerr << "Error getting bytes from encoded profile: "
4349
<< err_to_msg(&bytes_res.err, "Error getting bytes from encoded profile") << std::endl;
@@ -63,24 +69,10 @@ Datadog::Uploader::export_to_file(ddog_prof_EncodedProfile* encoded, std::string
6369
}
6470

6571
bool
66-
Datadog::Uploader::upload(ddog_prof_Profile& profile, Datadog::ProfilerStats& profiler_stats)
72+
Datadog::Uploader::upload()
6773
{
68-
// Serialize the profile
69-
ddog_prof_Profile_SerializeResult serialize_result = ddog_prof_Profile_serialize(&profile, nullptr, nullptr);
70-
if (serialize_result.tag !=
71-
DDOG_PROF_PROFILE_SERIALIZE_RESULT_OK) { // NOLINT (cppcoreguidelines-pro-type-union-access)
72-
auto err = serialize_result.err; // NOLINT (cppcoreguidelines-pro-type-union-access)
73-
errmsg = err_to_msg(&err, "Error serializing pprof");
74-
std::cerr << errmsg << std::endl;
75-
ddog_Error_drop(&err);
76-
return false;
77-
}
78-
ddog_prof_EncodedProfile* encoded = &serialize_result.ok; // NOLINT (cppcoreguidelines-pro-type-union-access)
79-
8074
if (!output_filename.empty()) {
81-
bool ret = export_to_file(encoded, profiler_stats.get_internal_metadata_json());
82-
ddog_prof_EncodedProfile_drop(encoded);
83-
return ret;
75+
return export_to_file(encoded_profile, profiler_stats.get_internal_metadata_json());
8476
}
8577

8678
std::vector<ddog_prof_Exporter_File> to_compress_files;
@@ -95,11 +87,12 @@ Datadog::Uploader::upload(ddog_prof_Profile& profile, Datadog::ProfilerStats& pr
9587
});
9688
}
9789

98-
auto internal_metadata_json_slice = to_slice(profiler_stats.get_internal_metadata_json());
90+
auto internal_metadata_json = profiler_stats.get_internal_metadata_json();
91+
auto internal_metadata_json_slice = to_slice(internal_metadata_json);
9992

10093
auto build_res = ddog_prof_Exporter_Request_build(
10194
&ddog_exporter,
102-
encoded,
95+
&encoded_profile,
10396
// files_to_compress_and_export
10497
{
10598
.ptr = reinterpret_cast<const ddog_prof_Exporter_File*>(to_compress_files.data()),
@@ -110,7 +103,6 @@ Datadog::Uploader::upload(ddog_prof_Profile& profile, Datadog::ProfilerStats& pr
110103
&internal_metadata_json_slice,
111104
nullptr // optional_info_json
112105
);
113-
ddog_prof_EncodedProfile_drop(encoded);
114106

115107
if (build_res.tag ==
116108
DDOG_PROF_REQUEST_RESULT_ERR_HANDLE_REQUEST) { // NOLINT (cppcoreguidelines-pro-type-union-access)

ddtrace/internal/datadog/profiling/dd_wrapper/src/uploader_builder.cpp

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "uploader_builder.hpp"
22

33
#include "libdatadog_helpers.hpp"
4+
#include "sample.hpp"
45

56
#include <mutex>
67
#include <numeric>
@@ -186,14 +187,37 @@ Datadog::UploaderBuilder::build()
186187
return errmsg;
187188
}
188189

190+
// Perform profile encoding before creating the Uploader
191+
// Also take the Profiler Stats and reset the one being written to
192+
ddog_prof_Profile_SerializeResult encoded;
193+
Datadog::ProfilerStats stats;
194+
{
195+
// Only keep the lock for the duration of the encoding operation.
196+
auto borrowed = Datadog::Sample::profile_borrow();
197+
198+
// Swap the ProfilerStats (which replaces the one being written to with an empty state).
199+
// We do this first as we still want to reset ProfilerStats if the serialization fails.
200+
std::swap(stats, borrowed.stats());
201+
202+
// Try to encode the Profile (which will also reset it)
203+
encoded = ddog_prof_Profile_serialize(&borrowed.profile(), nullptr, nullptr);
204+
if (encoded.tag != DDOG_PROF_PROFILE_SERIALIZE_RESULT_OK) {
205+
auto err = encoded.err;
206+
const std::string errmsg = Datadog::err_to_msg(&err, "Error serializing profile");
207+
ddog_Error_drop(&err);
208+
ddog_prof_Exporter_drop(ddog_exporter);
209+
return errmsg;
210+
}
211+
}
212+
189213
// We create a std::variant here instead of creating a temporary Uploader object.
190-
// i.e. return Datadog::Uploader{ output_filename, *ddog_exporter }
214+
// i.e. return Datadog::Uploader{ output_filename, *ddog_exporter, encoded.ok, std::move(stats) }
191215
// because above code creates a temporary Uploader object, moves it into the
192216
// variant, and then the destructor of the temporary Uploader object is called
193217
// when the temporary Uploader object goes out of scope.
194218
// This was necessary to avoid double-free from calling ddog_prof_Exporter_drop()
195219
// in the destructor of Uploader. See comments in uploader.hpp for more details.
196-
return std::variant<Datadog::Uploader, std::string>{ std::in_place_type<Datadog::Uploader>,
197-
output_filename,
198-
*ddog_exporter };
220+
return std::variant<Datadog::Uploader, std::string>{
221+
std::in_place_type<Datadog::Uploader>, output_filename, *ddog_exporter, encoded.ok, std::move(stats)
222+
};
199223
}

0 commit comments

Comments
 (0)