Skip to content

Commit adefd96

Browse files
patvarillyclaude
andcommitted
Make Delphy runs deterministic given same seed and thread count
Patch vendored Abseil at CMake configure time to neutralize three sources of hash container iteration nondeterminism (Hash::Seed, PerTableSalt, RandomSeed). Remove the sort-before-RNG workaround in build_usher_like_tree that was a partial fix for the same issue. Store the PRNG seed in Run and output both seed and thread count in BEAST log file headers via stamp_version_into_log_file. Route through Run rather than globals so multiple WASM Delphy_context instances can coexist. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8de0d4e commit adefd96

17 files changed

Lines changed: 501 additions & 27 deletions

.gitmodules

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
[submodule "third-party/abseil-cpp"]
66
path = third-party/abseil-cpp
77
url = https://github.com/abseil/abseil-cpp.git
8+
ignore = dirty
89
[submodule "third-party/ctpl"]
910
path = third-party/ctpl
1011
url = https://github.com/vit-vit/ctpl.git

CMakeLists.txt

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,32 @@ find_package(Threads REQUIRED)
7676
find_package(Eigen3 REQUIRED)
7777
find_package(Boost REQUIRED)
7878
set(ABSL_PROPAGATE_CXX_STD ON)
79+
80+
# Apply Delphy patches to vendored Abseil (deterministic hash iteration).
81+
# The patch neutralizes PerTableSalt, RandomSeed, and absl::Hash::Seed so that
82+
# flat_hash_map/set iteration order depends only on hash values, not on table
83+
# memory addresses or ASLR. This is required for reproducible MCMC runs.
84+
# If the patch has already been applied (e.g., on a reconfigure), git apply
85+
# --check --reverse will detect that and we skip it. If the patch fails for
86+
# any other reason (e.g., Abseil was upgraded), the build fails loudly.
87+
set(_abseil_patch "${CMAKE_SOURCE_DIR}/patches/abseil-deterministic-hash-iteration.patch")
88+
execute_process(
89+
COMMAND git apply --check --reverse ${_abseil_patch}
90+
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/third-party/abseil-cpp
91+
RESULT_VARIABLE _patch_already_applied
92+
OUTPUT_QUIET ERROR_QUIET)
93+
if(_patch_already_applied)
94+
execute_process(
95+
COMMAND git apply ${_abseil_patch}
96+
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/third-party/abseil-cpp
97+
RESULT_VARIABLE _patch_result)
98+
if(_patch_result)
99+
message(FATAL_ERROR
100+
"Failed to apply Abseil patch ${_abseil_patch}. "
101+
"If Abseil was upgraded, the patch may need updating.")
102+
endif()
103+
endif()
104+
79105
add_subdirectory(third-party/abseil-cpp EXCLUDE_FROM_ALL)
80106

81107
# Optional UI libraries

core/beasty_input.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ static auto export_beast_2_6_2_input(
191191
os << absl::StreamFormat("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n")
192192
<< absl::StreamFormat("\n")
193193
<< absl::StreamFormat("<!-- \n")
194-
<< stamp_version_into_log_file{}
194+
<< stamp_version_into_log_file{run}
195195
<< absl::StreamFormat("-->\n")
196196
<< absl::StreamFormat("\n")
197197
<< absl::StreamFormat("<!-- BEAST2 v2.6.2 input file, modelled on run analyzed in \n")
@@ -579,7 +579,7 @@ static auto export_beast_2_7_7_input(
579579
os << absl::StreamFormat("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n")
580580
<< absl::StreamFormat("\n")
581581
<< absl::StreamFormat("<!-- \n")
582-
<< stamp_version_into_log_file{}
582+
<< stamp_version_into_log_file{run}
583583
<< absl::StreamFormat("-->\n")
584584
<< absl::StreamFormat("\n")
585585
<< absl::StreamFormat("<!-- BEAST2 v2.7.7 input file, modelled on run analyzed in \n")
@@ -1111,7 +1111,7 @@ static auto export_beast_X_10_5_0_input(
11111111
os << absl::StreamFormat("<?xml version=\"1.0\" standalone=\"yes\"?>\n")
11121112
<< absl::StreamFormat("\n")
11131113
<< absl::StreamFormat("<!-- \n")
1114-
<< stamp_version_into_log_file{}
1114+
<< stamp_version_into_log_file{run}
11151115
<< absl::StreamFormat("-->\n")
11161116
<< absl::StreamFormat("\n")
11171117
<< absl::StreamFormat("<!-- BEAST X 10.5.0 input file, modelled on run analyzed in \n")

core/beasty_output.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class Beasty_log_output_2_x_x : public Beasty_log_output_version_impl {
6666

6767
if (not run.mpox_hack_enabled()) {
6868
// WARNING: Ensure this matches the logger output from export_beast_2_x_x_input (beasty_input.cpp)
69-
os << stamp_version_into_log_file{};
69+
os << stamp_version_into_log_file{run};
7070
os << "Sample\t"
7171
<< "numMuts\t" // Not output by BEAST, but useful for downstream analysis of Delphy results
7272
<< "posterior_for_Delphy\t" // Not really BEAST's posterior
@@ -275,7 +275,7 @@ class Beasty_log_output_X_10_5_0 : public Beasty_log_output_version_impl {
275275
const auto& pop_model = run.pop_model();
276276

277277
// WARNING: Ensure this matches the logger output from export_beast_X_10_5_0_input (beasty_input.cpp)
278-
os << stamp_version_into_log_file{};
278+
os << stamp_version_into_log_file{run};
279279
os << "state\t"
280280
<< "numMuts\t" // Not output by BEAST, but useful for downstream analysis of Delphy results
281281
<< "posterior_for_Delphy\t" // Not really BEAST's "joint"
@@ -513,7 +513,7 @@ auto Beasty_trees_output::output_headers(const Run& run) -> void {
513513
}
514514

515515
*os_ << "#NEXUS\n"
516-
<< stamp_version_into_log_file{}
516+
<< stamp_version_into_log_file{run}
517517
<< "\n"
518518
<< "Begin taxa;\n"
519519
<< " Dimensions ntax=" << num_tips << ";\n"

core/cmdline.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ auto process_args(int argc, char** argv) -> Processed_cmd_line {
625625

626626
// Create and configure initial run
627627
auto t0 = calc_max_tip_time(tree);
628-
auto run = std::make_shared<Run>(*thread_pool, prng, std::move(tree));
628+
auto run = std::make_shared<Run>(*thread_pool, prng, seed, std::move(tree));
629629
run->set_paranoid(opts["v0-paranoid"].as<bool>());
630630
run->set_mpox_hack_enabled(mpox_hack_enabled);
631631
run->set_alpha_move_enabled(alpha_move_enabled);

core/io.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ auto read_maple(
255255
return result;
256256
}
257257

258-
auto operator<<(std::ostream& os, stamp_version_into_log_file) -> std::ostream& {
258+
auto operator<<(std::ostream& os, stamp_version_into_log_file s) -> std::ostream& {
259259
os << absl::StreamFormat("# Produced by delphy version %s (build %d, commit %s)\n",
260260
k_delphy_version_string, k_delphy_build_number, k_delphy_commit_string);
261261
if (delphy_invoked_via_cli) {
@@ -266,6 +266,8 @@ auto operator<<(std::ostream& os, stamp_version_into_log_file) -> std::ostream&
266266
}
267267
os << "\n";
268268
}
269+
os << "# Seed: " << s.run.prng_seed() << "\n";
270+
os << "# Parallelism: " << s.run.num_parts() << "\n";
269271
return os;
270272
}
271273

core/io.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,11 @@ auto read_maple(
3333
= default_sequence_warning_hook)
3434
-> Maple_file;
3535

36-
struct stamp_version_into_log_file{}; // dummy type for stream command
37-
auto operator<<(std::ostream& os, stamp_version_into_log_file) -> std::ostream&;
36+
class Run; // forward declaration
37+
struct stamp_version_into_log_file {
38+
const Run& run;
39+
};
40+
auto operator<<(std::ostream& os, stamp_version_into_log_file s) -> std::ostream&;
3841

3942
auto output_resolved_fasta(const Phylo_tree& tree, std::ostream& os) -> void;
4043

core/phylo_tree.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -955,18 +955,7 @@ auto build_usher_like_tree(
955955
tree.at(S).parent = P;
956956

957957
// Distribute needed mutations randomly
958-
// - ordered_mutations is here to avoid the non-determinism introduced by abseil's flat_hash_map
959-
// (so delphy invocations with the same seed produce identical starting trees)
960-
auto ordered_mutations = Scratch_vector<std::pair<Site_index, Site_delta>>{};
961958
for (const auto& [l, delta] : deltas_P_to_X) {
962-
ordered_mutations.push_back({l, delta});
963-
}
964-
std::ranges::sort(ordered_mutations, [](const auto& pair_a, const auto& pair_b) {
965-
const auto& [la, _1] = pair_a;
966-
const auto& [lb, _2] = pair_b;
967-
return la < lb;
968-
});
969-
for (const auto& [l, delta] : ordered_mutations) {
970959
tree.at(X).mutations.push_back(Mutation{
971960
delta.from, l, delta.to,
972961
absl::Uniform(absl::IntervalOpenClosed, bitgen, t_P, t_X)});

core/run.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111

1212
namespace delphy {
1313

14-
Run::Run(ctpl::thread_pool& thread_pool, std::mt19937 bitgen, Phylo_tree tree)
14+
Run::Run(ctpl::thread_pool& thread_pool, std::mt19937 bitgen, uint32_t prng_seed, Phylo_tree tree)
1515
: thread_pool_{&thread_pool},
1616
bitgen_{bitgen},
17+
prng_seed_{prng_seed},
1718
tree_{std::move(tree)},
1819
num_parts_{1},
1920
target_coal_prior_cells_{400},

core/run.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ namespace delphy {
1717

1818
class Run {
1919
public:
20-
Run(ctpl::thread_pool& thread_pool, std::mt19937 bitgen, Phylo_tree tree);
20+
Run(ctpl::thread_pool& thread_pool, std::mt19937 bitgen, uint32_t prng_seed, Phylo_tree tree);
2121

2222
auto bitgen() -> absl::BitGenRef { return bitgen_; }
2323
auto tree() -> Phylo_tree& { return tree_; }
@@ -28,6 +28,7 @@ class Run {
2828
auto set_step(int64_t num_steps) -> void;
2929
auto local_moves_per_global_move() const -> int { return local_moves_per_global_move_; }
3030
auto set_local_moves_per_global_move(int local_moves_per_global_move) -> void;
31+
auto prng_seed() const -> uint32_t { return prng_seed_; }
3132
auto num_parts() const -> int { return num_parts_; }
3233
auto set_num_parts(int num_parts) -> void { num_parts_ = num_parts; partition_stencils_valid_ = false; }
3334
auto target_coal_prior_cells() const -> int { return target_coal_prior_cells_; }
@@ -225,6 +226,7 @@ class Run {
225226
private:
226227
ctpl::thread_pool* thread_pool_;
227228
std::mt19937 bitgen_;
229+
uint32_t prng_seed_;
228230
Phylo_tree tree_;
229231

230232
int64_t step_;

0 commit comments

Comments
 (0)