From b09e54130183aa4f1e62cdcdda6fbd2fcb33518e Mon Sep 17 00:00:00 2001 From: not-matthias Date: Fri, 8 Aug 2025 13:36:59 +0200 Subject: [PATCH 1/7] feat: run python with perf jit dump --- src/run/mod.rs | 2 +- src/run/runner/helpers/env.rs | 8 ++ src/run/runner/wall_time/perf/jit_dump.rs | 99 +++++++++++++++++++++++ src/run/runner/wall_time/perf/mod.rs | 7 +- 4 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 src/run/runner/wall_time/perf/jit_dump.rs diff --git a/src/run/mod.rs b/src/run/mod.rs index fa033aef..b64b51f8 100644 --- a/src/run/mod.rs +++ b/src/run/mod.rs @@ -133,7 +133,7 @@ pub struct RunArgs { pub command: Vec, } -#[derive(ValueEnum, Clone, Debug, Serialize)] +#[derive(ValueEnum, Clone, Debug, Serialize, PartialEq)] #[serde(rename_all = "lowercase")] pub enum RunnerMode { Instrumentation, diff --git a/src/run/runner/helpers/env.rs b/src/run/runner/helpers/env.rs index db745a06..350c25a7 100644 --- a/src/run/runner/helpers/env.rs +++ b/src/run/runner/helpers/env.rs @@ -8,6 +8,14 @@ pub fn get_base_injected_env( ) -> HashMap<&'static str, String> { HashMap::from([ ("PYTHONHASHSEED", "0".into()), + ( + "PYTHON_PERF_JIT_SUPPORT", + if mode == RunnerMode::Walltime { + "1".into() + } else { + "0".into() + }, + ), ("ARCH", ARCH.into()), ("CODSPEED_ENV", "runner".into()), ("CODSPEED_RUNNER_MODE", mode.to_string()), diff --git a/src/run/runner/wall_time/perf/jit_dump.rs b/src/run/runner/wall_time/perf/jit_dump.rs new file mode 100644 index 00000000..651b012a --- /dev/null +++ b/src/run/runner/wall_time/perf/jit_dump.rs @@ -0,0 +1,99 @@ +use crate::{prelude::*, run::runner::wall_time::perf::unwind_data::UnwindData}; +use linux_perf_data::jitdump::{JitDumpReader, JitDumpRecord}; +use std::{ + collections::HashSet, + path::{Path, PathBuf}, +}; + +struct JitDump { + path: PathBuf, +} + +impl JitDump { + pub fn new(path: PathBuf) -> Self { + Self { path } + } + + /// Parses the JIT dump file and converts it into a list of `UnwindData`. + /// + /// The JIT dump file contains synthetic `eh_frame` data for jitted functions. This can be parsed and + /// then converted to `UnwindData` which is used for stack unwinding. + /// + /// See: https://github.com/python/cpython/blob/main/Python/perf_jit_trampoline.c + pub fn into_unwind_data(self) -> Result> { + let file = std::fs::File::open(self.path)?; + + let mut jit_unwind_data = Vec::new(); + let mut current_unwind_info: Option<(Vec, Vec)> = None; + + let mut reader = JitDumpReader::new(file)?; + while let Some(raw_record) = reader.next_record()? { + // The first recording is always the unwind info, followed by the code load event + // (see `perf_map_jit_write_entry` in https://github.com/python/cpython/blob/9743d069bd53e9d3a8f09df899ec1c906a79da24/Python/perf_jit_trampoline.c#L1163C13-L1163C37) + match raw_record.parse()? { + JitDumpRecord::CodeLoad(record) => { + let name = record.function_name.as_slice(); + let name = String::from_utf8_lossy(&name); + + let avma_start = record.vma; + let code_size = record.code_bytes.len() as u64; + let avma_end = avma_start + code_size; + + let Some((eh_frame, eh_frame_hdr)) = current_unwind_info.take() else { + warn!("No unwind info available for JIT code load: {name}"); + continue; + }; + + jit_unwind_data.push(UnwindData { + path: format!("jit_{name}"), + avma_range: avma_start..avma_end, + base_avma: 0, + eh_frame_hdr, + eh_frame_hdr_svma: 0..0, + eh_frame, + eh_frame_svma: 0..0, + }); + } + JitDumpRecord::CodeUnwindingInfo(record) => { + // Store unwind info for the next code loads + current_unwind_info = Some(( + record.eh_frame.as_slice().to_vec(), + record.eh_frame_hdr.as_slice().to_vec(), + )); + } + _ => { + warn!("Unhandled JIT dump record: {raw_record:?}"); + } + } + } + + Ok(jit_unwind_data) + } +} + +/// Converts all the `jit-.dump` into unwind data and copies it to the profile folder. +pub async fn harvest_perf_jit_for_pids(profile_folder: &Path, pids: &HashSet) -> Result<()> { + for pid in pids { + let name = format!("jit-{pid}.dump"); + let path = PathBuf::from("/tmp").join(&name); + + if !path.exists() { + continue; + } + debug!("Found JIT dump file: {path:?}"); + + let unwind_data = match JitDump::new(path).into_unwind_data() { + Ok(unwind_data) => unwind_data, + Err(error) => { + warn!("Failed to convert jit dump into unwind data: {error:?}"); + continue; + } + }; + + for module in unwind_data { + module.save_to(profile_folder, *pid as _)?; + } + } + + Ok(()) +} diff --git a/src/run/runner/wall_time/perf/mod.rs b/src/run/runner/wall_time/perf/mod.rs index c8a3372c..f3c640ef 100644 --- a/src/run/runner/wall_time/perf/mod.rs +++ b/src/run/runner/wall_time/perf/mod.rs @@ -8,6 +8,7 @@ use crate::run::runner::helpers::run_command_with_log_pipe::run_command_with_log use crate::run::runner::helpers::setup::run_with_sudo; use crate::run::runner::valgrind::helpers::ignored_objects_path::get_objects_path_to_ignore; use crate::run::runner::valgrind::helpers::perf_maps::harvest_perf_maps_for_pids; +use crate::run::runner::wall_time::perf::jit_dump::harvest_perf_jit_for_pids; use anyhow::Context; use fifo::{PerfFifo, RunnerFifo}; use futures::stream::FuturesUnordered; @@ -22,6 +23,7 @@ use std::{cell::OnceCell, collections::HashMap, process::ExitStatus}; use tempfile::TempDir; use unwind_data::UnwindData; +mod jit_dump; mod metadata; mod setup; mod shared; @@ -195,7 +197,7 @@ impl PerfRunner { // Harvest the perf maps generated by python. This will copy the perf // maps from /tmp to the profile folder. We have to write our own perf // maps to these files AFTERWARDS, otherwise it'll be overwritten! - let perf_map_pids = futures::future::try_join_all(copy_tasks) + let bench_pids = futures::future::try_join_all(copy_tasks) .await? .into_iter() .filter_map(|result| { @@ -203,7 +205,8 @@ impl PerfRunner { result.ok() }) .collect::>(); - harvest_perf_maps_for_pids(profile_folder, &perf_map_pids).await?; + harvest_perf_maps_for_pids(profile_folder, &bench_pids).await?; + harvest_perf_jit_for_pids(profile_folder, &bench_pids).await?; // Append perf maps, unwind info and other metadata if let Err(BenchmarkDataSaveError::MissingIntegration) = bench_data.save_to(profile_folder) From 72f17f6b32f96d4e8e15511a45459adb5a53e530 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 10 Sep 2025 15:42:48 +0200 Subject: [PATCH 2/7] feat: add unwind data tests --- ...__unwind_data__tests__cpp_unwind_data.snap | 15 +++ ...nwind_data__tests__golang_unwind_data.snap | 15 +++ ...d_data__tests__rust_divan_unwind_data.snap | 15 +++ src/run/runner/wall_time/perf/unwind_data.rs | 107 +++++++++++++++++- 4 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__cpp_unwind_data.snap create mode 100644 src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__golang_unwind_data.snap create mode 100644 src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__rust_divan_unwind_data.snap diff --git a/src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__cpp_unwind_data.snap b/src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__cpp_unwind_data.snap new file mode 100644 index 00000000..e3eeff97 --- /dev/null +++ b/src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__cpp_unwind_data.snap @@ -0,0 +1,15 @@ +--- +source: src/run/runner/wall_time/perf/unwind_data.rs +expression: "UnwindData::new(MODULE_PATH.as_bytes(), 0x0, start_addr, size, None)" +--- +Ok( + UnwindData { + path: "testdata/perf_map/cpp_my_benchmark.bin", + avma_range: 400000..459000, + base_avma: 0, + eh_frame_hdr_svma: 4577bc..458b30, + eh_frame_hdr_hash: 4b4eac90f7f5e60d, + eh_frame_hash: 233bdd4ae9fe4ba4, + eh_frame_svma: 451098..4577bc, + }, +) diff --git a/src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__golang_unwind_data.snap b/src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__golang_unwind_data.snap new file mode 100644 index 00000000..f963bc7d --- /dev/null +++ b/src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__golang_unwind_data.snap @@ -0,0 +1,15 @@ +--- +source: src/run/runner/wall_time/perf/unwind_data.rs +expression: "UnwindData::new(MODULE_PATH.as_bytes(), 0x2000, start_addr, size, None)" +--- +Ok( + UnwindData { + path: "testdata/perf_map/go_fib.bin", + avma_range: 402000..50f000, + base_avma: 0, + eh_frame_hdr_svma: 6498b0..649b94, + eh_frame_hdr_hash: f1f69beb959a08d7, + eh_frame_hash: a8727039dd21b51c, + eh_frame_svma: 649b98..64aa70, + }, +) diff --git a/src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__rust_divan_unwind_data.snap b/src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__rust_divan_unwind_data.snap new file mode 100644 index 00000000..4e09cefa --- /dev/null +++ b/src/run/runner/wall_time/perf/snapshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__rust_divan_unwind_data.snap @@ -0,0 +1,15 @@ +--- +source: src/run/runner/wall_time/perf/unwind_data.rs +expression: unwind_data +--- +Ok( + UnwindData { + path: "testdata/perf_map/divan_sleep_benches.bin", + avma_range: 5555555a2000..555555692000, + base_avma: 555555554000, + eh_frame_hdr_svma: 2ac74..2ea60, + eh_frame_hdr_hash: f579da4368e627c1, + eh_frame_hash: 791501d5a9c438d, + eh_frame_svma: 11540..2ac74, + }, +) diff --git a/src/run/runner/wall_time/perf/unwind_data.rs b/src/run/runner/wall_time/perf/unwind_data.rs index 2ec8884c..c7a1be7d 100644 --- a/src/run/runner/wall_time/perf/unwind_data.rs +++ b/src/run/runner/wall_time/perf/unwind_data.rs @@ -1,12 +1,16 @@ //! WARNING: This file has to be in sync with perf-parser! use anyhow::{Context, bail}; +use core::{ + fmt::Debug, + hash::{Hash, Hasher}, +}; use debugid::CodeId; use serde::{Deserialize, Serialize}; -use std::ops::Range; +use std::{hash::DefaultHasher, ops::Range}; /// Unwind data for a single module. -#[derive(Debug, Serialize, Deserialize)] +#[derive(Serialize, Deserialize)] pub struct UnwindData { pub path: String, @@ -20,6 +24,34 @@ pub struct UnwindData { pub eh_frame_svma: Range, } +impl Debug for UnwindData { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let eh_frame_hdr_hash = { + let mut hasher = DefaultHasher::new(); + self.eh_frame_hdr.hash(&mut hasher); + hasher.finish() + }; + let eh_frame_hash = { + let mut hasher = DefaultHasher::new(); + self.eh_frame.hash(&mut hasher); + hasher.finish() + }; + + f.debug_struct("UnwindData") + .field("path", &self.path) + .field("avma_range", &format_args!("{:x?}", self.avma_range)) + .field("base_avma", &format_args!("{:x}", self.base_avma)) + .field( + "eh_frame_hdr_svma", + &format_args!("{:x?}", self.eh_frame_hdr_svma), + ) + .field("eh_frame_hdr_hash", &format_args!("{eh_frame_hdr_hash:x}")) + .field("eh_frame_hash", &format_args!("{eh_frame_hash:x}")) + .field("eh_frame_svma", &format_args!("{:x?}", self.eh_frame_svma)) + .finish() + } +} + impl UnwindData { // Based on this: https://github.com/mstange/linux-perf-stuff/blob/22ca6531b90c10dd2a4519351c843b8d7958a451/src/main.rs#L747-L893 pub fn new( @@ -138,3 +170,74 @@ impl UnwindData { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + + // Note: You can double-check the values by getting the /proc//maps via gdb: + // ``` + // $ gdb testdata/perf_map/.bin -ex "break main" -ex "run" -ex "info proc mappings" -ex "continue" -ex "quit" -batch + // Start Addr End Addr Size Offset Perms File + // 0x0000555555554000 0x00005555555a2000 0x4e000 0x0 r--p /runner/testdata/perf_map/divan_sleep_benches.bin + // 0x00005555555a2000 0x0000555555692000 0xf0000 0x4d000 r-xp /runner/testdata/perf_map/divan_sleep_benches.bin + // 0x0000555555692000 0x000055555569d000 0xb000 0x13c000 r--p /runner/testdata/perf_map/divan_sleep_benches.bin + // 0x000055555569d000 0x000055555569f000 0x2000 0x146000 rw-p /runner/testdata/perf_map/divan_sleep_benches.bin + // 0x00007ffff7c00000 0x00007ffff7c28000 0x28000 0x0 r--p /nix/store/g8zyryr9cr6540xsyg4avqkwgxpnwj2a-glibc-2.40-66/lib/libc.so.6 + // 0x00007ffff7c28000 0x00007ffff7d9e000 0x176000 0x28000 r-xp /nix/store/g8zyryr9cr6540xsyg4avqkwgxpnwj2a-glibc-2.40-66/lib/libc.so.6 + // 0x00007ffff7d9e000 0x00007ffff7df4000 0x56000 0x19e000 r--p /nix/store/g8zyryr9cr6540xsyg4avqkwgxpnwj2a-glibc-2.40-66/lib/libc.so.6 + // 0x00007ffff7df4000 0x00007ffff7df8000 0x4000 0x1f3000 r--p /nix/store/g8zyryr9cr6540xsyg4avqkwgxpnwj2a-glibc-2.40-66/lib/libc.so.6 + // 0x00007ffff7df8000 0x00007ffff7dfa000 0x2000 0x1f7000 rw-p /nix/store/g8zyryr9cr6540xsyg4avqkwgxpnwj2a-glibc-2.40-66/lib/libc.so.6 + // 0x00007ffff7dfa000 0x00007ffff7e07000 0xd000 0x0 rw-p + // 0x00007ffff7f8a000 0x00007ffff7f8d000 0x3000 0x0 rw-p + // ... + // ``` + + #[test] + fn test_golang_unwind_data() { + const MODULE_PATH: &str = "testdata/perf_map/go_fib.bin"; + + let (start_addr, end_addr) = (0x0000000000402000_u64, 0x000000000050f000_u64); + let size: u64 = end_addr - start_addr; + + insta::assert_debug_snapshot!(UnwindData::new( + MODULE_PATH.as_bytes(), + 0x2000, + start_addr, + size, + None + )); + } + + #[test] + fn test_cpp_unwind_data() { + const MODULE_PATH: &str = "testdata/perf_map/cpp_my_benchmark.bin"; + + let (start_addr, end_addr) = (0x0000000000400000_u64, 0x0000000000459000_u64); + let size: u64 = end_addr - start_addr; + + insta::assert_debug_snapshot!(UnwindData::new( + MODULE_PATH.as_bytes(), + 0x0, + start_addr, + size, + None + )); + } + + #[test] + fn test_rust_divan_unwind_data() { + const MODULE_PATH: &str = "testdata/perf_map/divan_sleep_benches.bin"; + + let (start_addr, end_addr) = (0x00005555555a2000_u64, 0x0000555555692000_u64); + let size: u64 = end_addr - start_addr; + + insta::assert_debug_snapshot!(UnwindData::new( + MODULE_PATH.as_bytes(), + 0x4d000, + start_addr, + size, + None + )); + } +} From a8d042f08702211c2af013e57915b8e1a1dc100a Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 10 Sep 2025 17:50:04 +0200 Subject: [PATCH 3/7] fix: create perf map for jitdump --- src/run/runner/wall_time/perf/jit_dump.rs | 42 ++++++++++++++++++++++- src/run/runner/wall_time/perf/perf_map.rs | 14 +++++--- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/src/run/runner/wall_time/perf/jit_dump.rs b/src/run/runner/wall_time/perf/jit_dump.rs index 651b012a..32edf2b2 100644 --- a/src/run/runner/wall_time/perf/jit_dump.rs +++ b/src/run/runner/wall_time/perf/jit_dump.rs @@ -1,4 +1,10 @@ -use crate::{prelude::*, run::runner::wall_time::perf::unwind_data::UnwindData}; +use crate::{ + prelude::*, + run::runner::wall_time::perf::{ + perf_map::{ModuleSymbols, Symbol}, + unwind_data::UnwindData, + }, +}; use linux_perf_data::jitdump::{JitDumpReader, JitDumpRecord}; use std::{ collections::HashSet, @@ -14,6 +20,30 @@ impl JitDump { Self { path } } + pub fn into_perf_map(self) -> Result { + let mut symbols = Vec::new(); + + let file = std::fs::File::open(self.path)?; + let mut reader = JitDumpReader::new(file)?; + while let Some(raw_record) = reader.next_record()? { + let JitDumpRecord::CodeLoad(record) = raw_record.parse()? else { + continue; + }; + + let name = record.function_name.as_slice(); + let name = String::from_utf8_lossy(&name); + + symbols.push(Symbol { + addr: record.vma, + size: record.code_bytes.len() as u64, + name: name.to_string(), + }); + } + debug!("Extracted {} JIT symbols", symbols.len()); + + Ok(ModuleSymbols::from_symbols(symbols)) + } + /// Parses the JIT dump file and converts it into a list of `UnwindData`. /// /// The JIT dump file contains synthetic `eh_frame` data for jitted functions. This can be parsed and @@ -82,6 +112,16 @@ pub async fn harvest_perf_jit_for_pids(profile_folder: &Path, pids: &HashSet symbols, + Err(error) => { + warn!("Failed to convert jit dump into perf map: {error:?}"); + continue; + } + }; + symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?; + let unwind_data = match JitDump::new(path).into_unwind_data() { Ok(unwind_data) => unwind_data, Err(error) => { diff --git a/src/run/runner/wall_time/perf/perf_map.rs b/src/run/runner/wall_time/perf/perf_map.rs index edf46805..34bf0785 100644 --- a/src/run/runner/wall_time/perf/perf_map.rs +++ b/src/run/runner/wall_time/perf/perf_map.rs @@ -8,10 +8,10 @@ use std::{ }; #[derive(Hash, PartialEq, Eq, Clone)] -struct Symbol { - addr: u64, - size: u64, - name: String, +pub struct Symbol { + pub addr: u64, + pub size: u64, + pub name: String, } impl Debug for Symbol { @@ -30,6 +30,10 @@ pub struct ModuleSymbols { } impl ModuleSymbols { + pub fn from_symbols(symbols: Vec) -> Self { + Self { symbols } + } + pub fn new>( path: P, runtime_start_addr: u64, @@ -149,7 +153,7 @@ impl ModuleSymbols { Ok(runtime_start_addr.wrapping_sub(load_segment.address())) } - fn append_to_file>(&self, path: P) -> anyhow::Result<()> { + pub fn append_to_file>(&self, path: P) -> anyhow::Result<()> { let mut file = std::fs::OpenOptions::new() .create(true) .append(true) From 85cf65632884b549ad1e2c688755125225089f02 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 10 Sep 2025 18:01:44 +0200 Subject: [PATCH 4/7] chore: add debug log for /proc//maps --- src/run/runner/wall_time/perf/mod.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/run/runner/wall_time/perf/mod.rs b/src/run/runner/wall_time/perf/mod.rs index f3c640ef..0b9cee0d 100644 --- a/src/run/runner/wall_time/perf/mod.rs +++ b/src/run/runner/wall_time/perf/mod.rs @@ -234,6 +234,17 @@ impl PerfRunner { procfs::process::Process::new(pid as _).expect("Failed to find benchmark process"); let exe_maps = bench_proc.maps().expect("Failed to read /proc/{pid}/maps"); + if is_codspeed_debug_enabled() { + debug!("Process memory mappings for PID {pid}:"); + for map in exe_maps.iter().sorted_by_key(|m| m.address.0) { + let (base_addr, end_addr) = map.address; + debug!( + " {:016x}-{:016x} {:08x} {:?} {:?} ", + base_addr, end_addr, map.offset, map.pathname, map.perms, + ); + } + } + for map in &exe_maps { let page_offset = map.offset; let (base_addr, end_addr) = map.address; From 1936c1fce34ebb5bd1685f8134dc80131cd28089 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 10 Sep 2025 19:13:16 +0200 Subject: [PATCH 5/7] fix: codspeed debug check --- src/run/runner/helpers/env.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/run/runner/helpers/env.rs b/src/run/runner/helpers/env.rs index 350c25a7..31cc04db 100644 --- a/src/run/runner/helpers/env.rs +++ b/src/run/runner/helpers/env.rs @@ -27,10 +27,13 @@ pub fn get_base_injected_env( } pub fn is_codspeed_debug_enabled() -> bool { - let log_level = std::env::var("CODSPEED_LOG") + std::env::var("CODSPEED_LOG") .ok() - .and_then(|log_level| log_level.parse::().ok()) - .unwrap_or(log::LevelFilter::Info); - - log_level < log::LevelFilter::Debug + .and_then(|log_level| { + log_level + .parse::() + .map(|level| level >= log::LevelFilter::Debug) + .ok() + }) + .unwrap_or_default() } From b9807182ddc92baa45bf2017f3601f335e89c60a Mon Sep 17 00:00:00 2001 From: not-matthias Date: Wed, 10 Sep 2025 19:44:05 +0200 Subject: [PATCH 6/7] fix: ignore statically linked python --- src/run/runner/wall_time/perf/mod.rs | 24 +++++++++++++++++++++++ src/run/runner/wall_time/perf/perf_map.rs | 4 ++++ 2 files changed, 28 insertions(+) diff --git a/src/run/runner/wall_time/perf/mod.rs b/src/run/runner/wall_time/perf/mod.rs index 0b9cee0d..1240680c 100644 --- a/src/run/runner/wall_time/perf/mod.rs +++ b/src/run/runner/wall_time/perf/mod.rs @@ -441,6 +441,30 @@ impl BenchmarkData { } } + // When python is statically linked, we'll not find it in the ignored modules. Add it manually: + let python_modules = self.symbols_by_pid.values().filter_map(|proc| { + proc.loaded_modules().find(|path| { + path.file_name() + .map(|name| name.to_string_lossy().starts_with("python")) + .unwrap_or(false) + }) + }); + for path in python_modules { + if let Some(mapping) = self + .symbols_by_pid + .values() + .find_map(|proc| proc.module_mapping(path)) + { + let (Some((base_addr, _)), Some((_, end_addr))) = ( + mapping.iter().min_by_key(|(base_addr, _)| base_addr), + mapping.iter().max_by_key(|(_, end_addr)| end_addr), + ) else { + continue; + }; + to_ignore.push((path.to_string_lossy().into(), *base_addr, *end_addr)); + } + } + to_ignore }, }; diff --git a/src/run/runner/wall_time/perf/perf_map.rs b/src/run/runner/wall_time/perf/perf_map.rs index 34bf0785..9dcc0cec 100644 --- a/src/run/runner/wall_time/perf/perf_map.rs +++ b/src/run/runner/wall_time/perf/perf_map.rs @@ -213,6 +213,10 @@ impl ProcessSymbols { .push((start_addr, end_addr)); } + pub fn loaded_modules(&self) -> impl Iterator { + self.modules.keys() + } + pub fn module_mapping>( &self, module_path: P, From 3323c5a35448052a1f2eebb4d84991d2962a076f Mon Sep 17 00:00:00 2001 From: not-matthias Date: Fri, 12 Sep 2025 12:36:58 +0200 Subject: [PATCH 7/7] feat: detect stack size at runtime --- src/run/runner/wall_time/perf/mod.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/run/runner/wall_time/perf/mod.rs b/src/run/runner/wall_time/perf/mod.rs index 1240680c..e0db17e1 100644 --- a/src/run/runner/wall_time/perf/mod.rs +++ b/src/run/runner/wall_time/perf/mod.rs @@ -94,20 +94,24 @@ impl PerfRunner { .tempfile_in(&self.perf_dir)?; // Infer the unwinding mode from the benchmark cmd - let cg_mode = match (config.perf_unwinding_mode, &bench_cmd) { - (Some(mode), _) => mode, - (None, cmd) if cmd.contains("pytest") => UnwindingMode::FramePointer, - (None, cmd) if cmd.contains("cargo") => UnwindingMode::Dwarf, - (None, _) => { - // Default to dwarf unwinding since it works well with most binaries. - debug!("No call graph mode detected, defaulting to dwarf"); - UnwindingMode::Dwarf - } + let (cg_mode, stack_size) = if let Some(mode) = config.perf_unwinding_mode { + (mode, None) + } else if config.command.contains("cargo") { + (UnwindingMode::Dwarf, None) + } else if config.command.contains("pytest") + || config.command.contains("uv") + || config.command.contains("python") + { + (UnwindingMode::Dwarf, Some(65528)) + } else { + // Default to dwarf unwinding since it works well with most binaries. + debug!("No call graph mode detected, defaulting to dwarf"); + (UnwindingMode::Dwarf, None) }; let cg_mode = match cg_mode { UnwindingMode::FramePointer => "fp", - UnwindingMode::Dwarf => "dwarf", + UnwindingMode::Dwarf => &format!("dwarf,{}", stack_size.unwrap_or(8192)), }; debug!("Using call graph mode: {cg_mode:?}");