CodSpeedHQ · not-matthias · Sep 12, 2025 · Aug 8, 2025 · Sep 10, 2025 · Sep 10, 2025
diff --git a/src/run/mod.rs b/src/run/mod.rs
@@ -133,7 +133,7 @@ pub struct RunArgs {
     pub command: Vec<String>,
 }
 
-#[derive(ValueEnum, Clone, Debug, Serialize)]
+#[derive(ValueEnum, Clone, Debug, Serialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum RunnerMode {
     Instrumentation,

diff --git a/src/run/runner/helpers/env.rs b/src/run/runner/helpers/env.rs
@@ -8,6 +8,14 @@ pub fn get_base_injected_env(
 ) -> HashMap<&'static str, String> {
     HashMap::from([
         ("PYTHONHASHSEED", "0".into()),
+        (
+            "PYTHON_PERF_JIT_SUPPORT",
+            if mode == RunnerMode::Walltime {
+                "1".into()
+            } else {
+                "0".into()
+            },
+        ),
         ("ARCH", ARCH.into()),
         ("CODSPEED_ENV", "runner".into()),
         ("CODSPEED_RUNNER_MODE", mode.to_string()),
@@ -19,10 +27,13 @@ pub fn get_base_injected_env(
 }
 
 pub fn is_codspeed_debug_enabled() -> bool {
-    let log_level = std::env::var("CODSPEED_LOG")
+    std::env::var("CODSPEED_LOG")
         .ok()
-        .and_then(|log_level| log_level.parse::<log::LevelFilter>().ok())
-        .unwrap_or(log::LevelFilter::Info);
-
-    log_level < log::LevelFilter::Debug
+        .and_then(|log_level| {
+            log_level
+                .parse::<log::LevelFilter>()
+                .map(|level| level >= log::LevelFilter::Debug)
+                .ok()
+        })
+        .unwrap_or_default()
 }
diff --git a/src/run/runner/wall_time/perf/jit_dump.rs b/src/run/runner/wall_time/perf/jit_dump.rs
@@ -0,0 +1,139 @@
+use crate::{
+    prelude::*,
+    run::runner::wall_time::perf::{
+        perf_map::{ModuleSymbols, Symbol},
+        unwind_data::UnwindData,
+    },
+};
+use linux_perf_data::jitdump::{JitDumpReader, JitDumpRecord};
+use std::{
+    collections::HashSet,
+    path::{Path, PathBuf},
+};
+
+struct JitDump {
+    path: PathBuf,
+}
+
+impl JitDump {
+    pub fn new(path: PathBuf) -> Self {
+        Self { path }
+    }
+
+    pub fn into_perf_map(self) -> Result<ModuleSymbols> {
+        let mut symbols = Vec::new();
+
+        let file = std::fs::File::open(self.path)?;
+        let mut reader = JitDumpReader::new(file)?;
+        while let Some(raw_record) = reader.next_record()? {
+            let JitDumpRecord::CodeLoad(record) = raw_record.parse()? else {
+                continue;
+            };
+
+            let name = record.function_name.as_slice();
+            let name = String::from_utf8_lossy(&name);
+
+            symbols.push(Symbol {
+                addr: record.vma,
+                size: record.code_bytes.len() as u64,
+                name: name.to_string(),
+            });
+        }
+        debug!("Extracted {} JIT symbols", symbols.len());
+
+        Ok(ModuleSymbols::from_symbols(symbols))
+    }
+
+    /// Parses the JIT dump file and converts it into a list of `UnwindData`.
+    ///
+    /// The JIT dump file contains synthetic `eh_frame` data for jitted functions. This can be parsed and
+    /// then converted to `UnwindData` which is used for stack unwinding.
+    ///
+    /// See: https://github.com/python/cpython/blob/main/Python/perf_jit_trampoline.c
+    pub fn into_unwind_data(self) -> Result<Vec<UnwindData>> {
+        let file = std::fs::File::open(self.path)?;
+
+        let mut jit_unwind_data = Vec::new();
+        let mut current_unwind_info: Option<(Vec<u8>, Vec<u8>)> = None;
+
+        let mut reader = JitDumpReader::new(file)?;
+        while let Some(raw_record) = reader.next_record()? {
+            // The first recording is always the unwind info, followed by the code load event
+            // (see `perf_map_jit_write_entry` in https://github.com/python/cpython/blob/9743d069bd53e9d3a8f09df899ec1c906a79da24/Python/perf_jit_trampoline.c#L1163C13-L1163C37)
+            match raw_record.parse()? {
+                JitDumpRecord::CodeLoad(record) => {
+                    let name = record.function_name.as_slice();
+                    let name = String::from_utf8_lossy(&name);
+
+                    let avma_start = record.vma;
+                    let code_size = record.code_bytes.len() as u64;
+                    let avma_end = avma_start + code_size;
+
+                    let Some((eh_frame, eh_frame_hdr)) = current_unwind_info.take() else {
+                        warn!("No unwind info available for JIT code load: {name}");
+                        continue;
+                    };
+
+                    jit_unwind_data.push(UnwindData {
+                        path: format!("jit_{name}"),
+                        avma_range: avma_start..avma_end,
+                        base_avma: 0,
+                        eh_frame_hdr,
+                        eh_frame_hdr_svma: 0..0,
+                        eh_frame,
+                        eh_frame_svma: 0..0,
+                    });
+                }
+                JitDumpRecord::CodeUnwindingInfo(record) => {
+                    // Store unwind info for the next code loads
+                    current_unwind_info = Some((
+                        record.eh_frame.as_slice().to_vec(),
+                        record.eh_frame_hdr.as_slice().to_vec(),
+                    ));
+                }
+                _ => {
+                    warn!("Unhandled JIT dump record: {raw_record:?}");
+                }
+            }
+        }
+
+        Ok(jit_unwind_data)
+    }
+}
+
+/// Converts all the `jit-<pid>.dump` into unwind data and copies it to the profile folder.
+pub async fn harvest_perf_jit_for_pids(profile_folder: &Path, pids: &HashSet<i32>) -> Result<()> {
+    for pid in pids {
+        let name = format!("jit-{pid}.dump");
+        let path = PathBuf::from("/tmp").join(&name);
+
+        if !path.exists() {
+            continue;
+        }
+        debug!("Found JIT dump file: {path:?}");
+
+        // Append the symbols to the existing perf map file
+        let symbols = match JitDump::new(path.clone()).into_perf_map() {
+            Ok(symbols) => symbols,
+            Err(error) => {
+                warn!("Failed to convert jit dump into perf map: {error:?}");
+                continue;
+            }
+        };
+        symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?;
+
+        let unwind_data = match JitDump::new(path).into_unwind_data() {
+            Ok(unwind_data) => unwind_data,
+            Err(error) => {
+                warn!("Failed to convert jit dump into unwind data: {error:?}");
+                continue;
+            }
+        };
+
+        for module in unwind_data {
+            module.save_to(profile_folder, *pid as _)?;
+        }
+    }
+
+    Ok(())
+}
diff --git a/src/run/runner/wall_time/perf/mod.rs b/src/run/runner/wall_time/perf/mod.rs
@@ -8,6 +8,7 @@ use crate::run::runner::helpers::run_command_with_log_pipe::run_command_with_log
 use crate::run::runner::helpers::setup::run_with_sudo;
 use crate::run::runner::valgrind::helpers::ignored_objects_path::get_objects_path_to_ignore;
 use crate::run::runner::valgrind::helpers::perf_maps::harvest_perf_maps_for_pids;
+use crate::run::runner::wall_time::perf::jit_dump::harvest_perf_jit_for_pids;
 use anyhow::Context;
 use fifo::{PerfFifo, RunnerFifo};
 use futures::stream::FuturesUnordered;
@@ -22,6 +23,7 @@ use std::{cell::OnceCell, collections::HashMap, process::ExitStatus};
 use tempfile::TempDir;
 use unwind_data::UnwindData;
 
+mod jit_dump;
 mod metadata;
 mod setup;
 mod shared;
@@ -92,20 +94,24 @@ impl PerfRunner {
             .tempfile_in(&self.perf_dir)?;
 
         // Infer the unwinding mode from the benchmark cmd
-        let cg_mode = match (config.perf_unwinding_mode, &bench_cmd) {
-            (Some(mode), _) => mode,
-            (None, cmd) if cmd.contains("pytest") => UnwindingMode::FramePointer,
-            (None, cmd) if cmd.contains("cargo") => UnwindingMode::Dwarf,
-            (None, _) => {
-                // Default to dwarf unwinding since it works well with most binaries.
-                debug!("No call graph mode detected, defaulting to dwarf");
-                UnwindingMode::Dwarf
-            }
+        let (cg_mode, stack_size) = if let Some(mode) = config.perf_unwinding_mode {
+            (mode, None)
+        } else if config.command.contains("cargo") {
+            (UnwindingMode::Dwarf, None)
+        } else if config.command.contains("pytest")
+            || config.command.contains("uv")
+            || config.command.contains("python")
+        {
+            (UnwindingMode::Dwarf, Some(65528))
+        } else {
+            // Default to dwarf unwinding since it works well with most binaries.
+            debug!("No call graph mode detected, defaulting to dwarf");
+            (UnwindingMode::Dwarf, None)
         };
 
         let cg_mode = match cg_mode {
             UnwindingMode::FramePointer => "fp",
-            UnwindingMode::Dwarf => "dwarf",
+            UnwindingMode::Dwarf => &format!("dwarf,{}", stack_size.unwrap_or(8192)),
         };
         debug!("Using call graph mode: {cg_mode:?}");
 
@@ -195,15 +201,16 @@ impl PerfRunner {
         // Harvest the perf maps generated by python. This will copy the perf
         // maps from /tmp to the profile folder. We have to write our own perf
         // maps to these files AFTERWARDS, otherwise it'll be overwritten!
-        let perf_map_pids = futures::future::try_join_all(copy_tasks)
+        let bench_pids = futures::future::try_join_all(copy_tasks)
             .await?
             .into_iter()
             .filter_map(|result| {
                 debug!("Copy task result: {result:?}");
                 result.ok()
             })
             .collect::<HashSet<_>>();
-        harvest_perf_maps_for_pids(profile_folder, &perf_map_pids).await?;
+        harvest_perf_maps_for_pids(profile_folder, &bench_pids).await?;
+        harvest_perf_jit_for_pids(profile_folder, &bench_pids).await?;
 
         // Append perf maps, unwind info and other metadata
         if let Err(BenchmarkDataSaveError::MissingIntegration) = bench_data.save_to(profile_folder)
@@ -231,6 +238,17 @@ impl PerfRunner {
             procfs::process::Process::new(pid as _).expect("Failed to find benchmark process");
         let exe_maps = bench_proc.maps().expect("Failed to read /proc/{pid}/maps");
 
+        if is_codspeed_debug_enabled() {
+            debug!("Process memory mappings for PID {pid}:");
+            for map in exe_maps.iter().sorted_by_key(|m| m.address.0) {
+                let (base_addr, end_addr) = map.address;
+                debug!(
+                    "  {:016x}-{:016x} {:08x} {:?} {:?} ",
+                    base_addr, end_addr, map.offset, map.pathname, map.perms,
+                );
+            }
+        }
+
         for map in &exe_maps {
             let page_offset = map.offset;
             let (base_addr, end_addr) = map.address;
@@ -427,6 +445,30 @@ impl BenchmarkData {
                     }
                 }
 
+                // When python is statically linked, we'll not find it in the ignored modules. Add it manually:
+                let python_modules = self.symbols_by_pid.values().filter_map(|proc| {
+                    proc.loaded_modules().find(|path| {
+                        path.file_name()
+                            .map(|name| name.to_string_lossy().starts_with("python"))
+                            .unwrap_or(false)
+                    })
+                });
+                for path in python_modules {
+                    if let Some(mapping) = self
+                        .symbols_by_pid
+                        .values()
+                        .find_map(|proc| proc.module_mapping(path))
+                    {
+                        let (Some((base_addr, _)), Some((_, end_addr))) = (
+                            mapping.iter().min_by_key(|(base_addr, _)| base_addr),
+                            mapping.iter().max_by_key(|(_, end_addr)| end_addr),
+                        ) else {
+                            continue;
+                        };
+                        to_ignore.push((path.to_string_lossy().into(), *base_addr, *end_addr));
+                    }
+                }
+
                 to_ignore
             },
         };

diff --git a/src/run/runner/wall_time/perf/perf_map.rs b/src/run/runner/wall_time/perf/perf_map.rs
@@ -8,10 +8,10 @@ use std::{
 };
 
 #[derive(Hash, PartialEq, Eq, Clone)]
-struct Symbol {
-    addr: u64,
-    size: u64,
-    name: String,
+pub struct Symbol {
+    pub addr: u64,
+    pub size: u64,
+    pub name: String,
 }
 
 impl Debug for Symbol {
@@ -30,6 +30,10 @@ pub struct ModuleSymbols {
 }
 
 impl ModuleSymbols {
+    pub fn from_symbols(symbols: Vec<Symbol>) -> Self {
+        Self { symbols }
+    }
+
     pub fn new<P: AsRef<Path>>(
         path: P,
         runtime_start_addr: u64,
@@ -149,7 +153,7 @@ impl ModuleSymbols {
         Ok(runtime_start_addr.wrapping_sub(load_segment.address()))
     }
 
-    fn append_to_file<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
+    pub fn append_to_file<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
         let mut file = std::fs::OpenOptions::new()
             .create(true)
             .append(true)
@@ -209,6 +213,10 @@ impl ProcessSymbols {
             .push((start_addr, end_addr));
     }
 
+    pub fn loaded_modules(&self) -> impl Iterator<Item = &PathBuf> {
+        self.modules.keys()
+    }
+
     pub fn module_mapping<P: AsRef<std::path::Path>>(
         &self,
         module_path: P,

diff --git a/...napshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__cpp_unwind_data.snap b/...napshots/codspeed__run__runner__wall_time__perf__unwind_data__tests__cpp_unwind_data.snap
@@ -0,0 +1,15 @@
+---
+source: src/run/runner/wall_time/perf/unwind_data.rs
+expression: "UnwindData::new(MODULE_PATH.as_bytes(), 0x0, start_addr, size, None)"
+---
+Ok(
+    UnwindData {
+        path: "testdata/perf_map/cpp_my_benchmark.bin",
+        avma_range: 400000..459000,
+        base_avma: 0,
+        eh_frame_hdr_svma: 4577bc..458b30,
+        eh_frame_hdr_hash: 4b4eac90f7f5e60d,
+        eh_frame_hash: 233bdd4ae9fe4ba4,
+        eh_frame_svma: 451098..4577bc,
+    },
+)
diff --git a/...shots/codspeed__run__runner__wall_time__perf__unwind_data__tests__golang_unwind_data.snap b/...shots/codspeed__run__runner__wall_time__perf__unwind_data__tests__golang_unwind_data.snap
@@ -0,0 +1,15 @@
+---
+source: src/run/runner/wall_time/perf/unwind_data.rs
+expression: "UnwindData::new(MODULE_PATH.as_bytes(), 0x2000, start_addr, size, None)"
+---
+Ok(
+    UnwindData {
+        path: "testdata/perf_map/go_fib.bin",
+        avma_range: 402000..50f000,
+        base_avma: 0,
+        eh_frame_hdr_svma: 6498b0..649b94,
+        eh_frame_hdr_hash: f1f69beb959a08d7,
+        eh_frame_hash: a8727039dd21b51c,
+        eh_frame_svma: 649b98..64aa70,
+    },
+)
diff --git a/...s/codspeed__run__runner__wall_time__perf__unwind_data__tests__rust_divan_unwind_data.snap b/...s/codspeed__run__runner__wall_time__perf__unwind_data__tests__rust_divan_unwind_data.snap
@@ -0,0 +1,15 @@
+---
+source: src/run/runner/wall_time/perf/unwind_data.rs
+expression: unwind_data
+---
+Ok(
+    UnwindData {
+        path: "testdata/perf_map/divan_sleep_benches.bin",
+        avma_range: 5555555a2000..555555692000,
+        base_avma: 555555554000,
+        eh_frame_hdr_svma: 2ac74..2ea60,
+        eh_frame_hdr_hash: f579da4368e627c1,
+        eh_frame_hash: 791501d5a9c438d,
+        eh_frame_svma: 11540..2ac74,
+    },
+)