Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/run/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ pub struct RunArgs {
pub command: Vec<String>,
}

#[derive(ValueEnum, Clone, Debug, Serialize)]
#[derive(ValueEnum, Clone, Debug, Serialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum RunnerMode {
Instrumentation,
Expand Down
21 changes: 16 additions & 5 deletions src/run/runner/helpers/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ pub fn get_base_injected_env(
) -> HashMap<&'static str, String> {
HashMap::from([
("PYTHONHASHSEED", "0".into()),
(
"PYTHON_PERF_JIT_SUPPORT",
if mode == RunnerMode::Walltime {
"1".into()
} else {
"0".into()
},
),
("ARCH", ARCH.into()),
("CODSPEED_ENV", "runner".into()),
("CODSPEED_RUNNER_MODE", mode.to_string()),
Expand All @@ -19,10 +27,13 @@ pub fn get_base_injected_env(
}

pub fn is_codspeed_debug_enabled() -> bool {
let log_level = std::env::var("CODSPEED_LOG")
std::env::var("CODSPEED_LOG")
.ok()
.and_then(|log_level| log_level.parse::<log::LevelFilter>().ok())
.unwrap_or(log::LevelFilter::Info);

log_level < log::LevelFilter::Debug
.and_then(|log_level| {
log_level
.parse::<log::LevelFilter>()
.map(|level| level >= log::LevelFilter::Debug)
.ok()
})
.unwrap_or_default()
}
139 changes: 139 additions & 0 deletions src/run/runner/wall_time/perf/jit_dump.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
use crate::{
prelude::*,
run::runner::wall_time::perf::{
perf_map::{ModuleSymbols, Symbol},
unwind_data::UnwindData,
},
};
use linux_perf_data::jitdump::{JitDumpReader, JitDumpRecord};
use std::{
collections::HashSet,
path::{Path, PathBuf},
};

struct JitDump {
path: PathBuf,
}

impl JitDump {
pub fn new(path: PathBuf) -> Self {
Self { path }
}

pub fn into_perf_map(self) -> Result<ModuleSymbols> {
let mut symbols = Vec::new();

let file = std::fs::File::open(self.path)?;
let mut reader = JitDumpReader::new(file)?;
while let Some(raw_record) = reader.next_record()? {
let JitDumpRecord::CodeLoad(record) = raw_record.parse()? else {
continue;
};

let name = record.function_name.as_slice();
let name = String::from_utf8_lossy(&name);

symbols.push(Symbol {
addr: record.vma,
size: record.code_bytes.len() as u64,
name: name.to_string(),
});
}
debug!("Extracted {} JIT symbols", symbols.len());

Ok(ModuleSymbols::from_symbols(symbols))
}

/// Parses the JIT dump file and converts it into a list of `UnwindData`.
///
/// The JIT dump file contains synthetic `eh_frame` data for jitted functions. This can be parsed and
/// then converted to `UnwindData` which is used for stack unwinding.
///
/// See: https://github.com/python/cpython/blob/main/Python/perf_jit_trampoline.c
pub fn into_unwind_data(self) -> Result<Vec<UnwindData>> {
let file = std::fs::File::open(self.path)?;

let mut jit_unwind_data = Vec::new();
let mut current_unwind_info: Option<(Vec<u8>, Vec<u8>)> = None;

let mut reader = JitDumpReader::new(file)?;
while let Some(raw_record) = reader.next_record()? {
// The first recording is always the unwind info, followed by the code load event
// (see `perf_map_jit_write_entry` in https://github.com/python/cpython/blob/9743d069bd53e9d3a8f09df899ec1c906a79da24/Python/perf_jit_trampoline.c#L1163C13-L1163C37)
match raw_record.parse()? {
JitDumpRecord::CodeLoad(record) => {
let name = record.function_name.as_slice();
let name = String::from_utf8_lossy(&name);

let avma_start = record.vma;
let code_size = record.code_bytes.len() as u64;
let avma_end = avma_start + code_size;

let Some((eh_frame, eh_frame_hdr)) = current_unwind_info.take() else {
warn!("No unwind info available for JIT code load: {name}");
continue;
};

jit_unwind_data.push(UnwindData {
path: format!("jit_{name}"),
avma_range: avma_start..avma_end,
base_avma: 0,
eh_frame_hdr,
eh_frame_hdr_svma: 0..0,
eh_frame,
eh_frame_svma: 0..0,
});
}
JitDumpRecord::CodeUnwindingInfo(record) => {
// Store unwind info for the next code loads
current_unwind_info = Some((
record.eh_frame.as_slice().to_vec(),
record.eh_frame_hdr.as_slice().to_vec(),
));
}
_ => {
warn!("Unhandled JIT dump record: {raw_record:?}");
}
}
}

Ok(jit_unwind_data)
}
}

/// Converts all the `jit-<pid>.dump` into unwind data and copies it to the profile folder.
pub async fn harvest_perf_jit_for_pids(profile_folder: &Path, pids: &HashSet<i32>) -> Result<()> {
for pid in pids {
let name = format!("jit-{pid}.dump");
let path = PathBuf::from("/tmp").join(&name);

if !path.exists() {
continue;
}
debug!("Found JIT dump file: {path:?}");

// Append the symbols to the existing perf map file
let symbols = match JitDump::new(path.clone()).into_perf_map() {
Ok(symbols) => symbols,
Err(error) => {
warn!("Failed to convert jit dump into perf map: {error:?}");
continue;
}
};
symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?;

let unwind_data = match JitDump::new(path).into_unwind_data() {
Ok(unwind_data) => unwind_data,
Err(error) => {
warn!("Failed to convert jit dump into unwind data: {error:?}");
continue;
}
};

for module in unwind_data {
module.save_to(profile_folder, *pid as _)?;
}
}

Ok(())
}
66 changes: 54 additions & 12 deletions src/run/runner/wall_time/perf/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::run::runner::helpers::run_command_with_log_pipe::run_command_with_log
use crate::run::runner::helpers::setup::run_with_sudo;
use crate::run::runner::valgrind::helpers::ignored_objects_path::get_objects_path_to_ignore;
use crate::run::runner::valgrind::helpers::perf_maps::harvest_perf_maps_for_pids;
use crate::run::runner::wall_time::perf::jit_dump::harvest_perf_jit_for_pids;
use anyhow::Context;
use fifo::{PerfFifo, RunnerFifo};
use futures::stream::FuturesUnordered;
Expand All @@ -22,6 +23,7 @@ use std::{cell::OnceCell, collections::HashMap, process::ExitStatus};
use tempfile::TempDir;
use unwind_data::UnwindData;

mod jit_dump;
mod metadata;
mod setup;
mod shared;
Expand Down Expand Up @@ -92,20 +94,24 @@ impl PerfRunner {
.tempfile_in(&self.perf_dir)?;

// Infer the unwinding mode from the benchmark cmd
let cg_mode = match (config.perf_unwinding_mode, &bench_cmd) {
(Some(mode), _) => mode,
(None, cmd) if cmd.contains("pytest") => UnwindingMode::FramePointer,
(None, cmd) if cmd.contains("cargo") => UnwindingMode::Dwarf,
(None, _) => {
// Default to dwarf unwinding since it works well with most binaries.
debug!("No call graph mode detected, defaulting to dwarf");
UnwindingMode::Dwarf
}
let (cg_mode, stack_size) = if let Some(mode) = config.perf_unwinding_mode {
(mode, None)
} else if config.command.contains("cargo") {
(UnwindingMode::Dwarf, None)
} else if config.command.contains("pytest")
|| config.command.contains("uv")
|| config.command.contains("python")
{
(UnwindingMode::Dwarf, Some(65528))
} else {
// Default to dwarf unwinding since it works well with most binaries.
debug!("No call graph mode detected, defaulting to dwarf");
(UnwindingMode::Dwarf, None)
};

let cg_mode = match cg_mode {
UnwindingMode::FramePointer => "fp",
UnwindingMode::Dwarf => "dwarf",
UnwindingMode::Dwarf => &format!("dwarf,{}", stack_size.unwrap_or(8192)),
};
debug!("Using call graph mode: {cg_mode:?}");

Expand Down Expand Up @@ -195,15 +201,16 @@ impl PerfRunner {
// Harvest the perf maps generated by python. This will copy the perf
// maps from /tmp to the profile folder. We have to write our own perf
// maps to these files AFTERWARDS, otherwise it'll be overwritten!
let perf_map_pids = futures::future::try_join_all(copy_tasks)
let bench_pids = futures::future::try_join_all(copy_tasks)
.await?
.into_iter()
.filter_map(|result| {
debug!("Copy task result: {result:?}");
result.ok()
})
.collect::<HashSet<_>>();
harvest_perf_maps_for_pids(profile_folder, &perf_map_pids).await?;
harvest_perf_maps_for_pids(profile_folder, &bench_pids).await?;
harvest_perf_jit_for_pids(profile_folder, &bench_pids).await?;

// Append perf maps, unwind info and other metadata
if let Err(BenchmarkDataSaveError::MissingIntegration) = bench_data.save_to(profile_folder)
Expand Down Expand Up @@ -231,6 +238,17 @@ impl PerfRunner {
procfs::process::Process::new(pid as _).expect("Failed to find benchmark process");
let exe_maps = bench_proc.maps().expect("Failed to read /proc/{pid}/maps");

if is_codspeed_debug_enabled() {
debug!("Process memory mappings for PID {pid}:");
for map in exe_maps.iter().sorted_by_key(|m| m.address.0) {
let (base_addr, end_addr) = map.address;
debug!(
" {:016x}-{:016x} {:08x} {:?} {:?} ",
base_addr, end_addr, map.offset, map.pathname, map.perms,
);
}
Comment on lines +243 to +249
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this be too much logs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't add that much (around 40-50 lines). However, it's one of the most important things needed for debugging any perf related issues (e.g. if we fail to resolve the symbol we can look at which module it belongs to).

And since we only enable it when running with CODSPEED_LOG=debug it shouldn't be an issue.

}

for map in &exe_maps {
let page_offset = map.offset;
let (base_addr, end_addr) = map.address;
Expand Down Expand Up @@ -427,6 +445,30 @@ impl BenchmarkData {
}
}

// When python is statically linked, we'll not find it in the ignored modules. Add it manually:
let python_modules = self.symbols_by_pid.values().filter_map(|proc| {
proc.loaded_modules().find(|path| {
path.file_name()
.map(|name| name.to_string_lossy().starts_with("python"))
.unwrap_or(false)
})
});
for path in python_modules {
if let Some(mapping) = self
.symbols_by_pid
.values()
.find_map(|proc| proc.module_mapping(path))
{
let (Some((base_addr, _)), Some((_, end_addr))) = (
mapping.iter().min_by_key(|(base_addr, _)| base_addr),
mapping.iter().max_by_key(|(_, end_addr)| end_addr),
) else {
continue;
};
to_ignore.push((path.to_string_lossy().into(), *base_addr, *end_addr));
}
}

to_ignore
},
};
Expand Down
18 changes: 13 additions & 5 deletions src/run/runner/wall_time/perf/perf_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ use std::{
};

#[derive(Hash, PartialEq, Eq, Clone)]
struct Symbol {
addr: u64,
size: u64,
name: String,
pub struct Symbol {
pub addr: u64,
pub size: u64,
pub name: String,
}

impl Debug for Symbol {
Expand All @@ -30,6 +30,10 @@ pub struct ModuleSymbols {
}

impl ModuleSymbols {
pub fn from_symbols(symbols: Vec<Symbol>) -> Self {
Self { symbols }
}

pub fn new<P: AsRef<Path>>(
path: P,
runtime_start_addr: u64,
Expand Down Expand Up @@ -149,7 +153,7 @@ impl ModuleSymbols {
Ok(runtime_start_addr.wrapping_sub(load_segment.address()))
}

fn append_to_file<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
pub fn append_to_file<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
let mut file = std::fs::OpenOptions::new()
.create(true)
.append(true)
Expand Down Expand Up @@ -209,6 +213,10 @@ impl ProcessSymbols {
.push((start_addr, end_addr));
}

pub fn loaded_modules(&self) -> impl Iterator<Item = &PathBuf> {
self.modules.keys()
}

pub fn module_mapping<P: AsRef<std::path::Path>>(
&self,
module_path: P,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
source: src/run/runner/wall_time/perf/unwind_data.rs
expression: "UnwindData::new(MODULE_PATH.as_bytes(), 0x0, start_addr, size, None)"
---
Ok(
UnwindData {
path: "testdata/perf_map/cpp_my_benchmark.bin",
avma_range: 400000..459000,
base_avma: 0,
eh_frame_hdr_svma: 4577bc..458b30,
eh_frame_hdr_hash: 4b4eac90f7f5e60d,
eh_frame_hash: 233bdd4ae9fe4ba4,
eh_frame_svma: 451098..4577bc,
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
source: src/run/runner/wall_time/perf/unwind_data.rs
expression: "UnwindData::new(MODULE_PATH.as_bytes(), 0x2000, start_addr, size, None)"
---
Ok(
UnwindData {
path: "testdata/perf_map/go_fib.bin",
avma_range: 402000..50f000,
base_avma: 0,
eh_frame_hdr_svma: 6498b0..649b94,
eh_frame_hdr_hash: f1f69beb959a08d7,
eh_frame_hash: a8727039dd21b51c,
eh_frame_svma: 649b98..64aa70,
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
source: src/run/runner/wall_time/perf/unwind_data.rs
expression: unwind_data
---
Ok(
UnwindData {
path: "testdata/perf_map/divan_sleep_benches.bin",
avma_range: 5555555a2000..555555692000,
base_avma: 555555554000,
eh_frame_hdr_svma: 2ac74..2ea60,
eh_frame_hdr_hash: f579da4368e627c1,
eh_frame_hash: 791501d5a9c438d,
eh_frame_svma: 11540..2ac74,
},
)
Loading