Skip to content

Commit a6a1a12

Browse files
committed
fix: compute proper load bias
1 parent 72ad271 commit a6a1a12

11 files changed

+8521
-74
lines changed

.gitattributes

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
testdata/perf_map/cpp_my_benchmark.bin filter=lfs diff=lfs merge=lfs -text
2+
testdata/perf_map/go_fib.bin filter=lfs diff=lfs merge=lfs -text
3+
testdata/perf_map/divan_sleep_benches.bin filter=lfs diff=lfs merge=lfs -text

.github/workflows/ci.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,7 @@ jobs:
2121
runs-on: ubuntu-latest
2222
steps:
2323
- uses: actions/checkout@v3
24+
with:
25+
lfs: true
2426
- uses: moonrepo/setup-rust@v1
2527
- run: cargo test --all

src/run/runner/wall_time/perf/mod.rs

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -233,36 +233,41 @@ impl PerfRunner {
233233
_ => None,
234234
};
235235

236-
if let Some(path) = &path {
237-
symbols_by_pid
238-
.entry(pid)
239-
.or_insert(ProcessSymbols::new(pid))
240-
.add_mapping(pid, path, base_addr, end_addr);
241-
debug!("Added mapping for module {path:?}");
242-
236+
let Some(path) = &path else {
243237
if map.perms.contains(MMPermissions::EXECUTE) {
244-
match UnwindData::new(
245-
path.to_string_lossy().as_bytes(),
246-
page_offset,
247-
base_addr,
248-
end_addr - base_addr,
249-
None,
250-
) {
251-
Ok(unwind_data) => {
252-
unwind_data_by_pid.entry(pid).or_default().push(unwind_data);
253-
debug!("Added unwind data for {path:?} ({base_addr:x} - {end_addr:x})");
254-
}
255-
Err(error) => {
256-
debug!(
257-
"Failed to create unwind data for module {}: {}",
258-
path.display(),
259-
error
260-
);
261-
}
262-
}
238+
debug!("Found executable mapping without path: {base_addr:x} - {end_addr:x}");
239+
}
240+
continue;
241+
};
242+
243+
if !map.perms.contains(MMPermissions::EXECUTE) {
244+
continue;
245+
}
246+
247+
symbols_by_pid
248+
.entry(pid)
249+
.or_insert(ProcessSymbols::new(pid))
250+
.add_mapping(pid, path, base_addr, end_addr, map.offset);
251+
debug!("Added mapping for module {path:?}");
252+
253+
match UnwindData::new(
254+
path.to_string_lossy().as_bytes(),
255+
page_offset,
256+
base_addr,
257+
end_addr - base_addr,
258+
None,
259+
) {
260+
Ok(unwind_data) => {
261+
unwind_data_by_pid.entry(pid).or_default().push(unwind_data);
262+
debug!("Added unwind data for {path:?} ({base_addr:x} - {end_addr:x})");
263+
}
264+
Err(error) => {
265+
debug!(
266+
"Failed to create unwind data for module {}: {}",
267+
path.display(),
268+
error
269+
);
263270
}
264-
} else if map.perms.contains(MMPermissions::EXECUTE) {
265-
debug!("Found executable mapping without path: {base_addr:x} - {end_addr:x}");
266271
}
267272
}
268273

Lines changed: 147 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,40 @@
11
use crate::prelude::*;
2-
use object::{Object, ObjectSymbol, ObjectSymbolTable};
2+
use object::{Object, ObjectSegment, ObjectSymbol, ObjectSymbolTable};
33
use std::{
44
collections::HashMap,
5+
fmt::Debug,
56
io::Write,
67
path::{Path, PathBuf},
78
};
89

9-
#[derive(Debug, Hash, PartialEq, Eq, Clone)]
10+
#[derive(Hash, PartialEq, Eq, Clone)]
1011
struct Symbol {
11-
offset: u64,
12+
addr: u64,
1213
size: u64,
1314
name: String,
1415
}
1516

17+
impl Debug for Symbol {
18+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
19+
write!(
20+
f,
21+
"Symbol {{ offset: {:x}, size: {:x}, name: {} }}",
22+
self.addr, self.size, self.name
23+
)
24+
}
25+
}
26+
1627
#[derive(Debug, Clone)]
1728
pub struct ModuleSymbols {
18-
path: PathBuf,
1929
symbols: Vec<Symbol>,
2030
}
2131

2232
impl ModuleSymbols {
23-
pub fn new<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
33+
pub fn new<P: AsRef<Path>>(
34+
path: P,
35+
runtime_start_addr: u64,
36+
runtime_offset: u64,
37+
) -> anyhow::Result<Self> {
2438
let content = std::fs::read(path.as_ref())?;
2539
let object = object::File::parse(&*content)?;
2640

@@ -29,7 +43,7 @@ impl ModuleSymbols {
2943
if let Some(symbol_table) = object.symbol_table() {
3044
symbols.extend(symbol_table.symbols().filter_map(|symbol| {
3145
Some(Symbol {
32-
offset: symbol.address(),
46+
addr: symbol.address(),
3347
size: symbol.size(),
3448
name: symbol.name().ok()?.to_string(),
3549
})
@@ -39,52 +53,110 @@ impl ModuleSymbols {
3953
if let Some(symbol_table) = object.dynamic_symbol_table() {
4054
symbols.extend(symbol_table.symbols().filter_map(|symbol| {
4155
Some(Symbol {
42-
offset: symbol.address(),
56+
addr: symbol.address(),
4357
size: symbol.size(),
4458
name: symbol.name().ok()?.to_string(),
4559
})
4660
}));
4761
}
4862

49-
symbols.retain(|symbol| symbol.offset > 0 && symbol.size > 0);
63+
symbols.retain(|symbol| symbol.addr > 0 && symbol.size > 0);
5064
if symbols.is_empty() {
5165
return Err(anyhow::anyhow!("No symbols found"));
5266
}
5367

54-
// The base_addr from the mapping is where the module is actually loaded in memory (See ProcessSymbols::add_mapping),
55-
// but the symbol addresses from the ELF file assume the module is loaded at its preferred virtual address. We need to:
56-
// 1. Find the module's preferred base address from the ELF file or symbols
57-
// 2. Calculate the offset: actual_base - preferred_base
58-
// 3. Apply this offset to the symbol addresses
59-
60-
// Find the preferred base address from the minimum symbol address
61-
let preferred_base = symbols.iter().map(|s| s.offset).min().unwrap_or(0) & !0xfff; // Align to page boundary
62-
63-
// Convert absolute addresses to relative offsets
68+
let load_bias = Self::compute_load_bias(runtime_start_addr, runtime_offset, &object)?;
6469
for symbol in &mut symbols {
65-
symbol.offset = symbol.offset.saturating_sub(preferred_base);
70+
symbol.addr = symbol.addr.wrapping_add(load_bias);
6671
}
6772

68-
Ok(Self {
69-
path: path.as_ref().to_path_buf(),
70-
symbols,
71-
})
73+
Ok(Self { symbols })
7274
}
7375

74-
fn append_to_file<P: AsRef<Path>>(&self, path: P, base_addr: u64) -> anyhow::Result<()> {
76+
fn compute_load_bias(
77+
runtime_start_addr: u64,
78+
runtime_offset: u64,
79+
object: &object::File,
80+
) -> anyhow::Result<u64> {
81+
// The addresses of symbols read from an ELF file on disk are not their final runtime addresses.
82+
// This is due to Address Space Layout Randomization (ASLR) and the way the OS loader maps
83+
// file segments into virtual memory.
84+
//
85+
// Step 1: Find the corresponding ELF segment.
86+
// We must find the `PT_LOAD` segment that corresponds to the executable memory region we found
87+
// in /proc/<pid>/maps. We do this by comparing the `runtime_offset` against the offset in the file.
88+
//
89+
// For example, if we have the following `/proc/<pid>/maps` output:
90+
// ```
91+
// 00400000-00402000 r--p 00000000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
92+
// 00402000-0050f000 r-xp 00002000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin <-- we find this
93+
// 0050f000-0064b000 r--p 0010f000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
94+
// 0064b000-0064c000 r--p 0024a000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
95+
// 0064c000-0065e000 rw-p 0024b000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
96+
// 0065e000-00684000 rw-p 00000000 00:00 0
97+
// ```
98+
//
99+
// We'll match the PT_LOAD segment with the same offset (0x2000):
100+
// ```
101+
// $ readelf -l testdata/perf_map/go_fib.bin
102+
// Elf file type is EXEC (Executable file)
103+
// Entry point 0x402490
104+
// There are 15 program headers, starting at offset 64
105+
//
106+
// Program Headers:
107+
// Type Offset VirtAddr PhysAddr
108+
// PHDR 0x0000000000000040 0x0000000000400040 0x0000000000400040
109+
// 0x0000000000000348 0x0000000000000348 R 0x8
110+
// INTERP 0x0000000000000430 0x0000000000400430 0x0000000000400430
111+
// 0x0000000000000053 0x0000000000000053 R 0x1
112+
// LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000
113+
// 0x0000000000001640 0x0000000000001640 R 0x1000
114+
// LOAD 0x0000000000002000 0x0000000000402000 0x0000000000402000 <-- we'll match this
115+
// 0x000000000010ceb1 0x000000000010ceb1 R E 0x1000
116+
// ```
117+
let load_segment = object
118+
.segments()
119+
.find(|segment| {
120+
// When the kernel loads an ELF file, it maps entire pages (usually 4KB aligned),
121+
// not just the exact segment boundaries. Here's what happens:
122+
//
123+
// **ELF File Structure**:
124+
// - LOAD segment 1: file offset 0x0 - 0x4d26a (data/code)
125+
// - LOAD segment 2: file offset 0x4d26c - 0x13c4b6 (executable code)
126+
//
127+
// **Kernel Memory Mapping**: The kernel rounds down to page boundaries when mapping:
128+
// - Maps pages starting at offset 0x0 (covers segment 1)
129+
// - Maps pages starting at offset 0x4d000 (page-aligned, covers segment 2)
130+
//
131+
// (the example values are based on the `test_rust_divan_symbols` test)
132+
let (file_offset, file_size) = segment.file_range();
133+
runtime_offset >= file_offset && runtime_offset < file_offset + file_size
134+
})
135+
.context("Failed to find a matching PT_LOAD segment")?;
136+
137+
// Step 2: Calculate the "load bias".
138+
// The bias is the difference between where the segment *actually* is in memory versus where the
139+
// ELF file *preferred* it to be.
140+
//
141+
// load_bias = runtime_start_addr - segment_preferred_vaddr
142+
//
143+
// - `runtime_start_addr`: The actual base address of this segment in memory (from `/proc/maps`).
144+
// - `load_segment.address()`: The preferred virtual address (`p_vaddr`) from the ELF file itself.
145+
//
146+
// This single calculation correctly handles both PIE/shared-objects and non-PIE executables:
147+
// - For PIE/.so files: `0x7f... (random) - 0x... (small) = <large_bias>`
148+
// - For non-PIE files: `0x402000 (fixed) - 0x402000 (fixed) = 0`
149+
Ok(runtime_start_addr.wrapping_sub(load_segment.address()))
150+
}
151+
152+
fn append_to_file<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
75153
let mut file = std::fs::OpenOptions::new()
76154
.create(true)
77155
.append(true)
78156
.open(path)?;
79157

80158
for symbol in &self.symbols {
81-
writeln!(
82-
file,
83-
"{:x} {:x} {}",
84-
base_addr + symbol.offset,
85-
symbol.size,
86-
symbol.name
87-
)?;
159+
writeln!(file, "{:x} {:x} {}", symbol.addr, symbol.size, symbol.name)?;
88160
}
89161

90162
Ok(())
@@ -113,23 +185,21 @@ impl ProcessSymbols {
113185
module_path: P,
114186
start_addr: u64,
115187
end_addr: u64,
188+
file_offset: u64,
116189
) {
117190
if self.pid != pid {
118191
warn!("pid mismatch: {} != {}", self.pid, pid);
119192
return;
120193
}
121194

195+
debug!("Loading module symbols at {start_addr:x}-{end_addr:x} (offset: {file_offset:x})");
122196
let path = module_path.as_ref().to_path_buf();
123-
match ModuleSymbols::new(module_path) {
197+
match ModuleSymbols::new(module_path, start_addr, file_offset) {
124198
Ok(symbol) => {
125199
self.modules.entry(path.clone()).or_insert(symbol);
126200
}
127201
Err(error) => {
128-
debug!(
129-
"Failed to load symbols for module {}: {}",
130-
path.display(),
131-
error
132-
);
202+
debug!("Failed to load symbols for module {path:?}: {error}");
133203
}
134204
}
135205

@@ -155,17 +225,48 @@ impl ProcessSymbols {
155225

156226
let symbols_path = folder.as_ref().join(format!("perf-{}.map", self.pid));
157227
for module in self.modules.values() {
158-
let Some((base_addr, _)) = self
159-
.module_mappings
160-
.get(&module.path)
161-
.and_then(|bounds| bounds.iter().min_by_key(|(start, _)| start))
162-
else {
163-
warn!("No bounds found for module: {}", module.path.display());
164-
continue;
165-
};
166-
module.append_to_file(&symbols_path, *base_addr)?;
228+
module.append_to_file(&symbols_path)?;
167229
}
168230

169231
Ok(())
170232
}
171233
}
234+
235+
#[cfg(test)]
236+
mod tests {
237+
use super::*;
238+
239+
#[test]
240+
fn test_golang_symbols() {
241+
let module_symbols =
242+
ModuleSymbols::new("testdata/perf_map/go_fib.bin", 0x00402000, 0x00002000).unwrap();
243+
insta::assert_debug_snapshot!(module_symbols.symbols);
244+
}
245+
246+
#[test]
247+
fn test_cpp_symbols() {
248+
const MODULE_PATH: &str = "testdata/perf_map/cpp_my_benchmark.bin";
249+
let module_symbols = ModuleSymbols::new(MODULE_PATH, 0x00400000, 0x00000000).unwrap();
250+
insta::assert_debug_snapshot!(module_symbols.symbols);
251+
}
252+
253+
#[test]
254+
fn test_rust_divan_symbols() {
255+
const MODULE_PATH: &str = "testdata/perf_map/divan_sleep_benches.bin";
256+
257+
// Segments in the file:
258+
// Segment: Segment { address: 0, size: 4d26a }
259+
// Segment: Segment { address: 4e26c, size: ef24a }
260+
// Segment: Segment { address: 13e4b8, size: ab48 }
261+
// Segment: Segment { address: 1499b0, size: 11a5 }
262+
//
263+
// Segments in memory:
264+
// 0x0000555555554000 0x00005555555a2000 0x4e000 0x0 r--p
265+
// 0x00005555555a2000 0x0000555555692000 0xf0000 0x4d000 r-xp <--
266+
// 0x0000555555692000 0x000055555569d000 0xb000 0x13c000 r--p
267+
// 0x000055555569d000 0x000055555569f000 0x2000 0x146000 rw-p
268+
//
269+
let module_symbols = ModuleSymbols::new(MODULE_PATH, 0x00005555555a2000, 0x4d000).unwrap();
270+
insta::assert_debug_snapshot!(module_symbols.symbols);
271+
}
272+
}

0 commit comments

Comments
 (0)