1
1
use crate :: prelude:: * ;
2
- use object:: { Object , ObjectSymbol , ObjectSymbolTable } ;
2
+ use object:: { Object , ObjectSegment , ObjectSymbol , ObjectSymbolTable } ;
3
3
use std:: {
4
4
collections:: HashMap ,
5
+ fmt:: Debug ,
5
6
io:: Write ,
6
7
path:: { Path , PathBuf } ,
7
8
} ;
8
9
9
- #[ derive( Debug , Hash , PartialEq , Eq , Clone ) ]
10
+ #[ derive( Hash , PartialEq , Eq , Clone ) ]
10
11
struct Symbol {
11
- offset : u64 ,
12
+ addr : u64 ,
12
13
size : u64 ,
13
14
name : String ,
14
15
}
15
16
17
+ impl Debug for Symbol {
18
+ fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
19
+ write ! (
20
+ f,
21
+ "Symbol {{ offset: {:x}, size: {:x}, name: {} }}" ,
22
+ self . addr, self . size, self . name
23
+ )
24
+ }
25
+ }
26
+
16
27
#[ derive( Debug , Clone ) ]
17
28
pub struct ModuleSymbols {
18
- path : PathBuf ,
19
29
symbols : Vec < Symbol > ,
20
30
}
21
31
22
32
impl ModuleSymbols {
23
- pub fn new < P : AsRef < Path > > ( path : P ) -> anyhow:: Result < Self > {
33
+ pub fn new < P : AsRef < Path > > (
34
+ path : P ,
35
+ runtime_start_addr : u64 ,
36
+ runtime_offset : u64 ,
37
+ ) -> anyhow:: Result < Self > {
24
38
let content = std:: fs:: read ( path. as_ref ( ) ) ?;
25
39
let object = object:: File :: parse ( & * content) ?;
26
40
@@ -29,7 +43,7 @@ impl ModuleSymbols {
29
43
if let Some ( symbol_table) = object. symbol_table ( ) {
30
44
symbols. extend ( symbol_table. symbols ( ) . filter_map ( |symbol| {
31
45
Some ( Symbol {
32
- offset : symbol. address ( ) ,
46
+ addr : symbol. address ( ) ,
33
47
size : symbol. size ( ) ,
34
48
name : symbol. name ( ) . ok ( ) ?. to_string ( ) ,
35
49
} )
@@ -39,52 +53,110 @@ impl ModuleSymbols {
39
53
if let Some ( symbol_table) = object. dynamic_symbol_table ( ) {
40
54
symbols. extend ( symbol_table. symbols ( ) . filter_map ( |symbol| {
41
55
Some ( Symbol {
42
- offset : symbol. address ( ) ,
56
+ addr : symbol. address ( ) ,
43
57
size : symbol. size ( ) ,
44
58
name : symbol. name ( ) . ok ( ) ?. to_string ( ) ,
45
59
} )
46
60
} ) ) ;
47
61
}
48
62
49
- symbols. retain ( |symbol| symbol. offset > 0 && symbol. size > 0 ) ;
63
+ symbols. retain ( |symbol| symbol. addr > 0 && symbol. size > 0 ) ;
50
64
if symbols. is_empty ( ) {
51
65
return Err ( anyhow:: anyhow!( "No symbols found" ) ) ;
52
66
}
53
67
54
- // The base_addr from the mapping is where the module is actually loaded in memory (See ProcessSymbols::add_mapping),
55
- // but the symbol addresses from the ELF file assume the module is loaded at its preferred virtual address. We need to:
56
- // 1. Find the module's preferred base address from the ELF file or symbols
57
- // 2. Calculate the offset: actual_base - preferred_base
58
- // 3. Apply this offset to the symbol addresses
59
-
60
- // Find the preferred base address from the minimum symbol address
61
- let preferred_base = symbols. iter ( ) . map ( |s| s. offset ) . min ( ) . unwrap_or ( 0 ) & !0xfff ; // Align to page boundary
62
-
63
- // Convert absolute addresses to relative offsets
68
+ let load_bias = Self :: compute_load_bias ( runtime_start_addr, runtime_offset, & object) ?;
64
69
for symbol in & mut symbols {
65
- symbol. offset = symbol. offset . saturating_sub ( preferred_base ) ;
70
+ symbol. addr = symbol. addr . wrapping_add ( load_bias ) ;
66
71
}
67
72
68
- Ok ( Self {
69
- path : path. as_ref ( ) . to_path_buf ( ) ,
70
- symbols,
71
- } )
73
+ Ok ( Self { symbols } )
72
74
}
73
75
74
- fn append_to_file < P : AsRef < Path > > ( & self , path : P , base_addr : u64 ) -> anyhow:: Result < ( ) > {
76
+ fn compute_load_bias (
77
+ runtime_start_addr : u64 ,
78
+ runtime_offset : u64 ,
79
+ object : & object:: File ,
80
+ ) -> anyhow:: Result < u64 > {
81
+ // The addresses of symbols read from an ELF file on disk are not their final runtime addresses.
82
+ // This is due to Address Space Layout Randomization (ASLR) and the way the OS loader maps
83
+ // file segments into virtual memory.
84
+ //
85
+ // Step 1: Find the corresponding ELF segment.
86
+ // We must find the `PT_LOAD` segment that corresponds to the executable memory region we found
87
+ // in /proc/<pid>/maps. We do this by comparing the `runtime_offset` against the offset in the file.
88
+ //
89
+ // For example, if we have the following `/proc/<pid>/maps` output:
90
+ // ```
91
+ // 00400000-00402000 r--p 00000000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
92
+ // 00402000-0050f000 r-xp 00002000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin <-- we find this
93
+ // 0050f000-0064b000 r--p 0010f000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
94
+ // 0064b000-0064c000 r--p 0024a000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
95
+ // 0064c000-0065e000 rw-p 0024b000 fe:01 114429641 /runner/testdata/perf_map/go_fib.bin
96
+ // 0065e000-00684000 rw-p 00000000 00:00 0
97
+ // ```
98
+ //
99
+ // We'll match the PT_LOAD segment with the same offset (0x2000):
100
+ // ```
101
+ // $ readelf -l testdata/perf_map/go_fib.bin
102
+ // Elf file type is EXEC (Executable file)
103
+ // Entry point 0x402490
104
+ // There are 15 program headers, starting at offset 64
105
+ //
106
+ // Program Headers:
107
+ // Type Offset VirtAddr PhysAddr
108
+ // PHDR 0x0000000000000040 0x0000000000400040 0x0000000000400040
109
+ // 0x0000000000000348 0x0000000000000348 R 0x8
110
+ // INTERP 0x0000000000000430 0x0000000000400430 0x0000000000400430
111
+ // 0x0000000000000053 0x0000000000000053 R 0x1
112
+ // LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000
113
+ // 0x0000000000001640 0x0000000000001640 R 0x1000
114
+ // LOAD 0x0000000000002000 0x0000000000402000 0x0000000000402000 <-- we'll match this
115
+ // 0x000000000010ceb1 0x000000000010ceb1 R E 0x1000
116
+ // ```
117
+ let load_segment = object
118
+ . segments ( )
119
+ . find ( |segment| {
120
+ // When the kernel loads an ELF file, it maps entire pages (usually 4KB aligned),
121
+ // not just the exact segment boundaries. Here's what happens:
122
+ //
123
+ // **ELF File Structure**:
124
+ // - LOAD segment 1: file offset 0x0 - 0x4d26a (data/code)
125
+ // - LOAD segment 2: file offset 0x4d26c - 0x13c4b6 (executable code)
126
+ //
127
+ // **Kernel Memory Mapping**: The kernel rounds down to page boundaries when mapping:
128
+ // - Maps pages starting at offset 0x0 (covers segment 1)
129
+ // - Maps pages starting at offset 0x4d000 (page-aligned, covers segment 2)
130
+ //
131
+ // (the example values are based on the `test_rust_divan_symbols` test)
132
+ let ( file_offset, file_size) = segment. file_range ( ) ;
133
+ runtime_offset >= file_offset && runtime_offset < file_offset + file_size
134
+ } )
135
+ . context ( "Failed to find a matching PT_LOAD segment" ) ?;
136
+
137
+ // Step 2: Calculate the "load bias".
138
+ // The bias is the difference between where the segment *actually* is in memory versus where the
139
+ // ELF file *preferred* it to be.
140
+ //
141
+ // load_bias = runtime_start_addr - segment_preferred_vaddr
142
+ //
143
+ // - `runtime_start_addr`: The actual base address of this segment in memory (from `/proc/maps`).
144
+ // - `load_segment.address()`: The preferred virtual address (`p_vaddr`) from the ELF file itself.
145
+ //
146
+ // This single calculation correctly handles both PIE/shared-objects and non-PIE executables:
147
+ // - For PIE/.so files: `0x7f... (random) - 0x... (small) = <large_bias>`
148
+ // - For non-PIE files: `0x402000 (fixed) - 0x402000 (fixed) = 0`
149
+ Ok ( runtime_start_addr. wrapping_sub ( load_segment. address ( ) ) )
150
+ }
151
+
152
+ fn append_to_file < P : AsRef < Path > > ( & self , path : P ) -> anyhow:: Result < ( ) > {
75
153
let mut file = std:: fs:: OpenOptions :: new ( )
76
154
. create ( true )
77
155
. append ( true )
78
156
. open ( path) ?;
79
157
80
158
for symbol in & self . symbols {
81
- writeln ! (
82
- file,
83
- "{:x} {:x} {}" ,
84
- base_addr + symbol. offset,
85
- symbol. size,
86
- symbol. name
87
- ) ?;
159
+ writeln ! ( file, "{:x} {:x} {}" , symbol. addr, symbol. size, symbol. name) ?;
88
160
}
89
161
90
162
Ok ( ( ) )
@@ -113,23 +185,21 @@ impl ProcessSymbols {
113
185
module_path : P ,
114
186
start_addr : u64 ,
115
187
end_addr : u64 ,
188
+ file_offset : u64 ,
116
189
) {
117
190
if self . pid != pid {
118
191
warn ! ( "pid mismatch: {} != {}" , self . pid, pid) ;
119
192
return ;
120
193
}
121
194
195
+ debug ! ( "Loading module symbols at {start_addr:x}-{end_addr:x} (offset: {file_offset:x})" ) ;
122
196
let path = module_path. as_ref ( ) . to_path_buf ( ) ;
123
- match ModuleSymbols :: new ( module_path) {
197
+ match ModuleSymbols :: new ( module_path, start_addr , file_offset ) {
124
198
Ok ( symbol) => {
125
199
self . modules . entry ( path. clone ( ) ) . or_insert ( symbol) ;
126
200
}
127
201
Err ( error) => {
128
- debug ! (
129
- "Failed to load symbols for module {}: {}" ,
130
- path. display( ) ,
131
- error
132
- ) ;
202
+ debug ! ( "Failed to load symbols for module {path:?}: {error}" ) ;
133
203
}
134
204
}
135
205
@@ -155,17 +225,48 @@ impl ProcessSymbols {
155
225
156
226
let symbols_path = folder. as_ref ( ) . join ( format ! ( "perf-{}.map" , self . pid) ) ;
157
227
for module in self . modules . values ( ) {
158
- let Some ( ( base_addr, _) ) = self
159
- . module_mappings
160
- . get ( & module. path )
161
- . and_then ( |bounds| bounds. iter ( ) . min_by_key ( |( start, _) | start) )
162
- else {
163
- warn ! ( "No bounds found for module: {}" , module. path. display( ) ) ;
164
- continue ;
165
- } ;
166
- module. append_to_file ( & symbols_path, * base_addr) ?;
228
+ module. append_to_file ( & symbols_path) ?;
167
229
}
168
230
169
231
Ok ( ( ) )
170
232
}
171
233
}
234
+
235
+ #[ cfg( test) ]
236
+ mod tests {
237
+ use super :: * ;
238
+
239
+ #[ test]
240
+ fn test_golang_symbols ( ) {
241
+ let module_symbols =
242
+ ModuleSymbols :: new ( "testdata/perf_map/go_fib.bin" , 0x00402000 , 0x00002000 ) . unwrap ( ) ;
243
+ insta:: assert_debug_snapshot!( module_symbols. symbols) ;
244
+ }
245
+
246
+ #[ test]
247
+ fn test_cpp_symbols ( ) {
248
+ const MODULE_PATH : & str = "testdata/perf_map/cpp_my_benchmark.bin" ;
249
+ let module_symbols = ModuleSymbols :: new ( MODULE_PATH , 0x00400000 , 0x00000000 ) . unwrap ( ) ;
250
+ insta:: assert_debug_snapshot!( module_symbols. symbols) ;
251
+ }
252
+
253
+ #[ test]
254
+ fn test_rust_divan_symbols ( ) {
255
+ const MODULE_PATH : & str = "testdata/perf_map/divan_sleep_benches.bin" ;
256
+
257
+ // Segments in the file:
258
+ // Segment: Segment { address: 0, size: 4d26a }
259
+ // Segment: Segment { address: 4e26c, size: ef24a }
260
+ // Segment: Segment { address: 13e4b8, size: ab48 }
261
+ // Segment: Segment { address: 1499b0, size: 11a5 }
262
+ //
263
+ // Segments in memory:
264
+ // 0x0000555555554000 0x00005555555a2000 0x4e000 0x0 r--p
265
+ // 0x00005555555a2000 0x0000555555692000 0xf0000 0x4d000 r-xp <--
266
+ // 0x0000555555692000 0x000055555569d000 0xb000 0x13c000 r--p
267
+ // 0x000055555569d000 0x000055555569f000 0x2000 0x146000 rw-p
268
+ //
269
+ let module_symbols = ModuleSymbols :: new ( MODULE_PATH , 0x00005555555a2000 , 0x4d000 ) . unwrap ( ) ;
270
+ insta:: assert_debug_snapshot!( module_symbols. symbols) ;
271
+ }
272
+ }
0 commit comments