forked from open-telemetry/opentelemetry-ebpf-profiler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoff_cpu.ebpf.c
89 lines (73 loc) · 2.37 KB
/
off_cpu.ebpf.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include "bpfdefs.h"
#include "tracemgmt.h"
#include "types.h"
// kprobe_progs maps from a program ID to a kprobe eBPF program
bpf_map_def SEC("maps") kprobe_progs = {
.type = BPF_MAP_TYPE_PROG_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u32),
.max_entries = NUM_TRACER_PROGS,
};
// sched_times keeps track of sched_switch call times.
bpf_map_def SEC("maps") sched_times = {
.type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
.key_size = sizeof(u64), // pid_tgid
.value_size = sizeof(u64), // time in ns
.max_entries = 256, // value is adjusted at load time in loadAllMaps.
};
// tracepoint__sched_switch serves as entry point for off cpu profiling.
SEC("tracepoint/sched/sched_switch")
int tracepoint__sched_switch(void *ctx)
{
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 pid = pid_tgid >> 32;
u32 tid = pid_tgid & 0xFFFFFFFF;
if (pid == 0 || tid == 0) {
return 0;
}
u32 key = 0;
SystemConfig *syscfg = bpf_map_lookup_elem(&system_config, &key);
if (!syscfg) {
// Unreachable: array maps are always fully initialized.
return ERR_UNREACHABLE;
}
if (bpf_get_prandom_u32() % OFF_CPU_THRESHOLD_MAX >= syscfg->off_cpu_threshold) {
return 0;
}
u64 ts = bpf_ktime_get_ns();
if (bpf_map_update_elem(&sched_times, &pid_tgid, &ts, BPF_ANY) < 0) {
DEBUG_PRINT("Failed to record sched_switch event entry");
return 0;
}
return 0;
}
// dummy is never loaded or called. It just makes sure kprobe_progs is
// referenced and make the compiler and linker happy.
SEC("kprobe/dummy")
int dummy(struct pt_regs *ctx)
{
bpf_tail_call(ctx, &kprobe_progs, 0);
return 0;
}
// kp__finish_task_switch is triggered right after the scheduler updated
// the CPU registers.
SEC("kprobe/finish_task_switch")
int finish_task_switch(struct pt_regs *ctx)
{
// Get the PID and TGID register.
u64 pid_tgid = bpf_get_current_pid_tgid();
u32 pid = pid_tgid >> 32;
u32 tid = pid_tgid & 0xFFFFFFFF;
if (pid == 0 || tid == 0) {
return 0;
}
u64 ts = bpf_ktime_get_ns();
u64 *start_ts = bpf_map_lookup_elem(&sched_times, &pid_tgid);
if (!start_ts || *start_ts == 0) {
// There is no information from the sched/sched_switch entry hook.
return 0;
}
u64 diff = ts - *start_ts;
DEBUG_PRINT("==== finish_task_switch ====");
return collect_trace(ctx, TRACE_OFF_CPU, pid, tid, ts, diff);
}