Skip to content

Commit de37e0b

Browse files
emerybergerclaude
andauthored
Fix issue #1022: pytest-xdist + --profile-all on Linux dropped user samples (#1055)
execnet worker subprocesses spawned by pytest-xdist inherited a sigmask with Scalene's CPU sampling signal blocked. The parent's patch_module_functions_with_signal_blocking shim (added for issue #841) wraps every os.* call with pthread_sigmask(SIG_BLOCK, [SIGALRM]); when that wrapper intersects subprocess.Popen's fork window, the child inherits the blocked mask. setitimer fires every 10 ms, but the kernel keeps the signal pending and the handler never runs — workers collect zero CPU and zero allocation samples, each prints "did not run for long enough", and the user's source file is absent from the merged profile. Fix in enable_signals(): unconditionally pthread_sigmask(SIG_UNBLOCK, ...) Scalene's profiling signals in the calling (main) thread so children clear any inherited block before the timer starts. Adds a Linux-only smoketest that runs --profile-all with -n 2 xdist workers against a CPU-busy fixture and asserts the user file ends up in the profile. Pre-fix the smoketest exits 1 with the exact warning the bug reporter saw; post-fix it passes. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 5e4dfb8 commit de37e0b

3 files changed

Lines changed: 168 additions & 0 deletions

File tree

.github/workflows/test-smoketests.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,17 @@ jobs:
6868
run: python test/smoketest_pool_spawn.py
6969
timeout-minutes: 5
7070

71+
# Regression test for pytest-xdist + --profile-all on Linux (issue
72+
# #1022). The bug was Linux-specific: execnet worker subprocesses
73+
# inherited a sigmask with Scalene's CPU sampling signal blocked,
74+
# so the timer fired but the handler never ran and the user's code
75+
# was absent from the merged profile. The smoketest itself self-
76+
# skips on non-Linux platforms.
77+
- name: pytest-xdist + --profile-all (issue #1022)
78+
if: matrix.os == 'ubuntu-latest'
79+
run: python test/smoketest_issue_1022.py
80+
timeout-minutes: 5
81+
7182
# Note: test/smoketest.py only handles single JSON, rather than multiple in sequence.
7283
- name: profile-interval smoke test
7384
run: python -m scalene run --profile-interval=2 test/testme.py && python -m scalene view --cli

scalene/scalene_signal_manager.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,24 @@ def enable_signals(
238238
self.__signals.cpu_timer_signal,
239239
cpu_sampling_rate,
240240
)
241+
# Unmask Scalene's profiling signals in the calling (main) thread.
242+
# pytest-xdist's execnet, and other libraries that fan out work via
243+
# subprocess.Popen, can leave the worker process with Scalene's CPU
244+
# sampling signal blocked in its inherited sigmask — most visibly
245+
# when the parent's patched os module (issue #841) intersects with
246+
# the subprocess fork window. Without this unblock the SIGALRM
247+
# timer fires but the handler never runs, leaving the worker with
248+
# zero CPU and zero allocation samples. See issue #1022.
249+
unblock_set = {
250+
self.__signals.cpu_signal,
251+
self.__signals.malloc_signal,
252+
self.__signals.free_signal,
253+
self.__signals.memcpy_signal,
254+
}
255+
unblock_set.discard(None)
256+
if unblock_set:
257+
with contextlib.suppress(ValueError, OSError):
258+
signal.pthread_sigmask(signal.SIG_UNBLOCK, unblock_set)
241259

242260
def setup_lifecycle_signals(
243261
self,

test/smoketest_issue_1022.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#!/usr/bin/env python3
2+
"""Smoketest for pytest-xdist + ``--profile-all`` (regression for #1022).
3+
4+
Workers spawned by pytest-xdist inherit a sigmask that, in combination
5+
with Scalene's ``patch_module_functions_with_signal_blocking`` shim
6+
(issue #841), used to land with Scalene's CPU sampling signal blocked.
7+
The setitimer fires, but the handler never runs, so each worker
8+
collects zero samples; the parent then merges nothing and the user's
9+
code is absent from the profile while ``"did not run for long enough"``
10+
warnings print twice (once per worker).
11+
12+
This smoketest reproduces the exact shape: ``-n 2`` workers running a
13+
CPU-busy fixture under ``--profile-all``. It asserts that the user's
14+
source file appears in the merged profile. Pre-fix on Linux 3.13 the
15+
profile contains only stdlib/execnet; post-fix it contains the fixture
16+
source.
17+
18+
Linux-only: macOS and Windows did not exhibit the bug in the wild
19+
(macOS goes through a different path that doesn't leak the sigmask
20+
across subprocess fork; Windows doesn't use the LD_PRELOAD/sigmask
21+
machinery at all).
22+
"""
23+
24+
import json
25+
import os
26+
import subprocess
27+
import sys
28+
import tempfile
29+
import textwrap
30+
31+
if sys.platform != "linux":
32+
print(f"skipping issue-1022 smoketest on {sys.platform}: bug is Linux-only")
33+
sys.exit(0)
34+
35+
# Install pytest-xdist into the running environment if missing. The
36+
# smoketest workflow already installs scalene + numpy; xdist is the
37+
# only extra requirement and pinning it isn't worth a separate step.
38+
try:
39+
import xdist # noqa: F401
40+
except ImportError:
41+
subprocess.check_call(
42+
[sys.executable, "-m", "pip", "install", "--quiet", "pytest-xdist"]
43+
)
44+
45+
workdir = tempfile.mkdtemp(prefix="scalene-1022-")
46+
src_dir = os.path.join(workdir, "src")
47+
os.makedirs(src_dir, exist_ok=True)
48+
49+
# Fixture: a CPU-busy function the workers will run. Each test calls it
50+
# with 10M iterations so even on slow runners there is more than enough
51+
# wall-clock time for the timer to fire many times per worker.
52+
with open(os.path.join(src_dir, "__init__.py"), "w") as f:
53+
f.write(
54+
textwrap.dedent(
55+
"""
56+
def crunch(n):
57+
total = 0
58+
for i in range(n):
59+
total += i * i
60+
return total
61+
"""
62+
).lstrip()
63+
)
64+
65+
with open(os.path.join(workdir, "test_heavy.py"), "w") as f:
66+
f.write(
67+
textwrap.dedent(
68+
"""
69+
from src import crunch
70+
71+
def test_a():
72+
assert crunch(10_000_000) > 0
73+
74+
def test_b():
75+
assert crunch(10_000_000) > 0
76+
77+
def test_c():
78+
assert crunch(10_000_000) > 0
79+
80+
def test_d():
81+
assert crunch(10_000_000) > 0
82+
"""
83+
).lstrip()
84+
)
85+
86+
profile_path = os.path.join(workdir, "issue1022.json")
87+
cmd = [
88+
sys.executable,
89+
"-m",
90+
"scalene",
91+
"run",
92+
"--profile-all",
93+
"-o",
94+
profile_path,
95+
"---",
96+
"-m",
97+
"pytest",
98+
"-n",
99+
"2",
100+
"test_heavy.py",
101+
]
102+
print("COMMAND", " ".join(cmd))
103+
proc = subprocess.run(cmd, cwd=workdir, capture_output=True, text=True, timeout=180)
104+
print(proc.stdout)
105+
print(proc.stderr, file=sys.stderr)
106+
107+
if proc.returncode != 0:
108+
print(f"scalene exited with rc={proc.returncode}")
109+
sys.exit(proc.returncode)
110+
111+
if not os.path.exists(profile_path):
112+
print(f"No profile produced at {profile_path}")
113+
sys.exit(1)
114+
115+
with open(profile_path) as f:
116+
data = json.load(f)
117+
118+
files = data.get("files", {})
119+
if not files:
120+
print("Profile has empty 'files' dict — issue #1022 regression")
121+
sys.exit(1)
122+
123+
# The fixture lives in workdir; the entry we expect to see is
124+
# "<workdir>/src/__init__.py" since that's where ``crunch`` runs. We
125+
# match by path suffix to stay tolerant of symlinked tmpdirs.
126+
user_file_suffix = os.path.join("src", "__init__.py")
127+
matching = [name for name in files if name.endswith(user_file_suffix)]
128+
if not matching:
129+
print(
130+
f"User-code file (...{user_file_suffix}) absent from profile. "
131+
f"Files present: {sorted(files)}"
132+
)
133+
sys.exit(1)
134+
135+
print(
136+
f"OK: issue #1022 fix holds. Profiled {len(files)} files, "
137+
f"user code at {matching[0]!r}, elapsed={data.get('elapsed_time_sec'):.2f}s"
138+
)
139+
sys.exit(0)

0 commit comments

Comments
 (0)