diff --git a/dev/poc/dispatch_regression_tests.py b/dev/poc/dispatch_regression_tests.py new file mode 100644 index 00000000..bfd93984 --- /dev/null +++ b/dev/poc/dispatch_regression_tests.py @@ -0,0 +1,68 @@ +from cibuildwheel.oci_container import OCIContainer +from cibuildwheel.oci_container import OCIPlatform +import ubelt as ub + +python_versions = [ + '3.9', + '3.10', + '3.11', + '3.12', + '3.13', +] + +for python_version in python_versions: + current_pyver = 'cp' + python_version.replace('.', '') + image = 'python:' + python_version + container = OCIContainer(image=image, oci_platform=OCIPlatform.AMD64) + + container.__enter__() # not using with for ipython dev + + script_path = ub.Path('~/code/line_profiler/dev/poc/regression_test.py').expand() + container_script_path = ub.Path('regression_test.py') + container.copy_into(script_path, container_script_path) + + container.call(['pip', 'install', 'uv']) + container.call(['uv', 'pip', 'install', '--system', 'kwutil', 'ubelt', 'scriptconfig', 'psutil', 'ruamel.yaml', 'py-cpuinfo']) + + line_profiler_versions = [ + # '4.0.0', + '4.1.2', + '4.2.0', + '5.0.0', + ] + + for line_profiler_version in line_profiler_versions: + container.call(['uv', 'pip', 'uninstall', '--system', 'line_profiler']) + container.call(['uv', 'pip', 'install', '--system', f'line_profiler=={line_profiler_version}']) + for _ in range(5): + container.call(['python', container_script_path]) + + # Test the latest wheels (requires these are built beforehand) + local_wheels = ub.Path('/home/joncrall/code/line_profiler/wheelhouse') + container.copy_into(local_wheels, ub.Path('wheelhouse')) + line_profiler_version = '5.0.1' + found = list(local_wheels.glob('*' + current_pyver + '-manylinux*')) + assert len(found) == 1 + wheel_name = found[0].name + container.call(['uv', 'pip', 'uninstall', '--system', 'line_profiler']) + container.call(['uv', 'pip', 'install', '--system', 'wheelhouse/' + str(wheel_name)]) + for _ in range(5): + container.call(['python', container_script_path]) + + container.copy_out(ub.Path('/root/.cache/line_profiler/benchmarks/'), ub.Path('bench_results')) + + container.__exit__(None, None, None) + +# FIXME: robustness +import kwutil +result_paths = kwutil.util_path.coerce_patterned_paths('bench_results', expected_extension='.yaml') +import sys, ubelt +sys.path.append(ubelt.expandpath('~/code/line_profiler/dev/poc')) +from gather_regression_tests import accumulate_results +from gather_regression_tests import plot_results + +df['context.machine.py_version_simple'] = df['context.machine.py_version'].apply(lambda x: x.split(' ')[0]) + +df = accumulate_results(result_paths) +df = df.sort_values('params.line_profiler_version') +plot_results(df) diff --git a/dev/poc/gather_regression_tests.py b/dev/poc/gather_regression_tests.py new file mode 100644 index 00000000..26aadf5e --- /dev/null +++ b/dev/poc/gather_regression_tests.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +import scriptconfig as scfg +import kwutil +import ubelt as ub + + +def nested_to_dotdict(data): + """ + Construct this flat representation from a nested one + + Args: + data (Dict): + nested data + + Example: + >>> data = { + >>> 'type': 'process', + >>> 'properties': { + >>> 'machine': { + >>> 'os_name': 'Linux', + >>> 'arch': 'x86_64', + >>> 'py_impl': 'CPython', + >>> }}} + >>> flat = nested_to_dotdict(data['properties']) + >>> print(f'flat = {ub.urepr(flat, nl=2)}') + flat = { + 'machine.os_name': 'Linux', + 'machine.arch': 'x86_64', + 'machine.py_impl': 'CPython', + } + """ + flat = dict() + walker = ub.IndexableWalker(data, list_cls=tuple()) + for path, value in walker: + if not isinstance(value, dict): + spath = list(map(str, path)) + key = '.'.join(spath) + flat[key] = value + return flat + + +def insert_prefix(self, prefix, index=0): + """ + Adds a prefix to all items + + Args: + prefix (str): prefix to insert + index (int): the depth to insert the new param + + Example: + >>> self = dict({ + >>> 'proc1.param1': 1, + >>> 'proc2.param1': 3, + >>> 'proc4.part2.param2': 10, + >>> }) + >>> new = insert_prefix(self, 'foo', index=1) + >>> print('new = {}'.format(ub.urepr(new, nl=1))) + new = { + 'foo.proc1.param1': 1, + 'foo.proc2.param1': 3, + 'foo.proc4.part2.param2': 10, + } + """ + def _generate_new_items(): + sep = '.' + for k, v in self.items(): + path = k.split(sep) + path.insert(index, prefix) + k2 = sep.join(path) + yield k2, v + new = self.__class__(_generate_new_items()) + return new + + +class GatherRegressionTestsCLI(scfg.DataConfig): + """ + """ + paths = scfg.Value(None, help='output results from profiling codes') + + @classmethod + def main(cls, argv=1, **kwargs): + """ + Example: + >>> # xdoctest: +SKIP + >>> from gather_regression_tests import * # NOQA + >>> argv = 0 + >>> kwargs = dict() + >>> cls = GatherRegressionTestsCLI + >>> config = cls(**kwargs) + >>> cls.main(argv=argv, **config) + """ + config = cls.cli(argv=argv, data=kwargs, strict=True, verbose='auto') + result_paths = kwutil.util_path.coerce_patterned_paths(config.paths, expected_extension='.yaml') + + df = accumulate_results(result_paths) + plot_results(df) + + +def plot_results(df): + import kwplot + sns = kwplot.autosns() + figman = kwplot.FigureManager(dpath='.') + fig = figman.figure() + fig.clf() + ax = fig.gca() + sns.lineplot(data=df, x='params.line_profiler_version', y='record.duration', style='record.line_profiler.enabled', ax=ax) + figman.finalize('regression_plot.png') + + fig.clf() + ax = fig.gca() + sns.lineplot(data=df, x='params.line_profiler_version', y='record.duration', style='record.line_profiler.enabled', ax=ax, hue='context.machine.py_version_simple') + figman.finalize('regression_plot_pyversions.png') + + +def accumulate_results(result_paths): + import pandas as pd + records_accum = [] + for fpath in result_paths: + results = kwutil.Yaml.coerce(fpath) + records = results['records'] + context = results['context']['properties'] + _flat_context = nested_to_dotdict(context) + flat_context = insert_prefix(_flat_context, 'context') + + flat_params = insert_prefix(nested_to_dotdict( + results['params']), 'params') + for record in records: + _flat_record = nested_to_dotdict(record) + flat_record = insert_prefix(_flat_record, 'record') + flat_record.update(flat_context) + flat_record.update(flat_params) + records_accum.append(flat_record) + df = pd.DataFrame(records_accum) + df['context.machine.py_version_simple'] = df['context.machine.py_version'].apply(lambda x: x.split(' ')[0]) + return df + +__cli__ = GatherRegressionTestsCLI + +if __name__ == '__main__': + """ + + CommandLine: + python ~/code/line_profiler/dev/poc/gather_regression_tests.py + python -m gather_regression_tests + """ + __cli__.main() diff --git a/dev/poc/regression_test.py b/dev/poc/regression_test.py new file mode 100644 index 00000000..6bbb7106 --- /dev/null +++ b/dev/poc/regression_test.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +import ubelt as ub +import sys +import scriptconfig as scfg + + +# TODO: more than one type of benchmark would be a good idea + +BENCHMARK_CODE = ub.codeblock( + ''' + if 'profile' not in globals(): + try: + from line_profiler import profile + except ImportError: + profile = lambda x: x + + @profile + def main(profiled=False): + import time + start = time.perf_counter() + for x in range(2000000): #2000000 + y = x + elapsed_ms = round((time.perf_counter()-start)*1000, 2) + if profiled: + print(f"Time Elapsed profiling: {elapsed_ms}ms") + else: + print(f"Time Elapsed without profiling: {elapsed_ms}ms") + return elapsed_ms + + if __name__ == '__main__': + main() + ''') + + +class RegressionTestCLI(scfg.DataConfig): + out_fpath = scfg.Value('auto', help='place to write results') + + @classmethod + def main(cls, argv=1, **kwargs): + """ + Example: + >>> # xdoctest: +SKIP + >>> from regression_test import * # NOQA + >>> argv = 0 + >>> kwargs = dict() + >>> cls = RegressionTestCLI + >>> config = cls(**kwargs) + >>> cls.main(argv=argv, **config) + """ + config = cls.cli(argv=argv, data=kwargs, strict=True, verbose='auto') + + import kwutil + import line_profiler + context = kwutil.ProcessContext().start() + + dpath = ub.Path.appdir('line_profiler/benchmarks').ensuredir() + + code = BENCHMARK_CODE + code_id = ub.hash_data(code)[0:16] + fpath = dpath / f'{code_id}.py' + fpath.write_text(code) + + # TODO: can we tag the hash of the wheel we used to install? + # We need to be able to differentiate dev versions. + + results = { + 'context': context.obj, + 'params': { + 'line_profiler_version': line_profiler.__version__, + 'code_id': code_id, + }, + 'records': [], + 'line_records': [], + } + + # result_path = fpath.augment(stemsuffix='_' + ub.timestamp(), ext='.lprof') + result_path = fpath.augment(ext='.py.lprof') + + with ub.Timer(ns=True) as noprof_timer: + res = ub.cmd([sys.executable, fpath], verbose=3, cwd=fpath.parent) + res.check_returncode() + results['records'].append({ + 'line_profiler.enabled': False, + 'duration': noprof_timer.elapsed, + }) + + with ub.Timer(ns=True) as prof_timer: + res = ub.cmd([sys.executable, '-m', 'kernprof', '-lv', fpath], verbose=3, cwd=fpath.parent) + res.check_returncode() + results['records'].append({ + 'line_profiler.enabled': True, + 'duration': prof_timer.elapsed, + }) + + # res2 = ub.cmd([sys.executable, '-m', 'line_profiler', '-rmt', result_path], verbose=3) + stats = line_profiler.load_stats(result_path) + + for key, timings in stats.timings.items(): + _path, _line, _name = key + for lineno, hits, time in timings: + results['line_records'].append({ + 'time': time, + 'lineno': lineno, + }) + + context.stop() + print(f'results = {ub.urepr(results, nl=2)}') + + out_text = kwutil.Yaml.dumps(results) + if config.out_fpath == 'auto': + out_fpath = fpath.augment('regression_test_' + ub.timestamp(), ext='.yaml') + else: + out_fpath = config.out_fpath + out_fpath.write_text(out_text) + +__cli__ = RegressionTestCLI + +if __name__ == '__main__': + """ + + CommandLine: + python ~/code/line_profiler/dev/poc/regression_test.py + python -m regression_test + """ + __cli__.main() diff --git a/line_profiler/_line_profiler.pyx b/line_profiler/_line_profiler.pyx index ff806962..3c9f3148 100644 --- a/line_profiler/_line_profiler.pyx +++ b/line_profiler/_line_profiler.pyx @@ -18,6 +18,9 @@ from functools import wraps from sys import byteorder import sys cimport cython +from cython.operator cimport dereference as deref +from cpython.object cimport PyObject_Hash +from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_GET_SIZE from cpython.version cimport PY_VERSION_HEX from libc.stdint cimport int64_t @@ -66,9 +69,26 @@ ctypedef unsigned long long int uint64 ctypedef long long int int64 cdef extern from "Python_wrapper.h": + """ + #if PY_VERSION_HEX >= 0x030d00a1 && PY_VERSION_HEX < 0x030e00a1 + /* Unfortunately, _Py_HashBytes() is not exposed in Python + * 3.13, so just fall back to the slower Py_ObjectHash() + */ + # define hash_bytecode(bytestring, bytes, len) PyObject_Hash(bytestring) + #else + # if PY_VERSION_HEX >= 0x030e00a1 // 3.14.0.a1 + # define hash_bytes(b, n) Py_HashBuffer(b, n) + # else + # include "pyhash.h" + # define hash_bytes(b, n) _Py_HashBytes(b, n) + # endif + # define hash_bytecode(bytestring, bytes, len) hash_bytes(bytes, len) + #endif + """ ctypedef struct PyObject ctypedef struct PyCodeObject ctypedef struct PyFrameObject + ctypedef Py_ssize_t Py_hash_t ctypedef long long PY_LONG_LONG ctypedef int (*Py_tracefunc)( object self, PyFrameObject *py_frame, int what, PyObject *arg) @@ -92,6 +112,13 @@ cdef extern from "Python_wrapper.h": cdef int PyFrame_GetLineNumber(PyFrameObject *frame) cdef void Py_XDECREF(PyObject *o) + + # it's actually an unsigned long, but we want to avoid truncation on windows + cdef unsigned long long PyThread_get_thread_ident() + cdef Py_hash_t hash_bytecode(PyObject *bytestring, const char* bytes, + Py_ssize_t len) + +ctypedef PyCodeObject *PyCodeObjectPtr cdef extern from "c_trace_callbacks.c": # Legacy tracing ctypedef unsigned long long Py_uintptr_t @@ -550,7 +577,7 @@ sys.monitoring.html#monitoring-event-RERAISE """ cdef TraceCallback *legacy_callback cdef _SysMonitoringState mon_state - cdef public object active_instances # type: set[LineProfiler] + cdef public set active_instances # type: set[LineProfiler] cdef int _wrap_trace cdef int _set_frame_local_trace cdef int recursion_guard @@ -1239,7 +1266,7 @@ datamodel.html#user-defined-functions property _manager: def __get__(self): - thread_id = threading.get_ident() + thread_id = PyThread_get_thread_ident() try: return self._managers[thread_id] except KeyError: @@ -1303,7 +1330,7 @@ datamodel.html#user-defined-functions If no profiling data is available on the current thread. """ try: - return (self._c_last_time)[threading.get_ident()] + return (self._c_last_time)[PyThread_get_thread_ident()] except KeyError as e: # We haven't actually profiled anything yet raise (KeyError('No profiling data on the current thread ' @@ -1349,7 +1376,7 @@ datamodel.html#user-defined-functions return py_last_time cpdef disable(self): - self._c_last_time[threading.get_ident()].clear() + self._c_last_time[PyThread_get_thread_ident()].clear() self._manager._handle_disable_event(self) def get_stats(self): @@ -1388,55 +1415,74 @@ datamodel.html#user-defined-functions @cython.boundscheck(False) @cython.wraparound(False) cdef inline inner_trace_callback( - int is_line_event, object instances, object code, int lineno): + int is_line_event, set instances, object code, int lineno): """ The basic building block for the trace callbacks. """ - cdef object prof_ - cdef object bytecode = code.co_code + cdef LineProfiler prof_ cdef LineProfiler prof cdef LastTime old cdef int key - cdef PY_LONG_LONG time - cdef int has_time = 0 + cdef PY_LONG_LONG time = 0 + cdef bint has_time = False cdef int64 code_hash - cdef int64 block_hash - cdef unordered_map[int64, LineTime] line_entries - - if any(bytecode): - block_hash = hash(bytecode) - else: # Cython functions have empty/zero bytecodes + cdef object py_bytes_obj = code.co_code + cdef char* data = PyBytes_AS_STRING(py_bytes_obj) + cdef Py_ssize_t size = PyBytes_GET_SIZE(py_bytes_obj) + cdef unsigned long ident + cdef Py_hash_t block_hash + cdef LineTime *entry + cdef unordered_map[int64, LineTime] *line_entries + cdef unordered_map[int64, LastTime] *last_map + + # Cython functions have empty/zero bytecodes + # FIXME: `hash_bytecode()` is supposed to switch to + # `Py_HashBuffer()` or `_Py_HashBytes()` where available, but + # in tests it only seems to add overhead... + for i in range(size): + if data[i]: + # block_hash = hash_bytecode(py_bytes_obj, data, size) + block_hash = hash(py_bytes_obj) + break + else: # fallback for Cython functions block_hash = hash(code) + code_hash = compute_line_hash(block_hash, lineno) for prof_ in instances: + # for some reason, doing this is much faster than just combining it into the above + # like doing "for prof in instances:" is far slower prof = prof_ if not prof._c_code_map.count(code_hash): continue if not has_time: time = hpTimer() - has_time = 1 - ident = threading.get_ident() - if prof._c_last_time[ident].count(block_hash): - old = prof._c_last_time[ident][block_hash] - line_entries = prof._c_code_map[code_hash] + has_time = True + ident = PyThread_get_thread_ident() + last_map = &(prof._c_last_time[ident]) + if deref(last_map).count(block_hash): + old = deref(last_map)[block_hash] + line_entries = &(prof._c_code_map[code_hash]) key = old.f_lineno - if not line_entries.count(key): - prof._c_code_map[code_hash][key] = LineTime( - code_hash, key, 0, 0) - prof._c_code_map[code_hash][key].nhits += 1 - prof._c_code_map[code_hash][key].total_time += time - old.time + if not deref(line_entries).count(key): + deref(line_entries)[key] = LineTime(code_hash, key, 0, 0) + entry = &(deref(line_entries)[key]) + # Note: explicitly `deref()`-ing here causes the new values + # to be assigned to a temp var; + # meanwhile, directly dot-accessing a pointer causes Cython + # to correctly write `ptr->attr = (ptr->attr + incr)` + entry.nhits += 1 + entry.total_time += time - old.time if is_line_event: # Get the time again. This way, we don't record much time # wasted in this function. - prof._c_last_time[ident][block_hash] = LastTime(lineno, hpTimer()) - elif prof._c_last_time[ident].count(block_hash): + deref(last_map)[block_hash] = LastTime(lineno, hpTimer()) + elif deref(last_map).count(block_hash): # We are returning from a function, not executing a line. # Delete the last_time record. It may have already been # deleted if we are profiling a generator that is being # pumped past its end. - prof._c_last_time[ident].erase( - prof._c_last_time[ident].find(block_hash)) + deref(last_map).erase(deref(last_map).find(block_hash)) cdef extern int legacy_trace_callback(