pyutils · Erotemic · Aug 6, 2025 · Aug 6, 2025 · Aug 7, 2025 · Aug 9, 2025
diff --git a/dev/poc/dispatch_regression_tests.py b/dev/poc/dispatch_regression_tests.py
@@ -0,0 +1,68 @@
+from cibuildwheel.oci_container import OCIContainer
+from cibuildwheel.oci_container import OCIPlatform
+import ubelt as ub
+
+python_versions = [
+    '3.9',
+    '3.10',
+    '3.11',
+    '3.12',
+    '3.13',
+]
+
+for python_version in python_versions:
+    current_pyver = 'cp' + python_version.replace('.', '')
+    image = 'python:' + python_version
+    container = OCIContainer(image=image, oci_platform=OCIPlatform.AMD64)
+
+    container.__enter__()   # not using with for ipython dev
+
+    script_path = ub.Path('~/code/line_profiler/dev/poc/regression_test.py').expand()
+    container_script_path = ub.Path('regression_test.py')
+    container.copy_into(script_path, container_script_path)
+
+    container.call(['pip', 'install', 'uv'])
+    container.call(['uv', 'pip', 'install', '--system', 'kwutil', 'ubelt', 'scriptconfig', 'psutil', 'ruamel.yaml', 'py-cpuinfo'])
+
+    line_profiler_versions = [
+        # '4.0.0',
+        '4.1.2',
+        '4.2.0',
+        '5.0.0',
+    ]
+
+    for line_profiler_version in line_profiler_versions:
+        container.call(['uv', 'pip', 'uninstall', '--system', 'line_profiler'])
+        container.call(['uv', 'pip', 'install', '--system', f'line_profiler=={line_profiler_version}'])
+        for _ in range(5):
+            container.call(['python', container_script_path])
+
+    # Test the latest wheels (requires these are built beforehand)
+    local_wheels = ub.Path('/home/joncrall/code/line_profiler/wheelhouse')
+    container.copy_into(local_wheels, ub.Path('wheelhouse'))
+    line_profiler_version = '5.0.1'
+    found = list(local_wheels.glob('*' + current_pyver + '-manylinux*'))
+    assert len(found) == 1
+    wheel_name = found[0].name
+    container.call(['uv', 'pip', 'uninstall', '--system', 'line_profiler'])
+    container.call(['uv', 'pip', 'install', '--system', 'wheelhouse/' + str(wheel_name)])
+    for _ in range(5):
+        container.call(['python', container_script_path])
+
+    container.copy_out(ub.Path('/root/.cache/line_profiler/benchmarks/'), ub.Path('bench_results'))
+
+    container.__exit__(None, None, None)
+
+# FIXME: robustness
+import kwutil
+result_paths = kwutil.util_path.coerce_patterned_paths('bench_results', expected_extension='.yaml')
+import sys, ubelt
+sys.path.append(ubelt.expandpath('~/code/line_profiler/dev/poc'))
+from gather_regression_tests import accumulate_results
+from gather_regression_tests import plot_results
+
+df['context.machine.py_version_simple'] = df['context.machine.py_version'].apply(lambda x: x.split(' ')[0])
+
+df = accumulate_results(result_paths)
+df = df.sort_values('params.line_profiler_version')
+plot_results(df)
diff --git a/dev/poc/gather_regression_tests.py b/dev/poc/gather_regression_tests.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+import scriptconfig as scfg
+import kwutil
+import ubelt as ub
+
+
+def nested_to_dotdict(data):
+    """
+    Construct this flat representation from a nested one
+
+    Args:
+        data (Dict):
+            nested data
+
+        Example:
+            >>> data = {
+            >>>     'type': 'process',
+            >>>     'properties': {
+            >>>         'machine': {
+            >>>             'os_name': 'Linux',
+            >>>             'arch': 'x86_64',
+            >>>             'py_impl': 'CPython',
+            >>>         }}}
+            >>> flat = nested_to_dotdict(data['properties'])
+            >>> print(f'flat = {ub.urepr(flat, nl=2)}')
+            flat = {
+                'machine.os_name': 'Linux',
+                'machine.arch': 'x86_64',
+                'machine.py_impl': 'CPython',
+            }
+    """
+    flat = dict()
+    walker = ub.IndexableWalker(data, list_cls=tuple())
+    for path, value in walker:
+        if not isinstance(value, dict):
+            spath = list(map(str, path))
+            key = '.'.join(spath)
+            flat[key] = value
+    return flat
+
+
+def insert_prefix(self, prefix, index=0):
+    """
+    Adds a prefix to all items
+
+    Args:
+        prefix (str): prefix to insert
+        index (int): the depth to insert the new param
+
+    Example:
+        >>> self = dict({
+        >>>     'proc1.param1': 1,
+        >>>     'proc2.param1': 3,
+        >>>     'proc4.part2.param2': 10,
+        >>> })
+        >>> new = insert_prefix(self, 'foo', index=1)
+        >>> print('new = {}'.format(ub.urepr(new, nl=1)))
+        new = {
+            'foo.proc1.param1': 1,
+            'foo.proc2.param1': 3,
+            'foo.proc4.part2.param2': 10,
+        }
+    """
+    def _generate_new_items():
+        sep = '.'
+        for k, v in self.items():
+            path = k.split(sep)
+            path.insert(index, prefix)
+            k2 = sep.join(path)
+            yield k2, v
+    new = self.__class__(_generate_new_items())
+    return new
+
+
+class GatherRegressionTestsCLI(scfg.DataConfig):
+    """
+    """
+    paths = scfg.Value(None, help='output results from profiling codes')
+
+    @classmethod
+    def main(cls, argv=1, **kwargs):
+        """
+        Example:
+            >>> # xdoctest: +SKIP
+            >>> from gather_regression_tests import *  # NOQA
+            >>> argv = 0
+            >>> kwargs = dict()
+            >>> cls = GatherRegressionTestsCLI
+            >>> config = cls(**kwargs)
+            >>> cls.main(argv=argv, **config)
+        """
+        config = cls.cli(argv=argv, data=kwargs, strict=True, verbose='auto')
+        result_paths = kwutil.util_path.coerce_patterned_paths(config.paths, expected_extension='.yaml')
+
+        df = accumulate_results(result_paths)
+        plot_results(df)
+
+
+def plot_results(df):
+    import kwplot
+    sns = kwplot.autosns()
+    figman = kwplot.FigureManager(dpath='.')
+    fig = figman.figure()
+    fig.clf()
+    ax = fig.gca()
+    sns.lineplot(data=df, x='params.line_profiler_version', y='record.duration', style='record.line_profiler.enabled', ax=ax)
+    figman.finalize('regression_plot.png')
+
+    fig.clf()
+    ax = fig.gca()
+    sns.lineplot(data=df, x='params.line_profiler_version', y='record.duration', style='record.line_profiler.enabled', ax=ax, hue='context.machine.py_version_simple')
+    figman.finalize('regression_plot_pyversions.png')
+
+
+def accumulate_results(result_paths):
+    import pandas as pd
+    records_accum = []
+    for fpath in result_paths:
+        results = kwutil.Yaml.coerce(fpath)
+        records = results['records']
+        context = results['context']['properties']
+        _flat_context = nested_to_dotdict(context)
+        flat_context = insert_prefix(_flat_context, 'context')
+
+        flat_params = insert_prefix(nested_to_dotdict(
+            results['params']), 'params')
+        for record in records:
+            _flat_record = nested_to_dotdict(record)
+            flat_record = insert_prefix(_flat_record, 'record')
+            flat_record.update(flat_context)
+            flat_record.update(flat_params)
+            records_accum.append(flat_record)
+    df = pd.DataFrame(records_accum)
+    df['context.machine.py_version_simple'] = df['context.machine.py_version'].apply(lambda x: x.split(' ')[0])
+    return df
+
+__cli__ = GatherRegressionTestsCLI
+
+if __name__ == '__main__':
+    """
+
+    CommandLine:
+        python ~/code/line_profiler/dev/poc/gather_regression_tests.py
+        python -m gather_regression_tests
+    """
+    __cli__.main()
diff --git a/dev/poc/regression_test.py b/dev/poc/regression_test.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+import ubelt as ub
+import sys
+import scriptconfig as scfg
+
+
+# TODO: more than one type of benchmark would be a good idea
+
+BENCHMARK_CODE = ub.codeblock(
+    '''
+    if 'profile' not in globals():
+        try:
+            from line_profiler import profile
+        except ImportError:
+            profile = lambda x: x
+
+    @profile
+    def main(profiled=False):
+         import time
+         start = time.perf_counter()
+         for x in range(2000000): #2000000
+             y = x
+         elapsed_ms = round((time.perf_counter()-start)*1000, 2)
+         if profiled:
+             print(f"Time Elapsed profiling: {elapsed_ms}ms")
+         else:
+            print(f"Time Elapsed without profiling: {elapsed_ms}ms")
+         return elapsed_ms
+
+    if __name__ == '__main__':
+        main()
+    ''')
+
+
+class RegressionTestCLI(scfg.DataConfig):
+    out_fpath = scfg.Value('auto', help='place to write results')
+
+    @classmethod
+    def main(cls, argv=1, **kwargs):
+        """
+        Example:
+            >>> # xdoctest: +SKIP
+            >>> from regression_test import *  # NOQA
+            >>> argv = 0
+            >>> kwargs = dict()
+            >>> cls = RegressionTestCLI
+            >>> config = cls(**kwargs)
+            >>> cls.main(argv=argv, **config)
+        """
+        config = cls.cli(argv=argv, data=kwargs, strict=True, verbose='auto')
+
+        import kwutil
+        import line_profiler
+        context = kwutil.ProcessContext().start()
+
+        dpath = ub.Path.appdir('line_profiler/benchmarks').ensuredir()
+
+        code = BENCHMARK_CODE
+        code_id = ub.hash_data(code)[0:16]
+        fpath = dpath / f'{code_id}.py'
+        fpath.write_text(code)
+
+        # TODO: can we tag the hash of the wheel we used to install?
+        # We need to be able to differentiate dev versions.
+
+        results = {
+            'context': context.obj,
+            'params':  {
+                'line_profiler_version': line_profiler.__version__,
+                'code_id': code_id,
+            },
+            'records': [],
+            'line_records': [],
+        }
+
+        # result_path = fpath.augment(stemsuffix='_' + ub.timestamp(), ext='.lprof')
+        result_path = fpath.augment(ext='.py.lprof')
+
+        with ub.Timer(ns=True) as noprof_timer:
+            res = ub.cmd([sys.executable, fpath], verbose=3, cwd=fpath.parent)
+        res.check_returncode()
+        results['records'].append({
+            'line_profiler.enabled': False,
+            'duration': noprof_timer.elapsed,
+        })
+
+        with ub.Timer(ns=True) as prof_timer:
+            res = ub.cmd([sys.executable, '-m', 'kernprof', '-lv', fpath], verbose=3, cwd=fpath.parent)
+        res.check_returncode()
+        results['records'].append({
+            'line_profiler.enabled': True,
+            'duration': prof_timer.elapsed,
+        })
+
+        # res2 = ub.cmd([sys.executable, '-m', 'line_profiler', '-rmt', result_path], verbose=3)
+        stats = line_profiler.load_stats(result_path)
+
+        for key, timings in stats.timings.items():
+            _path, _line, _name = key
+            for lineno, hits, time in timings:
+                results['line_records'].append({
+                    'time': time,
+                    'lineno': lineno,
+                })
+
+        context.stop()
+        print(f'results = {ub.urepr(results, nl=2)}')
+
+        out_text = kwutil.Yaml.dumps(results)
+        if config.out_fpath == 'auto':
+            out_fpath = fpath.augment('regression_test_' + ub.timestamp(), ext='.yaml')
+        else:
+            out_fpath = config.out_fpath
+        out_fpath.write_text(out_text)
+
+__cli__ = RegressionTestCLI
+
+if __name__ == '__main__':
+    """
+
+    CommandLine:
+        python ~/code/line_profiler/dev/poc/regression_test.py
+        python -m regression_test
+    """
+    __cli__.main()