hail-is
diff --git a/‎hail/python/benchmark/conftest.py‎
Lines changed: 21 additions & 10 deletions b/‎hail/python/benchmark/conftest.py‎
Lines changed: 21 additions & 10 deletions
diff --git a/‎hail/python/benchmark/hail/benchmark_analysis.py‎
Lines changed: 28 additions & 0 deletions b/‎hail/python/benchmark/hail/benchmark_analysis.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎hail/python/benchmark/hail/benchmark_table.py‎
Lines changed: 10 additions & 9 deletions b/‎hail/python/benchmark/hail/benchmark_table.py‎
Lines changed: 10 additions & 9 deletions
diff --git a/‎hail/python/benchmark/hail/conftest.py‎
Lines changed: 19 additions & 25 deletions b/‎hail/python/benchmark/hail/conftest.py‎
Lines changed: 19 additions & 25 deletions
diff --git a/‎hail/python/benchmark/tools/__init__.py‎
Lines changed: 7 additions & 0 deletions b/‎hail/python/benchmark/tools/__init__.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎hail/python/benchmark/tools/compare.py‎
Lines changed: 0 additions & 146 deletions b/‎hail/python/benchmark/tools/compare.py‎
Lines changed: 0 additions & 146 deletions
@@ -1,3 +1,4 @@
+import logging
 import os
 
 import pytest
@@ -10,36 +11,46 @@
 
 @pytest.hookimpl
 def pytest_addoption(parser):
-    parser.addoption("--log", type=str, help='Log file path', default=None)
-    parser.addoption("--output", type=str, help="Output file path.", default=None)
-    parser.addoption("--data-dir", type=str, help="Data directory.", default=os.getenv('HAIL_BENCHMARK_DIR'))
-    parser.addoption('--iterations', type=int, help='override number of iterations for all benchmarks', default=None)
-    parser.addoption('--cores', type=int, help='Number of cores to use.', default=1)
-    parser.addoption(
+    group = parser.getgroup('benchmark')
+    group.addoption("--log", type=str, help='Log file path', default=None)
+    group.addoption("--output", type=str, help="Output file path.", default=None)
+    group.addoption("--data-dir", type=str, help="Data directory.", default=os.getenv('HAIL_BENCHMARK_DIR'))
+    group.addoption(
+        "--burn-in-iterations", type=int, help="override number of burn-in iterations for all benchmarks", default=None
+    )
+    group.addoption('--iterations', type=int, help='override number of iterations for all benchmarks', default=None)
+    group.addoption('--cores', type=int, help='Number of cores to use.', default=1)
+    group.addoption(
         '--profile',
         choices=['cpu', 'alloc', 'itimer'],
         help='Run with async-profiler.',
         nargs='?',
         const='cpu',
         default=None,
     )
-    parser.addoption(
+    group.addoption(
         '--max-duration',
         type=int,
         help='Maximum permitted duration for any benchmark trial in seconds, not to be confused with pytest-timeout',
         default=200,
     )
-    parser.addoption('--max-failures', type=int, help='Stop benchmarking item after this many failures', default=3)
-    parser.addoption(
+    group.addoption('--max-failures', type=int, help='Stop benchmarking item after this many failures', default=3)
+    group.addoption(
         '--profiler-path', type=str, help='path to aysnc profiler', default=os.getenv('ASYNC_PROFILER_HOME')
     )
-    parser.addoption('--profiler-fmt', choices=['html', 'flame', 'jfr'], help='Choose profiler output.', default='html')
+    group.addoption('--profiler-fmt', choices=['html', 'flame', 'jfr'], help='Choose profiler output.', default='html')
 
 
 @pytest.hookimpl
 def pytest_configure(config):
     init_logging(file=config.getoption('log'))
 
+    if (nburn_in_iterations := config.getoption('burn_in_iterations')) is not None:
+        logging.info(f'benchmark: using {nburn_in_iterations} burn-in iterations.')
+
+    if (niterations := config.getoption('iterations')) is not None:
+        logging.info(f'benchmark: using {niterations} iterations.')
+
 
 @pytest.hookimpl(tryfirst=True)
 def pytest_collection_modifyitems(config, items):
 
@@ -0,0 +1,28 @@
+import tempfile
+
+import pytest
+
+import hail as hl
+from benchmark.tools.impex import import_benchmarks
+from benchmark.tools.statistics import analyze_benchmarks, laaber_mds, schultz_mds
+
+
+def test_analyze_benchmarks(local_tmpdir, onethreetwo, onethreethree):
+    tables = (import_benchmarks(v, local_tmpdir) for v in (onethreetwo, onethreethree))
+    tables = (t.select(instances=t.instances.iterations.time) for t in tables)
+    tables = (t._key_by_assert_sorted(*t.key.drop('version')) for t in tables)
+    tables = (t.checkpoint(tempfile.mktemp(suffix='.ht', dir=local_tmpdir)) for t in tables)
+    analyze_benchmarks(*tables, n_bootstrap_iterations=1000, confidence=0.95)._force_count()
+
+
+@pytest.fixture(scope='session')
+def _100_instances_100_iterations(resource_dir):
+    rows = lambda n, _: [hl.struct(id=0, instances=hl.repeat(hl.repeat(1.0, n), n))]
+    ht = hl.Table._generate(contexts=[100], partitions=1, rowfn=rows)
+    ht = ht._key_by_assert_sorted(ht.id)
+    return ht.checkpoint(f'{resource_dir}/100_instances_100_iterations.ht')
+
+
+@pytest.mark.parametrize('mds', [laaber_mds, schultz_mds])
+def test_minimal_detectable_slowdown(_100_instances_100_iterations, mds):
+    mds(_100_instances_100_iterations, n_experiments=1)._force_count()
@@ -29,15 +29,16 @@ def test_table_range_force_count():
     hl.utils.range_table(100_000_000)._force_count()
 
 
-def test_table_range_join_1b_1k():
-    ht1 = hl.utils.range_table(1_000_000_000)
-    ht2 = hl.utils.range_table(1_000)
-    ht1.join(ht2, 'inner').count()
-
-
-def test_table_range_join_1b_1b():
-    ht1 = hl.utils.range_table(1_000_000_000)
-    ht2 = hl.utils.range_table(1_000_000_000)
+@pytest.mark.parametrize(
+    'm, n',
+    [
+        (1_000_000_000, 1_000),
+        (1_000_000_000, 1_000_000_000),
+    ],
+)
+def test_table_range_join(m, n):
+    ht1 = hl.utils.range_table(m)
+    ht2 = hl.utils.range_table(n)
     ht1.join(ht2, 'inner').count()
 
 
 
@@ -72,6 +72,8 @@
     many_strings_ht,
     many_strings_tsv,
     onekg_chr22,
+    onethreethree,
+    onethreetwo,
     profile25_mt,
     profile25_vcf,
     random_doubles_mt,
@@ -119,39 +121,29 @@ def pytest_runtest_protocol(item, nextitem):
     # - each benchmark runs in a clean hail session
     # - our means of getting max task memory is quite crude (regex on logs)
     #   and a fresh session provides a new log
-    with init_hail(item.config):
-        if (num_iterations := item.config.getoption('iterations')) is not None:
-            burn_in_iterations = 0
-            logging.info(
-                msg=(
-                    f'Picked up iterations override. Config: '
-                    f'burn_in_iterations: {burn_in_iterations}, '
-                    f'iterations: {num_iterations}.'
-                )
-            )
 
-        else:
-            burn_in_iterations = 1
-            num_iterations = 5
+    nburn_in_iterations = item.config.getoption('burn_in_iterations', 3)
+    niterations = item.config.getoption('iterations', 3)
+
+    with init_hail(item.config):
+        logging.info(
+            msg=(
+                f'Executing "{item.nodeid}" with '
+                f'{nburn_in_iterations} burn in iterations and '
+                f'{niterations} timed iterations.'
+            ),
+        )
 
         s = item.stash
         s[start] = datetime.now(timezone.utc).isoformat()
         s[iterations] = []
         s[consecutive_fail_count] = 0
         s[end] = None
 
-        logging.info(
-            msg=(
-                f'Executing "{item.nodeid}" with '
-                f'{burn_in_iterations} burn in iterations and '
-                f'{num_iterations} timed iterations.'
-            )
-        )
-
         max_failures = item.config.getoption('max_failures')
 
         s[context] = 'burn_in'
-        for k in range(burn_in_iterations):
+        for k in range(nburn_in_iterations):
             if max_failures and s[consecutive_fail_count] >= max_failures:
                 break
 
@@ -165,8 +157,8 @@ def pytest_runtest_protocol(item, nextitem):
             runner.pytest_runtest_protocol(item, nextitem=item.parent)
 
         s[context] = 'benchmark'
-        total_iterations = burn_in_iterations + num_iterations
-        for k in range(burn_in_iterations, total_iterations):
+        total_iterations = nburn_in_iterations + niterations
+        for k in range(nburn_in_iterations, total_iterations):
             if max_failures and s[consecutive_fail_count] >= max_failures:
                 break
 
@@ -179,7 +171,7 @@ def pytest_runtest_protocol(item, nextitem):
 
         if max_failures and s[consecutive_fail_count] >= max_failures:
             logging.error(
-                msg=(f'Benchmarking "{item.nodeid}" aborted due to too many consecutive failures (max={max_failures})')
+                msg=f'Benchmarking "{item.nodeid}" aborted due to too many consecutive failures (max={max_failures})',
             )
 
     # prevent other plugins running that might invoke the benchmark again
@@ -333,6 +325,8 @@ def new_query_tmpdir(tmp_path):
     'many_strings_tsv',
     'new_query_tmpdir',
     'onekg_chr22',
+    'onethreethree',
+    'onethreetwo',
     'profile25_mt',
     'profile25_vcf',
     'random_doubles_mt',
 
@@ -1,6 +1,9 @@
 import logging
 from typing import Any, Callable, Generator, List, Optional, Sequence, TypeVar
 
+from hail.expr import ArrayStructExpression
+from hail.expr import enumerate as hl_enumerate
+
 A = TypeVar('A')
 
 
@@ -24,5 +27,9 @@ def select(keys: List[str], **kwargs) -> List[Optional[Any]]:
     return [kwargs.get(k) for k in keys]
 
 
+def annotate_index(arr: ArrayStructExpression) -> ArrayStructExpression:
+    return hl_enumerate(arr).map(lambda t: t[1].annotate(idx=t[0]))
+
+
 def init_logging(file=None):
     logging.basicConfig(format="%(asctime)-15s: %(levelname)s: %(message)s", level=logging.INFO, filename=file)