Skip to content

Commit 207fa9b

Browse files
committed
laaber + schultz's method of estimating slowdown
1 parent c2c2417 commit 207fa9b

20 files changed

Lines changed: 960 additions & 4679 deletions
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
data/
2+
out/
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"This notebook explores variability in hail's python (macro)-benchmarks when\n",
8+
"said benchmarks are executed on the hail batch service. The analyses within \n",
9+
"are based off the methods proposed in [1], albeit slightly modified for long\n",
10+
"running benchmarks. The goals of these analyses are\n",
11+
"\n",
12+
"- to determine if we can detect slowdowns of 5% or less reliably when running\n",
13+
" benchmarks on hail batch.\n",
14+
"- to identify configurations (number of batch jobs x iterations) that allow us\n",
15+
" to detect slowdowns efficiently (ie without excesssive time and money).\n",
16+
"\n",
17+
"[1] Laaber et al., Software Microbenchmarking in the Cloud.How Bad is it Really?\n",
18+
" https://dl.acm.org/doi/10.1007/s10664-019-09681-1"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {},
25+
"outputs": [],
26+
"source": [
27+
"from pathlib import Path\n",
28+
"\n",
29+
"from benchmark.tools.impex import dump_tsv, import_timings\n",
30+
"from benchmark.tools.plotting import plot_mean_time_per_instance, plot_trial_against_time\n",
31+
"from benchmark.tools.statistics import (\n",
32+
" bootstrap_mean_confidence_interval,\n",
33+
" laaber_mds,\n",
34+
" schultz_mds,\n",
35+
" variability,\n",
36+
")\n",
37+
"from IPython.display import clear_output\n",
38+
"from plotly.io import renderers\n",
39+
"\n",
40+
"import hail as hl\n",
41+
"\n",
42+
"renderers.default = 'notebook_connected'"
43+
]
44+
},
45+
{
46+
"cell_type": "code",
47+
"execution_count": null,
48+
"metadata": {},
49+
"outputs": [],
50+
"source": [
51+
"hl.init(backend='spark', idempotent=True, local_tmpdir='/tmp/mds')\n",
52+
"hl._set_flags(use_new_shuffle='1', lower='1')"
53+
]
54+
},
55+
{
56+
"cell_type": "code",
57+
"execution_count": null,
58+
"metadata": {},
59+
"outputs": [],
60+
"source": [
61+
"# Import benchmark data\n",
62+
"# ---------------------\n",
63+
"#\n",
64+
"# benchmarks under `hail/python/benchmarks` are executed with a custom pytest\n",
65+
"# plugin and their results are output as json lines (.jsonl).\n",
66+
"# Unscrupulously, we use hail to analyse itself.\n",
67+
"\n",
68+
"with hl.TemporaryFilename(dir='/tmp') as tsvfile:\n",
69+
" timings = Path(tsvfile)\n",
70+
" dump_tsv(Path('data/1k.jsonl'), timings)\n",
71+
" ht = import_timings(timings)\n",
72+
" ht = ht.checkpoint('out/imported.ht', overwrite=True)\n",
73+
"\n",
74+
"benchmarks = ht.aggregate(hl.agg.collect_as_set(ht.name))\n",
75+
"print(*benchmarks, sep='\\n')"
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"execution_count": null,
81+
"metadata": {},
82+
"outputs": [],
83+
"source": [
84+
"t = hl.read_table('out/imported.ht')\n",
85+
"t = t.filter(hl.len(t.instances) < 60)\n",
86+
"names = t.aggregate(hl.array(hl.agg.collect_as_set(t.path + '::' + t.name)))\n",
87+
"print(*names, sep='\\n')"
88+
]
89+
},
90+
{
91+
"cell_type": "code",
92+
"execution_count": null,
93+
"metadata": {},
94+
"outputs": [],
95+
"source": [
96+
"# Plotting the time vs iteration for all instances provides a visual way of\n",
97+
"# identifying the number of burn-in iteration required to reach a steady-state.\n",
98+
"# Note that a steady state is never reached in some cases.\n",
99+
"\n",
100+
"for fig in plot_trial_against_time(ht, names=names):\n",
101+
" clear_output(wait=True)\n",
102+
" print(fig.labels.title)\n",
103+
" fig.show()\n",
104+
" input()"
105+
]
106+
},
107+
{
108+
"cell_type": "code",
109+
"execution_count": null,
110+
"metadata": {},
111+
"outputs": [],
112+
"source": [
113+
"# This is an iterative process. Select the minimum number of burn-in iterations\n",
114+
"# required for each benchmark. Replot and verify that the graph is more-or-less\n",
115+
"# flat. This may not be possible in all cases.\n",
116+
"\n",
117+
"\n",
118+
"def filter_burn_in_iterations(ht: hl.Table) -> hl.Table:\n",
119+
" ht = ht.annotate_globals(\n",
120+
" first_stable_index={\n",
121+
" 'benchmark_join_partitions_table[100-10]': 15,\n",
122+
" 'benchmark_union_partitions_table[10-10]': 4,\n",
123+
" 'benchmark_join_partitions_table[1000-1000]': 15,\n",
124+
" 'benchmark_write_range_table[10000000-1000]': 5,\n",
125+
" 'benchmark_matrix_table_array_arithmetic': 15,\n",
126+
" 'benchmark_table_aggregate_array_sum': 5,\n",
127+
" 'benchmark_matrix_table_cols_show': 10,\n",
128+
" 'benchmark_pc_relate': hl.missing(hl.tint),\n",
129+
" 'benchmark_write_profile_mt': 20,\n",
130+
" 'benchmark_table_aggregate_approx_cdf': 28,\n",
131+
" 'benchmark_table_aggregate_counter': 12,\n",
132+
" 'benchmark_table_show': 10,\n",
133+
" 'benchmark_export_range_matrix_table_entry_field_p100': 5,\n",
134+
" 'benchmark_group_by_collect_per_row': 8,\n",
135+
"\n",
136+
" 'benchmark_export_range_matrix_table_row_p100': 20,\n",
137+
" 'benchmark_import_gvcf_force_count': 10,\n",
138+
" 'benchmark_matrix_table_take_col': 30,\n",
139+
" 'benchmark_ndarray_matmul_int64': 23,\n",
140+
" 'benchmark_sample_qc': 14,\n",
141+
" 'benchmark_shuffle_key_rows_by_mt': 10,\n",
142+
" 'benchmark_union_partitions_table[100-100]': 40,\n",
143+
" },\n",
144+
" )\n",
145+
"\n",
146+
" return ht.select(\n",
147+
" instances=ht.instances.map(\n",
148+
" lambda instance: instance.annotate(\n",
149+
" trials=(instance.trials.filter(lambda t: t.iteration >= ht.first_stable_index[ht.name]))\n",
150+
" )\n",
151+
" ),\n",
152+
" )\n",
153+
"\n",
154+
"\n",
155+
"ht = filter_burn_in_iterations(ht)\n",
156+
"plot_trial_against_time(ht)"
157+
]
158+
},
159+
{
160+
"cell_type": "code",
161+
"execution_count": null,
162+
"metadata": {},
163+
"outputs": [],
164+
"source": [
165+
"# As a final step of cleaning, we'll filter out trials that differ by some\n",
166+
"# multiplier of the median for each instance\n",
167+
"\n",
168+
"\n",
169+
"def filter_outliers(ht: hl.Table, factor: hl.Float64Expression) -> hl.Table:\n",
170+
" # Filter out failures and\n",
171+
" return ht.select(\n",
172+
" instances=ht.instances.map(\n",
173+
" lambda instance: instance.annotate(\n",
174+
" trials=hl.bind(\n",
175+
" lambda median: instance.trials.filter(\n",
176+
" lambda t: hl.max([t.time, median]) / hl.min([t.time, median]) < factor\n",
177+
" ),\n",
178+
" hl.median(instance.trials.map(lambda t: t.time)),\n",
179+
" )\n",
180+
" ),\n",
181+
" ),\n",
182+
" )\n",
183+
"\n",
184+
"\n",
185+
"ht = filter_outliers(ht, hl.float64(10))\n",
186+
"plot_trial_against_time(ht)"
187+
]
188+
},
189+
{
190+
"cell_type": "code",
191+
"execution_count": null,
192+
"metadata": {},
193+
"outputs": [],
194+
"source": [
195+
"# These plots show the mean time per instance. This provides a visual way of\n",
196+
"# identifying differences in instance type if there are multiple distinct layers\n",
197+
"\n",
198+
"plot_mean_time_per_instance(ht)"
199+
]
200+
},
201+
{
202+
"cell_type": "code",
203+
"execution_count": null,
204+
"metadata": {},
205+
"outputs": [],
206+
"source": [
207+
"ht = ht.select(instances=ht.instances.trials.time).checkpoint('out/pruned.ht', overwrite=True)"
208+
]
209+
},
210+
{
211+
"cell_type": "code",
212+
"execution_count": null,
213+
"metadata": {},
214+
"outputs": [],
215+
"source": [
216+
"# laaber et al. section 4\n",
217+
"\n",
218+
"variability(ht).show()"
219+
]
220+
},
221+
{
222+
"cell_type": "code",
223+
"execution_count": null,
224+
"metadata": {},
225+
"outputs": [],
226+
"source": [
227+
"# laaber et al. section 5 - boostrapping confidence intervals of the mean\n",
228+
"\n",
229+
"bootstrap_mean_confidence_interval(ht, 1000, 0.95).show()"
230+
]
231+
},
232+
{
233+
"cell_type": "code",
234+
"execution_count": null,
235+
"metadata": {},
236+
"outputs": [],
237+
"source": [
238+
"# Laaber et al - Minimal-Detectable Slowdown\n",
239+
"\n",
240+
"laaber = laaber_mds(ht).checkpoint('out/laaber-mds.ht', overwrite=True)\n",
241+
"schultz = schultz_mds(ht).checkpoint('out/schultz-mds.ht', overwrite=True)"
242+
]
243+
},
244+
{
245+
"cell_type": "code",
246+
"execution_count": null,
247+
"metadata": {
248+
"slideshow": {
249+
"slide_type": "fragment"
250+
}
251+
},
252+
"outputs": [],
253+
"source": [
254+
"\n",
255+
"mds = laaber.select(laaber=laaber.row_value, schultz=schultz[laaber.key])\n",
256+
"mds.show(100_000)"
257+
]
258+
}
259+
],
260+
"metadata": {
261+
"kernelspec": {
262+
"display_name": ".venv",
263+
"language": "python",
264+
"name": "python3"
265+
},
266+
"language_info": {
267+
"codemirror_mode": {
268+
"name": "ipython",
269+
"version": 3
270+
},
271+
"file_extension": ".py",
272+
"mimetype": "text/x-python",
273+
"name": "python",
274+
"nbconvert_exporter": "python",
275+
"pygments_lexer": "ipython3",
276+
"version": "3.9.18"
277+
}
278+
},
279+
"nbformat": 4,
280+
"nbformat_minor": 4
281+
}

0 commit comments

Comments
 (0)