From c5d4bd79fc1997af27a9e4a68c53003fdc9ba85c Mon Sep 17 00:00:00 2001 From: William Grant Date: Fri, 17 Feb 2023 16:55:57 -0500 Subject: [PATCH] Benchmarking with airspeed. Adds a benchmarks/ folder with speed and memory tests which we can use with airspeed to track perf over time. --- benchmarks/.gitignore | 1 + benchmarks/asv.conf.json | 19 ++ benchmarks/benchmarks/__init__.py | 0 benchmarks/benchmarks/cached_speed.py | 49 ++++ benchmarks/benchmarks/memory.py | 39 ++++ .../benchmarks/uncached_absolute_speed.py | 215 ++++++++++++++++++ .../benchmarks/uncached_relative_speed.py | 132 +++++++++++ 7 files changed, 455 insertions(+) create mode 100644 benchmarks/.gitignore create mode 100644 benchmarks/asv.conf.json create mode 100644 benchmarks/benchmarks/__init__.py create mode 100644 benchmarks/benchmarks/cached_speed.py create mode 100644 benchmarks/benchmarks/memory.py create mode 100644 benchmarks/benchmarks/uncached_absolute_speed.py create mode 100644 benchmarks/benchmarks/uncached_relative_speed.py diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore new file mode 100644 index 000000000..80922a4dd --- /dev/null +++ b/benchmarks/.gitignore @@ -0,0 +1 @@ +.asv/ diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json new file mode 100644 index 000000000..fa673cd07 --- /dev/null +++ b/benchmarks/asv.conf.json @@ -0,0 +1,19 @@ +{ + "version": 1, + "project": "typed_python", + "project_url": "typed-python.readthedocs.io", + "repo": "..", + "branches": ["dev"], + "dvcs": "git", + "show_commit_url": "https://github.com/APrioriInvestments/typed_python/commit/", + "benchmark_dir": "benchmarks", + "build_cache_size": 8, + "build_command": ["pip install numpy", + "python setup.py build", + "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" + ], + "environment_type": "conda", + "env_dir": ".asv/env", + "results_dir": ".asv/results", + "html_dir": ".asv/html" +} diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/benchmarks/cached_speed.py b/benchmarks/benchmarks/cached_speed.py new file mode 100644 index 000000000..99accdeb7 --- /dev/null +++ b/benchmarks/benchmarks/cached_speed.py @@ -0,0 +1,49 @@ +""" +Benchmarks: +organised into suites +can have setup and teardown methods +tests start with time, mem, track, or peakmem. +Four classes of timing test: +- Cached/Uncached - i.e is the compiler cache turned on. +- Absolute/Relative - i.e do we measure absolute time taken (with time_*) + or the relative time taken (with track_*) + + +TODO: + - Extend suite to better track real usage + - Tests to compare the performance of already-cached code (rather than cold start) + +""" +import tempfile + +from typed_python.test_util import evaluateExprInFreshProcess + + +class TimeSuiteCached: + def time_cache_handles_changed_types(self): + xmodule1 = "\n".join([ + "@Entrypoint", + "def f(x):", + " return x", + "aList=[]", + "@Entrypoint", + "def g1(x):", + " return len(aList) + f(x)", + ]) + + xmodule2 = "\n".join([ + "@Entrypoint", + "def f(x):", + " return x", + "@Entrypoint", + "def g2(x):", + " return f(x)", + ]) + + VERSION1 = {'x.py': xmodule1} + VERSION2 = {'x.py': xmodule2} + + with tempfile.TemporaryDirectory() as compilerCacheDir: + assert evaluateExprInFreshProcess(VERSION1, 'x.g1(1)', compilerCacheDir) == 1 + assert evaluateExprInFreshProcess(VERSION2, 'x.f(1)', compilerCacheDir) == 1 + assert evaluateExprInFreshProcess(VERSION1, 'x.g1(1)', compilerCacheDir) == 1 diff --git a/benchmarks/benchmarks/memory.py b/benchmarks/benchmarks/memory.py new file mode 100644 index 000000000..02153528b --- /dev/null +++ b/benchmarks/benchmarks/memory.py @@ -0,0 +1,39 @@ +""" +Benchmarks: +organised into suites +can have setup and teardown methods +tests start with time, mem, track, or peakmem. +Four classes of timing test: +- Cached/Uncached - i.e is the compiler cache turned on. +- Absolute/Relative - i.e do we measure absolute time taken (with time_*) + or the relative time taken (with track_*) + + +TODO: + - Extend suite to better track real usage + - Tests to compare the performance of already-cached code (rather than cold start) + +""" +from typed_python import Class, Member, Held, Final + + +@Held +class H(Class, Final): + x = Member(int, nonempty=True) + y = Member(float, nonempty=True) + + def f(self): + return self.x + self.y + + def addToX(self, y): + return self.x + y + + def increment(self): + self.x += 1 + self.y += 1 + + +class MemSuite: + def peakmem_held_class_on_heap(self): + for _ in range(100000): + H() diff --git a/benchmarks/benchmarks/uncached_absolute_speed.py b/benchmarks/benchmarks/uncached_absolute_speed.py new file mode 100644 index 000000000..c1f13a1a0 --- /dev/null +++ b/benchmarks/benchmarks/uncached_absolute_speed.py @@ -0,0 +1,215 @@ +""" +Benchmarks: +organised into suites +can have setup and teardown methods +tests start with time, mem, track, or peakmem. +Four classes of timing test: +- Cached/Uncached - i.e is the compiler cache turned on. +- Absolute/Relative - i.e do we measure absolute time taken (with time_*) + or the relative time taken (with track_*) + + +TODO: + - Extend suite to better track real usage + - Tests to compare the performance of already-cached code (rather than cold start) + +""" +import typed_python.compiler.python_ast_analysis as python_ast_analysis +import typed_python.python_ast as python_ast + +from typed_python import Entrypoint, Class, Member, TupleOf, Function, ListOf +from typed_python.test_util import CodeEvaluator + + +class AClass(Class): + x = Member(int) + y = Member(float) + z = Member(TupleOf(int)) + + def f(self) -> float: + return self.x + self.y + + def f(self, arg) -> float: # noqa + return self.x + self.y + arg + + def g(self) -> float: + return 100 + + def add(self, x) -> float: + return 100 + x + + def loop(self, count: int) -> float: + i = 0 + res = self.y + while i < count: + res = res + self.y + i = i + 1 + + return res + + +class AChildClass(AClass): + def g(self) -> float: + return 1234 + + def add(self, x) -> float: + if isinstance(x, int): + return 0.2 + + return 1234 + x + + +class TimeSuiteAbsoluteUncached: + """The TP perf tests, recast as benchmarks.""" + + def time_variables_read(self): + count = 200 + evaluator = CodeEvaluator() + + def makeF(cCount): + CODE = ( + "def f():\n" + " class C:\n" + ) + ( + " class B:\n" + " pass\n" + ) * cCount + moduleDict = {} + evaluator.evaluateInto(CODE, moduleDict) + return moduleDict.get('f') + + pyast = python_ast.convertFunctionToAlgebraicPyAst(makeF(count)) + python_ast_analysis.computeVariablesReadByClosures(pyast.body) + + def time_bytes_add(self): + @Entrypoint + def bytesAdd(x: bytes): + i = 0 + res = 0 + while i < len(x): + j = 0 + while j < len(x): + res = res + x[i] + x[j] + j = j + 1 + i = i + 1 + return res + bytesAdd(b" " * 1) # once to compile + bytesAdd(b" " * 2000) + + def time_bytes_split(self): + @Entrypoint + def splitAndCount(s: bytes, sep: bytes, times: int): + res = 0 + for i in range(times): + res += len(s.split(sep)) + return res + splitAndCount(b"a,"*100, b",", 10) # once to compile + splitAndCount(b"a,"*100, b",", 10_000) + + def time_call_method_dispatch(self): + @Entrypoint + def addCaller(c: AClass, count: int): + res = c.add(1) + c.add(2.5) + + for i in range(count - 1): + res += c.add(1) + c.add(2.5) + + return res + c = AClass() + c2 = AChildClass() + addCaller(c, 200 * 1) + addCaller(c, 200 * 10000) + addCaller(c2, 200 * 10000) + + def time_call_closure(self): + ct = 1000000 + aList1 = ListOf(int)([]) + + def makeAppender(l): + @Function + def append(y): + l.append(y) + return append + + @Entrypoint + def callManyTimes(c1, ct): + for i in range(ct): + c1(i) + + callManyTimes(makeAppender(aList1), 1) + aList1.clear() + callManyTimes(makeAppender(aList1), ct) + + def time_assign_functions_with_closure(self): + @Entrypoint + def callIt(x): + y = 10.0 + if x % 2: + def f(a): + return a + y + 1.0 + else: + def f(a): + return a + y + 2.0 + res = 0.0 + for i in range(x): + x = x + 1 + res += f(i) + return res + + callIt(1) + callIt(1000000) + + def time_mtrgs(self): + def q(x): + return x-1 + + def z(x): + return q(x)+1 + + def f(x): + return z(g(x - 1)) + z(g(x - 2)) + z(x) + + @Entrypoint + def g(x): + if x > 0: + return z(f(x-1)) * z(2) + f(x-2) + return 1 + + for input in [18, 18.0]: + for _ in range(1000): + g(input) + + def time_inlining(self): + """This one makes no sense in the absolute suite.""" + def f1(x): + return f2(x) + + def f2(x): + return f3(x) + + def f3(x): + return f4(x) + + def f4(x: int): + return x + + @Entrypoint + def callsF1(times: int): + res = 0.0 + for i in range(times): + res += f1(i) + return res + + @Entrypoint + def callsF4(times: int): + res = 0.0 + for i in range(times): + res += f4(i) + return res + + # prime the compilation + callsF4(1) + callsF1(1) + + callsF1(10000000) + callsF4(10000000) diff --git a/benchmarks/benchmarks/uncached_relative_speed.py b/benchmarks/benchmarks/uncached_relative_speed.py new file mode 100644 index 000000000..a4de19c04 --- /dev/null +++ b/benchmarks/benchmarks/uncached_relative_speed.py @@ -0,0 +1,132 @@ +""" +Benchmarks: +organised into suites +can have setup and teardown methods +tests start with time, mem, track, or peakmem. +Four classes of timing test: +- Cached/Uncached - i.e is the compiler cache turned on. +- Absolute/Relative - i.e do we measure absolute time taken (with time_*) + or the relative time taken (with track_*) + + +TODO: + - Extend suite to better track real usage + - Tests to compare the performance of already-cached code (rather than cold start) + +""" +import threading +import time + +from typed_python import Entrypoint, Dict + + +class TimeSuiteRelativeUncached: + unit = 'ratio' + + def track_inlining_ratio(self): + def f1(x): + return f2(x) + + def f2(x): + return f3(x) + + def f3(x): + return f4(x) + + def f4(x: int): + return x + + @Entrypoint + def callsF1(times: int): + res = 0.0 + for i in range(times): + res += f1(i) + return res + + @Entrypoint + def callsF4(times: int): + res = 0.0 + for i in range(times): + res += f4(i) + return res + + # prime the compilation + callsF4(1) + callsF1(1) + + t0 = time.time() + callsF1(10000000) + t1 = time.time() + callsF4(10000000) + t2 = time.time() + + callsDeeply = t1 - t0 + callsShallowly = t2 - t1 + return callsDeeply / callsShallowly + + def track_star_kwarg_intermediate_ratio(self): + def f(x, y): + return x + y + + def g(**kwargs): + return f(**kwargs) + + @Entrypoint + def sumUsingG(a: int): + res = 0.0 + for i in range(a): + res += g(x=2, y=i) + return res + + @Entrypoint + def sumUsingF(a: int): + res = 0.0 + for i in range(a): + res += f(x=2, y=i) + return res + + sumUsingF(10) + sumUsingG(10) + + t0 = time.time() + sumUsingG(1000000) + elapsedG = time.time() - t0 + + t0 = time.time() + sumUsingF(1000000) + elapsedF = time.time() - t0 + + return elapsedF / elapsedG + + def track_dict_read_write_multicore_ratio(self): + @Entrypoint + def dict_setmany(d, count, passes): + for _ in range(passes): + for i in range(count): + if i in d: + d[i] += i + else: + d[i] = i + + # make sure we compile this immediately + aDictToForceCompilation = Dict(int, int)() + dict_setmany(aDictToForceCompilation, 1, 1) + + # test it with one core + t0 = time.time() + aDict = Dict(int, int)() + dict_setmany(aDict, 10000, 100) + t1 = time.time() + + # test it with 2 cores + threads = [threading.Thread(target=dict_setmany, args=(Dict(int, int)(), 10000, 100)) for _ in range(2)] + t2 = time.time() + for t in threads: + t.start() + for t in threads: + t.join() + t3 = time.time() + + slowdownRatio = (t3 - t2) / (t1 - t0) + + return slowdownRatio