Skip to content

Commit 1fffd97

Browse files
committed
split up benchmarks into separate files
1 parent 32b7515 commit 1fffd97

File tree

5 files changed

+456
-435
lines changed

5 files changed

+456
-435
lines changed

performance_tests/perftest/base.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import os
2+
import time
3+
import warnings
4+
from pathlib import Path
5+
6+
from .tests import result_data
7+
8+
if os.environ.get("FASTBENCH"):
9+
NUM_ITERATIONS = 1
10+
MIN_ITERATION_TIME = 5
11+
MAX_ITERATION_TIME = 10
12+
NUM_DOCS = 1000
13+
else:
14+
NUM_ITERATIONS = 2
15+
MIN_ITERATION_TIME = 30
16+
MAX_ITERATION_TIME = 300
17+
NUM_DOCS = 10000
18+
19+
20+
class Timer:
21+
def __enter__(self):
22+
self.start = time.monotonic()
23+
return self
24+
25+
def __exit__(self, *args):
26+
self.end = time.monotonic()
27+
self.interval = self.end - self.start
28+
29+
30+
# Copied from the driver benchmarking suite.
31+
class PerformanceTest:
32+
dataset: str
33+
data_size: int
34+
test_data_path = os.environ.get(
35+
"DJANGO_MONGODB_PERFORMANCE_TEST_DATA_PATH",
36+
Path(os.path.realpath(__file__)).parent.parent / "odm-data",
37+
)
38+
num_docs = NUM_DOCS
39+
40+
def setUp(self):
41+
self.setup_time = time.monotonic()
42+
43+
def tearDown(self):
44+
duration = time.monotonic() - self.setup_time
45+
# Remove "Test" so that TestMyTestName is reported as "MyTestName".
46+
name = self.__class__.__name__[4:]
47+
median = self.percentile(50)
48+
megabytes_per_sec = self.data_size / median / 1000000
49+
print( # noqa: T201
50+
f"Completed {self.__class__.__name__} {megabytes_per_sec:.3f} MB/s, "
51+
f"MEDIAN={self.percentile(50):.3f}s, "
52+
f"total time={duration:.3f}s, iterations={len(self.results)}"
53+
)
54+
result_data.append(
55+
{
56+
"info": {
57+
"test_name": name,
58+
},
59+
"metrics": [
60+
{
61+
"name": "megabytes_per_sec",
62+
"type": "MEDIAN",
63+
"value": megabytes_per_sec,
64+
"metadata": {
65+
"improvement_direction": "up",
66+
"measurement_unit": "megabytes_per_second",
67+
},
68+
},
69+
],
70+
}
71+
)
72+
73+
def before(self):
74+
pass
75+
76+
def do_task(self):
77+
raise NotImplementedError
78+
79+
def after(self):
80+
pass
81+
82+
def percentile(self, percentile):
83+
if hasattr(self, "results"):
84+
sorted_results = sorted(self.results)
85+
percentile_index = int(len(sorted_results) * percentile / 100) - 1
86+
return sorted_results[percentile_index]
87+
self.fail("Test execution failed")
88+
return None
89+
90+
def runTest(self):
91+
results = []
92+
start = time.monotonic()
93+
i = 0
94+
while True:
95+
i += 1
96+
self.before()
97+
with Timer() as timer:
98+
self.do_task()
99+
self.after()
100+
results.append(timer.interval)
101+
duration = time.monotonic() - start
102+
if duration > MIN_ITERATION_TIME and i >= NUM_ITERATIONS:
103+
break
104+
if duration > MAX_ITERATION_TIME:
105+
with warnings.catch_warnings():
106+
warnings.simplefilter("default")
107+
warnings.warn(
108+
f"{self.__class__.__name__} timed out after {MAX_ITERATION_TIME}s, "
109+
f"completed {i}/{NUM_ITERATIONS} iterations.",
110+
stacklevel=2,
111+
)
112+
break
113+
self.results = results
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
from pathlib import Path
2+
from unittest import TestCase
3+
4+
from bson import encode, json_util
5+
6+
from .base import PerformanceTest
7+
from .models import LargeFlatModel
8+
9+
10+
class LargeFlatDocTest(PerformanceTest):
11+
"""Parent class for large flat document tests."""
12+
13+
dataset = "large_doc.json"
14+
15+
def setUp(self):
16+
super().setUp()
17+
with open( # noqa: PTH123
18+
Path(self.test_data_path) / Path("flat-models") / self.dataset
19+
) as data:
20+
self.document = json_util.loads(data.read())
21+
22+
self.data_size = len(encode(self.document)) * self.num_docs
23+
self.documents = [self.document.copy() for _ in range(self.num_docs)]
24+
25+
26+
class TestLargeFlatDocCreation(LargeFlatDocTest, TestCase):
27+
"""Benchmark for creating a large flat document."""
28+
29+
def do_task(self):
30+
for doc in self.documents:
31+
LargeFlatModel.objects.create(**doc)
32+
33+
def after(self):
34+
LargeFlatModel.objects.all().delete()
35+
36+
37+
class TestLargeFlatDocUpdate(LargeFlatDocTest, TestCase):
38+
"""Benchmark for updating a field within a large flat document."""
39+
40+
def setUp(self):
41+
super().setUp()
42+
for doc in self.documents:
43+
LargeFlatModel.objects.create(**doc)
44+
self.models = list(LargeFlatModel.objects.all())
45+
self.data_size = len(encode({"field1": "updated_value0"})) * self.num_docs
46+
self.iteration = 0
47+
48+
def do_task(self):
49+
for model in self.models:
50+
model.field1 = "updated_value" + str(self.iteration)
51+
model.save()
52+
self.iteration += 1
53+
54+
def tearDown(self):
55+
super().tearDown()
56+
LargeFlatModel.objects.all().delete()
57+
58+
59+
class TestLargeFlatDocFilterPkByIn(LargeFlatDocTest, TestCase):
60+
"""Benchmark for filtering large flat documents using the __in operator for primary keys."""
61+
62+
def setUp(self):
63+
super().setUp()
64+
models = []
65+
for doc in self.documents:
66+
models.append(LargeFlatModel(**doc))
67+
LargeFlatModel.objects.bulk_create(models)
68+
self.ids = [model.id for model in models]
69+
70+
def do_task(self):
71+
list(LargeFlatModel.objects.filter(id__in=self.ids))
72+
73+
def tearDown(self):
74+
super().tearDown()
75+
LargeFlatModel.objects.all().delete()
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
from pathlib import Path
2+
from unittest import TestCase
3+
4+
from bson import ObjectId, encode, json_util
5+
6+
from .base import PerformanceTest
7+
from .models import (
8+
IntegerEmbeddedModel,
9+
LargeNestedModel,
10+
StringEmbeddedModel,
11+
)
12+
13+
14+
class LargeNestedDocTest(PerformanceTest):
15+
"""Parent class for large nested document tests."""
16+
17+
dataset = "large_doc_nested.json"
18+
19+
def setUp(self):
20+
super().setUp()
21+
with open( # noqa: PTH123
22+
Path(self.test_data_path) / Path("nested-models") / self.dataset
23+
) as data:
24+
self.document = json_util.loads(data.read())
25+
26+
self.data_size = len(encode(self.document)) * self.num_docs
27+
self.documents = [self.document.copy() for _ in range(self.num_docs)]
28+
29+
def setUpData(self):
30+
for doc in self.documents:
31+
model = LargeNestedModel()
32+
for field_name, model_data in doc.items():
33+
if "array" in field_name:
34+
array_models = []
35+
for item in model_data:
36+
embedded_str_model = StringEmbeddedModel(**item)
37+
embedded_str_model.unique_field = str(ObjectId())
38+
array_models.append(embedded_str_model)
39+
setattr(model, field_name, array_models)
40+
elif "embedded_str_doc" in field_name:
41+
embedded_str_model = StringEmbeddedModel(**model_data)
42+
embedded_str_model.unique_field = str(ObjectId())
43+
setattr(model, field_name, embedded_str_model)
44+
else:
45+
embedded_int_model = IntegerEmbeddedModel(**model_data)
46+
setattr(model, field_name, embedded_int_model)
47+
model.save()
48+
49+
50+
class TestLargeNestedDocCreation(LargeNestedDocTest, TestCase):
51+
"""Benchmark for creating a large nested document."""
52+
53+
def do_task(self):
54+
for doc in self.documents:
55+
model = LargeNestedModel()
56+
for field_name, model_data in doc.items():
57+
if "array" in field_name:
58+
array_models = []
59+
for item in model_data:
60+
embedded_str_model = StringEmbeddedModel(**item)
61+
embedded_str_model.unique_field = str(ObjectId())
62+
array_models.append(embedded_str_model)
63+
setattr(model, field_name, array_models)
64+
elif "embedded_str_doc" in field_name:
65+
embedded_str_model = StringEmbeddedModel(**model_data)
66+
embedded_str_model.unique_field = str(ObjectId())
67+
setattr(model, field_name, embedded_str_model)
68+
else:
69+
embedded_int_model = IntegerEmbeddedModel(**model_data)
70+
setattr(model, field_name, embedded_int_model)
71+
model.save()
72+
73+
def after(self):
74+
LargeNestedModel.objects.all().delete()
75+
76+
77+
class TestLargeNestedDocUpdate(LargeNestedDocTest, TestCase):
78+
"""Benchmark for updating an embedded field within a large nested document."""
79+
80+
def setUp(self):
81+
super().setUp()
82+
self.setUpData()
83+
self.models = list(LargeNestedModel.objects.all())
84+
self.data_size = len(encode({"field1": "updated_value0"})) * self.num_docs
85+
self.iteration = 0
86+
87+
def do_task(self):
88+
for model in self.models:
89+
model.embedded_str_doc_1.field1 = "updated_value" + str(self.iteration)
90+
model.save()
91+
self.iteration += 1
92+
93+
def tearDown(self):
94+
super().tearDown()
95+
LargeNestedModel.objects.all().delete()
96+
97+
98+
class TestLargeNestedDocFilterById(LargeNestedDocTest, TestCase):
99+
"""Benchmark for filtering large nested documents by a unique field in an embedded document."""
100+
101+
def setUp(self):
102+
super().setUp()
103+
self.setUpData()
104+
self.ids = [
105+
model.embedded_str_doc_1.unique_field for model in list(LargeNestedModel.objects.all())
106+
]
107+
108+
def do_task(self):
109+
for _id in self.ids:
110+
list(LargeNestedModel.objects.filter(embedded_str_doc_1__unique_field=_id))
111+
112+
def tearDown(self):
113+
super().tearDown()
114+
LargeNestedModel.objects.all().delete()
115+
116+
117+
class TestLargeNestedDocFilterArray(LargeNestedDocTest, TestCase):
118+
"""Benchmark for filtering large nested documents using the __in operator
119+
for unique values in an embedded document array."""
120+
121+
def setUp(self):
122+
super().setUp()
123+
self.setUpData()
124+
self.ids = [
125+
model.embedded_str_doc_array[0].unique_field
126+
for model in list(LargeNestedModel.objects.all())
127+
]
128+
129+
def do_task(self):
130+
for _id in self.ids:
131+
list(LargeNestedModel.objects.filter(embedded_str_doc_array__unique_field__in=[_id]))
132+
133+
def tearDown(self):
134+
super().tearDown()
135+
LargeNestedModel.objects.all().delete()

0 commit comments

Comments
 (0)