Skip to content

Commit 2f4a143

Browse files
committed
add convertor
1 parent bdedf0c commit 2f4a143

File tree

1 file changed

+91
-0
lines changed

1 file changed

+91
-0
lines changed

Diff for: benchmark/convert.py

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import argparse
2+
import glob
3+
import json
4+
import os
5+
6+
7+
def main():
8+
parser = argparse.ArgumentParser()
9+
parser.add_argument("--input-dir", type=str, required=True)
10+
parser.add_argument("--output-file", type=str, required=True)
11+
args = parser.parse_args()
12+
13+
input_dir = args.input_dir
14+
output_file = args.output_file
15+
16+
searches = glob.glob(os.path.join(input_dir, "*-search-*.json"))
17+
uploads = glob.glob(os.path.join(input_dir, "*-upload-*.json"))
18+
19+
"""
20+
Target data structure:
21+
22+
{
23+
"engine_name": "qdrant",
24+
"setup_name": "qdrant-bq-rps-m-64-ef-256",
25+
"dataset_name": "dbpedia-openai-1M-1536-angular",
26+
"upload_time": 222.45490989403334,
27+
"total_upload_time": 593.0384756129934,
28+
"p95_time": 0.0025094749056734146,
29+
"rps": 1230.5984500596446,
30+
"parallel": 100.0,
31+
"p99_time": 0.014029250466264838,
32+
"mean_time": 0.00227582405093126,
33+
"mean_precisions": 0.95258,
34+
"engine_params": {
35+
"hnsw_ef": 64,
36+
"quantization": {
37+
"rescore": true,
38+
"oversampling": 4.0
39+
}
40+
}
41+
}
42+
"""
43+
44+
print(f"input_dir: {input_dir}")
45+
print(f"output_file: {output_file}")
46+
47+
print(f"searches: {len(searches)}")
48+
print(f"uploads: {len(uploads)}")
49+
50+
upload_data = {}
51+
52+
for upload_file in uploads:
53+
data = json.load(open(upload_file))
54+
experiment_name = data["params"]["experiment"]
55+
upload_data[experiment_name] = data
56+
57+
result_data = []
58+
59+
for search_file in searches:
60+
data = json.load(open(search_file))
61+
experiment_name = data["params"]["experiment"]
62+
dataset_name = data["params"]["dataset"]
63+
engine_params = data["params"]["config"]
64+
parallel = data["params"]["parallel"]
65+
engine_name = data["params"]["engine"]
66+
67+
upload_time = upload_data[experiment_name]["results"]["upload_time"]
68+
total_upload_time = upload_data[experiment_name]["results"]["total_time"]
69+
70+
search_results = data["results"]
71+
search_results.pop("total_time")
72+
73+
result_data.append(
74+
{
75+
"engine_name": engine_name,
76+
"setup_name": experiment_name,
77+
"dataset_name": dataset_name,
78+
"upload_time": upload_time,
79+
"total_upload_time": total_upload_time,
80+
"parallel": parallel,
81+
"engine_params": engine_params,
82+
**search_results,
83+
}
84+
)
85+
86+
with open(output_file, "w") as f:
87+
json.dump(result_data, f, indent=2)
88+
89+
90+
if __name__ == "__main__":
91+
main()

0 commit comments

Comments
 (0)