Skip to content

Commit d30f084

Browse files
Merge pull request #2 from JaimeAdanCuevas/patch-1
Create refactoring_of_benchmarks.py
2 parents 1103aad + 2d4989f commit d30f084

File tree

1 file changed

+64
-0
lines changed

1 file changed

+64
-0
lines changed

refactoring_of_benchmarks.py

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import os
2+
import json
3+
4+
# Configuration
5+
CONFIG_FILE = "xgb_cpu_main_config.json"
6+
DATASET_FOLDER = "dataset"
7+
EXPECTED_DATASETS = ["mlsr", "mortgage1Q", "plasticc", "santander"]
8+
9+
def load_config():
10+
"""Load the benchmark configuration file."""
11+
if not os.path.exists(CONFIG_FILE):
12+
print(f"ERROR: Configuration file '{CONFIG_FILE}' not found. Verify its location.")
13+
return None
14+
15+
with open(CONFIG_FILE, "r") as f:
16+
try:
17+
return json.load(f)
18+
except json.JSONDecodeError:
19+
print(f"ERROR: Failed to parse '{CONFIG_FILE}'. Ensure it contains valid JSON.")
20+
return None
21+
22+
def check_datasets():
23+
"""Check if required datasets exist in the dataset folder."""
24+
missing_datasets = []
25+
for dataset in EXPECTED_DATASETS:
26+
dataset_path = os.path.join(DATASET_FOLDER, dataset)
27+
if not os.path.exists(dataset_path):
28+
print(f"⚠️ WARNING: Dataset '{dataset}' is missing in '{DATASET_FOLDER}'.")
29+
missing_datasets.append(dataset)
30+
31+
if missing_datasets:
32+
print("\n🔹 Suggested Actions:")
33+
print("- Ensure dataset names are correct in the 'dataset/' folder.")
34+
print("- Download the missing datasets if necessary.")
35+
print("- If dataset names differ, update 'xgb_cpu_main_config.json'.\n")
36+
37+
return missing_datasets
38+
39+
def update_config(missing_datasets):
40+
"""Fix dataset names in the configuration file if necessary."""
41+
config = load_config()
42+
if not config:
43+
return
44+
45+
updated = False
46+
for dataset in missing_datasets:
47+
if dataset in config.get("datasets", {}):
48+
print(f"🛠️ Fixing dataset path for '{dataset}' in {CONFIG_FILE}...")
49+
config["datasets"][dataset] = os.path.join(DATASET_FOLDER, f"{dataset}.csv") # Adjust extension if necessary
50+
updated = True
51+
52+
if updated:
53+
with open(CONFIG_FILE, "w") as f:
54+
json.dump(config, f, indent=4)
55+
print(f"✅ {CONFIG_FILE} has been updated with corrected dataset paths.")
56+
57+
if __name__ == "__main__":
58+
print("🔍 Checking dataset availability...\n")
59+
missing = check_datasets()
60+
61+
if missing:
62+
update_config(missing)
63+
else:
64+
print("✅ All datasets are present. You can proceed with benchmarking.")

0 commit comments

Comments
 (0)