|
| 1 | +import os |
| 2 | +import json |
| 3 | + |
| 4 | +# Configuration |
| 5 | +CONFIG_FILE = "xgb_cpu_main_config.json" |
| 6 | +DATASET_FOLDER = "dataset" |
| 7 | +EXPECTED_DATASETS = ["mlsr", "mortgage1Q", "plasticc", "santander"] |
| 8 | + |
| 9 | +def load_config(): |
| 10 | + """Load the benchmark configuration file.""" |
| 11 | + if not os.path.exists(CONFIG_FILE): |
| 12 | + print(f"ERROR: Configuration file '{CONFIG_FILE}' not found. Verify its location.") |
| 13 | + return None |
| 14 | + |
| 15 | + with open(CONFIG_FILE, "r") as f: |
| 16 | + try: |
| 17 | + return json.load(f) |
| 18 | + except json.JSONDecodeError: |
| 19 | + print(f"ERROR: Failed to parse '{CONFIG_FILE}'. Ensure it contains valid JSON.") |
| 20 | + return None |
| 21 | + |
| 22 | +def check_datasets(): |
| 23 | + """Check if required datasets exist in the dataset folder.""" |
| 24 | + missing_datasets = [] |
| 25 | + for dataset in EXPECTED_DATASETS: |
| 26 | + dataset_path = os.path.join(DATASET_FOLDER, dataset) |
| 27 | + if not os.path.exists(dataset_path): |
| 28 | + print(f"⚠️ WARNING: Dataset '{dataset}' is missing in '{DATASET_FOLDER}'.") |
| 29 | + missing_datasets.append(dataset) |
| 30 | + |
| 31 | + if missing_datasets: |
| 32 | + print("\n🔹 Suggested Actions:") |
| 33 | + print("- Ensure dataset names are correct in the 'dataset/' folder.") |
| 34 | + print("- Download the missing datasets if necessary.") |
| 35 | + print("- If dataset names differ, update 'xgb_cpu_main_config.json'.\n") |
| 36 | + |
| 37 | + return missing_datasets |
| 38 | + |
| 39 | +def update_config(missing_datasets): |
| 40 | + """Fix dataset names in the configuration file if necessary.""" |
| 41 | + config = load_config() |
| 42 | + if not config: |
| 43 | + return |
| 44 | + |
| 45 | + updated = False |
| 46 | + for dataset in missing_datasets: |
| 47 | + if dataset in config.get("datasets", {}): |
| 48 | + print(f"🛠️ Fixing dataset path for '{dataset}' in {CONFIG_FILE}...") |
| 49 | + config["datasets"][dataset] = os.path.join(DATASET_FOLDER, f"{dataset}.csv") # Adjust extension if necessary |
| 50 | + updated = True |
| 51 | + |
| 52 | + if updated: |
| 53 | + with open(CONFIG_FILE, "w") as f: |
| 54 | + json.dump(config, f, indent=4) |
| 55 | + print(f"✅ {CONFIG_FILE} has been updated with corrected dataset paths.") |
| 56 | + |
| 57 | +if __name__ == "__main__": |
| 58 | + print("🔍 Checking dataset availability...\n") |
| 59 | + missing = check_datasets() |
| 60 | + |
| 61 | + if missing: |
| 62 | + update_config(missing) |
| 63 | + else: |
| 64 | + print("✅ All datasets are present. You can proceed with benchmarking.") |
0 commit comments