automlbenchmark/frameworks/FEDOT/exec.py at 0f933485ff5ed62582d01dd8c7b909829eccaf32 · openml/automlbenchmark · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import logging
import os
from pathlib import Path

from fedot.api.main import Fedot

from frameworks.shared.callee import call_run, result, output_subdir
from frameworks.shared.utils import Timer

log = logging.getLogger(__name__)


def run(dataset, config):
    log.info("\n**** FEDOT ****\n")

    is_classification = config.type == "classification"
    scoring_metric = get_fedot_metrics(config)

    training_params = {"preset": "best_quality", "n_jobs": config.cores}
    training_params.update(
        {k: v for k, v in config.framework_params.items() if not k.startswith("_")}
    )
    n_jobs = training_params["n_jobs"]

    log.info(
        f"Running FEDOT with a maximum time of {config.max_runtime_seconds}s on {n_jobs} cores, \
             optimizing {scoring_metric}"
    )

    runtime_min = config.max_runtime_seconds / 60

    fedot = Fedot(
        problem=config.type,
        timeout=runtime_min,
        metric=scoring_metric,
        seed=config.seed,
        max_pipeline_fit_time=runtime_min / 10,
        **training_params,
    )

    with Timer() as training:
        fedot.fit(features=dataset.train.X, target=dataset.train.y)

    log.info("Predicting on the test set.")
    with Timer() as predict:
        predictions = fedot.predict(features=dataset.test.X)
    probabilities = None
    if is_classification:
        probabilities = fedot.predict_proba(
            features=dataset.test.X, probs_for_all_classes=True
        )

    save_artifacts(fedot, config)

    return result(
        predictions=predictions,
        truth=dataset.test.y,
        probabilities=probabilities,
        target_is_encoded=False,
        models_count=fedot.current_pipeline.length,
        training_duration=training.duration,
        predict_duration=predict.duration,
    )


def get_fedot_metrics(config):
    metrics_mapping = dict(
        acc="accuracy",
        auc="roc_auc",
        f1="f1",
        logloss="neg_log_loss",
        mae="mae",
        mse="mse",
        msle="msle",
        r2="r2",
        rmse="rmse",
    )
    scoring_metric = metrics_mapping.get(config.metric, None)

    if scoring_metric is None:
        log.warning(f"Performance metric {config.metric} not supported.")

    return scoring_metric


def save_artifacts(automl, config):
    artifacts = config.framework_params.get("_save_artifacts", [])
    if "models" in artifacts:
        try:
            models_dir = output_subdir("models", config)
            models_file = os.path.join(models_dir, "model.json")
            automl.current_pipeline.save(models_file)
        except Exception as e:
            log.info(f"Error when saving 'models': {e}.", exc_info=True)

    if "info" in artifacts:
        try:
            info_dir = output_subdir("info", config)
            if automl.history:
                automl.history.save(os.path.join(info_dir, "history.json"))
            else:
                log.info("There is no optimization history info to save.")
        except Exception as e:
            log.info(
                f"Error when saving info about optimisation history: {e}.",
                exc_info=True,
            )

    if "leaderboard" in artifacts:
        try:
            leaderboard_dir = output_subdir("leaderboard", config)
            if automl.history:
                lb = automl.history.get_leaderboard()
                Path(os.path.join(leaderboard_dir, "leaderboard.csv")).write_text(lb)
        except Exception as e:
            log.info(f"Error when saving 'leaderboard': {e}.", exc_info=True)


if __name__ == "__main__":
    call_run(run)