Skip to content

[ENH] Add option to output estimator attributes to file in experiments #154

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 14, 2023
Merged
12 changes: 6 additions & 6 deletions build_tools/pr_labeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@
paths = [file.filename for file in pr.get_files()]

content_paths_to_labels = [
("tsml-eval/datasets/", "datasets"),
("tsml-eval/estimators/", "estimators"),
("tsml-eval/evaluation/", "evaluation"),
("tsml_eval/datasets/", "datasets"),
("tsml_eval/estimators/", "estimators"),
("tsml_eval/evaluation/", "evaluation"),
("examples/", "examples"),
("tsml-eval/experiments/", "experiments"),
("tsml-eval/publications/", "publications"),
("tsml_eval/experiments/", "experiments"),
("tsml_eval/publications/", "publications"),
("results/", "results"),
("tsml-eval/testing/", "testing"),
("tsml_eval/testing/", "testing"),
]

present_content_labels = [
Expand Down
19 changes: 19 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,18 @@

__author__ = ["MatthewMiddlehurst"]

import shutil

from tsml_eval.experiments import experiments
from tsml_eval.testing.test_utils import _TEST_OUTPUT_PATH

KEEP_PYTEST_OUTPUT = True


def pytest_sessionfinish(session, exitstatus):
"""Call after test run is finished, before returning the exit status to system."""
if not hasattr(session.config, "workerinput") and not KEEP_PYTEST_OUTPUT:
shutil.rmtree(_TEST_OUTPUT_PATH)


def pytest_addoption(parser):
Expand All @@ -14,8 +25,16 @@ def pytest_addoption(parser):
help="Set the time interval in seconds for recording memory usage "
"(default: %(default)s).",
)
parser.addoption(
"--keepoutput",
action="store_true",
help="Keep the unit test output folder after running pytest"
" (default: %(default)s).",
)


def pytest_configure(config):
"""Pytest configuration preamble."""
experiments.MEMRECORD_INTERVAL = config.getoption("--meminterval")
global KEEP_PYTEST_OUTPUT
KEEP_PYTEST_OUTPUT = config.getoption("--keepoutput")
3 changes: 3 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ Functions for running experiments.
experiments.load_and_run_regression_experiment
experiments.run_clustering_experiment
experiments.load_and_run_clustering_experiment
experiments.run_forecasting_experiment
experiments.load_and_run_forecasting_experiment
```

## Utilities: [tsml_eval.utils](https://github.com/time-series-machine-learning/tsml-eval/tree/main/tsml_eval/utils)
Expand All @@ -77,6 +79,7 @@ Public utility functions used elsewhere in the package.
utils.experiments.compare_result_file_resample
utils.experiments.assign_gpu
utils.experiments.timing_benchmark
utils.experiments.estimator_attributes_to_file
utils.functions.str_in_nested_list
utils.functions.pair_list_to_dict
utils.functions.time_to_milliseconds
Expand Down
8 changes: 8 additions & 0 deletions tsml_eval/experiments/classification_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def run_experiment(args):
classifier_name=args.estimator_name,
resample_id=args.resample_id,
build_train_file=args.train_fold,
write_attributes=args.write_attributes,
att_max_shape=args.att_max_shape,
benchmark_time=args.benchmark_time,
overwrite=args.overwrite,
predefined_resample=args.predefined_resample,
Expand All @@ -100,6 +102,9 @@ def run_experiment(args):
row_normalise = False
resample_id = 0
train_fold = False
write_attributes = True
att_max_shape = 0
benchmark_time = True
overwrite = False
predefined_resample = False
fit_contract = 0
Expand All @@ -126,6 +131,9 @@ def run_experiment(args):
classifier_name=estimator_name,
resample_id=resample_id,
build_train_file=train_fold,
write_attributes=write_attributes,
att_max_shape=att_max_shape,
benchmark_time=benchmark_time,
overwrite=overwrite,
predefined_resample=predefined_resample,
)
Expand Down
8 changes: 8 additions & 0 deletions tsml_eval/experiments/clustering_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def run_experiment(args):
clusterer_name=args.estimator_name,
resample_id=args.resample_id,
build_test_file=args.test_fold,
write_attributes=args.write_attributes,
att_max_shape=args.att_max_shape,
benchmark_time=args.benchmark_time,
overwrite=args.overwrite,
predefined_resample=args.predefined_resample,
Expand All @@ -109,6 +111,9 @@ def run_experiment(args):
n_clusters = -1
resample_id = 0
test_fold = False
write_attributes = True
att_max_shape = 0
benchmark_time = True
overwrite = False
predefined_resample = False
fit_contract = 0
Expand Down Expand Up @@ -138,6 +143,9 @@ def run_experiment(args):
clusterer_name=estimator_name,
resample_id=resample_id,
build_test_file=test_fold,
write_attributes=write_attributes,
att_max_shape=att_max_shape,
benchmark_time=benchmark_time,
overwrite=overwrite,
predefined_resample=predefined_resample,
combine_train_test_split=combine_test_train_split,
Expand Down
71 changes: 71 additions & 0 deletions tsml_eval/experiments/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)
from tsml_eval.evaluation.metrics import clustering_accuracy_score
from tsml_eval.utils.experiments import (
estimator_attributes_to_file,
load_experiment_data,
resample_data,
stratified_resample_data,
Expand Down Expand Up @@ -66,6 +67,8 @@ def run_classification_experiment(
resample_id=None,
build_test_file=True,
build_train_file=False,
attribute_file_path=None,
att_max_shape=0,
benchmark_time=True,
):
"""Run a classification experiment and save the results to file.
Expand Down Expand Up @@ -175,6 +178,11 @@ def run_classification_experiment(
)
fit_time += int(round(getattr(classifier, "_fit_time_milli", 0)))

if attribute_file_path is not None:
estimator_attributes_to_file(
classifier, attribute_file_path, max_list_shape=att_max_shape
)

if build_test_file:
start = int(round(time.time() * 1000))
test_probs = classifier.predict_proba(X_test)
Expand Down Expand Up @@ -258,6 +266,8 @@ def load_and_run_classification_experiment(
classifier_name=None,
resample_id=0,
build_train_file=False,
write_attributes=False,
att_max_shape=0,
benchmark_time=True,
overwrite=False,
predefined_resample=False,
Expand Down Expand Up @@ -303,6 +313,9 @@ def load_and_run_classification_experiment(
the file format must include the resample_id at the end of the dataset name i.e.
<problem_path>/<dataset>/<dataset>+<resample_id>+"_TRAIN.ts".
"""
if classifier_name is None:
classifier_name = type(classifier).__name__

build_test_file, build_train_file = _check_existing_results(
results_path,
classifier_name,
Expand All @@ -326,6 +339,11 @@ def load_and_run_classification_experiment(
X_train, y_train, X_test, y_test, random_state=resample_id
)

if write_attributes:
attribute_file_path = f"{results_path}/{classifier_name}/Workspace/{dataset}/"
else:
attribute_file_path = None

run_classification_experiment(
X_train,
y_train,
Expand All @@ -339,6 +357,8 @@ def load_and_run_classification_experiment(
resample_id=resample_id,
build_test_file=build_test_file,
build_train_file=build_train_file,
attribute_file_path=attribute_file_path,
att_max_shape=att_max_shape,
benchmark_time=benchmark_time,
)

Expand All @@ -356,6 +376,8 @@ def run_regression_experiment(
resample_id=None,
build_test_file=True,
build_train_file=False,
attribute_file_path=None,
att_max_shape=0,
benchmark_time=True,
):
"""Run a regression experiment and save the results to file.
Expand Down Expand Up @@ -457,6 +479,9 @@ def run_regression_experiment(
)
fit_time += int(round(getattr(regressor, "_fit_time_milli", 0)))

if attribute_file_path is not None:
estimator_attributes_to_file(regressor, attribute_file_path)

if build_test_file:
start = int(round(time.time() * 1000))
test_preds = regressor.predict(X_test)
Expand Down Expand Up @@ -525,6 +550,8 @@ def load_and_run_regression_experiment(
regressor_name=None,
resample_id=0,
build_train_file=False,
write_attributes=False,
att_max_shape=0,
benchmark_time=True,
overwrite=False,
predefined_resample=False,
Expand Down Expand Up @@ -570,6 +597,9 @@ def load_and_run_regression_experiment(
the file format must include the resample_id at the end of the dataset name i.e.
<problem_path>/<dataset>/<dataset>+<resample_id>+"_TRAIN.ts".
"""
if regressor_name is None:
regressor_name = type(regressor).__name__

build_test_file, build_train_file = _check_existing_results(
results_path,
regressor_name,
Expand All @@ -593,6 +623,11 @@ def load_and_run_regression_experiment(
X_train, y_train, X_test, y_test, random_state=resample_id
)

if write_attributes:
attribute_file_path = f"{results_path}/{regressor_name}/Workspace/{dataset}/"
else:
attribute_file_path = None

# Ensure labels are floats
y_train = y_train.astype(float)
y_test = y_test.astype(float)
Expand All @@ -610,6 +645,8 @@ def load_and_run_regression_experiment(
resample_id=resample_id,
build_test_file=build_test_file,
build_train_file=build_train_file,
attribute_file_path=attribute_file_path,
att_max_shape=att_max_shape,
benchmark_time=benchmark_time,
)

Expand All @@ -628,6 +665,8 @@ def run_clustering_experiment(
resample_id=None,
build_test_file=False,
build_train_file=True,
attribute_file_path=None,
att_max_shape=0,
benchmark_time=True,
):
"""Run a clustering experiment and save the results to file.
Expand Down Expand Up @@ -759,6 +798,9 @@ def run_clustering_experiment(
)
fit_time += int(round(getattr(clusterer, "_fit_time_milli", 0)))

if attribute_file_path is not None:
estimator_attributes_to_file(clusterer, attribute_file_path)

start = int(round(time.time() * 1000))
if callable(getattr(clusterer, "predict_proba", None)):
train_probs = clusterer.predict_proba(X_train)
Expand Down Expand Up @@ -860,6 +902,8 @@ def load_and_run_clustering_experiment(
clusterer_name=None,
resample_id=0,
build_test_file=False,
write_attributes=False,
att_max_shape=0,
benchmark_time=True,
overwrite=False,
predefined_resample=False,
Expand Down Expand Up @@ -913,6 +957,9 @@ def load_and_run_clustering_experiment(
the train/test split is combined into a single train set. If False then the
train/test split is used as normal.
"""
if clusterer_name is None:
clusterer_name = type(clusterer).__name__

if combine_train_test_split:
build_test_file = False

Expand All @@ -939,6 +986,11 @@ def load_and_run_clustering_experiment(
X_train, y_train, X_test, y_test, random_state=resample_id
)

if write_attributes:
attribute_file_path = f"{results_path}/{clusterer_name}/Workspace/{dataset}/"
else:
attribute_file_path = None

if combine_train_test_split:
y_train = np.concatenate((y_train, y_test), axis=None)
X_train = (
Expand All @@ -963,6 +1015,8 @@ def load_and_run_clustering_experiment(
resample_id=resample_id,
build_train_file=build_train_file,
build_test_file=build_test_file,
attribute_file_path=attribute_file_path,
att_max_shape=att_max_shape,
benchmark_time=benchmark_time,
)

Expand All @@ -975,6 +1029,8 @@ def run_forecasting_experiment(
forecaster_name=None,
dataset_name="N/A",
random_seed=None,
attribute_file_path=None,
att_max_shape=0,
benchmark_time=True,
):
"""Run a forecasting experiment and save the results to file.
Expand Down Expand Up @@ -1030,6 +1086,9 @@ def run_forecasting_experiment(
)
fit_time += int(round(getattr(forecaster, "_fit_time_milli", 0)))

if attribute_file_path is not None:
estimator_attributes_to_file(forecaster, attribute_file_path)

start = int(round(time.time() * 1000))
test_preds = forecaster.predict(np.arange(1, len(test) + 1))
test_time = (
Expand Down Expand Up @@ -1068,6 +1127,8 @@ def load_and_run_forecasting_experiment(
forecaster,
forecaster_name=None,
random_seed=None,
write_attributes=False,
att_max_shape=0,
benchmark_time=True,
overwrite=False,
):
Expand Down Expand Up @@ -1101,6 +1162,9 @@ def load_and_run_forecasting_experiment(
If set to False, this will only build results if there is not a result file
already present. If True, it will overwrite anything already there.
"""
if forecaster_name is None:
forecaster_name = type(forecaster).__name__

build_test_file, _ = _check_existing_results(
results_path,
forecaster_name,
Expand All @@ -1115,6 +1179,11 @@ def load_and_run_forecasting_experiment(
warnings.warn("All files exist and not overwriting, skipping.", stacklevel=1)
return

if write_attributes:
attribute_file_path = f"{results_path}/{forecaster_name}/Workspace/{dataset}/"
else:
attribute_file_path = None

train = pd.read_csv(
f"{problem_path}/{dataset}/{dataset}_TRAIN.csv", index_col=0
).squeeze("columns")
Expand All @@ -1132,6 +1201,8 @@ def load_and_run_forecasting_experiment(
forecaster_name=forecaster_name,
dataset_name=dataset,
random_seed=random_seed,
attribute_file_path=attribute_file_path,
att_max_shape=att_max_shape,
benchmark_time=benchmark_time,
)

Expand Down
Loading