time-series-machine-learning · MatthewMiddlehurst · Dec 14, 2023 · Oct 25, 2023 · Oct 25, 2023 · Oct 25, 2023
diff --git a/build_tools/pr_labeler.py b/build_tools/pr_labeler.py
@@ -41,14 +41,14 @@
 paths = [file.filename for file in pr.get_files()]
 
 content_paths_to_labels = [
-    ("tsml-eval/datasets/", "datasets"),
-    ("tsml-eval/estimators/", "estimators"),
-    ("tsml-eval/evaluation/", "evaluation"),
+    ("tsml_eval/datasets/", "datasets"),
+    ("tsml_eval/estimators/", "estimators"),
+    ("tsml_eval/evaluation/", "evaluation"),
     ("examples/", "examples"),
-    ("tsml-eval/experiments/", "experiments"),
-    ("tsml-eval/publications/", "publications"),
+    ("tsml_eval/experiments/", "experiments"),
+    ("tsml_eval/publications/", "publications"),
     ("results/", "results"),
-    ("tsml-eval/testing/", "testing"),
+    ("tsml_eval/testing/", "testing"),
 ]
 
 present_content_labels = [

diff --git a/conftest.py b/conftest.py
@@ -2,7 +2,18 @@
 
 __author__ = ["MatthewMiddlehurst"]
 
+import shutil
+
 from tsml_eval.experiments import experiments
+from tsml_eval.testing.test_utils import _TEST_OUTPUT_PATH
+
+KEEP_PYTEST_OUTPUT = True
+
+
+def pytest_sessionfinish(session, exitstatus):
+    """Call after test run is finished, before returning the exit status to system."""
+    if not hasattr(session.config, "workerinput") and not KEEP_PYTEST_OUTPUT:
+        shutil.rmtree(_TEST_OUTPUT_PATH)
 
 
 def pytest_addoption(parser):
@@ -14,8 +25,16 @@ def pytest_addoption(parser):
         help="Set the time interval in seconds for recording memory usage "
         "(default: %(default)s).",
     )
+    parser.addoption(
+        "--keepoutput",
+        action="store_true",
+        help="Keep the unit test output folder after running pytest"
+        " (default: %(default)s).",
+    )
 
 
 def pytest_configure(config):
     """Pytest configuration preamble."""
     experiments.MEMRECORD_INTERVAL = config.getoption("--meminterval")
+    global KEEP_PYTEST_OUTPUT
+    KEEP_PYTEST_OUTPUT = config.getoption("--keepoutput")
diff --git a/docs/api.md b/docs/api.md
@@ -54,6 +54,8 @@ Functions for running experiments.
     experiments.load_and_run_regression_experiment
     experiments.run_clustering_experiment
     experiments.load_and_run_clustering_experiment
+    experiments.run_forecasting_experiment
+    experiments.load_and_run_forecasting_experiment
 ```
 
 ## Utilities: [tsml_eval.utils](https://github.com/time-series-machine-learning/tsml-eval/tree/main/tsml_eval/utils)
@@ -77,6 +79,7 @@ Public utility functions used elsewhere in the package.
     utils.experiments.compare_result_file_resample
     utils.experiments.assign_gpu
     utils.experiments.timing_benchmark
+    utils.experiments.estimator_attributes_to_file
     utils.functions.str_in_nested_list
     utils.functions.pair_list_to_dict
     utils.functions.time_to_milliseconds

diff --git a/tsml_eval/experiments/classification_experiments.py b/tsml_eval/experiments/classification_experiments.py
@@ -84,6 +84,8 @@ def run_experiment(args):
                 classifier_name=args.estimator_name,
                 resample_id=args.resample_id,
                 build_train_file=args.train_fold,
+                write_attributes=args.write_attributes,
+                att_max_shape=args.att_max_shape,
                 benchmark_time=args.benchmark_time,
                 overwrite=args.overwrite,
                 predefined_resample=args.predefined_resample,
@@ -100,6 +102,9 @@ def run_experiment(args):
         row_normalise = False
         resample_id = 0
         train_fold = False
+        write_attributes = True
+        att_max_shape = 0
+        benchmark_time = True
         overwrite = False
         predefined_resample = False
         fit_contract = 0
@@ -126,6 +131,9 @@ def run_experiment(args):
             classifier_name=estimator_name,
             resample_id=resample_id,
             build_train_file=train_fold,
+            write_attributes=write_attributes,
+            att_max_shape=att_max_shape,
+            benchmark_time=benchmark_time,
             overwrite=overwrite,
             predefined_resample=predefined_resample,
         )

diff --git a/tsml_eval/experiments/clustering_experiments.py b/tsml_eval/experiments/clustering_experiments.py
@@ -91,6 +91,8 @@ def run_experiment(args):
                 clusterer_name=args.estimator_name,
                 resample_id=args.resample_id,
                 build_test_file=args.test_fold,
+                write_attributes=args.write_attributes,
+                att_max_shape=args.att_max_shape,
                 benchmark_time=args.benchmark_time,
                 overwrite=args.overwrite,
                 predefined_resample=args.predefined_resample,
@@ -109,6 +111,9 @@ def run_experiment(args):
         n_clusters = -1
         resample_id = 0
         test_fold = False
+        write_attributes = True
+        att_max_shape = 0
+        benchmark_time = True
         overwrite = False
         predefined_resample = False
         fit_contract = 0
@@ -138,6 +143,9 @@ def run_experiment(args):
             clusterer_name=estimator_name,
             resample_id=resample_id,
             build_test_file=test_fold,
+            write_attributes=write_attributes,
+            att_max_shape=att_max_shape,
+            benchmark_time=benchmark_time,
             overwrite=overwrite,
             predefined_resample=predefined_resample,
             combine_train_test_split=combine_test_train_split,

diff --git a/tsml_eval/experiments/experiments.py b/tsml_eval/experiments/experiments.py
@@ -36,6 +36,7 @@
 )
 from tsml_eval.evaluation.metrics import clustering_accuracy_score
 from tsml_eval.utils.experiments import (
+    estimator_attributes_to_file,
     load_experiment_data,
     resample_data,
     stratified_resample_data,
@@ -66,6 +67,8 @@ def run_classification_experiment(
     resample_id=None,
     build_test_file=True,
     build_train_file=False,
+    attribute_file_path=None,
+    att_max_shape=0,
     benchmark_time=True,
 ):
     """Run a classification experiment and save the results to file.
@@ -175,6 +178,11 @@ def run_classification_experiment(
         )
         fit_time += int(round(getattr(classifier, "_fit_time_milli", 0)))
 
+        if attribute_file_path is not None:
+            estimator_attributes_to_file(
+                classifier, attribute_file_path, max_list_shape=att_max_shape
+            )
+
     if build_test_file:
         start = int(round(time.time() * 1000))
         test_probs = classifier.predict_proba(X_test)
@@ -258,6 +266,8 @@ def load_and_run_classification_experiment(
     classifier_name=None,
     resample_id=0,
     build_train_file=False,
+    write_attributes=False,
+    att_max_shape=0,
     benchmark_time=True,
     overwrite=False,
     predefined_resample=False,
@@ -303,6 +313,9 @@ def load_and_run_classification_experiment(
         the file format must include the resample_id at the end of the dataset name i.e.
         <problem_path>/<dataset>/<dataset>+<resample_id>+"_TRAIN.ts".
     """
+    if classifier_name is None:
+        classifier_name = type(classifier).__name__
+
     build_test_file, build_train_file = _check_existing_results(
         results_path,
         classifier_name,
@@ -326,6 +339,11 @@ def load_and_run_classification_experiment(
             X_train, y_train, X_test, y_test, random_state=resample_id
         )
 
+    if write_attributes:
+        attribute_file_path = f"{results_path}/{classifier_name}/Workspace/{dataset}/"
+    else:
+        attribute_file_path = None
+
     run_classification_experiment(
         X_train,
         y_train,
@@ -339,6 +357,8 @@ def load_and_run_classification_experiment(
         resample_id=resample_id,
         build_test_file=build_test_file,
         build_train_file=build_train_file,
+        attribute_file_path=attribute_file_path,
+        att_max_shape=att_max_shape,
         benchmark_time=benchmark_time,
     )
 
@@ -356,6 +376,8 @@ def run_regression_experiment(
     resample_id=None,
     build_test_file=True,
     build_train_file=False,
+    attribute_file_path=None,
+    att_max_shape=0,
     benchmark_time=True,
 ):
     """Run a regression experiment and save the results to file.
@@ -457,6 +479,9 @@ def run_regression_experiment(
         )
         fit_time += int(round(getattr(regressor, "_fit_time_milli", 0)))
 
+        if attribute_file_path is not None:
+            estimator_attributes_to_file(regressor, attribute_file_path)
+
     if build_test_file:
         start = int(round(time.time() * 1000))
         test_preds = regressor.predict(X_test)
@@ -525,6 +550,8 @@ def load_and_run_regression_experiment(
     regressor_name=None,
     resample_id=0,
     build_train_file=False,
+    write_attributes=False,
+    att_max_shape=0,
     benchmark_time=True,
     overwrite=False,
     predefined_resample=False,
@@ -570,6 +597,9 @@ def load_and_run_regression_experiment(
         the file format must include the resample_id at the end of the dataset name i.e.
         <problem_path>/<dataset>/<dataset>+<resample_id>+"_TRAIN.ts".
     """
+    if regressor_name is None:
+        regressor_name = type(regressor).__name__
+
     build_test_file, build_train_file = _check_existing_results(
         results_path,
         regressor_name,
@@ -593,6 +623,11 @@ def load_and_run_regression_experiment(
             X_train, y_train, X_test, y_test, random_state=resample_id
         )
 
+    if write_attributes:
+        attribute_file_path = f"{results_path}/{regressor_name}/Workspace/{dataset}/"
+    else:
+        attribute_file_path = None
+
     # Ensure labels are floats
     y_train = y_train.astype(float)
     y_test = y_test.astype(float)
@@ -610,6 +645,8 @@ def load_and_run_regression_experiment(
         resample_id=resample_id,
         build_test_file=build_test_file,
         build_train_file=build_train_file,
+        attribute_file_path=attribute_file_path,
+        att_max_shape=att_max_shape,
         benchmark_time=benchmark_time,
     )
 
@@ -628,6 +665,8 @@ def run_clustering_experiment(
     resample_id=None,
     build_test_file=False,
     build_train_file=True,
+    attribute_file_path=None,
+    att_max_shape=0,
     benchmark_time=True,
 ):
     """Run a clustering experiment and save the results to file.
@@ -759,6 +798,9 @@ def run_clustering_experiment(
     )
     fit_time += int(round(getattr(clusterer, "_fit_time_milli", 0)))
 
+    if attribute_file_path is not None:
+        estimator_attributes_to_file(clusterer, attribute_file_path)
+
     start = int(round(time.time() * 1000))
     if callable(getattr(clusterer, "predict_proba", None)):
         train_probs = clusterer.predict_proba(X_train)
@@ -860,6 +902,8 @@ def load_and_run_clustering_experiment(
     clusterer_name=None,
     resample_id=0,
     build_test_file=False,
+    write_attributes=False,
+    att_max_shape=0,
     benchmark_time=True,
     overwrite=False,
     predefined_resample=False,
@@ -913,6 +957,9 @@ def load_and_run_clustering_experiment(
         the train/test split is combined into a single train set. If False then the
         train/test split is used as normal.
     """
+    if clusterer_name is None:
+        clusterer_name = type(clusterer).__name__
+
     if combine_train_test_split:
         build_test_file = False
 
@@ -939,6 +986,11 @@ def load_and_run_clustering_experiment(
             X_train, y_train, X_test, y_test, random_state=resample_id
         )
 
+    if write_attributes:
+        attribute_file_path = f"{results_path}/{clusterer_name}/Workspace/{dataset}/"
+    else:
+        attribute_file_path = None
+
     if combine_train_test_split:
         y_train = np.concatenate((y_train, y_test), axis=None)
         X_train = (
@@ -963,6 +1015,8 @@ def load_and_run_clustering_experiment(
         resample_id=resample_id,
         build_train_file=build_train_file,
         build_test_file=build_test_file,
+        attribute_file_path=attribute_file_path,
+        att_max_shape=att_max_shape,
         benchmark_time=benchmark_time,
     )
 
@@ -975,6 +1029,8 @@ def run_forecasting_experiment(
     forecaster_name=None,
     dataset_name="N/A",
     random_seed=None,
+    attribute_file_path=None,
+    att_max_shape=0,
     benchmark_time=True,
 ):
     """Run a forecasting experiment and save the results to file.
@@ -1030,6 +1086,9 @@ def run_forecasting_experiment(
     )
     fit_time += int(round(getattr(forecaster, "_fit_time_milli", 0)))
 
+    if attribute_file_path is not None:
+        estimator_attributes_to_file(forecaster, attribute_file_path)
+
     start = int(round(time.time() * 1000))
     test_preds = forecaster.predict(np.arange(1, len(test) + 1))
     test_time = (
@@ -1068,6 +1127,8 @@ def load_and_run_forecasting_experiment(
     forecaster,
     forecaster_name=None,
     random_seed=None,
+    write_attributes=False,
+    att_max_shape=0,
     benchmark_time=True,
     overwrite=False,
 ):
@@ -1101,6 +1162,9 @@ def load_and_run_forecasting_experiment(
         If set to False, this will only build results if there is not a result file
         already present. If True, it will overwrite anything already there.
     """
+    if forecaster_name is None:
+        forecaster_name = type(forecaster).__name__
+
     build_test_file, _ = _check_existing_results(
         results_path,
         forecaster_name,
@@ -1115,6 +1179,11 @@ def load_and_run_forecasting_experiment(
         warnings.warn("All files exist and not overwriting, skipping.", stacklevel=1)
         return
 
+    if write_attributes:
+        attribute_file_path = f"{results_path}/{forecaster_name}/Workspace/{dataset}/"
+    else:
+        attribute_file_path = None
+
     train = pd.read_csv(
         f"{problem_path}/{dataset}/{dataset}_TRAIN.csv", index_col=0
     ).squeeze("columns")
@@ -1132,6 +1201,8 @@ def load_and_run_forecasting_experiment(
         forecaster_name=forecaster_name,
         dataset_name=dataset,
         random_seed=random_seed,
+        attribute_file_path=attribute_file_path,
+        att_max_shape=att_max_shape,
         benchmark_time=benchmark_time,
     )