StatMixedML
diff --git a/‎Linear/__init__.py
+22 b/‎Linear/__init__.py
+22
diff --git a/‎Linear/estimator.py
+322 b/‎Linear/estimator.py
+322
@@ -0,0 +1,22 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License").
+# You may not use this file except in compliance with the License.
+# A copy of the License is located at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the "license" file accompanying this file. This file is distributed
+# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+from .module import LinearModel
+from .lightning_module import LinearLightningModule
+from .estimator import LinearEstimator
+
+__all__ = [
+    "LinearModel",
+    "LinearLightningModule",
+    "LinearEstimator",
+]
@@ -0,0 +1,322 @@
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License").
+# You may not use this file except in compliance with the License.
+# A copy of the License is located at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the "license" file accompanying this file. This file is distributed
+# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+from typing import List, Optional, Iterable, Dict, Any
+
+import torch
+import pytorch_lightning as pl
+
+from gluonts.core.component import validated
+from gluonts.dataset.common import Dataset
+from gluonts.dataset.field_names import FieldName
+from gluonts.dataset.loader import as_stacked_batches
+from gluonts.dataset.stat import calculate_dataset_statistics
+from gluonts.itertools import Cyclic
+from gluonts.model.forecast_generator import DistributionForecastGenerator
+from gluonts.torch.modules.loss import DistributionLoss, NegativeLogLikelihood
+from gluonts.transform import (
+    Transformation,
+    AddObservedValuesIndicator,
+    InstanceSampler,
+    InstanceSplitter,
+    ValidationSplitSampler,
+    TestSplitSampler,
+    ExpectedNumInstanceSampler,
+    RemoveFields,
+    SetField,
+    AddTimeFeatures,
+    AddAgeFeature,
+    VstackFeatures,
+)
+from gluonts.time_feature import TimeFeature, time_features_from_frequency_str
+from gluonts.torch.model.estimator import PyTorchLightningEstimator
+from gluonts.torch.model.predictor import PyTorchPredictor
+from gluonts.torch.distributions import (
+    DistributionOutput,
+    StudentTOutput,
+)
+
+from .lightning_module import LinearLightningModule
+
+PREDICTION_INPUT_NAMES = [
+    "feat_static_cat",
+    "feat_static_real",
+    "past_time_feat",
+    "past_target",
+    "past_observed_values",
+    "future_time_feat",
+]
+
+TRAINING_INPUT_NAMES = PREDICTION_INPUT_NAMES + [
+    "future_target",
+    "future_observed_values",
+]
+
+
+class LinearEstimator(PyTorchLightningEstimator):
+    """
+    An estimator training a Linear model for forecasting.
+
+    This class is uses the model defined in ``LinearModel``,
+    and wraps it into a ``LinearLightningModule`` for training
+    purposes: training is performed using PyTorch Lightning's ``pl.Trainer``
+    class.
+
+    Parameters
+    ----------
+    prediction_length
+        Length of the prediction horizon.
+    context_length
+        Number of time steps prior to prediction time that the model
+        takes as inputs (default: ``10 * prediction_length``).
+    hidden_dimensions
+        Size of hidden layers in the feed-forward network
+        (default: ``[20, 20]``).
+    lr
+        Learning rate (default: ``1e-3``).
+    weight_decay
+        Weight decay regularization parameter (default: ``1e-8``).
+    distr_output
+        Distribution to use to evaluate observations and sample predictions
+        (default: StudentTOutput()).
+    loss
+        Loss to be optimized during training
+        (default: ``NegativeLogLikelihood()``).
+    batch_norm
+        Whether to apply batch normalization.
+    batch_size
+        The size of the batches to be used for training (default: 32).
+    num_batches_per_epoch
+        Number of batches to be processed in each training epoch
+            (default: 50).
+    trainer_kwargs
+        Additional arguments to provide to ``pl.Trainer`` for construction.
+    train_sampler
+        Controls the sampling of windows during training.
+    validation_sampler
+        Controls the sampling of windows during validation.
+
+    """
+
+    @validated()
+    def __init__(
+        self,
+        freq: str,
+        prediction_length: int,
+        context_length: Optional[int] = None,
+        hidden_dimensions: Optional[List[int]] = None,
+        input_size: int = 1,
+        scaling: Optional[str] = "mean",
+        num_feat_dynamic_real: int = 0,
+        num_feat_static_cat: int = 0,
+        num_feat_static_real: int = 0,
+        cardinality: Optional[List[int]] = None,
+        embedding_dimension: Optional[List[int]] = None,
+        time_features: Optional[List[TimeFeature]] = None,
+        lr: float = 1e-3,
+        weight_decay: float = 1e-8,
+        distr_output: DistributionOutput = StudentTOutput(),
+        loss: DistributionLoss = NegativeLogLikelihood(),
+        batch_norm: bool = False,
+        batch_size: int = 32,
+        num_batches_per_epoch: int = 50,
+        trainer_kwargs: Optional[Dict[str, Any]] = None,
+        train_sampler: Optional[InstanceSampler] = None,
+        validation_sampler: Optional[InstanceSampler] = None,
+    ) -> None:
+        default_trainer_kwargs = {"max_epochs": 100, "gradient_clip_val": 10.0}
+        if trainer_kwargs is not None:
+            default_trainer_kwargs.update(trainer_kwargs)
+        super().__init__(trainer_kwargs=default_trainer_kwargs)
+
+        self.scaling = scaling
+        self.freq = freq
+        self.input_size = input_size
+        self.prediction_length = prediction_length
+        self.context_length = context_length or 10 * prediction_length
+        self.num_feat_dynamic_real = num_feat_dynamic_real
+        self.num_feat_static_cat = num_feat_static_cat
+        self.num_feat_static_real = num_feat_static_real
+        self.cardinality = (
+            cardinality if cardinality and num_feat_static_cat > 0 else [1]
+        )
+        self.embedding_dimension = embedding_dimension
+        self.time_features = (
+            time_features
+            if time_features is not None
+            else time_features_from_frequency_str(self.freq)
+        )
+        # TODO find way to enforce same defaults to network and estimator
+        # somehow
+        self.hidden_dimensions = hidden_dimensions or [20, 20]
+        self.lr = lr
+        self.weight_decay = weight_decay
+        self.distr_output = distr_output
+        self.loss = loss
+        self.batch_norm = batch_norm
+        self.batch_size = batch_size
+        self.num_batches_per_epoch = num_batches_per_epoch
+
+        self.train_sampler = train_sampler or ExpectedNumInstanceSampler(
+            num_instances=1.0, min_future=prediction_length
+        )
+        self.validation_sampler = validation_sampler or ValidationSplitSampler(
+            min_future=prediction_length
+        )
+
+    @classmethod
+    def derive_auto_fields(cls, train_iter):
+        stats = calculate_dataset_statistics(train_iter)
+
+        return {
+            "num_feat_dynamic_real": stats.num_feat_dynamic_real,
+            "num_feat_static_cat": len(stats.feat_static_cat),
+            "cardinality": [len(cats) for cats in stats.feat_static_cat],
+        }
+
+    def create_transformation(self) -> Transformation:
+        remove_field_names = []
+        if self.num_feat_static_real == 0:
+            remove_field_names.append(FieldName.FEAT_STATIC_REAL)
+        if self.num_feat_dynamic_real == 0:
+            remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL)
+
+        return (
+            RemoveFields(field_names=remove_field_names)
+            + (
+                SetField(output_field=FieldName.FEAT_STATIC_CAT, value=[0])
+                if not self.num_feat_static_cat > 0
+                else []
+            )
+            + (
+                SetField(output_field=FieldName.FEAT_STATIC_REAL, value=[0.0])
+                if not self.num_feat_static_real > 0
+                else []
+            )
+            + AddTimeFeatures(
+                start_field=FieldName.START,
+                target_field=FieldName.TARGET,
+                output_field=FieldName.FEAT_TIME,
+                time_features=self.time_features,
+                pred_length=self.prediction_length,
+            )
+            + AddAgeFeature(
+                target_field=FieldName.TARGET,
+                output_field=FieldName.FEAT_AGE,
+                pred_length=self.prediction_length,
+                log_scale=True,
+            )
+            + VstackFeatures(
+                output_field=FieldName.FEAT_TIME,
+                input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE]
+                + (
+                    [FieldName.FEAT_DYNAMIC_REAL]
+                    if self.num_feat_dynamic_real > 0
+                    else []
+                ),
+            )
+            + AddObservedValuesIndicator(
+                target_field=FieldName.TARGET,
+                output_field=FieldName.OBSERVED_VALUES,
+            )
+        )
+
+    def create_lightning_module(self) -> pl.LightningModule:
+        return LinearLightningModule(
+            loss=self.loss,
+            lr=self.lr,
+            weight_decay=self.weight_decay,
+            model_kwargs={
+                "input_size": self.input_size,
+                "prediction_length": self.prediction_length,
+                "context_length": self.context_length,
+                "hidden_dimensions": self.hidden_dimensions,
+                "scaling": self.scaling,
+                "distr_output": self.distr_output,
+                "batch_norm": self.batch_norm,
+            },
+        )
+
+    def _create_instance_splitter(self, module: LinearLightningModule, mode: str):
+        assert mode in ["training", "validation", "test"]
+
+        instance_sampler = {
+            "training": self.train_sampler,
+            "validation": self.validation_sampler,
+            "test": TestSplitSampler(),
+        }[mode]
+
+        return InstanceSplitter(
+            target_field=FieldName.TARGET,
+            is_pad_field=FieldName.IS_PAD,
+            start_field=FieldName.START,
+            forecast_start_field=FieldName.FORECAST_START,
+            instance_sampler=instance_sampler,
+            past_length=self.context_length,
+            future_length=self.prediction_length,
+            time_series_fields=[FieldName.FEAT_TIME, FieldName.OBSERVED_VALUES],
+            dummy_value=self.distr_output.value_in_support,
+        )
+
+    def create_training_data_loader(
+        self,
+        data: Dataset,
+        module: LinearLightningModule,
+        shuffle_buffer_length: Optional[int] = None,
+        **kwargs,
+    ) -> Iterable:
+        data = Cyclic(data).stream()
+        instances = self._create_instance_splitter(module, "training").apply(
+            data, is_train=True
+        )
+        return as_stacked_batches(
+            instances,
+            batch_size=self.batch_size,
+            shuffle_buffer_length=shuffle_buffer_length,
+            field_names=TRAINING_INPUT_NAMES,
+            output_type=torch.tensor,
+            num_batches_per_epoch=self.num_batches_per_epoch,
+        )
+
+    def create_validation_data_loader(
+        self,
+        data: Dataset,
+        module: LinearLightningModule,
+        **kwargs,
+    ) -> Iterable:
+        instances = self._create_instance_splitter(module, "validation").apply(
+            data, is_train=True
+        )
+        return as_stacked_batches(
+            instances,
+            batch_size=self.batch_size,
+            field_names=TRAINING_INPUT_NAMES,
+            output_type=torch.tensor,
+        )
+
+    def create_predictor(
+        self,
+        transformation: Transformation,
+        module,
+    ) -> PyTorchPredictor:
+        prediction_splitter = self._create_instance_splitter(module, "test")
+
+        return PyTorchPredictor(
+            input_transform=transformation + prediction_splitter,
+            input_names=PREDICTION_INPUT_NAMES,
+            prediction_net=module,
+            forecast_generator=DistributionForecastGenerator(self.distr_output),
+            batch_size=self.batch_size,
+            prediction_length=self.prediction_length,
+            device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
+        )