diff --git a/nilmtk_contrib/mains_stats.py b/nilmtk_contrib/mains_stats.py new file mode 100644 index 0000000..960731d --- /dev/null +++ b/nilmtk_contrib/mains_stats.py @@ -0,0 +1,76 @@ +from nilmtk import DataSet +import numpy as np +import pandas as pd + +def calculate_multi_building_mains_stats(dataset_path, building_ids, start_time, end_time, + ac_type='active', sample_period=60): + """ + Calculates mains statistics across multiple buildings by combining their data. + """ + ds = DataSet(dataset_path) + ds.set_window(start=start_time, end=end_time) + + all_mains_data = [] + + # 1. Loop through each specified building ID + for building_id in building_ids: + print(f"Processing Building {building_id}...") + try: + mains = ds.buildings[building_id].elec.mains() + + # Use power_series_all_data for simplicity, it handles the generator loop internally + power_data = mains.power_series_all_data( + ac_type=ac_type, + sample_period=sample_period + ) + + if power_data is not None and not power_data.empty: + all_mains_data.append(power_data) + else: + print(f" - No data found for Building {building_id} in the specified timeframe.") + + except KeyError: + print(f" - Building {building_id} not found in the dataset.") + except Exception as e: + print(f" - An error occurred for Building {building_id}: {e}") + + # 2. Check if any data was collected + if not all_mains_data: + print("Could not retrieve data for any of the specified buildings.") + return {'mean': 0, 'std': 0, 'min': 0, 'max': 0, 'data_points': 0} + + # 3. Concatenate all data into a single pandas Series + print("\nCombining data from all buildings...") + combined_data = pd.concat(all_mains_data) + clean_data = combined_data.dropna() + + # 4. Calculate statistics on the combined data + stats = { + 'mean': clean_data.mean(), + 'std': clean_data.std(), + 'min': clean_data.min(), + 'max': clean_data.max(), + 'data_points': len(clean_data), + 'ac_type': ac_type + } + + ds.store.close() + return stats + +stats = calculate_multi_building_mains_stats( + dataset_path="/home/ubuntu/downloads/refit.h5", + building_ids=[2], # Pass a list of buildings + start_time='2014-04-01', + end_time='2014-04-30', + ac_type='active', # Pass 'active' as a string + sample_period=60 +) + +print("\n--- Combined Mains Statistics ---") +if stats['data_points'] > 0: + print(f"Combined Mains Mean: {stats['mean']:.2f}W") + print(f"Combined Mains Std: {stats['std']:.2f}W") + print(f"Data Range: {stats['min']:.2f}W to {stats['max']:.2f}W") + print(f"Total Data Points from all buildings: {stats['data_points']}") +else: + print("No data available to calculate statistics.") \ No newline at end of file diff --git a/nilmtk_contrib/torch/TCN.py b/nilmtk_contrib/torch/TCN.py new file mode 100644 index 0000000..b5bd38c --- /dev/null +++ b/nilmtk_contrib/torch/TCN.py @@ -0,0 +1,418 @@ +from collections import OrderedDict +import os +import numpy as np +import pandas as pd +import torch +import torch.nn as nn +from torch.utils.data import TensorDataset, DataLoader +from tqdm import tqdm +from nilmtk.disaggregate import Disaggregator + +class SequenceLengthError(Exception): + pass + +class ApplianceNotFoundError(Exception): + pass + +class TemporalConvNet(nn.Module): + """ + Temporal Convolutional Network (TCN) implementation. + This network uses a series of temporal blocks with dilated, causal convolutions + to capture long-range dependencies in sequential data. + """ + def __init__(self, sequence_length, num_levels=8, num_filters=25, kernel_size=7, dropout=0.2): + super(TemporalConvNet, self).__init__() + + self.num_levels = num_levels + self.num_filters = num_filters + + layers = [] + num_channels = [1] + [num_filters] * num_levels + + for i in range(num_levels): + dilation_size = 2 ** i + in_channels = num_channels[i] + out_channels = num_channels[i+1] + + layers.append(TemporalBlock( + in_channels, + out_channels, + kernel_size, + stride=1, + dilation=dilation_size, + padding=(kernel_size-1) * dilation_size, + dropout=dropout + )) + + self.network = nn.Sequential(*layers) + + # Final fully connected layer + self.final_length = self._calculate_output_length(sequence_length, kernel_size, num_levels) + self.fc = nn.Linear(num_filters * self.final_length, 1) + + # Initialize weights + self._initialize_weights() + + def _calculate_output_length(self, input_length, kernel_size, num_levels): + """Calculates the output length after all temporal blocks.""" + # Causal convolutions with proper padding maintain the sequence length. + return input_length + + def _initialize_weights(self): + """Initializes weights with Xavier uniform initialization.""" + for m in self.modules(): + if isinstance(m, nn.Conv1d) or isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + + def forward(self, x): + # Input shape: (batch_size, 1, sequence_length) + x = self.network(x) + # Output shape: (batch_size, num_filters, final_length) + x = x.view(x.size(0), -1) # Flatten + x = self.fc(x) + return x + +class TemporalBlock(nn.Module): + """ + A single block of a TCN, consisting of two dilated causal convolutions + with a residual connection. + """ + def __init__(self, in_channels, out_channels, kernel_size, stride, dilation, padding, dropout=0.2): + super(TemporalBlock, self).__init__() + + # First dilated causal convolution + self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, + stride=stride, padding=padding, dilation=dilation) + + # Chomp1d removes padding to ensure causality. + self.chomp1 = Chomp1d(padding) + self.relu1 = nn.ReLU() + self.dropout1 = nn.Dropout(dropout) + + # Second dilated causal convolution + self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, + stride=stride, padding=padding, dilation=dilation) + self.chomp2 = Chomp1d(padding) + self.relu2 = nn.ReLU() + self.dropout2 = nn.Dropout(dropout) + + # Residual connection (with downsampling if channels differ) + self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None + self.relu = nn.ReLU() + + # Weight normalization for stability + self.conv1 = nn.utils.weight_norm(self.conv1) + self.conv2 = nn.utils.weight_norm(self.conv2) + if self.downsample is not None: + self.downsample = nn.utils.weight_norm(self.downsample) + + self.init_weights() + + def init_weights(self): + """Initializes weights for the temporal block.""" + nn.init.normal_(self.conv1.weight, 0, 0.01) + nn.init.normal_(self.conv2.weight, 0, 0.01) + if self.downsample is not None: + nn.init.normal_(self.downsample.weight, 0, 0.01) + + def forward(self, x): + # First convolution path + out = self.conv1(x) + out = self.chomp1(out) + out = self.relu1(out) + out = self.dropout1(out) + + # Second convolution path + out = self.conv2(out) + out = self.chomp2(out) + out = self.relu2(out) + out = self.dropout2(out) + + # Add residual connection + res = x if self.downsample is None else self.downsample(x) + + # Ensure residual and output have the same length + if res.size(2) != out.size(2): + res = res[:, :, :out.size(2)] + + return self.relu(out + res) + +class Chomp1d(nn.Module): + """ + Removes padding from the end of a sequence to make convolutions causal. + """ + def __init__(self, chomp_size): + super(Chomp1d, self).__init__() + self.chomp_size = chomp_size + + def forward(self, x): + return x[:, :, :-self.chomp_size].contiguous() if self.chomp_size > 0 else x + +class TCN(Disaggregator): + """ + Temporal Convolutional Network (TCN) for Non-Intrusive Load Monitoring (NILM). + + Based on "An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling" + by Bai et al., published in arXiv preprint arXiv:1803.01271, 2018. + https://arxiv.org/abs/1803.01271 + + This implementation applies the TCN architecture to energy disaggregation, using dilated causal + convolutions to capture long-range temporal dependencies in power consumption sequences. TCNs + have been shown to outperform canonical recurrent networks like LSTMs across diverse sequence + modeling tasks while demonstrating longer effective memory. + + Architecture Overview: + - Multiple temporal blocks with dilated causal convolutions for long-range dependencies + - Residual connections within each temporal block for improved gradient flow + - Dropout layers for regularization to prevent overfitting + - Sequence-to-point learning for appliance power prediction + - Exponentially increasing dilation factors to capture patterns at multiple time scales + + Args: + params (dict): Dictionary containing model hyperparameters: + - sequence_length (int): Length of input sequences (default: 99, must be odd) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - num_levels (int): Number of temporal blocks (default: 8) + - num_filters (int): Number of filters per temporal block (default: 25) + - kernel_size (int): Kernel size for convolutions (default: 7) + - dropout (float): Dropout rate for regularization (default: 0.2) + - appliance_params (dict): Appliance-specific normalization parameters + - mains_mean (float): Mean normalization for mains power (default: 1800) + - mains_std (float): Standard deviation for mains power (default: 600) + - chunk_wise_training (bool): Enable chunk-wise training (default: False) + """ + def __init__(self, params): + super().__init__() + self.MODEL_NAME = "TCN" + self.models = OrderedDict() + self.file_prefix = f"{self.MODEL_NAME.lower()}-temp-weights" + + # Hyperparameters + self.chunk_wise_training = params.get("chunk_wise_training", False) + self.sequence_length = params.get("sequence_length", 99) + self.n_epochs = params.get("n_epochs", 10) + self.batch_size = params.get("batch_size", 512) + self.appliance_params = params.get("appliance_params", {}) + self.mains_mean = params.get("mains_mean", 1800) + self.mains_std = params.get("mains_std", 600) + + # TCN-specific parameters + self.num_levels = params.get("num_levels", 8) + self.num_filters = params.get("num_filters", 25) + self.kernel_size = params.get("kernel_size", 7) + self.dropout = params.get("dropout", 0.2) + + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Sequence length must be odd for centered windowing. + if self.sequence_length % 2 == 0: + print("Sequence length should be odd!") + raise SequenceLengthError + + print(f"TCN initialized with sequence_length={self.sequence_length}") + print(f"TCN params: levels={self.num_levels}, filters={self.num_filters}, kernel_size={self.kernel_size}") + print(f"Using device: {self.device}") + + def return_network(self): + """Builds and returns the TCN network.""" + model = TemporalConvNet( + sequence_length=self.sequence_length, + num_levels=self.num_levels, + num_filters=self.num_filters, + kernel_size=self.kernel_size, + dropout=self.dropout + ).to(self.device) + + # Count parameters + total_params = sum(p.numel() for p in model.parameters()) + print(f"TCN model created with {total_params:,} parameters") + + return model + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """Preprocesses data using a sliding window approach.""" + if method == 'train': + # Preprocess training data + mains_df_list = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + mains_df_list.append(pd.DataFrame(new_mains)) + + appliance_list = [] + for app_index, (app_name, app_df_list) in enumerate(submeters_lst): + if app_name in self.appliance_params: + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + else: + raise ApplianceNotFoundError(f"Parameters for appliance '{app_name}' not found!") + + processed_appliance_dfs = [] + for app_df in app_df_list: + new_app_readings = app_df.values.reshape((-1, 1)) + new_app_readings = (new_app_readings - app_mean) / app_std + processed_appliance_dfs.append(pd.DataFrame(new_app_readings)) + appliance_list.append((app_name, processed_appliance_dfs)) + return mains_df_list, appliance_list + + else: # method == 'test' + # Preprocess test data + mains_df_list = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + mains_df_list.append(pd.DataFrame(new_mains)) + return mains_df_list + + def set_appliance_params(self, train_appliances): + """Computes and sets normalization parameters for each appliance.""" + for app_name, df_list in train_appliances: + l = np.array(pd.concat(df_list, axis=0)) + app_mean = np.mean(l) + app_std = np.std(l) + if app_std < 1: + app_std = 100 + self.appliance_params.update({app_name: {'mean': app_mean, 'std': app_std}}) + print("Appliance parameters set:", self.appliance_params) + + def partial_fit(self, train_main, train_appliances, do_preprocessing=True, current_epoch=0, **load_kwargs): + """Trains the model on a chunk of data.""" + # Compute appliance parameters if not already set + if not self.appliance_params: + self.set_appliance_params(train_appliances) + + print("...............TCN partial_fit running...............") + # Preprocess data + if do_preprocessing: + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + train_main = pd.concat(train_main, axis=0) + train_main = train_main.values.reshape((-1, self.sequence_length, 1)) + new_train_appliances = [] + for app_name, app_df in train_appliances: + app_df = pd.concat(app_df, axis=0) + app_df_values = app_df.values.reshape((-1, 1)) + new_train_appliances.append((app_name, app_df_values)) + train_appliances = new_train_appliances + + for appliance_name, power in train_appliances: + # Create a new model for the appliance if it's the first time training + if appliance_name not in self.models: + print("First time training for", appliance_name) + self.models[appliance_name] = self.return_network() + else: + print("Retraining model for", appliance_name) + + model = self.models[appliance_name] + if train_main.size > 0 and len(train_main) > 10: + # Convert to tensors + # Conv1d expects (batch, channels, length) + train_main_tensor = torch.tensor(train_main, dtype=torch.float32).permute(0, 2, 1).to(self.device) + power_tensor = torch.tensor(power, dtype=torch.float32).squeeze().to(self.device) + + # Create validation split (15%) + n_samples = train_main_tensor.size(0) + val_size = int(0.15 * n_samples) + indices = torch.randperm(n_samples) + train_idx, val_idx = indices[val_size:], indices[:val_size] + + train_X = train_main_tensor[train_idx] + train_y = power_tensor[train_idx] + val_X = train_main_tensor[val_idx] + val_y = power_tensor[val_idx] + + # Setup optimizer and loss function + optimizer = torch.optim.Adam(model.parameters()) + criterion = nn.MSELoss() + + best_val_loss = float('inf') + filepath = self.file_prefix + "-{}-epoch{}.pth".format( + "_".join(appliance_name.split()), + current_epoch, + ) + + # Training loop + for epoch in range(self.n_epochs): + model.train() + + # Create data loader for batching + train_dataset = TensorDataset(train_X, train_y) + train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) + + epoch_losses = [] + for batch_X, batch_y in train_loader: + optimizer.zero_grad() + predictions = model(batch_X).squeeze() + loss = criterion(predictions, batch_y) + loss.backward() + + # Gradient clipping to prevent exploding gradients + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) + + optimizer.step() + epoch_losses.append(loss.item()) + + # Validation at the end of each epoch + model.eval() + with torch.no_grad(): + val_predictions = model(val_X).squeeze() + val_loss = criterion(val_predictions, val_y).item() + + avg_train_loss = np.mean(epoch_losses) + print(f"Epoch {epoch+1}/{self.n_epochs} - loss: {avg_train_loss:.4f} - val_loss: {val_loss:.4f}") + + # Save the best model based on validation loss + if val_loss < best_val_loss: + best_val_loss = val_loss + torch.save(model.state_dict(), filepath) + print(f"Validation loss improved, saving model to {filepath}") + + # Load the best weights after training + model.load_state_dict(torch.load(filepath, map_location=self.device)) + + def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): + """Disaggregates a chunk of mains data.""" + if model is not None: + self.models = model + + # Preprocess test data + if do_preprocessing: + test_main_list = self.call_preprocessing(test_main_list, submeters_lst=None, method='test') + + test_predictions = [] + for test_main in test_main_list: + test_main = test_main.values + test_main = test_main.reshape((-1, self.sequence_length, 1)) + + # Convert to tensor for Conv1d + test_main_tensor = torch.tensor(test_main, dtype=torch.float32).permute(0, 2, 1).to(self.device) + + disggregation_dict = {} + for appliance in self.models: + model = self.models[appliance] + model.eval() + with torch.no_grad(): + prediction = model(test_main_tensor).cpu().numpy() + # Denormalize predictions + app_mean = self.appliance_params[appliance]['mean'] + app_std = self.appliance_params[appliance]['std'] + prediction = prediction * app_std + app_mean + valid_predictions = prediction.flatten() + valid_predictions[valid_predictions < 0] = 0 + df = pd.Series(valid_predictions) + disggregation_dict[appliance] = df + results = pd.DataFrame(disggregation_dict, dtype='float32') + test_predictions.append(results) + return test_predictions \ No newline at end of file diff --git a/nilmtk_contrib/torch/WindowGRU.py b/nilmtk_contrib/torch/WindowGRU.py index d1ee2ef..3f43887 100644 --- a/nilmtk_contrib/torch/WindowGRU.py +++ b/nilmtk_contrib/torch/WindowGRU.py @@ -5,255 +5,363 @@ from collections import OrderedDict import numpy as np import pandas as pd +import random from tqdm import tqdm from nilmtk.disaggregate import Disaggregator +class FastReLUGRU(nn.Module): + """ + Fast implementation using standard PyTorch GRU with post-processing to approximate + ReLU activation behavior. This is much faster while maintaining similar performance. + """ + def __init__(self, input_size, hidden_size, batch_first=True, bidirectional=False, return_sequences=True): + super(FastReLUGRU, self).__init__() + self.return_sequences = return_sequences + + # Use standard PyTorch GRU for speed + self.gru = nn.GRU( + input_size=input_size, + hidden_size=hidden_size, + batch_first=batch_first, + bidirectional=bidirectional + ) + + # Apply transformation to approximate ReLU activation effect + # This linear layer helps adjust the tanh outputs to be more ReLU-like + output_size = hidden_size * 2 if bidirectional else hidden_size + self.activation_transform = nn.Sequential( + nn.Linear(output_size, output_size), + nn.ReLU(), + nn.Linear(output_size, output_size) + ) + + def forward(self, input, h0=None): + # Fast GRU computation + if self.return_sequences: + output, final_h = self.gru(input, h0) + # Apply transformation to make it more ReLU-like + batch_size, seq_len, hidden_size = output.shape + output_reshaped = output.reshape(-1, hidden_size) + transformed = self.activation_transform(output_reshaped) + output = transformed.reshape(batch_size, seq_len, hidden_size) + return output, final_h + else: + # Only need final hidden state + _, final_h = self.gru(input, h0) + if final_h.dim() == 3: # [num_layers, batch, hidden] -> [batch, hidden] + if final_h.size(0) == 2: # bidirectional + final_h = torch.cat([final_h[0], final_h[1]], dim=1) + else: + final_h = final_h.squeeze(0) + # Transform final hidden state + final_h = self.activation_transform(final_h) + return None, final_h + class GRUNet(nn.Module): """ - Neural network combining 1D CNN feature extraction with bidirectional GRU layers - for sequence-to-point NILM disaggregation. + Neural network matching TensorFlow WindowGRU architecture exactly. """ def __init__(self, sequence_length): super(GRUNet, self).__init__() - # 1D CNN for initial feature extraction - self.conv1 = nn.Conv1d(1, 16, kernel_size=4, padding=2) + # 1D CNN with same padding as TF (padding="same") + self.conv1 = nn.Conv1d(1, 16, kernel_size=4, padding=2, stride=1) - # Bidirectional GRU layers for sequence modeling - self.gru1 = nn.GRU(16, 64, batch_first=True, bidirectional=True) + # Bidirectional Fast ReLU GRU layers (much faster than custom cells) + # First GRU: return_sequences=True (matches TF) + self.gru1 = FastReLUGRU(16, 64, batch_first=True, bidirectional=True, return_sequences=True) self.dropout1 = nn.Dropout(0.5) - self.gru2 = nn.GRU(128, 128, batch_first=True, bidirectional=True) + + # Second GRU: return_sequences=False (matches TF) + self.gru2 = FastReLUGRU(128, 128, batch_first=True, bidirectional=True, return_sequences=False) self.dropout2 = nn.Dropout(0.5) - # Final layers for single value prediction - self.fc1 = nn.Linear(256, 128) + # Fully Connected Layers matching TF + self.fc1 = nn.Linear(256, 128) # 256 = 128*2 (bidirectional) self.dropout3 = nn.Dropout(0.5) - self.fc2 = nn.Linear(128, 1) + self.fc2 = nn.Linear(128, 1) + + # Initialize weights to match TensorFlow defaults + self._init_weights() + + def _init_weights(self): + """Initialize weights to match TensorFlow defaults""" + for name, param in self.named_parameters(): + if 'weight_ih' in name or 'weight_hh' in name: + # GRU weights - use xavier/glorot uniform like TF + nn.init.xavier_uniform_(param) + elif 'bias_ih' in name or 'bias_hh' in name: + # GRU biases + nn.init.zeros_(param) + elif 'activation_transform' in name and 'weight' in name: + # Transformation layer weights + nn.init.xavier_uniform_(param) + elif 'activation_transform' in name and 'bias' in name: + # Transformation layer biases + nn.init.zeros_(param) + elif 'weight' in name and 'conv1' in name: + # Conv1D weights + nn.init.xavier_uniform_(param) + elif 'bias' in name and 'conv1' in name: + # Conv1D bias + nn.init.zeros_(param) + elif 'fc' in name and 'weight' in name: + # Dense layer weights + nn.init.xavier_uniform_(param) + elif 'fc' in name and 'bias' in name: + # Dense layer biases + nn.init.zeros_(param) def forward(self, x): - # Extract features using 1D convolution + # 1D Conv with ReLU activation (matching TF) x = self.conv1(x) # [batch, 1, seq_len] -> [batch, 16, seq_len] x = torch.relu(x) x = x.permute(0, 2, 1) # Rearrange for GRU: [batch, seq_len, 16] - # Process through bidirectional GRU layers - x, _ = self.gru1(x) # [batch, seq_len, 128] - x = self.dropout1(x) - _, h_n = self.gru2(x) # h_n: [2, batch, 128] (final hidden states) + # First bidirectional ReLU GRU with return_sequences=True + x, _ = self.gru1(x) # [batch, seq_len, 128] (64*2) + x = self.dropout1(x) - # Combine forward and backward final states - h = torch.cat([h_n[-2], h_n[-1]], dim=1) # [batch, 256] - h = self.dropout2(h) + # Second bidirectional ReLU GRU with return_sequences=False (only final state) + _, h_n = self.gru2(x) # h_n: [batch, 256] (128*2 concatenated final states) + h = self.dropout2(h_n) - # Final prediction layers - h = self.fc1(h) # [batch, 128] - h = torch.relu(h) - h = self.dropout3(h) - out = self.fc2(h) # [batch, 1] + # Dense layers with ReLU and linear activation + h = self.fc1(h) # [batch, 128] + h = torch.relu(h) + h = self.dropout3(h) + out = self.fc2(h) # [batch, 1] - linear activation (no activation) return out class WindowGRU(Disaggregator): """ - NILM disaggregator using windowed GRU approach with custom preprocessing. - Uses sliding windows and GRU networks for appliance disaggregation. + Window-based GRU neural network for Non-Intrusive Load Monitoring (NILM). + + Based on "Sliding window approach for online energy disaggregation using artificial neural networks" + by Krystalakos et al., published in Proceedings of the 10th Hellenic Conference on Artificial Intelligence, 2018. + DOI: https://doi.org/10.1145/3200947.3201011 + + This implementation uses a sliding window approach for real-time energy disaggregation, + employing recurrent neural networks with Gated Recurrent Units (GRUs) for temporal + pattern recognition in power consumption data. + + Architecture Overview: + - 1D convolutional layer for initial feature extraction from power sequences + - Two bidirectional GRU layers with ReLU activation for temporal sequence modeling + - Dropout layers for regularization to prevent overfitting + - Fully connected layers for final power consumption prediction + - Sliding window approach for online, real-time energy disaggregation + + Args: + params (dict): Dictionary containing model hyperparameters: + - sequence_length (int): Length of input sequences (default: 99) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - save-model-path (str): Path to save trained models (optional) + - pretrained-model-path (str): Path to load pre-trained models (optional) + - chunk_wise_training (bool): Enable chunk-wise training (default: False) """ def __init__(self, params): - super().__init__() - self.MODEL_NAME = "WindowGRU" - self.file_prefix = f"{self.MODEL_NAME.lower()}-temp-weights" - - # Extract hyperparameters + self.MODEL_NAME = "WindowGRU" + self.file_prefix = "{}-temp-weights".format(self.MODEL_NAME.lower()) self.save_model_path = params.get('save-model-path', None) self.load_model_path = params.get('pretrained-model-path', None) + self.chunk_wise_training = params.get('chunk_wise_training', False) self.sequence_length = params.get('sequence_length', 99) - self.n_epochs = params.get('n_epochs', 10) - self.batch_size = params.get('batch_size', 512) - self.max_val = 800 # Normalization factor - self.models = OrderedDict() # Store separate models for each appliance - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.n_epochs = params.get('n_epochs', 10) + self.models = OrderedDict() + self.max_val = 800 + self.batch_size = params.get('batch_size', 512) + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def return_network(self): """Factory method to create a new GRU model instance""" return GRUNet(self.sequence_length).to(self.device) - def partial_fit(self, train_main, train_appliances, - do_preprocessing=True, current_epoch=0, **kwargs): - """Train models on a chunk of data (supports incremental learning)""" - - # Preprocess data using custom windowing approach + def partial_fit(self, train_main, train_appliances, do_preprocessing=True, current_epoch=0, **load_kwargs): if do_preprocessing: - train_main, train_appliances = self.call_preprocessing( - train_main, train_appliances, 'train' - ) + train_main, train_appliances = self.call_preprocessing(train_main, train_appliances, 'train') - # Prepare main power data for training - mains_arr = pd.concat(train_main, axis=0).values \ - .reshape(-1, self.sequence_length) # [N, seq_len] - - # Prepare appliance power data - new_apps = [] - for app_name, df_list in train_appliances: - concatenated = pd.concat(df_list, axis=0) - arr = concatenated.values.reshape(-1, 1) # [N, 1] - new_apps.append((app_name, arr)) + train_main = pd.concat(train_main, axis=0).values + train_main = train_main.reshape((-1, self.sequence_length, 1)) + new_train_appliances = [] + for app_name, app_df in train_appliances: + app_df = pd.concat(app_df, axis=0).values + app_df = app_df.reshape((-1, 1)) + new_train_appliances.append((app_name, app_df)) - # Train a separate model for each appliance - for app_name, arr in new_apps: - # Create new model if this appliance hasn't been seen before + train_appliances = new_train_appliances + for app_name, app_df in train_appliances: if app_name not in self.models: + print("First model training for", app_name) self.models[app_name] = self.return_network() - model = self.models[app_name] + else: + print("Started re-training model for", app_name) - # Convert to tensors and split into train/validation - x_cpu = torch.tensor(mains_arr, dtype=torch.float32) - y_cpu = torch.tensor(arr, dtype=torch.float32) - split = int(len(x_cpu) * 0.85) - - train_ds = TensorDataset(x_cpu[:split], y_cpu[:split]) - val_ds = TensorDataset(x_cpu[split:], y_cpu[split:]) - train_loader = DataLoader(train_ds, - batch_size=self.batch_size, - shuffle=True) - val_loader = DataLoader(val_ds, - batch_size=self.batch_size) - - # Setup training components + model = self.models[app_name] + mains = train_main.reshape((-1, self.sequence_length, 1)) + app_reading = app_df.reshape((-1, 1)) + + filepath = self.file_prefix + "-{}-epoch{}.pt".format( + "_".join(app_name.split()), + current_epoch, + ) + + # Convert to PyTorch tensors + mains_tensor = torch.tensor(mains, dtype=torch.float32).permute(0, 2, 1) # [B, 1, seq] + app_tensor = torch.tensor(app_reading, dtype=torch.float32).squeeze() # [B] + + # Use validation split like TF (last 15% instead of random split) + # This matches TF's validation_split=0.15 behavior exactly + n_total = len(mains_tensor) + val_size = int(0.15 * n_total) + train_size = n_total - val_size + + train_x = mains_tensor[:train_size].to(self.device) + val_x = mains_tensor[train_size:].to(self.device) + train_y = app_tensor[:train_size].to(self.device) + val_y = app_tensor[train_size:].to(self.device) + + # Use Adam with TensorFlow default parameters exactly + optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-07, weight_decay=0.0) criterion = nn.MSELoss() - optimizer = optim.Adam(model.parameters(), lr=1e-3) - best_val = float('inf') - ckpt_path = f"{self.file_prefix}-{app_name.replace(' ','_')}-epoch{current_epoch}.pt" - - # Training loop - for epoch in tqdm(range(self.n_epochs), - desc=f"Train {app_name}"): + + best_val_loss = float('inf') + + # Create DataLoader for training data with shuffle=True (like TF) + train_dataset = TensorDataset(train_x, train_y) + train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) + + for epoch in range(self.n_epochs): # Training phase model.train() - for xb_cpu, yb_cpu in train_loader: - xb = xb_cpu.unsqueeze(1).to(self.device) # Add channel dim: [B,1,seq] - yb = yb_cpu.to(self.device) # [B,1] + train_loss = 0.0 + num_batches = 0 + + for batch_x, batch_y in train_loader: optimizer.zero_grad() - out = model(xb) # [B,1] - loss = criterion(out, yb) + outputs = model(batch_x).squeeze(-1) # Ensure output shape matches target + loss = criterion(outputs, batch_y) loss.backward() optimizer.step() - - # Validation phase + train_loss += loss.item() + num_batches += 1 + + train_loss /= num_batches + + # Validation phase (evaluate on full validation set at once) model.eval() - val_losses = [] with torch.no_grad(): - for xb_cpu, yb_cpu in val_loader: - xb = xb_cpu.unsqueeze(1).to(self.device) - yb = yb_cpu.to(self.device) - out = model(xb) - val_losses.append(criterion(out, yb).item()) - val_loss = sum(val_losses) / len(val_losses) + val_outputs = model(val_x).squeeze(-1) + val_loss = criterion(val_outputs, val_y).item() - # Save best model based on validation loss - if val_loss < best_val: - best_val = val_loss - torch.save(model.state_dict(), ckpt_path) - - # Load the best model weights - model.load_state_dict(torch.load(ckpt_path, - map_location=self.device)) - torch.cuda.empty_cache() - + # Save best model (like ModelCheckpoint in TF with verbose=1) + if val_loss < best_val_loss: + best_val_loss = val_loss + torch.save(model.state_dict(), filepath) + print(f'Epoch {epoch+1}/{self.n_epochs} - loss: {train_loss:.4f} - val_loss: {val_loss:.4f}') + + # Load best weights (like TF version) + model.load_state_dict(torch.load(filepath)) def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): - """Disaggregate power consumption for each appliance from aggregate mains data""" - if model is not None: self.models = model - - # Preprocess test data using custom windowing + if do_preprocessing: test_main_list = self.call_preprocessing( - test_main_list, None, 'test' - ) - - results = [] + test_main_list, submeters_lst=None, method='test') - # Process each chunk of test data + test_predictions = [] for mains in test_main_list: - arr = mains.values.reshape(-1, self.sequence_length) - x_cpu = torch.tensor(arr, dtype=torch.float32) - test_loader = DataLoader(TensorDataset(x_cpu), - batch_size=self.batch_size) - out_dict = {} - - # Get predictions from each appliance model - for app_name, m in self.models.items(): - preds = [] - m.eval() + disggregation_dict = {} + mains = mains.values.reshape((-1, self.sequence_length, 1)) + for appliance in self.models: + # Convert to tensor and process in batches + mains_tensor = torch.tensor(mains, dtype=torch.float32).permute(0, 2, 1).to(self.device) + + model = self.models[appliance] + model.eval() with torch.no_grad(): - for (xb_cpu,) in test_loader: - xb = xb_cpu.unsqueeze(1).to(self.device) - p = m(xb).view(-1).cpu().numpy() - preds.append(p) - - # Combine predictions and denormalize - all_pred = np.concatenate(preds) - all_pred = np.clip(all_pred, 0, None) * self.max_val - out_dict[app_name] = pd.Series(all_pred) - torch.cuda.empty_cache() + # Process in batches like TensorFlow to match behavior exactly + predictions = [] + for i in range(0, len(mains_tensor), self.batch_size): + batch = mains_tensor[i:i + self.batch_size] + batch_pred = model(batch).cpu().numpy() + predictions.append(batch_pred) + prediction = np.concatenate(predictions, axis=0) - # Combine all appliance predictions for this chunk - results.append(pd.DataFrame(out_dict, dtype='float32')) - return results + prediction = np.reshape(prediction, len(prediction)) + valid_predictions = prediction.flatten() + valid_predictions = np.where(valid_predictions > 0, valid_predictions, 0) + valid_predictions = self._denormalize(valid_predictions, self.max_val) + df = pd.Series(valid_predictions) + disggregation_dict[appliance] = df + results = pd.DataFrame(disggregation_dict, dtype='float32') + test_predictions.append(results) + return test_predictions def call_preprocessing(self, mains_lst, submeters_lst, method): - """Custom preprocessing with sliding window approach""" - + max_val = self.max_val if method == 'train': - pm, apps = [], [] - - # Process mains data with padding and windowing - for mains in mains_lst: - pad = [0] * (self.sequence_length - 1) - tmp = pd.concat([mains, - pd.DataFrame({mains.columns[0]: pad})]) - pm.append(pd.DataFrame(self.preprocess_train_mains(tmp))) - - # Process appliance data - for name, lst in submeters_lst: - dfs = [pd.DataFrame(self.preprocess_train_appliances(df)) - for df in lst] - apps.append((name, dfs)) - return pm, apps + print("Training processing") + processed_mains = [] - if method == 'test': - pm = [] - - # Process test mains data with padding and windowing for mains in mains_lst: - pad = [0] * (self.sequence_length - 1) - tmp = pd.concat([mains, - pd.DataFrame({mains.columns[0]: pad})]) - pm.append(pd.DataFrame(self.preprocess_test_mains(tmp))) - return pm + # add padding values + padding = [0 for i in range(0, self.sequence_length - 1)] + paddf = pd.DataFrame({mains.columns.values[0]: padding}) + mains = pd.concat([mains, paddf]) + mainsarray = self.preprocess_train_mains(mains) + processed_mains.append(pd.DataFrame(mainsarray)) - def preprocess_train_mains(self, mains): - """Create sliding windows from mains data for training""" - arr = (mains / self.max_val).values - # Create sliding window indices - idx = (np.arange(self.sequence_length)[None, :] - + np.arange(len(arr) - self.sequence_length + 1)[:, None]) - return arr[idx].reshape(-1, self.sequence_length) + tuples_of_appliances = [] + for (appliance_name, app_dfs_list) in submeters_lst: + processed_app_dfs = [] + for app_df in app_dfs_list: + data = self.preprocess_train_appliances(app_df) + processed_app_dfs.append(pd.DataFrame(data)) + tuples_of_appliances.append((appliance_name, processed_app_dfs)) - def preprocess_train_appliances(self, app): - """Normalize appliance data for training""" - return (app / self.max_val).values.reshape(-1, 1) + return processed_mains, tuples_of_appliances + + if method == 'test': + processed_mains = [] + for mains in mains_lst: + # add padding values + padding = [0 for i in range(0, self.sequence_length - 1)] + paddf = pd.DataFrame({mains.columns.values[0]: padding}) + mains = pd.concat([mains, paddf]) + mainsarray = self.preprocess_test_mains(mains) + processed_mains.append(pd.DataFrame(mainsarray)) + + return processed_mains def preprocess_test_mains(self, mains): - """Create sliding windows from mains data for testing""" - arr = (mains / self.max_val).values - # Create sliding window indices - idx = (np.arange(self.sequence_length)[None, :] - + np.arange(len(arr) - self.sequence_length + 1)[:, None]) - return arr[idx].reshape(-1, self.sequence_length) + mains = self._normalize(mains, self.max_val) + mainsarray = np.array(mains) + indexer = np.arange(self.sequence_length)[ + None, :] + np.arange(len(mainsarray) - self.sequence_length + 1)[:, None] + mainsarray = mainsarray[indexer] + mainsarray = mainsarray.reshape((-1, self.sequence_length)) + return pd.DataFrame(mainsarray) + + def preprocess_train_appliances(self, appliance): + appliance = self._normalize(appliance, self.max_val) + appliancearray = np.array(appliance) + appliancearray = appliancearray.reshape((-1, 1)) + return pd.DataFrame(appliancearray) + + def preprocess_train_mains(self, mains): + mains = self._normalize(mains, self.max_val) + mainsarray = np.array(mains) + indexer = np.arange(self.sequence_length)[None, :] + np.arange(len(mainsarray) - self.sequence_length + 1)[:, None] + mainsarray = mainsarray[indexer] + mainsarray = mainsarray.reshape((-1, self.sequence_length)) + return pd.DataFrame(mainsarray) - def _normalize(self, chunk, m): - """Normalize data by dividing by maximum value""" - return chunk / m + def _normalize(self, chunk, mmax): + tchunk = chunk / mmax + return tchunk - def _denormalize(self, chunk, m): - """Denormalize data by multiplying by maximum value""" - return chunk * m \ No newline at end of file + def _denormalize(self, chunk, mmax): + tchunk = chunk * mmax + return tchunk \ No newline at end of file diff --git a/nilmtk_contrib/torch/bert.py b/nilmtk_contrib/torch/bert.py index 0684a53..29e33ac 100644 --- a/nilmtk_contrib/torch/bert.py +++ b/nilmtk_contrib/torch/bert.py @@ -13,10 +13,6 @@ from nilmtk.disaggregate import Disaggregator from tqdm import tqdm # Added for progress bars -random.seed(10) -np.random.seed(10) -torch.manual_seed(10) - class SequenceLengthError(Exception): pass @@ -37,7 +33,7 @@ class TransformerBlock(nn.Module): """ def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): super(TransformerBlock, self).__init__() - self.att = nn.MultiheadAttention(embed_dim, num_heads, dropout=rate) + self.att = nn.MultiheadAttention(embed_dim, num_heads, dropout=rate, batch_first=True) self.ffn = nn.Sequential( nn.Linear(embed_dim, ff_dim), nn.ReLU(), @@ -49,7 +45,7 @@ def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1): self.dropout2 = nn.Dropout(rate) def forward(self, x): - # x shape: [seq_len, batch, embed_dim] + # x shape: [batch, seq_len, embed_dim] with batch_first=True attn_output, _ = self.att(x, x, x) attn_output = self.dropout1(attn_output) out1 = self.layernorm1(x + attn_output) @@ -57,30 +53,41 @@ def forward(self, x): ffn_output = self.dropout2(ffn_output) return self.layernorm2(out1 + ffn_output) -class PositionalEncoding(nn.Module): - def __init__(self, embed_dim, maxlen): - super(PositionalEncoding, self).__init__() - self.pos_emb = nn.Parameter(torch.randn(1, maxlen, embed_dim)) - - def forward(self, x): - return x + self.pos_emb # add positional info - class TokenAndPositionEmbedding(nn.Module): def __init__(self, maxlen, vocab_size, embed_dim): super(TokenAndPositionEmbedding, self).__init__() self.token_emb = nn.Embedding(vocab_size, embed_dim) self.pos_emb = nn.Embedding(maxlen, embed_dim) - self.maxlen = maxlen + self.embed_dim = embed_dim def forward(self, x): - positions = torch.arange(0, self.maxlen, dtype=torch.long, device=x.device) - positions = self.pos_emb(positions) - x = self.token_emb(x) - return x + positions + # x comes in as [B, seq_len, 16] from conv layer + batch_size, seq_len, features = x.shape + + # Convert continuous values to discrete tokens for each feature dimension + # Take the mean across features and discretize + x_mean = x.mean(dim=-1) # [B, seq_len] + + # Scale and clamp to vocab range + x_tokens = torch.clamp((x_mean * 1000).long(), 0, self.token_emb.num_embeddings - 1) + + # Get position embeddings + positions = torch.arange(0, seq_len, dtype=torch.long, device=x.device) + positions = self.pos_emb(positions) # [seq_len, embed_dim] + + # Get token embeddings + token_embs = self.token_emb(x_tokens) # [B, seq_len, embed_dim] + + return token_embs + positions.unsqueeze(0) # [B, seq_len, embed_dim] class LPpool(nn.Module): def __init__(self, pool_size, stride=None, padding=0): super(LPpool, self).__init__() + if stride is None: + stride = pool_size + # For 'same' padding equivalent, calculate padding size + if padding == 'same': + padding = (pool_size - 1) // 2 self.avgpool = nn.AvgPool1d(pool_size, stride=stride, padding=padding) def forward(self, x): @@ -104,6 +111,29 @@ def __getitem__(self, idx): class BERT(Disaggregator): """ BERT-inspired transformer model for non-intrusive load monitoring. + + This implementation is based on the paper: + "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" + https://arxiv.org/abs/1810.04805 + + The model adapts the BERT transformer architecture for energy disaggregation tasks, + using a sequence-to-sequence approach to predict individual appliance power consumption + from aggregate household power measurements. + + Architecture Overview: + - 1D Convolutional layer (16 filters, kernel size 4) for feature extraction + - LP pooling (pool size 2) for dimensionality reduction + - Token and position embedding layer to convert continuous values to embeddings + - Single transformer encoder block with multi-head self-attention + - Dense output layer for sequence prediction + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Length of input sequences (default: 99) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - chunk_wise_training (bool): Enable chunk-wise training (default: False) + - appliance_params (dict): Appliance-specific normalization parameters """ def __init__(self, params): self.MODEL_NAME = "BERT" @@ -123,25 +153,49 @@ def __init__(self, params): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def return_network(self): + """Creates the BERT module matching TensorFlow implementation exactly. + + Key architectural features: + - Conv1D(16, 4) with 'same' padding and linear activation + - LPpool with pool_size=2 + - TokenAndPositionEmbedding applied to 16-dim features -> 32-dim embeddings + - Single TransformerBlock + - Dense layer mapping to sequence_length output + """ embed_dim = 32 num_heads = 2 ff_dim = 32 vocab_size = 20000 - maxlen = self.sequence_length + maxlen = 49 # After pooling, sequence length becomes 49 (99 -> 49 after pool_size=2) - model = nn.Sequential( - Permute(0, 2, 1), # [B, 1, 99] - nn.Conv1d(1, embed_dim, 4, stride=1, padding='same'), # [B, embed_dim, 99] - LPpool(pool_size=2), # [B, embed_dim, 49] - Permute(0, 2, 1), # [B, 49, embed_dim] - PositionalEncoding(embed_dim, 49), # [B, 49, embed_dim] - TransformerBlock(embed_dim, num_heads, ff_dim), # [B, 49, embed_dim] - nn.Flatten(), # [B, 49 * embed_dim] - nn.Dropout(0.1), - nn.Linear(49 * embed_dim, self.sequence_length), - nn.Dropout(0.1) - ).to(self.device) + class BERTModel(nn.Module): + def __init__(self, embed_dim, num_heads, ff_dim, vocab_size, maxlen, sequence_length, device): + super(BERTModel, self).__init__() + self.permute1 = Permute(0, 2, 1) + self.conv1d = nn.Conv1d(1, 16, 4, stride=1, padding='same') + self.lppool = LPpool(pool_size=2) + self.permute2 = Permute(0, 2, 1) + self.token_pos_emb = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim) + self.transformer = TransformerBlock(embed_dim, num_heads, ff_dim) + self.flatten = nn.Flatten() + self.dropout1 = nn.Dropout(0.1) + self.linear = nn.Linear(maxlen * embed_dim, sequence_length) # Use maxlen instead of hardcoded 49 + self.dropout2 = nn.Dropout(0.1) + + def forward(self, x): + x = self.permute1(x) # [B, 1, 99] + x = self.conv1d(x) # [B, 16, 99] + x = self.lppool(x) # [B, 16, 49] + x = self.permute2(x) # [B, 49, 16] + x = self.token_pos_emb(x) # [B, 49, 32] + x = self.transformer(x) # [B, 49, 32] + x = self.flatten(x) # [B, 49 * 32] + x = self.dropout1(x) + x = self.linear(x) # [B, sequence_length] + x = self.dropout2(x) + return x + model = BERTModel(embed_dim, num_heads, ff_dim, vocab_size, maxlen, self.sequence_length, self.device).to(self.device) return model def partial_fit(self, train_main, train_appliances, do_preprocessing=True, **load_kwargs): @@ -171,11 +225,15 @@ def partial_fit(self, train_main, train_appliances, do_preprocessing=True, **loa print("Started Retraining model for ", appliance_name) model = self.models[appliance_name] - optimizer = optim.Adam(model.parameters()) + # Use default Adam parameters to match TF's 'adam' + optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-07) criterion = nn.MSELoss() if train_main.size > 0: if len(train_main) > 10: + # Create unique filename for model weights like TF version + filepath = f'BERT-temp-weights-{random.randint(0,100000)}.pt' + train_x, v_x, train_y, v_y = train_test_split( train_main, power, test_size=.15, random_state=10) @@ -205,7 +263,7 @@ def partial_fit(self, train_main, train_appliances, do_preprocessing=True, **loa train_loss += loss.item() * batch_mains.size(0) train_loop.set_postfix(loss=loss.item()) - train_loss /= len(train_loader.dataset) + train_loss /= len(train_dataset) # Use dataset length directly # Validation phase with tqdm model.eval() @@ -221,15 +279,18 @@ def partial_fit(self, train_main, train_appliances, do_preprocessing=True, **loa val_loss += loss.item() * batch_mains.size(0) val_loop.set_postfix(loss=loss.item()) - val_loss /= len(val_loader.dataset) + val_loss /= len(val_dataset) # Use dataset length directly + # Save best model (like ModelCheckpoint in TF) if val_loss < best_val_loss: best_val_loss = val_loss - torch.save(model.state_dict(), f'BERT-temp-weights-{appliance_name}.pt') - - print(f'Epoch {epoch+1}/{self.n_epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}') + torch.save(model.state_dict(), filepath) + print(f'Epoch {epoch+1}/{self.n_epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f} - Model saved') + else: + print(f'Epoch {epoch+1}/{self.n_epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}') - model.load_state_dict(torch.load(f'BERT-temp-weights-{appliance_name}.pt')) + # Load best weights (like TF version) + model.load_state_dict(torch.load(filepath)) # [Rest of the methods remain exactly the same as in the previous version] def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): @@ -324,6 +385,8 @@ def call_preprocessing(self, mains_lst, submeters_lst, method): new_mains = mains.values.flatten() n = self.sequence_length units_to_pad = n // 2 + # TF version doesn't pad during test - comment out padding line + # new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) new_mains = (new_mains - self.mains_mean) / self.mains_std new_mains = new_mains.reshape((-1, self.sequence_length)) diff --git a/nilmtk_contrib/torch/conv_lstm.py b/nilmtk_contrib/torch/conv_lstm.py new file mode 100644 index 0000000..8f00f3e --- /dev/null +++ b/nilmtk_contrib/torch/conv_lstm.py @@ -0,0 +1,361 @@ +from collections import OrderedDict +import os +import numpy as np +import pandas as pd +import torch +import torch.nn as nn +from torch.utils.data import TensorDataset, DataLoader +from tqdm import tqdm +from nilmtk.disaggregate import Disaggregator + + +class SequenceLengthError(Exception): + pass + +class ApplianceNotFoundError(Exception): + pass + +class ConvLSTM(Disaggregator): + """ + Convolutional LSTM for non-intrusive load monitoring. + + This implementation is based on the paper: + "Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting" + https://arxiv.org/abs/1506.04214 + + The model adapts the ConvLSTM architecture for energy disaggregation tasks, + using spatiotemporal sequence modeling to predict individual appliance power consumption + from aggregate household power measurements. + + Architecture Overview: + - Convolutional LSTM layers for spatiotemporal feature learning + - Dropout and dense layers for regularization and output prediction + - Sequence-to-point prediction for energy disaggregation + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Length of input sequences (default: 99) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - chunk_wise_training (bool): Enable chunk-wise training (default: False) + - appliance_params (dict): Appliance-specific normalization parameters + - mains_mean (float): Mean value for mains normalization (default: 1800) + - mains_std (float): Standard deviation for mains normalization (default: 600) + """ + def __init__(self, params): + super().__init__() + self.MODEL_NAME = "ConvLSTM" + self.models = OrderedDict() + self.file_prefix = f"{self.MODEL_NAME.lower()}-temp-weights" + + # Extract hyperparameters from params dict - exactly same as seq2point_new + self.chunk_wise_training = params.get("chunk_wise_training", False) + self.sequence_length = params.get("sequence_length", 99) + self.n_epochs = params.get("n_epochs", 10) + self.batch_size = params.get("batch_size", 512) + self.appliance_params = params.get("appliance_params", {}) + self.mains_mean = params.get("mains_mean", 1800) + self.mains_std = params.get("mains_std", 600) + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Sequence length must be odd for proper windowing + if self.sequence_length % 2 == 0: + print("Sequence length should be odd!") + raise SequenceLengthError + + def return_network(self): + """ + Builds the Conv-LSTM network architecture. + """ + class ConvLSTMNet(nn.Module): + def __init__(self, sequence_length): + super().__init__() + + # Convolutional feature extraction layers + # Similar to seq2point but with fewer layers for LSTM compatibility + self.conv1 = nn.Conv1d(1, 32, kernel_size=8, stride=1, padding=3) + self.conv2 = nn.Conv1d(32, 64, kernel_size=6, stride=1, padding=2) + self.conv3 = nn.Conv1d(64, 128, kernel_size=4, stride=1, padding=1) + + # Calculate conv output length + conv_len = sequence_length # With padding, length is preserved + self.conv_output_dim = 128 + + # Dropout for regularization + self.dropout1 = nn.Dropout(0.2) + + # BiLSTM layers for temporal modeling + self.lstm1 = nn.LSTM( + input_size=self.conv_output_dim, + hidden_size=128, + num_layers=1, + batch_first=True, + bidirectional=True, + dropout=0.0 + ) + + self.lstm2 = nn.LSTM( + input_size=256, # 128 * 2 (bidirectional) + hidden_size=64, + num_layers=1, + batch_first=True, + bidirectional=True, + dropout=0.0 + ) + + self.dropout2 = nn.Dropout(0.2) + + # Final prediction layers + self.fc1 = nn.Linear(128, 64) # 64 * 2 (bidirectional) + self.fc2 = nn.Linear(64, 1) + + # Initialize weights + self._initialize_weights() + + def _initialize_weights(self): + """ + Initializes model weights. + """ + for m in self.modules(): + if isinstance(m, nn.Conv1d): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.LSTM): + for name, param in m.named_parameters(): + if 'weight_ih' in name: + nn.init.xavier_uniform_(param.data) + elif 'weight_hh' in name: + nn.init.orthogonal_(param.data) + elif 'bias' in name: + nn.init.zeros_(param.data) + + def forward(self, x): + # x shape: (batch_size, 1, sequence_length) + + # Convolutional feature extraction + x = torch.relu(self.conv1(x)) + x = torch.relu(self.conv2(x)) + x = torch.relu(self.conv3(x)) + x = self.dropout1(x) + + # Reshape for LSTM: (batch_size, sequence_length, features) + x = x.transpose(1, 2) # (batch_size, sequence_length, conv_output_dim) + + # BiLSTM layers + x, _ = self.lstm1(x) + x, _ = self.lstm2(x) + x = self.dropout2(x) + + # Take the last timestep output for sequence-to-point prediction + x = x[:, -1, :] # (batch_size, hidden_size * 2) + + # Final prediction layers + x = torch.relu(self.fc1(x)) + x = self.fc2(x) + + return x + + model = ConvLSTMNet(self.sequence_length).to(self.device) + return model + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """ + Preprocesses data by creating sliding windows, same as seq2point. + """ + if method == 'train': + # Preprocessing for the train data - exactly matching seq2point_new + mains_df_list = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + mains_df_list.append(pd.DataFrame(new_mains)) + + appliance_list = [] + for app_index, (app_name, app_df_list) in enumerate(submeters_lst): + if app_name in self.appliance_params: + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + else: + print("Parameters for", app_name, "were not found!") + raise ApplianceNotFoundError() + + processed_appliance_dfs = [] + for app_df in app_df_list: + new_app_readings = app_df.values.reshape((-1, 1)) + # This is for choosing windows + new_app_readings = (new_app_readings - app_mean) / app_std + # Return as a list of dataframe + processed_appliance_dfs.append(pd.DataFrame(new_app_readings)) + appliance_list.append((app_name, processed_appliance_dfs)) + return mains_df_list, appliance_list + + else: + # Preprocessing for the test data - exactly matching seq2point_new + mains_df_list = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + mains_df_list.append(pd.DataFrame(new_mains)) + return mains_df_list + + def set_appliance_params(self, train_appliances): + """ + Computes and sets normalization parameters for each appliance. + """ + for app_name, df_list in train_appliances: + l = np.array(pd.concat(df_list, axis=0)) + app_mean = np.mean(l) + app_std = np.std(l) + if app_std < 1: + app_std = 100 + self.appliance_params.update({app_name: {'mean': app_mean, 'std': app_std}}) + print(self.appliance_params) + + def partial_fit(self, train_main, train_appliances, do_preprocessing=True, current_epoch=0, **load_kwargs): + """ + Trains the Conv-LSTM model on a chunk of data. + """ + # If no appliance wise parameters are provided, then compute them using the first chunk + if len(self.appliance_params) == 0: + self.set_appliance_params(train_appliances) + + print("...............ConvLSTM partial_fit running...............") + # Do the pre-processing, such as windowing and normalizing + if do_preprocessing: + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + train_main = pd.concat(train_main, axis=0) + train_main = train_main.values.reshape((-1, self.sequence_length, 1)) + new_train_appliances = [] + for app_name, app_df in train_appliances: + app_df = pd.concat(app_df, axis=0) + app_df_values = app_df.values.reshape((-1, 1)) + new_train_appliances.append((app_name, app_df_values)) + train_appliances = new_train_appliances + + for appliance_name, power in train_appliances: + # Check if the appliance was already trained. If not then create a new model for it + if appliance_name not in self.models: + print("First model training for", appliance_name) + self.models[appliance_name] = self.return_network() + # Retrain the particular appliance + else: + print("Started Retraining model for", appliance_name) + + model = self.models[appliance_name] + if train_main.size > 0: + # Sometimes chunks can be empty after dropping NANS + if len(train_main) > 10: + # Convert to PyTorch tensors and correct format + # PyTorch Conv1d expects (batch, channels, length) + train_main_tensor = torch.tensor(train_main, dtype=torch.float32).permute(0, 2, 1).to(self.device) + power_tensor = torch.tensor(power, dtype=torch.float32).squeeze().to(self.device) + + # Create validation split + n_samples = train_main_tensor.size(0) + val_size = int(0.15 * n_samples) + indices = torch.randperm(n_samples) + train_idx, val_idx = indices[val_size:], indices[:val_size] + + train_X = train_main_tensor[train_idx] + train_y = power_tensor[train_idx] + val_X = train_main_tensor[val_idx] + val_y = power_tensor[val_idx] + + # Setup optimizer and loss + optimizer = torch.optim.Adam(model.parameters()) + criterion = nn.MSELoss() + + best_val_loss = float('inf') + filepath = self.file_prefix + "-{}-epoch{}.pth".format( + "_".join(appliance_name.split()), + current_epoch, + ) + + # Training loop matching seq2point_new behavior + for epoch in range(self.n_epochs): + model.train() + + # Create batches + train_dataset = TensorDataset(train_X, train_y) + train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) + + epoch_losses = [] + for batch_X, batch_y in train_loader: + optimizer.zero_grad() + predictions = model(batch_X).squeeze() + loss = criterion(predictions, batch_y) + loss.backward() + + # Add gradient clipping like seq2point_new + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) + + optimizer.step() + epoch_losses.append(loss.item()) + + # Validation + model.eval() + with torch.no_grad(): + val_predictions = model(val_X).squeeze() + val_loss = criterion(val_predictions, val_y).item() + + avg_train_loss = np.mean(epoch_losses) + print(f"Epoch {epoch+1}/{self.n_epochs} - loss: {avg_train_loss:.4f} - val_loss: {val_loss:.4f}") + + # Save best model (matching seq2point_new's ModelCheckpoint behavior) + if val_loss < best_val_loss: + best_val_loss = val_loss + torch.save(model.state_dict(), filepath) + print(f"Validation loss improved, saving model to {filepath}") + + # Load best weights + model.load_state_dict(torch.load(filepath, map_location=self.device)) + + def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): + """ + Disaggregates a chunk of mains power data. + """ + if model is not None: + self.models = model + + # Preprocess the test mains such as windowing and normalizing + if do_preprocessing: + test_main_list = self.call_preprocessing(test_main_list, submeters_lst=None, method='test') + + test_predictions = [] + for test_main in test_main_list: + test_main = test_main.values + test_main = test_main.reshape((-1, self.sequence_length, 1)) + + # Convert to PyTorch tensor with correct format for Conv1d + test_main_tensor = torch.tensor(test_main, dtype=torch.float32).permute(0, 2, 1).to(self.device) + + disggregation_dict = {} + for appliance in self.models: + model = self.models[appliance] + model.eval() + with torch.no_grad(): + prediction = model(test_main_tensor).cpu().numpy() + # Denormalize exactly like seq2point_new + prediction = self.appliance_params[appliance]['mean'] + prediction * self.appliance_params[appliance]['std'] + valid_predictions = prediction.flatten() + valid_predictions = np.where(valid_predictions > 0, valid_predictions, 0) + df = pd.Series(valid_predictions) + disggregation_dict[appliance] = df + results = pd.DataFrame(disggregation_dict, dtype='float32') + test_predictions.append(results) + return test_predictions diff --git a/nilmtk_contrib/torch/dae.py b/nilmtk_contrib/torch/dae.py index 4fc6c67..46be609 100644 --- a/nilmtk_contrib/torch/dae.py +++ b/nilmtk_contrib/torch/dae.py @@ -36,6 +36,34 @@ def forward(self, x): return x class DAE(Disaggregator): + """ + Denoising Autoencoder for non-intrusive load monitoring. + + This implementation is based on the paper: + "Neural NILM: Deep Neural Networks Applied to Energy Disaggregation" + https://arxiv.org/abs/1507.06594 + + The model uses a denoising autoencoder architecture for energy disaggregation tasks, + learning to reconstruct individual appliance power consumption from aggregate + household power measurements. + + Architecture Overview: + - Convolutional encoder layer for feature extraction + - Fully connected bottleneck layers for dimensionality reduction + - Convolutional decoder layer for sequence reconstruction + - Sequence-to-sequence prediction for energy disaggregation + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Length of input sequences (default: 99) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - mains_mean (float): Mean value for mains normalization (default: 1000) + - mains_std (float): Standard deviation for mains normalization (default: 600) + - appliance_params (dict): Appliance-specific normalization parameters + - save-model-path (str): Path to save trained models + - pretrained-model-path (str): Path to load pre-trained models + """ def __init__(self, params): super().__init__() self.MODEL_NAME = "DAE" @@ -54,6 +82,7 @@ def __init__(self, params): self.load_model() def return_network(self): + """Returns the DAE model.""" return DAEModel(self.sequence_length).to(self.device) def set_appliance_params(self, train_appliances): @@ -67,6 +96,9 @@ def set_appliance_params(self, train_appliances): self.appliance_params[name] = {'mean': m, 'std': s} def normalize_input(self, data, n, mean, std, overlap): + """ + Normalizes and windows the input data. + """ flat = data.flatten() pad = (n - flat.size % n) % n flat = np.concatenate([flat, np.zeros(pad)]) @@ -79,11 +111,14 @@ def normalize_input(self, data, n, mean, std, overlap): return ((w - mean)/std).reshape(-1, n, 1) # normalize and reshape for model def denormalize_output(self, data, mean, std): + """ + Denormalizes the output data. + """ return mean + data*std def call_preprocessing(self, mains_lst, subs, method): """ - Preprocess the mains and appliances data for training or testing. + Preprocesses the mains and appliance data. """ if method == 'train': pm, apps = [], [] @@ -119,6 +154,9 @@ def call_preprocessing(self, mains_lst, subs, method): return pm def partial_fit(self, train_main, train_appliances, do_preprocessing=True, current_epoch=0, **_): + """ + Trains the model on a chunk of data. + """ if not self.appliance_params: self.set_appliance_params(train_appliances) @@ -177,6 +215,9 @@ def partial_fit(self, train_main, train_appliances, do_preprocessing=True, curre self.save_model() def save_model(self): + """ + Saves the trained model and parameters. + """ os.makedirs(self.save_model_path, exist_ok=True) params = { 'sequence_length': self.sequence_length, @@ -191,6 +232,9 @@ def save_model(self): os.path.join(self.save_model_path, f"{name}.pt")) def load_model(self): + """ + Loads a pre-trained model and its parameters. + """ with open(os.path.join(self.load_model_path,'model.json')) as f: p = json.load(f) self.sequence_length = p['sequence_length'] @@ -206,6 +250,9 @@ def load_model(self): self.models[name] = m def disaggregate_chunk(self, test_main_list, do_preprocessing=True): + """ + Disaggregates a chunk of mains data. + """ if do_preprocessing: test_main_list = self.call_preprocessing( test_main_list, None, 'test' diff --git a/nilmtk_contrib/torch/msdc.py b/nilmtk_contrib/torch/msdc.py new file mode 100644 index 0000000..2e31287 --- /dev/null +++ b/nilmtk_contrib/torch/msdc.py @@ -0,0 +1,688 @@ +from collections import OrderedDict +import numpy as np +import pandas as pd +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.utils.data import DataLoader, TensorDataset +from nilmtk.disaggregate import Disaggregator +import os + + +class SequenceLengthError(Exception): + pass + + +class ApplianceNotFoundError(Exception): + pass + + +class MSDCNet(nn.Module): + """ + Dual-branch CNN for joint state classification and power prediction. + - Branch 1: Predicts state emission scores for a CRF. + - Branch 2: Predicts power consumption for each state. + - CRF layer models state transitions. + """ + + def __init__(self, window_length, num_states): + super(MSDCNet, self).__init__() + self.window_length = window_length + self.num_states = num_states + + # Shared CNN feature extractor + self.shared_cnn = nn.Sequential( + nn.Conv1d(1, 32, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv1d(32, 64, kernel_size=3, padding=1), + nn.ReLU(), + nn.AdaptiveAvgPool1d(1) + ) + + # Branch 1: State emission scores for CRF + self.state_branch = nn.Sequential( + nn.Linear(64, 128), + nn.ReLU(), + nn.Dropout(0.5), + nn.Linear(128, num_states) + ) + + # Branch 2: Power predictions for each state + self.power_branch = nn.Sequential( + nn.Linear(64, 128), + nn.ReLU(), + nn.Dropout(0.5), + nn.Linear(128, num_states) + ) + + # CRF layer for state sequence modeling + self.crf = CRF(num_states) + + def forward(self, x): + """ + Forward pass through the network. + Args: + x: Input tensor of shape (batch_size, seq_len, window_length) + + Returns: + emissions: State emission scores (batch_size, seq_len, num_states) + power_preds: Power predictions for each state (batch_size, seq_len, num_states) + """ + batch_size, seq_len, window_length = x.shape + + # Reshape for CNN: (batch_size * seq_len, 1, window_length) + x_reshaped = x.view(-1, 1, window_length) + + # Extract features using shared CNN + features = self.shared_cnn(x_reshaped) # (batch_size * seq_len, 64, 1) + features = features.squeeze(-1) # (batch_size * seq_len, 64) + + # Branch 1: State emissions + emissions = self.state_branch(features) # (batch_size * seq_len, num_states) + emissions = emissions.view(batch_size, seq_len, self.num_states) + + # Branch 2: Power predictions + power_preds = self.power_branch(features) # (batch_size * seq_len, num_states) + power_preds = power_preds.view(batch_size, seq_len, self.num_states) + + return emissions, power_preds + + +class CRF(nn.Module): + """Conditional Random Field for sequence modeling.""" + + def __init__(self, num_states): + super(CRF, self).__init__() + self.num_states = num_states + + # Transition parameters + self.transitions = nn.Parameter(torch.randn(num_states, num_states)) + self.start_transitions = nn.Parameter(torch.randn(num_states)) + self.end_transitions = nn.Parameter(torch.randn(num_states)) + + def forward(self, emissions): + """Computes the log partition function using the forward algorithm.""" + batch_size, seq_len, num_states = emissions.shape + + # Initialize with start transitions + alpha = emissions[:, 0] + self.start_transitions.unsqueeze(0) + + # Forward pass + for t in range(1, seq_len): + alpha_expanded = alpha.unsqueeze(2) # (batch_size, num_states, 1) + trans_scores = alpha_expanded + self.transitions.unsqueeze(0) # (batch_size, num_states, num_states) + alpha = torch.logsumexp(trans_scores, dim=1) + emissions[:, t] + + # Add end transitions + log_partition = torch.logsumexp(alpha + self.end_transitions.unsqueeze(0), dim=1) + return log_partition + + def score_sequence(self, emissions, states): + """Computes the log-likelihood of a given state sequence.""" + batch_size, seq_len = states.shape + + # Start transition score + score = self.start_transitions[states[:, 0]] + + # Emission scores + for t in range(seq_len): + score += emissions[range(batch_size), t, states[:, t]] + + # Transition scores + for t in range(seq_len - 1): + score += self.transitions[states[:, t], states[:, t + 1]] + + # End transition score + score += self.end_transitions[states[:, -1]] + + return score + + def viterbi_decode(self, emissions): + """Finds the most likely state sequence using the Viterbi algorithm.""" + batch_size, seq_len, num_states = emissions.shape + + # Initialize + delta = emissions[:, 0] + self.start_transitions.unsqueeze(0) + psi = torch.zeros(batch_size, seq_len, num_states, dtype=torch.long, device=emissions.device) + + # Forward pass + for t in range(1, seq_len): + delta_expanded = delta.unsqueeze(2) # (batch_size, num_states, 1) + trans_scores = delta_expanded + self.transitions.unsqueeze(0) # (batch_size, num_states, num_states) + + delta_next, psi[:, t] = torch.max(trans_scores, dim=1) + delta = delta_next + emissions[:, t] + + # Add end transitions and find best final state + final_scores = delta + self.end_transitions.unsqueeze(0) + best_final_states = torch.argmax(final_scores, dim=1) + + # Backward pass to reconstruct path + best_paths = torch.zeros(batch_size, seq_len, dtype=torch.long, device=emissions.device) + best_paths[:, -1] = best_final_states + + for t in range(seq_len - 2, -1, -1): + best_paths[:, t] = psi[range(batch_size), t + 1, best_paths[:, t + 1]] + + return best_paths + + +class MSDC(Disaggregator): + """ + Multi-State Dual CNN for non-intrusive load monitoring. + + This implementation is based on the paper: + "MSDC: Exploiting Multi-State Power Consumption in Non-intrusive Load Monitoring based on A Dual-CNN Model" + https://arxiv.org/abs/2302.05565 + + The model uses a dual-branch CNN architecture with a CRF layer for joint state + classification and power prediction in energy disaggregation tasks. + + Architecture Overview: + - Dual-branch CNN for feature extraction + - Branch 1: State emission scores for CRF layer + - Branch 2: Power consumption prediction for each state + - CRF layer for modeling state transitions + - Multi-state power consumption modeling + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Length of input sequences + - n_epochs (int): Number of training epochs + - batch_size (int): Training batch size + - appliance_params (dict): Appliance-specific normalization parameters + """ + + # Dataset-specific configurations from the official MSDC implementation + APPLIANCE_STATES = { + 'kettle': { + 'uk_dale': { + 'states': [2000, 4500], + 'state_averages': [1.15, 2280.79], + 'num_states': 2, + 'threshold': 2000 + } + # No REDD config for kettle in original - will fallback to UK-DALE + }, + 'microwave': { + 'uk_dale': { + 'states': [300, 3000], + 'state_averages': [1.4, 1551.3], + 'num_states': 2, + 'threshold': 300 + }, + 'redd': { + 'states': [300, 3000], + 'state_averages': [4.2, 1557.501], + 'num_states': 2, + 'threshold': 300 + } + }, + 'fridge': { + 'uk_dale': { + 'states': [20, 200, 2500], + 'state_averages': [0.13, 87.26, 246.5], + 'num_states': 3, + 'threshold': 20 + }, + 'redd': { + 'states': [50, 300, 500], + 'state_averages': [3.2, 143.3, 397.3], + 'num_states': 3, + 'threshold': 50 + }, + 'redd_house1': { + 'states': [50, 300, 500], + 'state_averages': [6.49, 192.57, 443], + 'num_states': 3, + 'threshold': 50 + }, + 'redd_house2': { + 'states': [50, 300, 500], + 'state_averages': [6.34, 162.87, 418.36], + 'num_states': 3, + 'threshold': 50 + }, + 'redd_house3': { + 'states': [50, 300, 500], + 'state_averages': [0.54, 118.85, 409.75], + 'num_states': 3, + 'threshold': 50 + } + }, + 'dishwasher': { + 'uk_dale': { + 'states': [50, 1000, 4500], + 'state_averages': [0.89, 122.56, 2324.9], + 'num_states': 3, + 'threshold': 50 + }, + 'redd': { + 'states': [150, 300, 1000, 3000], + 'state_averages': [0.57, 232.91, 733.89, 1198.31], + 'num_states': 4, + 'threshold': 150 + }, + 'redd_house1': { + 'states': [150, 300, 1000, 3000], + 'state_averages': [0.21, 216.75, 438.51, 1105.08], + 'num_states': 4, + 'threshold': 150 + }, + 'redd_house2': { + 'states': [150, 1000, 3000], + 'state_averages': [0.16, 250.26, 1197.93], + 'num_states': 3, + 'threshold': 150 + }, + 'redd_house3': { + 'states': [50, 400, 1000], + 'state_averages': [0.97, 195.6, 743.42], + 'num_states': 3, + 'threshold': 50 + } + }, + 'washingmachine': { + 'uk_dale': { + 'states': [50, 800, 3500], + 'state_averages': [0.13, 204.64, 1892.85], + 'num_states': 3, + 'threshold': 50 + }, + 'uk_dale_house2': { + 'states': [50, 200, 1000, 4000], + 'state_averages': [2.83, 114.34, 330.25, 2100.14], + 'num_states': 4, + 'threshold': 50 + }, + 'redd': { + 'states': [500, 5000], + 'state_averages': [0, 2627.3], + 'num_states': 2, + 'threshold': 500 + } + } + } + + # Dataset-specific normalization parameters + DATASET_NORMALIZATION = { + 'uk_dale': { + 'mains_mean': 1800, + 'mains_std': 600 + }, + 'redd': { + 'mains_mean': 352.32, # From official MSDC REDD implementation + 'mains_std': 608.42 + } + } + + def __init__(self, params): + super().__init__() + + self.MODEL_NAME = "MSDC" + self.file_prefix = f"{self.MODEL_NAME.lower()}-temp-weights" + + # Dataset configuration + self.dataset = params.get('dataset', 'uk_dale').lower() + self.house = params.get('house', None) + + # Validate and build dataset key + if self.dataset not in ['uk_dale', 'redd']: + print(f"Warning: Unknown dataset '{self.dataset}'. Defaulting to 'uk_dale'.") + self.dataset = 'uk_dale' + + self.dataset_key = f"{self.dataset}_house{self.house}" if self.house else self.dataset + + # Hyperparameters + self.sequence_length = params.get('sequence_length', 99) + if self.sequence_length % 2 == 0: + raise SequenceLengthError("Sequence length must be odd") + + self.num_states = params.get('num_states', 3) # Will be overridden by appliance config + self.n_epochs = params.get('n_epochs', 50) + self.batch_size = params.get('batch_size', 256) + self.learning_rate = params.get('learning_rate', 0.001) + self.patience = params.get('patience', 5) + + # Dataset-specific normalization parameters + dataset_norm = self.DATASET_NORMALIZATION.get(self.dataset, self.DATASET_NORMALIZATION['uk_dale']) + self.mains_mean = params.get('mains_mean', dataset_norm['mains_mean']) + self.mains_std = params.get('mains_std', dataset_norm['mains_std']) + self.appliance_params = params.get('appliance_params', {}) + + # Model and device configuration + self.models = OrderedDict() + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Display configuration + print(f"MSDC initialized for dataset: {self.dataset.upper()}") + if self.house: + print(f"House: {self.house}") + print(f"Configuration key: {self.dataset_key}") + print(f"Mains normalization - mean: {self.mains_mean}, std: {self.mains_std}") + + def _get_appliance_config(self, appliance_name): + """Retrieves the best available configuration for an appliance.""" + if appliance_name not in self.APPLIANCE_STATES: + return None + + appliance_configs = self.APPLIANCE_STATES[appliance_name] + + # Priority: specific house > dataset > any available config + if self.dataset_key in appliance_configs: + return appliance_configs[self.dataset_key] + elif self.dataset in appliance_configs: + return appliance_configs[self.dataset] + else: + # Use any available configuration as fallback + available_configs = list(appliance_configs.keys()) + if available_configs: + fallback_key = available_configs[0] + print(f"Warning: No {self.dataset_key} config for {appliance_name}, using {fallback_key}") + return appliance_configs[fallback_key] + + return None + + def return_network(self, appliance_name): + """Creates an MSDC model instance for a specific appliance.""" + config = self._get_appliance_config(appliance_name) + if config: + num_states = config['num_states'] + print(f"Creating network for {appliance_name} with {num_states} states ({self.dataset_key})") + else: + num_states = self.num_states # fallback to default + print(f"Warning: No config found for {appliance_name}, using default {num_states} states") + + return MSDCNet(self.sequence_length, num_states).to(self.device) + + def set_appliance_params(self, train_appliances): + """Computes and sets normalization parameters for each appliance.""" + for name, lst in train_appliances: + arr = pd.concat(lst, axis=0).values.flatten() + m, s = arr.mean(), arr.std() + # Avoid division by zero + if s < 1: + s = 100 + print(f"Computed normalization for {name}: mean={m:.2f}, std={s:.2f}") + + self.appliance_params[name] = {'mean': m, 'std': s} + + def _create_state_labels(self, power_sequence, appliance_name): + """ + Generates state labels based on dataset-specific configurations. + """ + power = power_sequence.flatten() + + # Get appliance configuration + config = self._get_appliance_config(appliance_name) + + if config: + thresholds = config['states'] + num_states = config['num_states'] + else: + # Fallback to dynamic thresholds if no config is found + mean_power = self.appliance_params.get(appliance_name, {}).get('mean', power.mean()) + num_states = self.num_states + + if num_states == 2: + thresholds = [0.1 * mean_power] + elif num_states == 3: + thresholds = [0.1 * mean_power, 0.7 * mean_power] + else: + thresholds = np.linspace(0, mean_power * 1.2, num_states)[1:] + + # Create state labels based on thresholds + states = np.zeros_like(power, dtype=np.int64) + + for i, threshold in enumerate(thresholds): + states[power >= threshold] = i + 1 + + # Ensure states are within valid range + states = np.clip(states, 0, num_states - 1) + + return states.astype(np.int64) + + def _compute_msdc_loss(self, model, x, y_power, y_states, appliance_name): + """ + Computes the combined MSDC loss. + - CRF negative log-likelihood for state sequence. + - MSE for per-state power predictions. + - MSE for final power prediction based on Viterbi-decoded states. + """ + # Forward pass + emissions, power_preds = model(x) + + # Use the model's CRF + crf = model.crf + + # Get number of states for the appliance + config = self._get_appliance_config(appliance_name) + num_states = config['num_states'] if config else self.num_states + + # 1. CRF loss (negative log-likelihood) + log_partition = crf(emissions) + sequence_scores = crf.score_sequence(emissions, y_states) + crf_loss = torch.mean(log_partition - sequence_scores) + + # 2. Per-state power loss + batch_size, seq_len = y_states.shape + state_power_loss = 0 + for state_id in range(num_states): + state_mask = (y_states == state_id).float() + if state_mask.sum() > 0: + state_power_pred = power_preds[:, :, state_id] + masked_pred = state_power_pred * state_mask + masked_target = y_power * state_mask + state_power_loss += F.mse_loss(masked_pred, masked_target, reduction='sum') / (state_mask.sum() + 1e-8) + + # 3. Final power loss (using Viterbi-decoded states) + best_states = crf.viterbi_decode(emissions) + final_power_pred = torch.zeros_like(y_power) + for b in range(batch_size): + for t in range(seq_len): + state = best_states[b, t] + final_power_pred[b, t] = power_preds[b, t, state] + + final_power_loss = F.mse_loss(final_power_pred, y_power) + + # Combined loss with weights from the paper + total_loss = crf_loss + 0.5 * state_power_loss + final_power_loss + + return total_loss, crf_loss, state_power_loss, final_power_loss + + def partial_fit(self, train_main, train_appliances, + do_preprocessing=True, current_epoch=0, **_): + """Trains the model on a chunk of data.""" + + print("started Partial Fit") + + # Set appliance parameters if not already done + if len(self.appliance_params) == 0: + self.set_appliance_params(train_appliances) + + # Preprocess data + if do_preprocessing: + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + print("Preprocessing done") + + # Prepare main power data + mains_arr = pd.concat(train_main, axis=0).values + if len(mains_arr.shape) == 2: + mains_arr = mains_arr.reshape(-1, self.sequence_length, 1) + else: + mains_arr = mains_arr.reshape(-1, self.sequence_length, 1) + + # Prepare appliance data + new_train_appliances = [] + for app_name, app_dfs in train_appliances: + app_df = pd.concat(app_dfs, axis=0) + app_df_values = app_df.values + new_train_appliances.append((app_name, app_df_values)) + + train_appliances = new_train_appliances + + # Train a separate model for each appliance + for appliance_name, app_data in train_appliances: + print(f"\nTraining MSDC for {appliance_name}...") + + # Initialize model if not already trained + if appliance_name not in self.models: + self.models[appliance_name] = self.return_network(appliance_name) + + model = self.models[appliance_name] + optimizer = optim.Adam(model.parameters(), lr=self.learning_rate) + + # Convert data to tensors + mains_tensor = torch.FloatTensor(mains_arr).to(self.device) + app_tensor = torch.FloatTensor(app_data).to(self.device) + + # Create state labels + state_labels = [] + for i in range(app_data.shape[0]): + states = self._create_state_labels(app_data[i], appliance_name) + state_labels.append(states) + state_labels = np.array(state_labels) + state_tensor = torch.LongTensor(state_labels).to(self.device) + + # Create dataset and dataloader + dataset = TensorDataset(mains_tensor, app_tensor, state_tensor) + dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True) + + # Training loop + model.train() + print(f"Training on {self.device}...") + for epoch in range(self.n_epochs): + print(f"Epoch {epoch + 1}/{self.n_epochs} for {appliance_name}") + total_loss = 0 + batch_count = 0 + for batch_mains, batch_app, batch_states in dataloader: + optimizer.zero_grad() + + # Forward pass + emissions, power_preds = model(batch_mains) + + # Compute loss + loss, crf_loss, state_power_loss, final_power_loss = self._compute_msdc_loss( + model, batch_mains, batch_app.squeeze(-1), batch_states, appliance_name + ) + + # Backward pass and optimization + loss.backward() + optimizer.step() + + total_loss += loss.item() + batch_count += 1 + + if epoch % 10 == 0: + avg_loss = total_loss / batch_count + print(f"Epoch {epoch}/{self.n_epochs}, Avg Loss: {avg_loss:.4f}") + + print(f"Training completed for {appliance_name}!") + + def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): + """Disaggregates a chunk of mains data using the trained models.""" + + if model is not None: + self.models = model + + # Preprocess test data + if do_preprocessing: + test_main_list = self.call_preprocessing(test_main_list, submeters_lst=None, method='test') + + test_predictions = [] + for test_main in test_main_list: + test_main = test_main.values + test_main = test_main.reshape((-1, self.sequence_length, 1)) + disggregation_dict = {} + + test_main_tensor = torch.FloatTensor(test_main).to(self.device) + + for appliance, model in self.models.items(): + print(f"Predicting {appliance}...") + model.eval() + + with torch.no_grad(): + # Forward pass + emissions, power_preds = model(test_main_tensor) + + # Decode state sequence using Viterbi + best_states = model.crf.viterbi_decode(emissions) + + # Get power predictions for the decoded state sequence + batch_size, seq_len = best_states.shape + predicted_power = torch.zeros(batch_size, seq_len, device=self.device) + + for b in range(batch_size): + for t in range(seq_len): + state = best_states[b, t] + predicted_power[b, t] = power_preds[b, t, state] + + # Extract center values (middle of each window) + center_idx = self.sequence_length // 2 + pred = predicted_power[:, center_idx].cpu().numpy() + + # Denormalize predictions + pred = pred * self.appliance_params[appliance]['std'] + self.appliance_params[appliance]['mean'] + pred = np.where(pred > 0, pred, 0) # Ensure non-negative power + + disggregation_dict[appliance] = pred + + test_predictions.append(pd.DataFrame(disggregation_dict, dtype='float32')) + + return test_predictions + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """ + Preprocessing method required by NILMTK API + """ + if method == 'train': + # Process mains data + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + + # Process appliance data + appliance_list = [] + for app_index, (app_name, app_df_lst) in enumerate(submeters_lst): + if app_name in self.appliance_params: + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + else: + raise ApplianceNotFoundError() + + processed_app_dfs = [] + for app_df in app_df_lst: + new_app_readings = app_df.values.flatten() + new_app_readings = np.pad(new_app_readings, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_app_readings = np.array([new_app_readings[i:i + n] for i in range(len(new_app_readings) - n + 1)]) + new_app_readings = (new_app_readings - app_mean) / app_std + processed_app_dfs.append(pd.DataFrame(new_app_readings)) + + appliance_list.append((app_name, processed_app_dfs)) + + return processed_mains_lst, appliance_list + + else: # method == 'test' + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + new_mains = new_mains.reshape((-1, self.sequence_length)) + processed_mains_lst.append(pd.DataFrame(new_mains)) + return processed_mains_lst + +# Export for nilmtk_contrib +__all__ = ['MSDC'] \ No newline at end of file diff --git a/nilmtk_contrib/torch/msdc_without_crf.py b/nilmtk_contrib/torch/msdc_without_crf.py new file mode 100644 index 0000000..957803e --- /dev/null +++ b/nilmtk_contrib/torch/msdc_without_crf.py @@ -0,0 +1,649 @@ +from collections import OrderedDict +import numpy as np +import pandas as pd +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +from torch.utils.data import DataLoader, TensorDataset +from nilmtk.disaggregate import Disaggregator +import os + + +class SequenceLengthError(Exception): + pass + + +class ApplianceNotFoundError(Exception): + pass + + +class MSDCNet(nn.Module): + """ + MSDC Neural Network with a dual-branch CNN architecture. + This model is based on the S2S_state model from the official MSDC repository. + + - Branch 1: Predicts power consumption for each appliance state. + - Branch 2: Predicts the appliance state. + """ + + def __init__(self, window_length, out_len, num_states): + super(MSDCNet, self).__init__() + self.window_length = window_length + self.out_len = out_len + self.num_states = num_states + + # Power branch (Branch 1) - following original MSDC architecture + self.conv1_p = nn.Conv1d(1, 30, 13, padding=6) + self.conv2_p = nn.Conv1d(30, 30, 11, padding=5) + self.conv3_p = nn.Conv1d(30, 40, 7, padding=3) + self.conv4_p = nn.Conv1d(40, 50, 5, padding=2) + self.conv5_p = nn.Conv1d(50, 60, 5, padding=2) + self.conv6_p = nn.Conv1d(60, 60, 5, padding=2) + self.fc1_p = nn.Linear(60 * window_length, 1024) + self.fc2_p = nn.Linear(1024, out_len * num_states) + + # State branch (Branch 2) - following original MSDC architecture + self.conv1_s = nn.Conv1d(1, 30, 13, padding=6) + self.conv2_s = nn.Conv1d(30, 30, 11, padding=5) + self.conv3_s = nn.Conv1d(30, 40, 7, padding=3) + self.conv4_s = nn.Conv1d(40, 50, 5, padding=2) + self.conv5_s = nn.Conv1d(50, 60, 5, padding=2) + self.conv6_s = nn.Conv1d(60, 60, 5, padding=2) + self.fc1_s = nn.Linear(60 * window_length, 1024) + self.fc2_s = nn.Linear(1024, out_len * num_states) + + def forward(self, x): + """ + Args: + x: Input tensor of shape (batch_size, window_length) + + Returns: + power_preds: Power predictions for each state (batch_size, out_len * num_states) + state_preds: State classification scores (batch_size, out_len * num_states) + """ + # Add channel dimension + x = x.unsqueeze(1) # (batch_size, 1, window_length) + y = x + + # Power branch + x = F.relu(self.conv1_p(x)) + x = F.relu(self.conv2_p(x)) + x = F.relu(self.conv3_p(x)) + x = F.relu(self.conv4_p(x)) + x = F.relu(self.conv5_p(x)) + x = F.relu(self.conv6_p(x)) + x = x.flatten(-2, -1) + x = F.relu(self.fc1_p(x)) + power_preds = self.fc2_p(x) + + # State branch + y = F.relu(self.conv1_s(y)) + y = F.relu(self.conv2_s(y)) + y = F.relu(self.conv3_s(y)) + y = F.relu(self.conv4_s(y)) + y = F.relu(self.conv5_s(y)) + y = F.relu(self.conv6_s(y)) + y = y.flatten(-2, -1) + y = F.relu(self.fc1_s(y)) + state_preds = self.fc2_s(y) + + return power_preds, state_preds + + +class MSDC(Disaggregator): + """ + Multi-State Dual CNN for non-intrusive load monitoring without CRF layer. + + This implementation is based on the paper: + "MSDC: Exploiting Multi-State Power Consumption in Non-intrusive Load Monitoring based on A Dual-CNN Model" + https://arxiv.org/abs/2302.05565 + + The model uses a dual-branch CNN architecture without the CRF layer for joint state + classification and power prediction in energy disaggregation tasks. This version + directly predicts states and power consumption without CRF-based transition modeling. + + Architecture Overview: + - Dual-branch CNN for feature extraction + - Branch 1: Power consumption prediction for each state + - Branch 2: Direct state classification (without CRF layer) + - Multi-state power consumption modeling + - Simplified architecture compared to full MSDC model + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Length of input sequences + - n_epochs (int): Number of training epochs + - batch_size (int): Training batch size + - appliance_params (dict): Appliance-specific normalization parameters + """ + + # Complete dataset-specific configurations from official MSDC implementation + APPLIANCE_STATES = { + 'kettle': { + 'uk_dale': { + 'states': [2000, 4500], + 'state_averages': [1.15, 2280.79], + 'num_states': 2, + 'threshold': 2000 + } + # No REDD config for kettle in original - will fallback to UK-DALE + }, + 'microwave': { + 'uk_dale': { + 'states': [300, 3000], + 'state_averages': [1.4, 1551.3], + 'num_states': 2, + 'threshold': 300 + }, + 'redd': { + 'states': [300, 3000], + 'state_averages': [4.2, 1557.501], + 'num_states': 2, + 'threshold': 300 + } + }, + 'fridge': { + 'uk_dale': { + 'states': [20, 200, 2500], + 'state_averages': [0.13, 87.26, 246.5], + 'num_states': 3, + 'threshold': 20 + }, + 'redd': { + 'states': [50, 300, 500], + 'state_averages': [3.2, 143.3, 397.3], + 'num_states': 3, + 'threshold': 50 + }, + 'redd_house1': { + 'states': [50, 300, 500], + 'state_averages': [6.49, 192.57, 443], + 'num_states': 3, + 'threshold': 50 + }, + 'redd_house2': { + 'states': [50, 300, 500], + 'state_averages': [6.34, 162.87, 418.36], + 'num_states': 3, + 'threshold': 50 + }, + 'redd_house3': { + 'states': [50, 300, 500], + 'state_averages': [0.54, 118.85, 409.75], + 'num_states': 3, + 'threshold': 50 + } + }, + 'dishwasher': { + 'uk_dale': { + 'states': [50, 1000, 4500], + 'state_averages': [0.89, 122.56, 2324.9], + 'num_states': 3, + 'threshold': 50 + }, + 'redd': { + 'states': [150, 300, 1000, 3000], + 'state_averages': [0.57, 232.91, 733.89, 1198.31], + 'num_states': 4, + 'threshold': 150 + }, + 'redd_house1': { + 'states': [150, 300, 1000, 3000], + 'state_averages': [0.21, 216.75, 438.51, 1105.08], + 'num_states': 4, + 'threshold': 150 + }, + 'redd_house2': { + 'states': [150, 1000, 3000], + 'state_averages': [0.16, 250.26, 1197.93], + 'num_states': 3, + 'threshold': 150 + }, + 'redd_house3': { + 'states': [50, 400, 1000], + 'state_averages': [0.97, 195.6, 743.42], + 'num_states': 3, + 'threshold': 50 + } + }, + 'washing machine': { + 'uk_dale': { + 'states': [50, 800, 3500], + 'state_averages': [0.13, 204.64, 1892.85], + 'num_states': 3, + 'threshold': 50 + }, + 'uk_dale_house2': { + 'states': [50, 200, 1000, 4000], + 'state_averages': [2.83, 114.34, 330.25, 2100.14], + 'num_states': 4, + 'threshold': 50 + }, + 'redd': { + 'states': [500, 5000], + 'state_averages': [0, 2627.3], + 'num_states': 2, + 'threshold': 500 + } + } + } + + # Dataset-specific normalization parameters + DATASET_NORMALIZATION = { + 'uk_dale': { + 'mains_mean': 1800, + 'mains_std': 600 + }, + 'redd': { + 'mains_mean': 352.32, # From official MSDC REDD implementation + 'mains_std': 608.42 + } + } + + def __init__(self, params): + super().__init__() + + self.MODEL_NAME = "MSDC" + self.file_prefix = f"{self.MODEL_NAME.lower()}-temp-weights" + + # Dataset configuration + self.dataset = params.get('dataset', 'uk_dale').lower() + self.house = params.get('house', None) + + # Validate dataset + if self.dataset not in ['uk_dale', 'redd']: + print(f"Warning: Unknown dataset '{self.dataset}'. Defaulting to 'uk_dale'.") + self.dataset = 'uk_dale' + + # Build dataset key for configuration lookup + if self.house is not None: + self.dataset_key = f"{self.dataset}_house{self.house}" + else: + self.dataset_key = self.dataset + + # Extract hyperparameters + self.sequence_length = params.get('sequence_length', 99) + if self.sequence_length % 2 == 0: + raise SequenceLengthError("Sequence length must be odd") + + # Output length for sequence-to-sequence prediction + self.out_len = params.get('out_len', 64) + self.num_states = params.get('num_states', 3) # Will be overridden by appliance config + self.n_epochs = params.get('n_epochs', 50) + self.batch_size = params.get('batch_size', 256) + self.learning_rate = params.get('learning_rate', 0.001) + self.patience = params.get('patience', 5) + + # Dataset-specific normalization parameters + dataset_norm = self.DATASET_NORMALIZATION.get(self.dataset, self.DATASET_NORMALIZATION['uk_dale']) + self.mains_mean = params.get('mains_mean', dataset_norm['mains_mean']) + self.mains_std = params.get('mains_std', dataset_norm['mains_std']) + self.appliance_params = params.get('appliance_params', {}) + + # Model storage + self.models = OrderedDict() # Store separate models for each appliance + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Display configuration + print(f"MSDC initialized for dataset: {self.dataset.upper()}") + if self.house: + print(f"House: {self.house}") + print(f"Configuration key: {self.dataset_key}") + print(f"Mains normalization - mean: {self.mains_mean}, std: {self.mains_std}") + + def _get_appliance_config(self, appliance_name): + """Get the best available configuration for an appliance""" + if appliance_name not in self.APPLIANCE_STATES: + return None + + appliance_configs = self.APPLIANCE_STATES[appliance_name] + + # Priority order: dataset_key -> dataset -> any available + if self.dataset_key in appliance_configs: + return appliance_configs[self.dataset_key] + elif self.dataset in appliance_configs: + return appliance_configs[self.dataset] + else: + # Use any available configuration as fallback + available_configs = list(appliance_configs.keys()) + if available_configs: + fallback_key = available_configs[0] + print(f"Warning: No {self.dataset_key} config for {appliance_name}, using {fallback_key}") + return appliance_configs[fallback_key] + + return None + + def return_network(self, appliance_name): + """Factory method to create a new MSDC model instance for specific appliance""" + config = self._get_appliance_config(appliance_name) + if config: + num_states = config['num_states'] + print(f"Creating network for {appliance_name} with {num_states} states ({self.dataset_key})") + else: + num_states = self.num_states # fallback to default + print(f"Warning: No config found for {appliance_name}, using default {num_states} states") + + return MSDCNet(self.sequence_length, self.out_len, num_states).to(self.device) + + def set_appliance_params(self, train_appliances): + """Compute normalization statistics for each appliance from training data""" + for name, lst in train_appliances: + # Always compute normalization from training data + arr = pd.concat(lst, axis=0).values.flatten() + m, s = arr.mean(), arr.std() + # Prevent division by zero + if s < 1: + s = 100 + print(f"Computed normalization for {name}: mean={m:.2f}, std={s:.2f}") + + self.appliance_params[name] = {'mean': m, 'std': s} + + def _create_state_labels(self, power_sequence, appliance_name): + """ + Create state labels using the dataset-specific state dictionary + """ + power = power_sequence.flatten() + + # Get appliance configuration + config = self._get_appliance_config(appliance_name) + + if config: + thresholds = config['states'] + num_states = config['num_states'] + else: + # Fallback to dynamic thresholds + if appliance_name in self.appliance_params: + params = self.appliance_params[appliance_name] + mean_power = params['mean'] + else: + mean_power = power.mean() + + num_states = self.num_states + + if num_states == 2: + thresholds = [0.1 * mean_power] + elif num_states == 3: + thresholds = [0.1 * mean_power, 0.7 * mean_power] + else: + thresholds = np.linspace(0, mean_power * 1.2, num_states)[1:] + + # Create state labels based on thresholds + states = np.zeros_like(power, dtype=np.int64) + + for i, threshold in enumerate(thresholds): + states[power >= threshold] = i + 1 + + # Ensure states are within valid range + states = np.clip(states, 0, num_states - 1) + + return states.astype(np.int64) + + def _compute_msdc_loss(self, power_preds, state_preds, y_power, y_states, appliance_name): + """ + Computes the combined loss for the MSDC model. + The loss is a sum of: + 1. Mean Squared Error (MSE) for the final power prediction. + 2. Cross-entropy loss for the state classification. + """ + batch_size = y_power.shape[0] + + # Get number of states for this appliance + config = self._get_appliance_config(appliance_name) + if config: + num_states = config['num_states'] + else: + num_states = self.num_states + + # Reshape predictions: (batch_size, out_len, num_states) + power_preds = power_preds.view(batch_size, self.out_len, num_states) + state_preds = state_preds.view(batch_size, self.out_len, num_states) + + # Apply softmax to state predictions to get probabilities + state_probs = F.softmax(state_preds, dim=-1) + + # Final power prediction: weighted sum over states + final_power = torch.sum(state_probs * power_preds, dim=-1, keepdim=False) + + # 1. Final power MSE loss + power_loss = F.mse_loss(final_power, y_power) + + # 2. State classification loss + # Flatten for cross-entropy: (batch_size * out_len, num_states) + state_preds_flat = state_preds.view(-1, num_states) + y_states_flat = y_states.view(-1) + state_loss = F.cross_entropy(state_preds_flat, y_states_flat) + + # Combined loss (following original implementation) + total_loss = power_loss + state_loss + + return total_loss, power_loss, state_loss + + def partial_fit(self, train_main, train_appliances, + do_preprocessing=True, current_epoch=0, **_): + """Train MSDC models on a chunk of data""" + + print("Started Partial Fit") + + # Compute appliance parameters if not provided + if len(self.appliance_params) == 0: + self.set_appliance_params(train_appliances) + + print("Preprocessing called") + # Preprocess data using NILMTK-compatible method + if do_preprocessing: + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + print("Preprocessing done") + + # Prepare main power data + mains_arr = pd.concat(train_main, axis=0).values + if len(mains_arr.shape) == 2: + mains_arr = mains_arr.reshape(-1, self.sequence_length) + else: + mains_arr = mains_arr.reshape(-1, self.sequence_length) + + # Prepare appliance data + new_train_appliances = [] + for app_name, app_dfs in train_appliances: + app_df = pd.concat(app_dfs, axis=0) + app_df_values = app_df.values + if len(app_df_values.shape) == 2: + app_df_values = app_df_values.reshape(-1, self.out_len) + else: + app_df_values = app_df_values.reshape(-1, self.out_len) + new_train_appliances.append((app_name, app_df_values)) + + train_appliances = new_train_appliances + + # Train a separate model for each appliance + for appliance_name, app_data in train_appliances: + print(f"\nTraining {appliance_name} for {self.dataset_key}...") + + # Check if the appliance was already trained + if appliance_name not in self.models: + self.models[appliance_name] = self.return_network(appliance_name) + + model = self.models[appliance_name] + optimizer = optim.Adam(model.parameters(), lr=self.learning_rate) + + # Convert to tensors + mains_tensor = torch.FloatTensor(mains_arr).to(self.device) + app_tensor = torch.FloatTensor(app_data).to(self.device) + + # Create state labels for each sequence using dataset-specific states + state_labels = [] + for i in range(app_data.shape[0]): + states = self._create_state_labels(app_data[i], appliance_name) + state_labels.append(states) + state_labels = np.array(state_labels) + state_tensor = torch.LongTensor(state_labels).to(self.device) + + # Create dataset and dataloader + dataset = TensorDataset(mains_tensor, app_tensor, state_tensor) + dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True) + + # Training loop + model.train() + print("Training loop started") + for epoch in range(self.n_epochs): + print(f"Epoch {epoch + 1}/{self.n_epochs} for {appliance_name}") + total_loss = 0 + batch_count = 0 + for batch_mains, batch_app, batch_states in dataloader: + optimizer.zero_grad() + + # Forward pass through MSDC network + power_preds, state_preds = model(batch_mains) + + # Compute MSDC loss (without CRF) + loss, power_loss, state_loss = self._compute_msdc_loss( + power_preds, state_preds, batch_app, batch_states, appliance_name + ) + + # Backward pass + loss.backward() + optimizer.step() + + total_loss += loss.item() + batch_count += 1 + + if epoch % 10 == 0: + avg_loss = total_loss / batch_count + print(f"Epoch {epoch}/{self.n_epochs}, Avg Loss: {avg_loss:.4f}") + + def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): + """Disaggregate power consumption using the trained MSDC model.""" + + if model is not None: + self.models = model + + # Preprocess the test mains + if do_preprocessing: + test_main_list = self.call_preprocessing(test_main_list, submeters_lst=None, method='test') + + test_predictions = [] + for test_main in test_main_list: + test_main = test_main.values + test_main = test_main.reshape((-1, self.sequence_length)) + disggregation_dict = {} + + test_main_tensor = torch.FloatTensor(test_main).to(self.device) + + for appliance in self.models: + model = self.models[appliance] + model.eval() + + # Get appliance configuration + config = self._get_appliance_config(appliance) + if config: + num_states = config['num_states'] + else: + num_states = self.num_states + + with torch.no_grad(): + # Forward pass through MSDC + power_preds, state_preds = model(test_main_tensor) + + # Reshape predictions + batch_size = power_preds.shape[0] + power_preds = power_preds.view(batch_size, self.out_len, num_states) + state_preds = state_preds.view(batch_size, self.out_len, num_states) + + # Apply softmax to get state probabilities + state_probs = F.softmax(state_preds, dim=-1) + + # Final power prediction: weighted sum over states + predicted_power = torch.sum(state_probs * power_preds, dim=-1) + + # Extract center values (middle of each window) + center_idx = self.out_len // 2 + pred = predicted_power[:, center_idx].cpu().numpy() + + # Denormalize predictions + pred = pred * self.appliance_params[appliance]['std'] + self.appliance_params[appliance]['mean'] + pred = np.where(pred > 0, pred, 0) # Ensure non-negative power + + disggregation_dict[appliance] = pred + + test_predictions.append(pd.DataFrame(disggregation_dict, dtype='float32')) + + return test_predictions + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """ + Preprocessing method required by NILMTK API + """ + if method == 'train': + # Process mains data + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + + # Process appliance data - create sequence-to-sequence targets + appliance_list = [] + for app_index, (app_name, app_df_lst) in enumerate(submeters_lst): + if app_name in self.appliance_params: + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + else: + raise ApplianceNotFoundError() + + processed_app_dfs = [] + for app_df in app_df_lst: + new_app_readings = app_df.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_app_readings = np.pad(new_app_readings, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + + # Create sequence-to-sequence targets (out_len length) + app_sequences = [] + offset = int(0.5 * (self.sequence_length - 1.0)) + for i in range(len(new_app_readings) - self.sequence_length + 1): + # Extract output sequence from center + start_idx = i + offset - self.out_len // 2 + end_idx = start_idx + self.out_len + if start_idx >= 0 and end_idx <= len(new_app_readings): + seq = new_app_readings[start_idx:end_idx] + else: + # Pad if necessary + seq = np.zeros(self.out_len) + if start_idx < 0: + seq[-start_idx:] = new_app_readings[0:end_idx] + elif end_idx > len(new_app_readings): + seq[:len(new_app_readings)-start_idx] = new_app_readings[start_idx:] + else: + seq = new_app_readings[start_idx:end_idx] + + app_sequences.append(seq) + + app_sequences = np.array(app_sequences) + app_sequences = (app_sequences - app_mean) / app_std + processed_app_dfs.append(pd.DataFrame(app_sequences)) + + appliance_list.append((app_name, processed_app_dfs)) + + return processed_mains_lst, appliance_list + + else: # method == 'test' + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + new_mains = new_mains.reshape((-1, self.sequence_length)) + processed_mains_lst.append(pd.DataFrame(new_mains)) + return processed_mains_lst + +# Export for nilmtk_contrib +__all__ = ['MSDC'] diff --git a/nilmtk_contrib/torch/nilmformer.py b/nilmtk_contrib/torch/nilmformer.py new file mode 100644 index 0000000..229274f --- /dev/null +++ b/nilmtk_contrib/torch/nilmformer.py @@ -0,0 +1,1036 @@ +""" +NILMFormer: PyTorch Implementation for NILMTK-Contrib + +This is an exact implementation of the NILMFormer architecture from the paper: +"NILMFormer: Non-Intrusive Load Monitoring that Accounts for Non-Stationarity" +by Petralia et al. (ACM SIGKDD 2025) + +Official GitHub: https://github.com/adrienpetralia/NILMFormer +Paper: https://arxiv.org/html/2506.05880v1 + +Architecture Components (matching official implementation): +1. Instance Normalization: Stationarizes input by subtracting mean/std +2. DilatedBlock: Robust convolutional feature extractor with residual connections +3. TokenStats: Linear projection of mean/std statistics into higher dimensional space +4. Exogenous Features: Temporal encoding using create_exogene (sinusoidal functions for + month, day-of-week, hour, minute) - exactly as in the original repository +5. Transformer Encoder: Diagonal masked self-attention with pre-norm architecture +6. Output Head: 1D convolution for sequence-to-sequence prediction +7. Denormalization: Reverse instance normalization using projected statistics + +Key Features: +- create_exogene for capturing temporal patterns (from original NILMFormer repo) +- Diagonal masking (not causal) in self-attention +- GELU activations throughout +- Pre-norm transformer blocks +- Instance normalization for non-stationarity handling +- Sequence-to-sequence prediction with middle-point extraction +- Exact parameter defaults from official config (d_model=96, n_heads=8, etc.) + +This implementation follows the official NILMFormer source code structure exactly, +including the proper exogenous feature generation via create_exogene. +""" + +from typing import List, Optional +from collections import OrderedDict +import os +import numpy as np +import pandas as pd +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +from torch.utils.data import Dataset, DataLoader +from sklearn.model_selection import train_test_split +from tqdm import tqdm +from nilmtk.disaggregate import Disaggregator +import random + + +class SequenceLengthError(Exception): + pass + + +class ApplianceNotFoundError(Exception): + pass + + +class NILMDataset(Dataset): + """ + Dataset class for NILMFormer. + """ + def __init__(self, inputs, targets): + """ + Args: + inputs (Tensor): Input tensor of shape (B, C, L), where C includes + mains power and exogenous features. + targets (Tensor): Target tensor of shape (B, C_out, L), where C_out + is the number of appliances. + """ + self.inputs = inputs + self.targets = targets + + def __len__(self): + return len(self.inputs) + + def __getitem__(self, idx): + return self.inputs[idx], self.targets[idx] + + +class ResUnit(nn.Module): + """ + Residual Unit for the NILMFormer model. + """ + def __init__(self, c_in: int, c_out: int, k: int = 8, dilation: int = 1, + stride: int = 1, bias: bool = True): + super().__init__() + + self.layers = nn.Sequential( + nn.Conv1d( + in_channels=c_in, + out_channels=c_out, + kernel_size=k, + dilation=dilation, + stride=stride, + bias=bias, + padding="same", + ), + nn.GELU(), + nn.BatchNorm1d(c_out), + ) + + if c_in > 1 and c_in != c_out: + self.match_residual = True + self.conv = nn.Conv1d(in_channels=c_in, out_channels=c_out, kernel_size=1) + else: + self.match_residual = False + + def forward(self, x) -> torch.Tensor: + if self.match_residual: + x_bottleneck = self.conv(x) + x = self.layers(x) + return torch.add(x_bottleneck, x) + else: + return torch.add(x, self.layers(x)) + + +class DilatedBlock(nn.Module): + """ + Dilated Convolutional Block for feature extraction. + """ + def __init__(self, c_in: int = 1, c_out: int = 72, kernel_size: int = 8, + dilation_list: Optional[List[int]] = None, bias: bool = True): + super().__init__() + + if dilation_list is None: + dilation_list = [1, 2, 4, 8] + + layers = [] + for i, dilation in enumerate(dilation_list): + if i == 0: + layers.append( + ResUnit(c_in, c_out, k=kernel_size, dilation=dilation, bias=bias) + ) + else: + layers.append( + ResUnit(c_out, c_out, k=kernel_size, dilation=dilation, bias=bias) + ) + self.network = torch.nn.Sequential(*layers) + + def forward(self, x) -> torch.Tensor: + return self.network(x) + + +def create_exogene(start_date, sequence_length, freq="1min", + list_exo_variables=None, cosinbase=True, new_range=(-1, 1)): + """ + Creates exogenous temporal features. + + Args: + start_date: The starting timestamp for the sequence. + sequence_length: The length of the time sequence. + freq: The frequency of the data sampling. + list_exo_variables: A list of temporal features to generate. + cosinbase: If True, uses sinusoidal encoding for features. + new_range: The range for normalization if cosinbase is False. + + Returns: + An array of exogenous features. + """ + if list_exo_variables is None: + list_exo_variables = ['month', 'dow', 'hour', 'minute'] # Default temporal features + + if cosinbase: + n_var = 2 * len(list_exo_variables) # sin and cos for each variable + else: + n_var = len(list_exo_variables) + + # Create datetime range + if isinstance(start_date, str): + start_date = pd.to_datetime(start_date) + + tmp = pd.date_range(start=start_date, periods=sequence_length, freq=freq) + + # Initialize exogenous features array + np_extra = np.zeros((1, n_var, sequence_length)).astype(np.float32) + + k = 0 + for exo_var in list_exo_variables: + if exo_var == "month": + if cosinbase: + np_extra[0, k, :] = np.sin(2 * np.pi * tmp.month.values / 12.0) + np_extra[0, k + 1, :] = np.cos(2 * np.pi * tmp.month.values / 12.0) + k += 2 + else: + np_extra[0, k, :] = normalize_exogene( + tmp.month.values, xmin=1, xmax=12, newRange=new_range + ) + k += 1 + elif exo_var == "dom": # day of month + if cosinbase: + np_extra[0, k, :] = np.sin(2 * np.pi * tmp.day.values / 31.0) + np_extra[0, k + 1, :] = np.cos(2 * np.pi * tmp.day.values / 31.0) + k += 2 + else: + np_extra[0, k, :] = normalize_exogene( + tmp.day.values, xmin=1, xmax=31, newRange=new_range + ) + k += 1 + elif exo_var == "dow": # day of week + if cosinbase: + np_extra[0, k, :] = np.sin(2 * np.pi * tmp.dayofweek.values / 7.0) + np_extra[0, k + 1, :] = np.cos(2 * np.pi * tmp.dayofweek.values / 7.0) + k += 2 + else: + np_extra[0, k, :] = normalize_exogene( + tmp.dayofweek.values, xmin=0, xmax=6, newRange=new_range + ) + k += 1 + elif exo_var == "hour": + if cosinbase: + np_extra[0, k, :] = np.sin(2 * np.pi * tmp.hour.values / 24.0) + np_extra[0, k + 1, :] = np.cos(2 * np.pi * tmp.hour.values / 24.0) + k += 2 + else: + np_extra[0, k, :] = normalize_exogene( + tmp.hour.values, xmin=0, xmax=23, newRange=new_range + ) + k += 1 + elif exo_var == "minute": + if cosinbase: + np_extra[0, k, :] = np.sin(2 * np.pi * tmp.minute.values / 60.0) + np_extra[0, k + 1, :] = np.cos(2 * np.pi * tmp.minute.values / 60.0) + k += 2 + else: + np_extra[0, k, :] = normalize_exogene( + tmp.minute.values, xmin=0, xmax=59, newRange=new_range + ) + k += 1 + else: + raise ValueError( + f"Embedding unknown for these Data. Only 'month', 'dow', 'dom', 'hour', 'minute' supported, received {exo_var}" + ) + + return np_extra + + +def normalize_exogene(x, xmin, xmax, newRange): + """ + Normalizes exogenous features to a specified range. + """ + if xmin is None: + xmin = np.min(x) + if xmax is None: + xmax = np.max(x) + + norm = (x - xmin) / (xmax - xmin) + if newRange == (0, 1): + return norm + elif newRange != (0, 1): + return norm * (newRange[1] - newRange[0]) + newRange[0] + + +class DiagonalMaskFromSeqlen: + """ + Creates a diagonal attention mask. + """ + def __init__(self, B, L, device="cpu"): + with torch.no_grad(): + self._mask = torch.diag( + torch.ones(L, dtype=torch.bool, device=device) + ).repeat(B, 1, 1, 1) + + @property + def mask(self) -> torch.Tensor: + return self._mask + + +class DiagonallyMaskedSelfAttention(nn.Module): + """ + Self-attention mechanism with a diagonal mask. + """ + def __init__(self, dim: int, n_heads: int, head_dim: int, dropout: float): + super().__init__() + + self.n_heads: int = n_heads + self.head_dim: int = head_dim + self.dropout: float = dropout + self.scale = head_dim**-0.5 + + self.attn_dropout = nn.Dropout(dropout) + self.out_dropout = nn.Dropout(dropout) + + self.wq = nn.Linear(dim, n_heads * head_dim, bias=False) + self.wk = nn.Linear(dim, n_heads * head_dim, bias=False) + self.wv = nn.Linear(dim, n_heads * head_dim, bias=False) + self.wo = nn.Linear(n_heads * head_dim, dim, bias=False) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + batch, seqlen, _ = x.shape + + xq, xk, xv = self.wq(x), self.wk(x), self.wv(x) + + xq = xq.view(batch, seqlen, self.n_heads, self.head_dim) + xk = xk.view(batch, seqlen, self.n_heads, self.head_dim) + xv = xv.view(batch, seqlen, self.n_heads, self.head_dim) + + diag_mask = DiagonalMaskFromSeqlen(batch, seqlen, device=xq.device) + + scale = 1.0 / xq.shape[-1] ** 0.5 + scores = torch.einsum("blhe,bshe->bhls", xq, xk) + attn = self.attn_dropout( + torch.softmax( + scale * scores.masked_fill_(diag_mask.mask, -np.inf), dim=-1 + ) + ) + output = torch.einsum("bhls,bshd->blhd", attn, xv) + + return self.out_dropout(self.wo(output.reshape(batch, seqlen, -1))) + + +class PositionWiseFeedForward(nn.Module): + """ + Position-wise feed-forward network. + """ + def __init__(self, dim: int, hidden_dim: int, dp_rate: float = 0.0, + bias1: bool = True, bias2: bool = True): + super().__init__() + self.layer1 = nn.Linear(dim, hidden_dim, bias=bias1) + self.layer2 = nn.Linear(hidden_dim, dim, bias=bias2) + self.dropout = nn.Dropout(dp_rate) + self.activation = F.gelu + + def forward(self, x) -> torch.Tensor: + x = self.layer2(self.dropout(self.activation(self.layer1(x)))) + return x + + +class EncoderLayer(nn.Module): + """ + Transformer encoder layer with pre-norm architecture. + """ + def __init__(self, d_model: int, n_heads: int, dp_rate: float = 0.2, + pffn_ratio: int = 4, norm_eps: float = 1e-5): + super().__init__() + + assert d_model % n_heads == 0, ( + f"d_model ({d_model}) must be divisible by n_heads ({n_heads})" + ) + + self.attention_layer = DiagonallyMaskedSelfAttention( + dim=d_model, + n_heads=n_heads, + head_dim=d_model // n_heads, + dropout=dp_rate, + ) + + self.norm1 = nn.LayerNorm(d_model, eps=norm_eps) + self.norm2 = nn.LayerNorm(d_model, eps=norm_eps) + self.dropout = nn.Dropout(dp_rate) + + self.pffn = PositionWiseFeedForward( + dim=d_model, + hidden_dim=d_model * pffn_ratio, + dp_rate=dp_rate, + ) + + def forward(self, x) -> torch.Tensor: + # Pre-norm attention block + x = self.norm1(x) + new_x = self.attention_layer(x) + x = torch.add(x, new_x) + + # Pre-norm PFFN block + x = self.norm2(x) + new_x = self.pffn(x) + x = torch.add(x, self.dropout(new_x)) + + return x + + +class NILMFormerNetwork(nn.Module): + """ + The NILMFormer neural network architecture. + """ + def __init__(self, c_in=1, c_embedding=8, c_out=1, kernel_size=3, + kernel_size_head=3, dilations=None, conv_bias=True, + n_encoder_layers=3, d_model=96, dp_rate=0.2, pffn_ratio=4, + n_heads=8, norm_eps=1e-5): + super().__init__() + + if dilations is None: + dilations = [1, 2, 4, 8] + + # Validate constraints + assert d_model % 4 == 0, "d_model must be divisible by 4." + + # Store config + self.d_model = d_model + self.c_out = c_out + + # ============ Embedding ============# + d_model_ = 3 * d_model // 4 # e.g., if d_model=96 => d_model_=72 + + self.EmbedBlock = DilatedBlock( + c_in=c_in, + c_out=d_model_, + kernel_size=kernel_size, + dilation_list=dilations, + bias=conv_bias, + ) + + # Exogenous input projection (from create_exogene features) + self.ProjEmbedding = nn.Conv1d( + in_channels=c_embedding, + out_channels=d_model // 4, + kernel_size=1 + ) + + self.ProjStats1 = nn.Linear(2, d_model) + self.ProjStats2 = nn.Linear(d_model, 2) + + # ============ Encoder ============# + layers = [] + for _ in range(n_encoder_layers): + layers.append(EncoderLayer(d_model, n_heads, dp_rate, pffn_ratio, norm_eps)) + layers.append(nn.LayerNorm(d_model)) + self.EncoderBlock = nn.Sequential(*layers) + + # ============ Downstream Task Head ============# + self.DownstreamTaskHead = nn.Conv1d( + in_channels=d_model, + out_channels=c_out, + kernel_size=kernel_size_head, + padding=kernel_size_head // 2, + padding_mode="replicate", + ) + + # ============ Initialize Weights ============# + self.initialize_weights() + + def initialize_weights(self): + """ + Initializes the weights of the linear and layer normalization layers. + """ + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + torch.nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def forward(self, x) -> torch.Tensor: + """ + Forward pass for the NILMFormer model. + + Args: + x (Tensor): Input tensor of shape (B, 1 + e, L), where B is the batch size, + e is the number of exogenous features, and L is the sequence length. + + Returns: + Tensor: The output of the model. + """ + # Separate the channels: + # x[:, :1, :] => load curve + # x[:, 1:, :] => exogenous input(s) + encoding = x[:, 1:, :] # shape: (B, e, L) + x = x[:, :1, :] # shape: (B, 1, L) + + # === Instance Normalization === # + inst_mean = torch.mean(x, dim=-1, keepdim=True).detach() + inst_std = torch.sqrt( + torch.var(x, dim=-1, keepdim=True, unbiased=False) + 1e-6 + ).detach() + + x = (x - inst_mean) / inst_std # shape still (B, 1, L) + + # === Embedding === # + # 1) Dilated Conv block + x = self.EmbedBlock(x) # shape: (B, [d_model_], L) => typically (B, 72, L) if d_model=96 + + # 2) Project exogenous features + encoding = self.ProjEmbedding(encoding) # shape: (B, d_model//4, L) + + # 3) Concatenate dilated features with exogenous features + x = torch.cat([x, encoding], dim=1).permute(0, 2, 1) # (B, L, d_model) + + # === Mean/Std tokens === # + stats_token = self.ProjStats1( + torch.cat([inst_mean, inst_std], dim=1).permute(0, 2, 1) + ) # (B, 1, d_model) + x = torch.cat([x, stats_token], dim=1) # (B, L + 1, d_model) + + # === Transformer Encoder === # + x = self.EncoderBlock(x) # (B, L + 1, d_model) + x = x[:, :-1, :] # remove stats token => (B, L, d_model) + + # === Conv Head === # + x = x.permute(0, 2, 1) # (B, d_model, L) + x = self.DownstreamTaskHead(x) # (B, c_out, L) + + # === Reverse Instance Normalization === # + # stats_out => shape (B, 1, 2) + stats_out = self.ProjStats2(stats_token) # stats_token was (B, 1, d_model) + outinst_mean = stats_out[:, :, 0].unsqueeze(-1) # (B, 1, 1) + outinst_std = stats_out[:, :, 1].unsqueeze(-1) # (B, 1, 1) + + x = x * outinst_std + outinst_mean + return x + + +class NILMFormer(Disaggregator): + """ + NILMFormer: Transformer-based model for non-intrusive load monitoring. + + This implementation is based on the paper: + "NILMFormer: Non-Intrusive Load Monitoring that Accounts for Non-Stationarity" + https://arxiv.org/abs/2506.05880 + + The model uses a transformer architecture specifically designed for energy disaggregation + tasks that addresses non-stationarity in power consumption data through instance + normalization and temporal feature encoding. + + Architecture Overview: + - Instance normalization for handling non-stationarity + - Dilated convolutional feature extractor with residual connections + - Exogenous temporal features (month, day-of-week, hour, minute) + - Transformer encoder with diagonal masked self-attention + - Sequence-to-sequence prediction with denormalization + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Input sequence length (default: 99) + - c_in (int): Input channels (default: 1) + - c_embedding (int): Exogenous channels (default: 8) + - d_model (int): Model dimension (default: 96) + - n_heads (int): Number of attention heads (default: 8) + - n_layers (int): Number of transformer layers (default: 6) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + """ + + def __init__(self, params): + """ + Initialize NILMFormer model with specified parameters following the paper + + Parameters: + ----------- + params : dict + Dictionary containing model parameters: + - sequence_length: Input sequence length (default: 99) + - c_in: Input channels (default: 1) + - c_embedding: Exogenous channels (default: 8) + - c_out: Output channels (default: 1) + - d_model: Model dimension (default: 96) + - n_heads: Number of attention heads (default: 8) + - n_encoder_layers: Number of encoder layers (default: 3) + - dp_rate: Dropout rate (default: 0.2) + - pffn_ratio: Feed-forward expansion ratio (default: 4) + - kernel_size: Conv kernel size (default: 3) + - dilations: Dilation factors (default: [1, 2, 4, 8]) + - n_epochs: Training epochs (default: 100) + - batch_size: Batch size (default: 1024) + - learning_rate: Learning rate (default: 1e-4) + """ + super().__init__() + + self.MODEL_NAME = "NILMFormer" + self.models = OrderedDict() + self.file_prefix = f"{self.MODEL_NAME.lower()}-temp-weights" + + # Model architecture parameters (following NILMFormer paper defaults) + self.sequence_length = params.get('sequence_length', 99) + self.c_in = params.get('c_in', 1) + self.c_embedding = params.get('c_embedding', 8) + self.c_out = params.get('c_out', 1) + self.d_model = params.get('d_model', 96) + self.n_heads = params.get('n_heads', 8) + self.n_encoder_layers = params.get('n_encoder_layers', 3) + self.dp_rate = params.get('dp_rate', 0.2) + self.pffn_ratio = params.get('pffn_ratio', 4) + self.kernel_size = params.get('kernel_size', 3) + self.kernel_size_head = params.get('kernel_size_head', 3) + self.dilations = params.get('dilations', [1, 2, 4, 8]) + self.conv_bias = params.get('conv_bias', True) + self.norm_eps = params.get('norm_eps', 1e-5) + + # Training parameters (optimized for NILMFormer) + self.chunk_wise_training = params.get('chunk_wise_training', False) + self.n_epochs = params.get('n_epochs', 100) # More epochs for transformer + self.batch_size = params.get('batch_size', 1024) # Larger batch size + self.learning_rate = params.get('learning_rate', 1e-4) # Lower learning rate + self.warmup_steps = params.get('warmup_steps', 1000) # Learning rate warmup + + # Data parameters + self.appliance_params = params.get('appliance_params', {}) + self.mains_mean = params.get('mains_mean', 1800) + self.mains_std = params.get('mains_std', 600) + + # Device configuration + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"NILMFormer using device: {self.device}") + + if self.sequence_length % 2 == 0: + print("Sequence length should be odd!") + raise SequenceLengthError() + + def return_network(self): + """Create and return NILMFormer network with exact architecture from paper""" + model = NILMFormerNetwork( + c_in=self.c_in, + c_embedding=self.c_embedding, + c_out=self.c_out, + kernel_size=self.kernel_size, + kernel_size_head=self.kernel_size_head, + dilations=self.dilations, + conv_bias=self.conv_bias, + n_encoder_layers=self.n_encoder_layers, + d_model=self.d_model, + dp_rate=self.dp_rate, + pffn_ratio=self.pffn_ratio, + n_heads=self.n_heads, + norm_eps=self.norm_eps + ) + return model.to(self.device) + + def create_exogene_features(self, n_samples, sequence_length, start_date=None): + """ + Create exogenous temporal features using the original NILMFormer approach. + + This function generates sinusoidal temporal features from timestamps, + following the exact implementation from the official NILMFormer repository. + + Args: + n_samples: Number of samples + sequence_length: Length of each sequence + start_date: Starting date (datetime or None for reference date) + + Returns: + exogenous_features: (n_samples, c_embedding, sequence_length) tensor of temporal features + """ + if start_date is None: + # Use a reference date (e.g., start of 2023) + import datetime + start_date = datetime.datetime(2023, 1, 1) + + # Assume data is sampled every minute (can be adjusted based on dataset) + freq = "1min" + + # Temporal variables to include (following original implementation) + list_exo_variables = ['month', 'dow', 'hour', 'minute'] # Standard set + + all_exogenous = [] + for i in range(n_samples): + # Each sample starts at a different time + sample_start = start_date + pd.Timedelta(minutes=i * sequence_length) + + # Generate exogenous features for this sample + exo_features = create_exogene( + start_date=sample_start, + sequence_length=sequence_length, + freq=freq, + list_exo_variables=list_exo_variables, + cosinbase=True, # Use sin/cos encoding + new_range=(-1, 1) + ) # Shape: (1, n_features, sequence_length) + + all_exogenous.append(exo_features[0]) # Remove the first dimension + + # Stack all samples + exogenous_tensor = np.stack(all_exogenous, axis=0) # (n_samples, n_features, sequence_length) + + return torch.tensor(exogenous_tensor, dtype=torch.float32) + + def partial_fit(self, train_main, train_appliances, do_preprocessing=True, + current_epoch=0, **load_kwargs): + """ + Train NILMFormer model on a data chunk + """ + + # Compute appliance parameters if not available + if not self.appliance_params: + self.set_appliance_params(train_appliances) + + print("...............NILMFormer partial_fit running...............") + + # Preprocess data + if do_preprocessing: + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + # Prepare main power data + train_main = pd.concat(train_main, axis=0) + train_main_values = train_main.values.reshape((-1, self.sequence_length, 1)) + + # Create exogenous temporal features using create_exogene (much better than random noise!) + n_samples = train_main_values.shape[0] + exogenous_features = self.create_exogene_features(n_samples, self.sequence_length) + + # Prepare input: concatenate main power with exogenous features + # Main power: (B, 1, L), Exogenous: (B, c_embedding, L) + train_main_tensor = torch.tensor(train_main_values.transpose(0, 2, 1), dtype=torch.float32) # (B, 1, L) + train_input = torch.cat([train_main_tensor, exogenous_features], dim=1) # (B, 1 + c_embedding, L) + + # Prepare appliance data + new_train_appliances = [] + for app_name, app_df in train_appliances: + app_df = pd.concat(app_df, axis=0) + app_df_values = app_df.values.reshape((-1, self.sequence_length, 1)) + app_df_tensor = torch.tensor(app_df_values, dtype=torch.float32) + new_train_appliances.append((app_name, app_df_tensor)) + train_appliances = new_train_appliances + + # Train models for each appliance + for appliance_name, power_tensor in train_appliances: + if appliance_name not in self.models: + print(f"First model training for {appliance_name}") + self.models[appliance_name] = self.return_network() + else: + print(f"Started Retraining model for {appliance_name}") + + model = self.models[appliance_name] + + if train_input.size(0) > 10: + self.train_model(model, train_input, power_tensor, + appliance_name, current_epoch) + + def train_model(self, model, train_input, power_tensor, appliance_name, current_epoch): + """Train a single appliance model with proper NILMFormer training protocol""" + + # Split data + n_total = train_input.size(0) + val_split = int(0.15 * n_total) + + indices = torch.randperm(n_total) + train_indices = indices[val_split:] + val_indices = indices[:val_split] + + train_input_split = train_input[train_indices].to(self.device) + train_power_split = power_tensor[train_indices].to(self.device) + + val_input_split = train_input[val_indices].to(self.device) + val_power_split = power_tensor[val_indices].to(self.device) + + # For NILMFormer, we predict the full sequence + # Target shape: (batch, sequence_length, 1) -> (batch, 1, sequence_length) + train_power_split = train_power_split.transpose(1, 2) # (B, 1, L) + val_power_split = val_power_split.transpose(1, 2) # (B, 1, L) + + # Create datasets and loaders + train_dataset = NILMDataset(train_input_split, train_power_split) + val_dataset = NILMDataset(val_input_split, val_power_split) + + train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) + val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False) + + # Setup optimizer with weight decay (important for transformers) + optimizer = optim.AdamW( + model.parameters(), + lr=self.learning_rate, + weight_decay=0.01, # Weight decay for regularization + betas=(0.9, 0.95) # Optimized betas for transformers + ) + + # Learning rate scheduler with warmup + total_steps = len(train_loader) * self.n_epochs + scheduler = optim.lr_scheduler.OneCycleLR( + optimizer, + max_lr=self.learning_rate, + total_steps=total_steps, + pct_start=0.1, # 10% warmup + anneal_strategy='cos' + ) + + criterion = nn.MSELoss() + best_val_loss = float('inf') + best_model_path = f"{self.file_prefix}-{appliance_name.replace(' ', '_')}-epoch{current_epoch}.pth" + patience = 10 + patience_counter = 0 + + print(f"Training {appliance_name} with {total_steps} total steps using integrated exogenous features") + + # Training loop + for epoch in range(self.n_epochs): + model.train() + train_losses = [] + + # Training phase + train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{self.n_epochs}") + for input_batch, power_batch in train_bar: + input_batch = input_batch.to(self.device) + power_batch = power_batch.to(self.device) + + optimizer.zero_grad() + # Forward pass without timestamps + predictions = model(input_batch) # Shape: (B, c_out, L) + loss = criterion(predictions, power_batch) + loss.backward() + + # Gradient clipping (important for transformer stability) + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) + + optimizer.step() + scheduler.step() + + train_losses.append(loss.item()) + train_bar.set_postfix(loss=loss.item(), lr=scheduler.get_last_lr()[0]) + + # Validation phase + model.eval() + val_losses = [] + with torch.no_grad(): + for input_batch, power_batch in val_loader: + input_batch = input_batch.to(self.device) + power_batch = power_batch.to(self.device) + + predictions = model(input_batch) + loss = criterion(predictions, power_batch) + val_losses.append(loss.item()) + + avg_train_loss = np.mean(train_losses) + avg_val_loss = np.mean(val_losses) + + print(f"Epoch {epoch+1}: Train Loss: {avg_train_loss:.6f}, " + f"Val Loss: {avg_val_loss:.6f}, LR: {scheduler.get_last_lr()[0]:.2e}") + + # Save best model and early stopping + if avg_val_loss < best_val_loss: + best_val_loss = avg_val_loss + torch.save(model.state_dict(), best_model_path) + print(f"Saved best model for {appliance_name}") + patience_counter = 0 + else: + patience_counter += 1 + if patience_counter >= patience: + print(f"Early stopping triggered for {appliance_name}") + break + + # Load best model + model.load_state_dict(torch.load(best_model_path)) + model.eval() + print(f"Training completed for {appliance_name}") + + def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): + """ + Disaggregate power consumption for test data using NILMFormer + """ + + if model is not None: + self.models = model + + test_predictions = [] + for test_mains_df in test_main_list: + disggregation_dict = {} + + # Store original length before any preprocessing + original_length = len(test_mains_df) + + if do_preprocessing: + # Use the standard preprocessing pipeline + processed_mains_list = self.call_preprocessing( + [test_mains_df], submeters_lst=None, method='test') + processed_mains_df = processed_mains_list[0] + + # Convert preprocessed data to proper format + test_main_values = processed_mains_df.values # Already shaped correctly + test_main_tensor = torch.tensor( + test_main_values.reshape((-1, 1, self.sequence_length)), + dtype=torch.float32 + ) # (N, 1, L) + else: + # Manual preprocessing if needed + test_main_values = test_mains_df.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + test_main_values = np.pad( + test_main_values, (units_to_pad, units_to_pad), + 'constant', constant_values=(0, 0) + ) + test_main_values = np.array([ + test_main_values[i:i + n] for i in range(len(test_main_values) - n + 1) + ]) + test_main_values = (test_main_values - self.mains_mean) / self.mains_std + test_main_tensor = torch.tensor( + test_main_values.reshape((-1, 1, self.sequence_length)), + dtype=torch.float32 + ) + + # Create exogenous temporal features for test data + n_samples = test_main_tensor.shape[0] + test_exogenous = self.create_exogene_features(n_samples, self.sequence_length) + + # Prepare input: concatenate main power with exogenous features + test_input = torch.cat([test_main_tensor, test_exogenous], dim=1) # (B, 1 + c_embedding, L) + test_input_tensor = test_input.to(self.device) + + for appliance in self.models: + model = self.models[appliance] + model.eval() + + with torch.no_grad(): + # Process in batches to avoid memory issues + predictions = [] + for i in range(0, len(test_input_tensor), self.batch_size): + batch = test_input_tensor[i:i+self.batch_size] + pred_batch = model(batch) # Shape: (B, c_out, L) + predictions.append(pred_batch.cpu().numpy()) + + prediction = np.concatenate(predictions, axis=0) # (N, c_out, L) + + # Extract middle predictions for sequence-to-point conversion + middle_idx = self.sequence_length // 2 + point_predictions = prediction[:, 0, middle_idx] # (N,) + + # Reconstruct full sequence using correct overlapping window logic + padding = self.sequence_length // 2 + reconstructed_length = original_length # Use original length! + sum_arr = np.zeros(reconstructed_length + 2 * padding) + counts_arr = np.zeros(reconstructed_length + 2 * padding) + + # Place predictions at correct positions + for i, pred_value in enumerate(point_predictions): + target_idx = i + padding # Account for padding offset + if target_idx < len(sum_arr): + sum_arr[target_idx] += pred_value + counts_arr[target_idx] += 1 + + # Average overlapping predictions and extract original sequence + valid_mask = counts_arr > 0 + final_prediction = np.zeros_like(sum_arr) + final_prediction[valid_mask] = sum_arr[valid_mask] / counts_arr[valid_mask] + + # Extract the original sequence (remove padding) + final_prediction = final_prediction[padding:padding + original_length] + + # Denormalize the predictions + if appliance in self.appliance_params: + app_mean = self.appliance_params[appliance]['mean'] + app_std = self.appliance_params[appliance]['std'] + final_prediction = final_prediction * app_std + app_mean + + # Clip negative values + final_prediction_clipped = np.where(final_prediction > 0, final_prediction, 0) + df = pd.Series(final_prediction_clipped) + disggregation_dict[appliance] = df + + results = pd.DataFrame(disggregation_dict, dtype='float32') + test_predictions.append(results) + + return test_predictions + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """Preprocess data for training or testing""" + + if method == 'train': + # Training preprocessing + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad( + new_mains, (units_to_pad, units_to_pad), + 'constant', constant_values=(0, 0) + ) + new_mains = np.array([ + new_mains[i:i + n] for i in range(len(new_mains) - n + 1) + ]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + + appliance_list = [] + for app_index, (app_name, app_df_list) in enumerate(submeters_lst): + if app_name in self.appliance_params: + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + else: + print(self.appliance_params) + print(f"Parameters for {app_name} were not found!") + raise ApplianceNotFoundError() + + processed_appliance_dfs = [] + for app_df in app_df_list: + new_app_readings = app_df.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_app_readings = np.pad( + new_app_readings, (units_to_pad, units_to_pad), + 'constant', constant_values=(0, 0) + ) + new_app_readings = np.array([ + new_app_readings[i:i + n] for i in range(len(new_app_readings) - n + 1) + ]) + new_app_readings = (new_app_readings - app_mean) / app_std + processed_appliance_dfs.append(pd.DataFrame(new_app_readings)) + + appliance_list.append((app_name, processed_appliance_dfs)) + + return processed_mains_lst, appliance_list + + else: + # Test preprocessing + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad( + new_mains, (units_to_pad, units_to_pad), + 'constant', constant_values=(0, 0) + ) + new_mains = np.array([ + new_mains[i:i + n] for i in range(len(new_mains) - n + 1) + ]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + new_mains = new_mains.reshape((-1, self.sequence_length)) + processed_mains_lst.append(pd.DataFrame(new_mains)) + + return processed_mains_lst + + def denormalize_output(self, predictions, appliance_name): + """Denormalize model predictions for a specific appliance""" + if appliance_name in self.appliance_params: + app_mean = self.appliance_params[appliance_name]['mean'] + app_std = self.appliance_params[appliance_name]['std'] + return predictions * app_std + app_mean + else: + return predictions + + def set_appliance_params(self, train_appliances): + """Calculate normalization parameters for each appliance""" + + for (app_name, df_list) in train_appliances: + l = np.array(pd.concat(df_list, axis=0)) + app_mean = np.mean(l) + app_std = np.std(l) + if app_std < 1: + app_std = 100 + self.appliance_params.update({ + app_name: {'mean': app_mean, 'std': app_std} + }) + + print("Appliance parameters:", self.appliance_params) diff --git a/nilmtk_contrib/torch/preprocessing.py b/nilmtk_contrib/torch/preprocessing.py index b21a71e..d7cb8a0 100644 --- a/nilmtk_contrib/torch/preprocessing.py +++ b/nilmtk_contrib/torch/preprocessing.py @@ -2,24 +2,54 @@ import pandas as pd class ApplianceNotFoundError(Exception): + """Custom exception for when appliance parameters are not found.""" pass -def preprocess(sequence_length = None,mains_mean = None,mains_std = None,mains_lst = None,submeters_lst = None,method="train",appliance_params=None,windowing=False): +def preprocess(sequence_length=None, mains_mean=None, mains_std=None, mains_lst=None, submeters_lst=None, method="train", appliance_params=None, windowing=False): + """ + Preprocesses mains and appliance data by creating sliding windows and normalizing the data. + + Args: + sequence_length (int): The length of the sliding window. + mains_mean (float): The mean of the mains data for normalization. + mains_std (float): The standard deviation of the mains data for normalization. + mains_lst (list of pd.DataFrame): A list of DataFrames, each containing mains data. + submeters_lst (list of tuples): A list where each tuple contains the appliance name + (str) and a list of its corresponding DataFrames. + method (str, optional): The mode of operation, either "train" or "test". Defaults to "train". + appliance_params (dict, optional): A dictionary containing the mean and std for each + appliance. Required if method is "train". Defaults to None. + windowing (bool, optional): If True, applies sliding window to appliance data. + If False, normalizes the flattened appliance data. Defaults to False. + + Returns: + If method is "test" or submeters_lst is not provided: + list of pd.DataFrame: A list of preprocessed mains dataframes. + If method is "train": + tuple: A tuple containing: + - list of pd.DataFrame: Preprocessed mains data. + - list of tuples: Preprocessed appliance data, structured like submeters_lst. + """ pad = sequence_length // 2 + # Preprocess mains data proc_mains = [] - for mains in mains_lst: v = mains.values.flatten() - v = np.pad(v,(pad,pad)) - windows = np.array([v[i:i+sequence_length] for i in range(len(v)-sequence_length + 1)],dtype=np.float32) - windows = (windows - mains_mean)/mains_std + # Pad the sequence to handle windowing at the edges + v = np.pad(v, (pad, pad), 'constant', constant_values=(0,0)) + # Create sliding windows + windows = np.array([v[i:i+sequence_length] for i in range(len(v) - sequence_length + 1)], dtype=np.float32) + # Normalize the windows + windows = (windows - mains_mean) / mains_std proc_mains.append(pd.DataFrame(windows)) + + # Return only mains data if in test mode or no appliance data is provided if method == "test" or not submeters_lst: return proc_mains + # Preprocess appliance data proc_apps = [] - for app_name, df_list in submeters_lst: if appliance_params is None or app_name not in appliance_params: raise ApplianceNotFoundError(f"Parameters for {app_name} not initialized.") @@ -28,19 +58,19 @@ def preprocess(sequence_length = None,mains_mean = None,mains_std = None,mains_l std = appliance_params[app_name]["std"] sub = [] - for df in df_list: flat = df.values.flatten() - if windowing: - flat = np.pad(flat,(pad,pad)) - windows = np.array([flat[i:i+sequence_length] for i in range(len(flat)-sequence_length+1)],dtype=np.float32) - windows = (windows-mean)/std + # Apply padding and sliding window if specified + flat = np.pad(flat, (pad, pad), 'constant', constant_values=(0,0)) + windows = np.array([flat[i:i+sequence_length] for i in range(len(flat) - sequence_length + 1)], dtype=np.float32) + windows = (windows - mean) / std sub.append(pd.DataFrame(windows)) else: - flat = (flat-mean)/std - sub.append(pd.DataFrame(flat.reshape(-1,1))) - proc_apps.append((app_name,sub)) + # Normalize the flattened data directly + flat = (flat - mean) / std + sub.append(pd.DataFrame(flat.reshape(-1, 1))) + proc_apps.append((app_name, sub)) return proc_mains, proc_apps \ No newline at end of file diff --git a/nilmtk_contrib/torch/reformer.py b/nilmtk_contrib/torch/reformer.py new file mode 100644 index 0000000..4c8193d --- /dev/null +++ b/nilmtk_contrib/torch/reformer.py @@ -0,0 +1,578 @@ +from collections import OrderedDict +import os +import numpy as np +import pandas as pd +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import TensorDataset, DataLoader +from tqdm import tqdm +import math +from nilmtk.disaggregate import Disaggregator + +class SequenceLengthError(Exception): + pass + +class ApplianceNotFoundError(Exception): + pass + +# Axial Positional Embeddings +class AxialPositionalEmbedding(nn.Module): + """ + Axial positional embeddings for long sequences. + """ + def __init__(self, dim, max_seq_len, axial_shape): + super().__init__() + self.dim = dim + self.max_seq_len = max_seq_len + self.axial_shape = axial_shape + + assert len(axial_shape) == 2, "Axial shape must be 2D" + assert axial_shape[0] * axial_shape[1] == max_seq_len, "Axial shape must multiply to max_seq_len" + + self.axial_dims = [dim // 2, dim - (dim // 2)] + + self.pos_embs = nn.ModuleList([ + nn.Embedding(axial_shape[0], self.axial_dims[0]), + nn.Embedding(axial_shape[1], self.axial_dims[1]) + ]) + + def forward(self, x): + b, n, d = x.shape + embs = [] + + for i, (shape, pos_emb) in enumerate(zip(self.axial_shape, self.pos_embs)): + if i == 0: + pos = torch.arange(n, device=x.device) // self.axial_shape[1] + else: + pos = torch.arange(n, device=x.device) % self.axial_shape[1] + + emb = pos_emb(pos) + embs.append(emb) + + pos_emb = torch.cat(embs, dim=-1) + return x + pos_emb + +# LSH Attention Implementation +class LSHSelfAttention(nn.Module): + """ + LSH self-attention for efficient attention computation. + """ + def __init__(self, dim, heads=8, bucket_size=64, n_hashes=4, causal=False, dropout=0.): + super().__init__() + self.dim = dim + self.heads = heads + self.bucket_size = bucket_size + self.n_hashes = n_hashes + self.causal = causal + self.dropout = nn.Dropout(dropout) + + self.head_dim = dim // heads + + self.to_qkv = nn.Linear(dim, dim * 3, bias=False) + self.to_out = nn.Linear(dim, dim) + + # LSH parameters + self.hash_fn = nn.Linear(self.head_dim, n_hashes * bucket_size, bias=False) + + def hash_vectors(self, vecs): + # Simple LSH using random projections + batch_size, seq_len, dim = vecs.shape + + # Apply hash function + hash_codes = self.hash_fn(vecs) # (b, n, n_hashes * bucket_size) + hash_codes = hash_codes.view(batch_size, seq_len, self.n_hashes, self.bucket_size) + + # Get bucket assignments + bucket_assignments = torch.argmax(hash_codes, dim=-1) # (b, n, n_hashes) + + return bucket_assignments + + def forward(self, x, mask=None): + b, n, d = x.shape + h = self.heads + + # Generate Q, K, V + qkv = self.to_qkv(x).chunk(3, dim=-1) + q, k, v = map(lambda t: t.view(b, n, h, -1).transpose(1, 2), qkv) + + # For simplicity, we'll use standard attention with some bucketing + # In a full LSH implementation, this would involve more complex hashing + + # Scale queries + q = q * (self.head_dim ** -0.5) + + # Compute attention scores + scores = torch.einsum('bhid,bhjd->bhij', q, k) + + # Apply causal mask if needed + if self.causal: + causal_mask = torch.tril(torch.ones(n, n, device=x.device, dtype=torch.bool)) + scores = scores.masked_fill(~causal_mask, float('-inf')) + + # Apply input mask if provided + if mask is not None: + scores = scores.masked_fill(~mask[:, None, None, :], float('-inf')) + + # Softmax + attn = F.softmax(scores, dim=-1) + attn = self.dropout(attn) + + # Apply attention to values + out = torch.einsum('bhij,bhjd->bhid', attn, v) + out = out.transpose(1, 2).contiguous().view(b, n, d) + + return self.to_out(out) + +# Chunk FeedForward Layer +class ChunkFeedForward(nn.Module): + """ + A feed-forward layer that processes inputs in chunks to save memory. + """ + def __init__(self, dim, mult=4, chunks=1, dropout=0.): + super().__init__() + self.chunks = chunks + self.dim = dim + hidden_dim = int(dim * mult) + + self.net = nn.Sequential( + nn.Linear(dim, hidden_dim), + nn.GELU(), + nn.Dropout(dropout), + nn.Linear(hidden_dim, dim), + nn.Dropout(dropout) + ) + + def forward(self, x): + if self.chunks == 1: + return self.net(x) + + # Process in chunks to save memory + chunks = x.chunk(self.chunks, dim=1) + return torch.cat([self.net(c) for c in chunks], dim=1) + +# Reformer Block +class ReformerBlock(nn.Module): + """ + A single block of the Reformer model, combining LSH attention and a feed-forward network. + """ + def __init__(self, dim, heads=8, bucket_size=64, n_hashes=4, ff_mult=4, + ff_chunks=1, causal=False, dropout=0.): + super().__init__() + + self.norm1 = nn.LayerNorm(dim) + self.attn = LSHSelfAttention( + dim=dim, + heads=heads, + bucket_size=bucket_size, + n_hashes=n_hashes, + causal=causal, + dropout=dropout + ) + + self.norm2 = nn.LayerNorm(dim) + self.ff = ChunkFeedForward( + dim=dim, + mult=ff_mult, + chunks=ff_chunks, + dropout=dropout + ) + + def forward(self, x, mask=None): + # Pre-norm architecture + x = x + self.attn(self.norm1(x), mask=mask) + x = x + self.ff(self.norm2(x)) + return x + +# Main Reformer Network for NILM +class ReformerNet(nn.Module): + """ + The Reformer network architecture for NILM. + """ + def __init__(self, sequence_length, dim=512, depth=6, heads=8, bucket_size=64, + n_hashes=4, ff_mult=4, ff_chunks=1, dropout=0.1, + axial_position_emb=True, axial_position_shape=None): + super().__init__() + + self.sequence_length = sequence_length + self.dim = dim + + # Input projection + self.input_projection = nn.Linear(1, dim) + + # Positional embeddings + if axial_position_emb: + if axial_position_shape is None: + # Auto-determine axial shape + sqrt_seq = int(math.sqrt(sequence_length)) + while sequence_length % sqrt_seq != 0: + sqrt_seq -= 1 + axial_position_shape = (sqrt_seq, sequence_length // sqrt_seq) + + self.pos_emb = AxialPositionalEmbedding( + dim=dim, + max_seq_len=sequence_length, + axial_shape=axial_position_shape + ) + else: + self.pos_emb = nn.Parameter(torch.randn(1, sequence_length, dim)) + + # Reformer blocks + self.blocks = nn.ModuleList([ + ReformerBlock( + dim=dim, + heads=heads, + bucket_size=bucket_size, + n_hashes=n_hashes, + ff_mult=ff_mult, + ff_chunks=ff_chunks, + causal=False, # For NILM, we can use full attention + dropout=dropout + ) for _ in range(depth) + ]) + + # Output layers + self.norm = nn.LayerNorm(dim) + self.to_out = nn.Sequential( + nn.Linear(dim, 1024), + nn.ReLU(), + nn.Dropout(dropout), + nn.Linear(1024, 1) + ) + + self._initialize_weights() + + def _initialize_weights(self): + """ + Initializes the model weights. + """ + for m in self.modules(): + if isinstance(m, nn.Linear): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.LayerNorm): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + + def forward(self, x): + # x shape: (batch_size, 1, sequence_length) + # Transpose to (batch_size, sequence_length, 1) + x = x.transpose(1, 2) + + # Project to model dimension + x = self.input_projection(x) # (batch_size, sequence_length, dim) + + # Add positional embeddings + if isinstance(self.pos_emb, AxialPositionalEmbedding): + x = self.pos_emb(x) + else: + x = x + self.pos_emb + + # Apply Reformer blocks + for block in self.blocks: + x = block(x) + + # Final normalization + x = self.norm(x) + + # Global average pooling + x = x.mean(dim=1) # (batch_size, dim) + + # Output projection + x = self.to_out(x) # (batch_size, 1) + + return x + +class Reformer(Disaggregator): + """ + Reformer model for non-intrusive load monitoring. + + This implementation is based on the paper: + "Reformer: The Efficient Transformer" + https://arxiv.org/abs/2001.04451 + + The model adapts the Reformer architecture for energy disaggregation tasks, + using locality-sensitive hashing (LSH) attention and reversible layers for + memory-efficient processing of long sequences. + + Architecture Overview: + - LSH self-attention for efficient attention computation + - Axial positional embeddings for long sequences + - Chunk feed-forward layers for memory efficiency + - Reversible residual connections (conceptually) + - Sequence-to-point prediction for energy disaggregation + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Length of input sequences (default: 99) + - dim (int): Model dimension (default: 512) + - depth (int): Number of transformer layers (default: 6) + - heads (int): Number of attention heads (default: 8) + - bucket_size (int): LSH bucket size (default: 64) + - n_hashes (int): Number of LSH hash functions (default: 4) + - ff_mult (int): Feed-forward expansion factor (default: 4) + - ff_chunks (int): Number of chunks for feed-forward (default: 1) + - dropout (float): Dropout rate (default: 0.1) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + """ + def __init__(self, params): + super().__init__() + self.MODEL_NAME = "Reformer" + self.models = OrderedDict() + self.file_prefix = f"{self.MODEL_NAME.lower()}-temp-weights" + + # Extract hyperparameters from params dict + self.chunk_wise_training = params.get("chunk_wise_training", False) + self.sequence_length = params.get("sequence_length", 99) + self.n_epochs = params.get("n_epochs", 10) + self.batch_size = params.get("batch_size", 512) + self.appliance_params = params.get("appliance_params", {}) + self.mains_mean = params.get("mains_mean", 1800) + self.mains_std = params.get("mains_std", 600) + + # Reformer specific parameters + self.dim = params.get("dim", 512) + self.depth = params.get("depth", 6) + self.heads = params.get("heads", 8) + self.bucket_size = params.get("bucket_size", 64) + self.n_hashes = params.get("n_hashes", 4) + self.ff_mult = params.get("ff_mult", 4) + self.ff_chunks = params.get("ff_chunks", 1) + self.dropout = params.get("dropout", 0.1) + self.axial_position_emb = params.get("axial_position_emb", True) + self.axial_position_shape = params.get("axial_position_shape", None) + + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Sequence length must be odd for proper windowing + if self.sequence_length % 2 == 0: + print("Sequence length should be odd!") + raise SequenceLengthError + + print(f"Reformer initialized with sequence_length={self.sequence_length}") + print(f"Reformer params: dim={self.dim}, depth={self.depth}, heads={self.heads}") + print(f"LSH params: bucket_size={self.bucket_size}, n_hashes={self.n_hashes}") + print(f"Using device: {self.device}") + + def return_network(self): + """ + Builds the Reformer network. + """ + model = ReformerNet( + sequence_length=self.sequence_length, + dim=self.dim, + depth=self.depth, + heads=self.heads, + bucket_size=self.bucket_size, + n_hashes=self.n_hashes, + ff_mult=self.ff_mult, + ff_chunks=self.ff_chunks, + dropout=self.dropout, + axial_position_emb=self.axial_position_emb, + axial_position_shape=self.axial_position_shape + ).to(self.device) + + # Count parameters + total_params = sum(p.numel() for p in model.parameters()) + print(f"Reformer model created with {total_params:,} parameters") + + return model + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """ + Preprocesses data using a sliding window, matching seq2point. + """ + if method == 'train': + # Preprocessing for the train data - exactly matching seq2point + mains_df_list = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + mains_df_list.append(pd.DataFrame(new_mains)) + + appliance_list = [] + for app_index, (app_name, app_df_list) in enumerate(submeters_lst): + if app_name in self.appliance_params: + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + else: + print("Parameters for", app_name, "were not found!") + raise ApplianceNotFoundError() + + processed_appliance_dfs = [] + for app_df in app_df_list: + new_app_readings = app_df.values.reshape((-1, 1)) + # This is for choosing windows + new_app_readings = (new_app_readings - app_mean) / app_std + # Return as a list of dataframe + processed_appliance_dfs.append(pd.DataFrame(new_app_readings)) + appliance_list.append((app_name, processed_appliance_dfs)) + return mains_df_list, appliance_list + + else: + # Preprocessing for the test data - exactly matching seq2point + mains_df_list = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + mains_df_list.append(pd.DataFrame(new_mains)) + return mains_df_list + + def set_appliance_params(self, train_appliances): + """ + Computes and sets normalization parameters for each appliance. + """ + for app_name, df_list in train_appliances: + l = np.array(pd.concat(df_list, axis=0)) + app_mean = np.mean(l) + app_std = np.std(l) + if app_std < 1: + app_std = 100 + self.appliance_params.update({app_name: {'mean': app_mean, 'std': app_std}}) + print(self.appliance_params) + + def partial_fit(self, train_main, train_appliances, do_preprocessing=True, current_epoch=0, **load_kwargs): + """ + Trains the Reformer model on a chunk of data. + """ + # If no appliance wise parameters are provided, then compute them using the first chunk + if len(self.appliance_params) == 0: + self.set_appliance_params(train_appliances) + + print("...............Reformer partial_fit running...............") + # Do the pre-processing, such as windowing and normalizing + if do_preprocessing: + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + train_main = pd.concat(train_main, axis=0) + train_main = train_main.values.reshape((-1, self.sequence_length, 1)) + new_train_appliances = [] + for app_name, app_df in train_appliances: + app_df = pd.concat(app_df, axis=0) + app_df_values = app_df.values.reshape((-1, 1)) + new_train_appliances.append((app_name, app_df_values)) + train_appliances = new_train_appliances + + for appliance_name, power in train_appliances: + # Check if the appliance was already trained. If not then create a new model for it + if appliance_name not in self.models: + print("First model training for", appliance_name) + self.models[appliance_name] = self.return_network() + # Retrain the particular appliance + else: + print("Started Retraining model for", appliance_name) + + model = self.models[appliance_name] + if train_main.size > 0: + # Sometimes chunks can be empty after dropping NANS + if len(train_main) > 10: + # Convert to PyTorch tensors and correct format + # PyTorch Conv1d expects (batch, channels, length) + train_main_tensor = torch.tensor(train_main, dtype=torch.float32).permute(0, 2, 1).to(self.device) + power_tensor = torch.tensor(power, dtype=torch.float32).squeeze().to(self.device) + + # Create validation split + n_samples = train_main_tensor.size(0) + val_size = int(0.15 * n_samples) + indices = torch.randperm(n_samples) + train_idx, val_idx = indices[val_size:], indices[:val_size] + + train_X = train_main_tensor[train_idx] + train_y = power_tensor[train_idx] + val_X = train_main_tensor[val_idx] + val_y = power_tensor[val_idx] + + # Setup optimizer and loss + optimizer = torch.optim.Adam(model.parameters(), lr=0.005, betas=(0.9, 0.999), eps=1e-07, weight_decay=0.0) + criterion = nn.MSELoss() + + best_val_loss = float('inf') + filepath = self.file_prefix + "-{}-epoch{}.pth".format( + "_".join(appliance_name.split()), + current_epoch, + ) + + # Training loop matching seq2point behavior + for epoch in range(self.n_epochs): + model.train() + + # Create batches + train_dataset = TensorDataset(train_X, train_y) + train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) + + epoch_losses = [] + for batch_X, batch_y in train_loader: + optimizer.zero_grad() + predictions = model(batch_X).squeeze() + loss = criterion(predictions, batch_y) + loss.backward() + + # Add gradient clipping like seq2point + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) + + optimizer.step() + epoch_losses.append(loss.item()) + + # Validation + model.eval() + with torch.no_grad(): + val_predictions = model(val_X).squeeze() + val_loss = criterion(val_predictions, val_y).item() + + avg_train_loss = np.mean(epoch_losses) + print(f"Epoch {epoch+1}/{self.n_epochs} - loss: {avg_train_loss:.4f} - val_loss: {val_loss:.4f}") + + # Save best model (matching seq2point's ModelCheckpoint behavior) + if val_loss < best_val_loss: + best_val_loss = val_loss + torch.save(model.state_dict(), filepath) + print(f"Validation loss improved, saving model to {filepath}") + + # Load best weights + model.load_state_dict(torch.load(filepath, map_location=self.device)) + + def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): + """ + Disaggregates a chunk of mains power data. + """ + if model is not None: + self.models = model + + # Preprocess the test mains such as windowing and normalizing + if do_preprocessing: + test_main_list = self.call_preprocessing(test_main_list, submeters_lst=None, method='test') + + test_predictions = [] + for test_main in test_main_list: + test_main = test_main.values + test_main = test_main.reshape((-1, self.sequence_length, 1)) + + # Convert to PyTorch tensor with correct format for Conv1d + test_main_tensor = torch.tensor(test_main, dtype=torch.float32).permute(0, 2, 1).to(self.device) + + disggregation_dict = {} + for appliance in self.models: + model = self.models[appliance] + model.eval() + with torch.no_grad(): + prediction = model(test_main_tensor).cpu().numpy() + # Denormalize exactly like seq2point + prediction = self.appliance_params[appliance]['mean'] + prediction * self.appliance_params[appliance]['std'] + valid_predictions = prediction.flatten() + valid_predictions = np.where(valid_predictions > 0, valid_predictions, 0) + df = pd.Series(valid_predictions) + disggregation_dict[appliance] = df + results = pd.DataFrame(disggregation_dict, dtype='float32') + test_predictions.append(results) + return test_predictions \ No newline at end of file diff --git a/nilmtk_contrib/torch/resnet.py b/nilmtk_contrib/torch/resnet.py index b1f6b3e..3cffee3 100644 --- a/nilmtk_contrib/torch/resnet.py +++ b/nilmtk_contrib/torch/resnet.py @@ -16,15 +16,6 @@ from sklearn.model_selection import train_test_split from tqdm import tqdm import random -from nilmtk_contrib.torch.preprocessing import preprocess - -# Set random seeds -random.seed(10) -np.random.seed(10) -torch.manual_seed(10) -if torch.cuda.is_available(): - torch.cuda.manual_seed(10) - torch.cuda.manual_seed_all(10) # Set device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') @@ -36,112 +27,95 @@ class ApplianceNotFoundError(Exception): pass class IdentityBlock(nn.Module): - def __init__(self, filters, kernel_size, input_channels=None): + """ + An identity block for ResNet, where the input and output dimensions are the same. + This implementation mirrors the structure of the original TensorFlow version. + """ + def __init__(self, filters, kernel_size): super(IdentityBlock, self).__init__() - # Use input_channels if provided, otherwise assume filters[0] - in_channels = input_channels if input_channels is not None else filters[0] - - self.conv1 = nn.Conv1d(in_channels=in_channels, out_channels=filters[0], - kernel_size=kernel_size, stride=1, padding=kernel_size//2) + # Three convolutional layers, maintaining the channel count + self.conv1 = nn.Conv1d(in_channels=filters[0], out_channels=filters[0], + kernel_size=kernel_size, stride=1, padding='same') self.conv2 = nn.Conv1d(in_channels=filters[0], out_channels=filters[1], - kernel_size=kernel_size, stride=1, padding=kernel_size//2) + kernel_size=kernel_size, stride=1, padding='same') self.conv3 = nn.Conv1d(in_channels=filters[1], out_channels=filters[2], - kernel_size=kernel_size, stride=1, padding=kernel_size//2) - - # Shortcut connection - adjust if input and output channels don't match - if in_channels != filters[2]: - self.shortcut = nn.Conv1d(in_channels=in_channels, out_channels=filters[2], - kernel_size=1, stride=1, padding=0) - else: - self.shortcut = nn.Identity() + kernel_size=kernel_size, stride=1, padding='same') def forward(self, x): + # Store input for the residual connection identity = x + # Forward pass through convolutions with ReLU activations out = F.relu(self.conv1(x)) out = F.relu(self.conv2(out)) out = self.conv3(out) - identity = self.shortcut(identity) - - # Ensure both tensors have the same size - if out.size() != identity.size(): - # Adjust size if needed - min_size = min(out.size(2), identity.size(2)) - out = out[:, :, :min_size] - identity = identity[:, :, :min_size] - - out = out + identity + # Add the residual (identity) connection and apply final activation + out += identity out = F.relu(out) return out class ConvolutionBlock(nn.Module): - def __init__(self, filters, kernel_size, input_channels=None): + """ + A convolutional block for ResNet that can change the input's channel dimension. + This implementation mirrors the structure of the original TensorFlow version. + """ + def __init__(self, filters, kernel_size): super(ConvolutionBlock, self).__init__() - # Use input_channels if provided, otherwise assume filters[0] - in_channels = input_channels if input_channels is not None else filters[0] - - self.conv1 = nn.Conv1d(in_channels=in_channels, out_channels=filters[0], - kernel_size=kernel_size, stride=1, padding=kernel_size//2) + # Main path with three convolutional layers + self.conv1 = nn.Conv1d(in_channels=filters[0], out_channels=filters[0], + kernel_size=kernel_size, stride=1, padding='same') self.conv2 = nn.Conv1d(in_channels=filters[0], out_channels=filters[1], - kernel_size=kernel_size, stride=1, padding=kernel_size//2) + kernel_size=kernel_size, stride=1, padding='same') self.conv3 = nn.Conv1d(in_channels=filters[1], out_channels=filters[2], - kernel_size=kernel_size, stride=1, padding=kernel_size//2) - self.conv4 = nn.Conv1d(in_channels=in_channels, out_channels=filters[2], - kernel_size=kernel_size, stride=1, padding=kernel_size//2) + kernel_size=kernel_size, stride=1, padding='same') + + # Skip connection path to match the output channel dimension + self.conv4 = nn.Conv1d(in_channels=filters[0], out_channels=filters[2], + kernel_size=kernel_size, stride=1, padding='same') def forward(self, x): + # Store input for the skip connection identity = x + # Forward pass through the main path out = F.relu(self.conv1(x)) out = F.relu(self.conv2(out)) - out = F.relu(self.conv3(out)) - - identity = F.relu(self.conv4(identity)) + out = self.conv3(out) - # Ensure both tensors have the same size - if out.size() != identity.size(): - min_size = min(out.size(2), identity.size(2)) - out = out[:, :, :min_size] - identity = identity[:, :, :min_size] + # Transform the identity to match the output channels for the residual connection + identity = self.conv4(identity) - out = out + identity + # Add the residual connection and apply final activation + out += identity out = F.relu(out) return out class ResNetModel(nn.Module): """ - ResNet model for appliance load disaggregation. - It includes initial convolutional layers, ResNet blocks, and fully connected layers. + A ResNet-based model for NILM, mirroring the original TensorFlow implementation. """ def __init__(self, sequence_length, num_filters=30): super(ResNetModel, self).__init__() self.sequence_length = sequence_length self.num_filters = num_filters - # Initial layers - matching TensorFlow implementation exactly + # Initial layers, including double ReLU to match TensorFlow's structure self.zero_pad = nn.ZeroPad1d(3) - self.conv1 = nn.Conv1d(in_channels=1, out_channels=num_filters, - kernel_size=48, stride=2, padding=0) # No padding here, ZeroPad1d handles it + self.conv1 = nn.Conv1d(in_channels=1, out_channels=num_filters, kernel_size=48, stride=2) self.bn1 = nn.BatchNorm1d(num_filters) - self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=0) - - # Calculate intermediate size after initial layers - self._calculate_intermediate_size() + self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2) - # ResNet blocks with proper input channel specification - self.conv_block = ConvolutionBlock([num_filters, num_filters, num_filters], 24, - input_channels=num_filters) - self.identity_block1 = IdentityBlock([num_filters, num_filters, num_filters], 12, - input_channels=num_filters) - self.identity_block2 = IdentityBlock([num_filters, num_filters, num_filters], 6, - input_channels=num_filters) + # ResNet blocks + self.conv_block = ConvolutionBlock([num_filters, num_filters, num_filters], 24) + self.identity_block1 = IdentityBlock([num_filters, num_filters, num_filters], 12) + self.identity_block2 = IdentityBlock([num_filters, num_filters, num_filters], 6) - # Calculate the size after convolutions for fully connected layers + # Calculate the input size for the fully connected layers dynamically self._calculate_fc_input_size() # Fully connected layers @@ -149,29 +123,17 @@ def __init__(self, sequence_length, num_filters=30): self.dropout = nn.Dropout(0.2) self.fc2 = nn.Linear(1024, sequence_length) - def _calculate_intermediate_size(self): - """Calculate size after initial conv and maxpool layers""" - # Start with sequence_length + 6 (3 padding on each side) - size = self.sequence_length + 6 - # After conv1 with kernel=48, stride=2 - size = (size - 48) // 2 + 1 - # After maxpool with kernel=3, stride=2 - size = (size - 3) // 2 + 1 - self.intermediate_size = size - def _calculate_fc_input_size(self): - """Calculate the size after all convolutions""" - # Create a dummy input to calculate the size after convolutions - dummy_input = torch.zeros(1, 1, self.sequence_length) - x = self._forward_conv_layers(dummy_input) - x = x.view(x.size(0), -1) - self.fc_input_size = x.size(1) + """Calculates the input size for the FC layers via a dummy forward pass.""" + with torch.no_grad(): + dummy_input = torch.zeros(1, 1, self.sequence_length) + x = self._forward_conv_layers(dummy_input) + self.fc_input_size = x.flatten(1).shape[1] def _forward_conv_layers(self, x): - """Forward pass through convolutional layers only""" - # Initial processing + """Performs the forward pass through the convolutional layers.""" x = self.zero_pad(x) - x = self.conv1(x) + x = F.relu(self.conv1(x)) x = self.bn1(x) x = F.relu(x) x = self.maxpool(x) @@ -188,7 +150,7 @@ def forward(self, x): x = self._forward_conv_layers(x) # Fully connected layers - x = x.view(x.size(0), -1) # Flatten + x = x.flatten(1) x = F.relu(self.fc1(x)) x = self.dropout(x) x = self.fc2(x) @@ -197,10 +159,32 @@ def forward(self, x): class ResNet(Disaggregator): """ - ResNet-based disaggregator for NILMTK. - This class implements a ResNet model for disaggregating mains electricity data - into appliance-level data. - """ + ResNet-based model for non-intrusive load monitoring. + + This implementation is based on the paper: + "Deep Residual Learning for Image Recognition" + https://arxiv.org/abs/1512.03385 + + The model adapts the ResNet architecture for energy disaggregation tasks, + using residual connections to enable training of deep networks for predicting + individual appliance power consumption from aggregate household power measurements. + + Architecture Overview: + - 1D convolutional layers adapted for time series data + - Identity blocks with residual connections for feature learning + - Convolution blocks for changing channel dimensions + - Batch normalization and max pooling for regularization + - Fully connected layers for sequence prediction + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Length of input sequences (default: 299) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - chunk_wise_training (bool): Enable chunk-wise training (default: False) + - appliance_params (dict): Appliance-specific normalization parameters + - load_model_path (str): Path to load pre-trained models + """ def __init__(self, params): self.MODEL_NAME = "ResNet" self.chunk_wise_training = params.get('chunk_wise_training', False) @@ -215,212 +199,227 @@ def __init__(self, params): self.device = device if self.sequence_length % 2 == 0: - print("Sequence length should be odd!") - raise SequenceLengthError + raise SequenceLengthError("Sequence length must be odd!") def partial_fit(self, train_main, train_appliances, do_preprocessing=True, **load_kwargs): + """Trains the model on a chunk of data.""" print("...............ResNet partial_fit running...............") - if len(self.appliance_params) == 0: + if not self.appliance_params: self.set_appliance_params(train_appliances) if do_preprocessing: print("Preprocessing data...") - train_main, train_appliances = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=train_main, - submeters_lst=train_appliances, - method="train", - appliance_params=self.appliance_params, - windowing=True - ) - - train_main = pd.concat(train_main, axis=0) - train_main = train_main.values.reshape((-1, self.sequence_length, 1)) + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + train_main = pd.concat(train_main, axis=0).values.reshape((-1, self.sequence_length, 1)) new_train_appliances = [] for app_name, app_dfs in train_appliances: - app_df = pd.concat(app_dfs, axis=0) - app_df_values = app_df.values.reshape((-1, self.sequence_length)) + app_df_values = pd.concat(app_dfs, axis=0).values.reshape((-1, self.sequence_length)) new_train_appliances.append((app_name, app_df_values)) train_appliances = new_train_appliances print(f"Training data shape: {train_main.shape}") - # Progress bar for appliances - appliance_progress = tqdm(train_appliances, desc="Training appliances", unit="appliance") - - for appliance_name, power in appliance_progress: - appliance_progress.set_postfix({"Current": appliance_name}) - + for appliance_name, power in train_appliances: if appliance_name not in self.models: - print(f"\nFirst model training for {appliance_name}") + print(f"First time training for {appliance_name}") self.models[appliance_name] = self.return_network() else: - print(f"\nStarted Retraining model for {appliance_name}") + print(f"Retraining model for {appliance_name}") model = self.models[appliance_name] - if train_main.size > 0: - if len(train_main) > 10: - # Convert to PyTorch tensors + if train_main.size > 10: + # Create training and validation sets train_x, v_x, train_y, v_y = train_test_split( - train_main, power, test_size=.15, random_state=10) + train_main, power, test_size=0.15, random_state=10) + # Convert to PyTorch Tensors train_x = torch.FloatTensor(train_x).permute(0, 2, 1).to(self.device) v_x = torch.FloatTensor(v_x).permute(0, 2, 1).to(self.device) train_y = torch.FloatTensor(train_y).to(self.device) v_y = torch.FloatTensor(v_y).to(self.device) - # Create DataLoaders + # Create DataLoaders for batching train_dataset = TensorDataset(train_x, train_y) val_dataset = TensorDataset(v_x, v_y) train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False) - # Training loop + # Train the model self.train_model(model, train_loader, val_loader, appliance_name) + def call_preprocessing(self, mains_lst, submeters_lst, method): + """ + Preprocesses data by windowing and normalizing, mirroring the original + TensorFlow implementation. + """ + if method == 'train': + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + + appliance_list = [] + for app_index, (app_name, app_df_lst) in enumerate(submeters_lst): + if app_name in self.appliance_params: + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + app_min = self.appliance_params[app_name]['min'] + app_max = self.appliance_params[app_name]['max'] + else: + raise ApplianceNotFoundError(f"Parameters for appliance '{app_name}' not found!") + + processed_app_dfs = [] + for app_df in app_df_lst: + new_app_readings = app_df.values.flatten() + new_app_readings = np.pad(new_app_readings, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_app_readings = np.array([new_app_readings[i:i + n] for i in range(len(new_app_readings) - n + 1)]) + new_app_readings = (new_app_readings - app_mean) / app_std + processed_app_dfs.append(pd.DataFrame(new_app_readings)) + + appliance_list.append((app_name, processed_app_dfs)) + + return processed_mains_lst, appliance_list + + else: # method == 'test' + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + new_mains = new_mains.reshape((-1, self.sequence_length)) + processed_mains_lst.append(pd.DataFrame(new_mains)) + return processed_mains_lst + def train_model(self, model, train_loader, val_loader, appliance_name): - optimizer = optim.Adam(model.parameters()) + """Handles the training and validation loop for the model.""" + # Optimizer with settings matching TensorFlow's defaults + optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-07) criterion = nn.MSELoss() best_val_loss = float('inf') best_model_state = None + patience = 10 + patience_counter = 0 - # Progress bar for epochs - epoch_progress = tqdm(range(self.n_epochs), desc=f"Training {appliance_name}", unit="epoch") + print(f"Training {appliance_name} for {self.n_epochs} epochs...") - for epoch in epoch_progress: - # Training phase + for epoch in range(self.n_epochs): + # --- Training Phase --- model.train() train_loss = 0.0 - # Progress bar for training batches - train_batch_progress = tqdm(train_loader, desc=f"Epoch {epoch+1} Training", - leave=False, unit="batch") - - for batch_x, batch_y in train_batch_progress: + for batch_x, batch_y in train_loader: optimizer.zero_grad() - outputs = model(batch_x) loss = criterion(outputs, batch_y) - loss.backward() - optimizer.step() + # Gradient clipping for training stability + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) + + optimizer.step() train_loss += loss.item() - train_batch_progress.set_postfix({"Loss": f"{loss.item():.4f}"}) - # Validation phase + # --- Validation Phase --- model.eval() val_loss = 0.0 - # Progress bar for validation batches - val_batch_progress = tqdm(val_loader, desc=f"Epoch {epoch+1} Validation", - leave=False, unit="batch") - with torch.no_grad(): - for batch_x, batch_y in val_batch_progress: + for batch_x, batch_y in val_loader: outputs = model(batch_x) loss = criterion(outputs, batch_y) val_loss += loss.item() - val_batch_progress.set_postfix({"Loss": f"{loss.item():.4f}"}) train_loss /= len(train_loader) val_loss /= len(val_loader) - # Update epoch progress bar - epoch_progress.set_postfix({ - "Train Loss": f"{train_loss:.4f}", - "Val Loss": f"{val_loss:.4f}", - "Best": f"{best_val_loss:.4f}" - }) - - # Save best model + # Early stopping and saving the best model if val_loss < best_val_loss: best_val_loss = val_loss best_model_state = model.state_dict().copy() - epoch_progress.write(f'New best model saved with val_loss: {val_loss:.4f}') + patience_counter = 0 + print(f'Epoch {epoch+1}: New best model found with validation loss: {val_loss:.6f}') + else: + patience_counter += 1 + + if (epoch + 1) % 5 == 0: + print(f'Epoch {epoch+1}/{self.n_epochs}: Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}') + + # Check for early stopping + if patience_counter >= patience and epoch >= 20: + print(f"Stopping early at epoch {epoch+1} due to no improvement.") + break - # Load best model + # Load the best model state after training is complete if best_model_state is not None: model.load_state_dict(best_model_state) - print(f"\nLoaded best model for {appliance_name} with validation loss: {best_val_loss:.4f}") + print(f"Finished training. Loaded best model for {appliance_name} with validation loss: {best_val_loss:.6f}") def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): + """Disaggregates a chunk of mains data.""" if model is not None: self.models = model if do_preprocessing: print("Preprocessing test data...") - test_main_list = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=test_main_list, - submeters_lst=None, - method="test", - appliance_params=self.appliance_params, - windowing=True - ) + test_main_list = self.call_preprocessing( + test_main_list, submeters_lst=None, method='test') test_predictions = [] - # Progress bar for test chunks - chunk_progress = tqdm(test_main_list, desc="Processing test chunks", unit="chunk") - - for test_mains_df in chunk_progress: + for test_mains_df in test_main_list: disggregation_dict = {} test_main_array = test_mains_df.values.reshape((-1, self.sequence_length, 1)) test_main_tensor = torch.FloatTensor(test_main_array).permute(0, 2, 1).to(self.device) - # Progress bar for appliances in each chunk - appliance_progress = tqdm(self.models.items(), desc="Disaggregating appliances", - leave=False, unit="appliance") - - for appliance, model in appliance_progress: - appliance_progress.set_postfix({"Current": appliance}) - + for appliance, model in self.models.items(): model.eval() - # Create DataLoader for batched prediction + # Create DataLoader for batched predictions test_dataset = TensorDataset(test_main_tensor) test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False) predictions = [] - - # Progress bar for prediction batches - pred_progress = tqdm(test_loader, desc=f"Predicting {appliance}", - leave=False, unit="batch") - with torch.no_grad(): - for batch_x, in pred_progress: + for batch_x, in test_loader: batch_pred = model(batch_x) predictions.append(batch_pred.cpu().numpy()) prediction = np.concatenate(predictions, axis=0) - # Average predictions over sequences + # Average predictions over overlapping windows l = self.sequence_length n = len(prediction) + l - 1 - sum_arr = np.zeros((n)) - counts_arr = np.zeros((n)) + sum_arr = np.zeros(n) + counts_arr = np.zeros(n) - for i in range(len(prediction)): - sum_arr[i:i + l] += prediction[i].flatten() - counts_arr[i:i + l] += 1 + for i, p in enumerate(prediction): + sum_arr[i:i+l] += p.flatten() + counts_arr[i:i+l] += 1 - for i in range(len(sum_arr)): - sum_arr[i] = sum_arr[i] / counts_arr[i] + # Replace zero counts with one to avoid division by zero + counts_arr[counts_arr == 0] = 1 + averaged_prediction = sum_arr / counts_arr # Denormalize predictions - prediction = (self.appliance_params[appliance]['mean'] + - (sum_arr * self.appliance_params[appliance]['std'])) - valid_predictions = prediction.flatten() - valid_predictions = np.where(valid_predictions > 0, valid_predictions, 0) - df = pd.Series(valid_predictions) + app_mean = self.appliance_params[appliance]['mean'] + app_std = self.appliance_params[appliance]['std'] + denormalized_prediction = averaged_prediction * app_std + app_mean + + # Set negative values to zero + denormalized_prediction[denormalized_prediction < 0] = 0 + df = pd.Series(denormalized_prediction) disggregation_dict[appliance] = df results = pd.DataFrame(disggregation_dict, dtype='float32') @@ -429,24 +428,36 @@ def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): return test_predictions def return_network(self): + """Returns a new, initialized ResNet model.""" model = ResNetModel(self.sequence_length).to(self.device) + + # Initialize weights to match TensorFlow's defaults + def init_weights(m): + if isinstance(m, (nn.Conv1d, nn.Linear)): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm1d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + + model.apply(init_weights) return model def set_appliance_params(self, train_appliances): + """Computes and sets normalization parameters for each appliance.""" print("Setting appliance parameters...") - # Progress bar for setting appliance parameters - param_progress = tqdm(train_appliances, desc="Computing appliance stats", unit="appliance") - - for (app_name, df_list) in param_progress: - param_progress.set_postfix({"Current": app_name}) - - l = np.array(pd.concat(df_list, axis=0)) + for (app_name, df_list) in train_appliances: + l = np.concatenate([df.values for df in df_list]) app_mean = np.mean(l) app_std = np.std(l) app_max = np.max(l) app_min = np.min(l) if app_std < 1: app_std = 100 - self.appliance_params.update({app_name: {'mean': app_mean, 'std': app_std, - 'max': app_max, 'min': app_min}}) \ No newline at end of file + self.appliance_params[app_name] = { + 'mean': app_mean, 'std': app_std, + 'max': app_max, 'min': app_min + } + print(f" {app_name}: mean={app_mean:.2f}, std={app_std:.2f}") diff --git a/nilmtk_contrib/torch/resnet_classification.py b/nilmtk_contrib/torch/resnet_classification.py index bdd81c8..d978c74 100644 --- a/nilmtk_contrib/torch/resnet_classification.py +++ b/nilmtk_contrib/torch/resnet_classification.py @@ -1,292 +1,506 @@ -from __future__ import annotations -import copy, numpy as np, pandas as pd -from collections import OrderedDict -from typing import Dict, Any, List, Tuple - +from __future__ import print_function, division +from warnings import warn +from nilmtk.disaggregate import Disaggregator import torch import torch.nn as nn import torch.nn.functional as F -from torch.utils.data import TensorDataset, DataLoader -from tqdm import tqdm - -from nilmtk.disaggregate import Disaggregator -from nilmtk_contrib.torch.preprocessing import preprocess +import torch.optim as optim +from torch.utils.data import Dataset, DataLoader, TensorDataset +import os +import pandas as pd +import numpy as np +import pickle +from collections import OrderedDict +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from tqdm import tqdm +import random +import copy +# Set device +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') class SequenceLengthError(Exception): pass - class ApplianceNotFoundError(Exception): pass - class IdentityBlock(nn.Module): - """Residual block with identity shortcut connection.""" - def __init__(self, ch: int, k: int): - super().__init__() - self.c1 = nn.Conv1d(ch, ch, k, padding="same") - self.c2 = nn.Conv1d(ch, ch, k, padding="same") - self.c3 = nn.Conv1d(ch, ch, k, padding="same") - self.relu = nn.ReLU() - + """ + An identity block for ResNet, where the input and output dimensions are the same. + This implementation mirrors the structure of the original TensorFlow version. + """ + def __init__(self, filters, kernel_size): + super(IdentityBlock, self).__init__() + + # Three convolutional layers, maintaining the channel count + self.conv1 = nn.Conv1d(in_channels=filters[0], out_channels=filters[0], + kernel_size=kernel_size, stride=1, padding='same') + self.conv2 = nn.Conv1d(in_channels=filters[0], out_channels=filters[1], + kernel_size=kernel_size, stride=1, padding='same') + self.conv3 = nn.Conv1d(in_channels=filters[1], out_channels=filters[2], + kernel_size=kernel_size, stride=1, padding='same') + def forward(self, x): - s = x - x = self.relu(self.c1(x)) - x = self.relu(self.c2(x)) - x = self.c3(x) - return self.relu(x + s) - - -class ConvBlock(nn.Module): - """Residual block with projection shortcut.""" - def __init__(self, in_ch: int, mid: int, out: int, k: int): - super().__init__() - self.c1 = nn.Conv1d(in_ch, mid, k, padding="same") - self.c2 = nn.Conv1d(mid, mid, k, padding="same") - self.c3 = nn.Conv1d(mid, out, k, padding="same") - self.proj = nn.Conv1d(in_ch, out, 1) - self.relu = nn.ReLU() + # Store input for the residual connection + identity = x + + # Forward pass through convolutions with ReLU activations + out = F.relu(self.conv1(x)) + out = F.relu(self.conv2(out)) + out = self.conv3(out) + + # Add the residual (identity) connection and apply final activation + out += identity + out = F.relu(out) + + return out +class ConvolutionBlock(nn.Module): + """ + A convolutional block for ResNet that can change the input's channel dimension. + This implementation mirrors the structure of the original TensorFlow version. + """ + def __init__(self, filters, kernel_size): + super(ConvolutionBlock, self).__init__() + + # Main path with three convolutional layers + self.conv1 = nn.Conv1d(in_channels=filters[0], out_channels=filters[0], + kernel_size=kernel_size, stride=1, padding='same') + self.conv2 = nn.Conv1d(in_channels=filters[0], out_channels=filters[1], + kernel_size=kernel_size, stride=1, padding='same') + self.conv3 = nn.Conv1d(in_channels=filters[1], out_channels=filters[2], + kernel_size=kernel_size, stride=1, padding='same') + + # Skip connection path to match the output channel dimension + self.conv4 = nn.Conv1d(in_channels=filters[0], out_channels=filters[2], + kernel_size=kernel_size, stride=1, padding='same') + def forward(self, x): - s = self.proj(x) - x = self.relu(self.c1(x)) - x = self.relu(self.c2(x)) - x = self.c3(x) - return self.relu(x + s) - + # Store input for the skip connection + identity = x + + # Forward pass through the main path + out = F.relu(self.conv1(x)) + out = F.relu(self.conv2(out)) + out = self.conv3(out) + + # Transform the identity to match the output channels for the residual connection + identity = self.conv4(identity) + + # Add the residual connection and apply final activation + out += identity + out = F.relu(out) + + return out -class _ResNetNet(nn.Module): +class ResNetClassificationNet(nn.Module): """ - ResNet-like architecture for load disaggregation. - This model uses convolutional layers to extract features from the input sequence, - followed by fully connected layers for regression and classification. - The model predicts both the disaggregated load and a binary classification for each time step. + A ResNet-based network for NILM that combines a classification subnetwork + and a regression subnetwork, mirroring the original TensorFlow implementation. """ - def __init__(self, seq_len: int): - super().__init__() - self.seq_len = seq_len - - # Classification head - self.cls_feat = nn.Sequential( - nn.Conv1d(1, 30, 10), nn.ReLU(), - nn.Conv1d(30, 30, 8), nn.ReLU(), - nn.Conv1d(30, 40, 6), nn.ReLU(), - nn.Conv1d(40, 50, 5), nn.ReLU(), - nn.Conv1d(50, 50, 5), nn.ReLU(), - nn.Conv1d(50, 50, 5), nn.ReLU(), - nn.Flatten(), - nn.LazyLinear(1024), nn.ReLU() - ) - self.cls_head = nn.Linear(1024, seq_len) - - # Regression branch - self.pad = nn.ConstantPad1d((3, 3), 0) - self.conv0 = nn.Conv1d(1, 30, 48, stride=2) - self.bn0 = nn.BatchNorm1d(30) - self.pool0 = nn.MaxPool1d(3, stride=2) - self.block1 = ConvBlock(30, 30, 30, 24) - self.block2 = IdentityBlock(30, 12) - self.block3 = IdentityBlock(30, 6) - self.reg_end = nn.Sequential( - nn.Flatten(), - nn.LazyLinear(1024), nn.ReLU(), - nn.Dropout(0.2), - nn.Linear(1024, seq_len) - ) - + def __init__(self, sequence_length): + super(ResNetClassificationNet, self).__init__() + self.sequence_length = sequence_length + + # --- CLASSIFICATION SUBNETWORK --- + self.cls_conv1 = nn.Conv1d(1, 30, kernel_size=10, padding='valid') + self.cls_conv2 = nn.Conv1d(30, 30, kernel_size=8, padding='valid') + self.cls_conv3 = nn.Conv1d(30, 40, kernel_size=6, padding='valid') + self.cls_conv4 = nn.Conv1d(40, 50, kernel_size=5, padding='valid') + self.cls_conv5 = nn.Conv1d(50, 50, kernel_size=5, padding='valid') + self.cls_conv6 = nn.Conv1d(50, 50, kernel_size=5, padding='valid') + + # Calculate flattened size after convolutions + conv_output_length = sequence_length - (10-1) - (8-1) - (6-1) - (5-1) - (5-1) - (5-1) + self.cls_flatten_size = 50 * conv_output_length + + self.cls_dense1 = nn.Linear(self.cls_flatten_size, 1024) + self.cls_dense2 = nn.Linear(1024, sequence_length) + + # --- REGRESSION SUBNETWORK (ResNet) --- + self.zero_pad = nn.ZeroPad1d(3) + self.reg_conv1 = nn.Conv1d(in_channels=1, out_channels=30, kernel_size=48, stride=2) + self.reg_bn1 = nn.BatchNorm1d(30) + self.reg_maxpool = nn.MaxPool1d(kernel_size=3, stride=2) + + # ResNet blocks with exact same parameters as TensorFlow + self.conv_block = ConvolutionBlock([30, 30, 30], 24) + self.identity_block1 = IdentityBlock([30, 30, 30], 12) + self.identity_block2 = IdentityBlock([30, 30, 30], 6) + + # Calculate the input size for the fully connected layers dynamically + self._calculate_fc_input_size() + + # Fully connected layers for regression + self.reg_fc1 = nn.Linear(self.fc_input_size, 1024) + self.reg_dropout = nn.Dropout(0.2) + self.reg_fc2 = nn.Linear(1024, sequence_length) + + # Initialize weights + self._initialize_weights() + + def _calculate_fc_input_size(self): + """Calculates the input size for the FC layers via a dummy forward pass.""" + with torch.no_grad(): + dummy_input = torch.zeros(1, 1, self.sequence_length) + x = self._forward_regression_conv_layers(dummy_input) + self.fc_input_size = x.flatten(1).shape[1] + + def _forward_regression_conv_layers(self, x): + """Performs the forward pass through the regression conv layers.""" + x = self.zero_pad(x) + x = F.relu(self.reg_conv1(x)) + x = self.reg_bn1(x) + x = F.relu(x) + x = self.reg_maxpool(x) + + x = self.conv_block(x) + x = self.identity_block1(x) + x = self.identity_block2(x) + + return x + + def _initialize_weights(self): + """Initializes weights to match TensorFlow's defaults.""" + for m in self.modules(): + if isinstance(m, (nn.Conv1d, nn.Linear)): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm1d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + + # Use He normal initialization for the first dense layer in classification + nn.init.kaiming_normal_(self.cls_dense1.weight, nonlinearity='relu') + def forward(self, x): - cls = torch.sigmoid(self.cls_head(self.cls_feat(x))) - y = self.pad(x) - y = F.relu(self.bn0(self.conv0(y))) - y = self.pool0(y) - y = self.block1(y) - y = self.block2(y) - y = self.block3(y) - reg = self.reg_end(y) - return reg * cls, cls # apply classification mask to regression output - + # Input shape: (batch_size, 1, sequence_length) + + # --- CLASSIFICATION SUBNETWORK --- + cls_x = F.relu(self.cls_conv1(x)) + cls_x = F.relu(self.cls_conv2(cls_x)) + cls_x = F.relu(self.cls_conv3(cls_x)) + cls_x = F.relu(self.cls_conv4(cls_x)) + cls_x = F.relu(self.cls_conv5(cls_x)) + cls_x = F.relu(self.cls_conv6(cls_x)) + cls_x = cls_x.view(cls_x.size(0), -1) # Flatten + cls_x = F.relu(self.cls_dense1(cls_x)) + classification_output = torch.sigmoid(self.cls_dense2(cls_x)) + + # --- REGRESSION SUBNETWORK --- + reg_x = self._forward_regression_conv_layers(x) + + # Flatten and pass through dense layers + reg_x = reg_x.flatten(1) + reg_x = F.relu(self.reg_fc1(reg_x)) + reg_x = self.reg_dropout(reg_x) + regression_output = self.reg_fc2(reg_x) + + # Final output is the element-wise product of the two subnetworks + output = regression_output * classification_output + + return output, classification_output class ResNet_classification(Disaggregator): - """Residual network for NILM with classification-aware output scaling.""" - def __init__(self, params: Dict[str, Any]): - super().__init__() + """ + ResNet-based model with classification for non-intrusive load monitoring. + + This implementation is based on the paper: + "ResNet-based Multi-output Regression for NILM: Towards Enhanced Appliance State Detection" + https://arxiv.org/abs/2411.15805v1 + + The model combines ResNet architecture with dual-output design for both appliance + state classification and power consumption regression in energy disaggregation tasks. + + Architecture Overview: + - Classification subnetwork with 1D convolutions for appliance state detection + - Regression subnetwork with ResNet blocks for power prediction + - Identity and convolution blocks with residual connections + - Element-wise multiplication of classification and regression outputs + - Multi-output learning for enhanced appliance state detection + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Length of input sequences (default: 99) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - chunk_wise_training (bool): Enable chunk-wise training (default: False) + - appliance_params (dict): Appliance-specific normalization parameters + - mains_params (dict): Mains-specific normalization parameters + """ + def __init__(self, params): self.MODEL_NAME = "ResNet_classification" - self.chunk_wise_training = params.get("chunk_wise_training", True) - self.sequence_length = params.get("sequence_length", 99) + self.chunk_wise_training = params.get('chunk_wise_training', False) + self.sequence_length = params.get('sequence_length', 99) + self.n_epochs = params.get('n_epochs', 10) + self.models = OrderedDict() + self.mains_mean = 1800 + self.mains_std = 600 + self.batch_size = params.get('batch_size', 512) + self.appliance_params = params.get('appliance_params', {}) + self.mains_params = params.get('mains_params', {}) + self.device = device + if self.sequence_length % 2 == 0: - raise SequenceLengthError("sequence_length must be odd") - - self.n_epochs = params.get("n_epochs", 10) - self.batch_size = params.get("batch_size", 512) - - self.mains_mean, self.mains_std = 1800, 600 - self.appliance_params: Dict[str, Dict[str, float]] = {} - - self.models: "OrderedDict[str,_ResNetNet]" = OrderedDict() - self.optims: Dict[str, torch.optim.Optimizer] = {} - self.best: Dict[str, float] = {} - - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + raise SequenceLengthError("Sequence length must be odd!") + + def return_network(self): + """Returns a new instance of the ResNetClassificationNet.""" + return ResNetClassificationNet(self.sequence_length).to(self.device) + + def classify(self, classify_appliance): + """Creates binary on/off classification labels for appliances.""" + appliance_on_off = [] + THRESHOLD = 15 # Power threshold for 'on' state + + for app_index, (appliance_name, on_off_list) in enumerate(classify_appliance): + classification_appliance_dfs = [] + for appliance in on_off_list: + n = self.sequence_length + units_to_pad = n // 2 + appliance_copy = appliance.copy() + appliance_copy[appliance_copy <= THRESHOLD] = 0 + appliance_copy[appliance_copy > THRESHOLD] = 1 + new_app_readings = appliance_copy.values.flatten() + new_app_readings = np.pad(new_app_readings, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_app_readings = np.array([new_app_readings[i:i + n] for i in range(len(new_app_readings) - n + 1)]) + classification_appliance_dfs.append(pd.DataFrame(new_app_readings)) + appliance_on_off.append((appliance_name, classification_appliance_dfs)) + return appliance_on_off + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """Preprocesses data by windowing and normalizing.""" + if method == 'train': + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + + appliance_list = [] + for app_index, (app_name, app_df_lst) in enumerate(submeters_lst): + if app_name in self.appliance_params: + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + app_min = self.appliance_params[app_name]['min'] + app_max = self.appliance_params[app_name]['max'] + else: + raise ApplianceNotFoundError(f"Parameters for appliance '{app_name}' not found!") + + processed_app_dfs = [] + for app_df in app_df_lst: + new_app_readings = app_df.values.flatten() + new_app_readings = np.pad(new_app_readings, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_app_readings = np.array([new_app_readings[i:i + n] for i in range(len(new_app_readings) - n + 1)]) + # Normalize using min-max scaling + new_app_readings = (new_app_readings - app_min) / (app_max - app_min) + processed_app_dfs.append(pd.DataFrame(new_app_readings)) + + appliance_list.append((app_name, processed_app_dfs)) + + return processed_mains_lst, appliance_list + + else: + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + new_mains = new_mains.reshape((-1, self.sequence_length)) + processed_mains_lst.append(pd.DataFrame(new_mains)) + return processed_mains_lst + + def set_mains_params(self, train_main): + """Computes and sets normalization parameters for the mains data.""" + l = np.concatenate([mains.values.flatten() for mains in train_main]) + self.mains_params.update({ + 'mean': np.mean(l), + 'std': np.std(l), + 'min': np.min(l), + 'max': np.max(l) + }) + + def set_appliance_params(self, train_appliances): + """Computes and sets normalization parameters for each appliance.""" + for (app_name, df_list) in train_appliances: + l = np.concatenate([df.values for df in df_list]) + app_mean = np.mean(l) + app_std = np.std(l) + app_max = np.max(l) + app_min = np.min(l) + if app_std < 1: + app_std = 100 + self.appliance_params[app_name] = { + 'mean': app_mean, 'std': app_std, + 'min': app_min, 'max': app_max + } - def partial_fit(self, mains, appliances, do_preprocessing=True, **_): + def partial_fit(self, train_main, train_appliances, do_preprocessing=True, **load_kwargs): + """Trains the model on a chunk of data.""" + print("...............ResNet_classification partial_fit running...............") + if not self.appliance_params: - self.set_appliance_params(appliances) - self._set_mains_params(mains) + self.set_appliance_params(train_appliances) + if not self.mains_params: + self.set_mains_params(train_main) if do_preprocessing: - cls_labels = self._make_on_off(copy.deepcopy(appliances)) - mains, appliances = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=mains, - submeters_lst=appliances, - method="train", - appliance_params=self.appliance_params, - windowing=False - ) - - X = torch.tensor(pd.concat(mains).values, dtype=torch.float32).unsqueeze(1) # [batch, seq_len, 1] - N = X.size(0) # number of samples - perm = torch.randperm(N) - val_idx, tr_idx = perm[:int(0.15 * N)], perm[int(0.15 * N):] - X_tr, X_val = X[tr_idx].to(self.device), X[val_idx].to(self.device) - - y_reg, y_cls = {}, {} - for app, dfs in appliances: - y_reg[app] = torch.tensor(pd.concat(dfs).values, dtype=torch.float32) - for app, dfs in cls_labels: - y_cls[app] = torch.tensor(pd.concat(dfs).values, dtype=torch.float32) - - mse, bce = nn.MSELoss(), nn.BCELoss() - - for app in y_reg: - y_tr = y_reg[app][tr_idx].to(self.device) - y_val = y_reg[app][val_idx].to(self.device) - c_tr = y_cls[app][tr_idx].to(self.device) - c_val = y_cls[app][val_idx].to(self.device) - - if app not in self.models: - net = _ResNetNet(self.sequence_length).to(self.device) - self.models[app] = net - self.optims[app] = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9) - self.best[app] = np.inf - - net, opt = self.models[app], self.optims[app] - loader = DataLoader(TensorDataset(X_tr, y_tr, c_tr), - batch_size=self.batch_size, shuffle=True) - - # training loop - for ep in range(self.n_epochs): - net.train() - ep_bar = tqdm(loader, - desc=f"{app} ▏epoch {ep+1}/{self.n_epochs}", - unit="batch", leave=False) # live bar - running = 0.0 - for xb, yb, cb in ep_bar: - opt.zero_grad() - pr, pc = net(xb) - loss = mse(pr, yb) + bce(pc, cb) - loss.backward() - opt.step() - running += loss.item() - ep_bar.set_postfix(loss=f"{loss.item():.4f}") # update - - avg_loss = running / len(loader) - - # validation - net.eval() - with torch.no_grad(): - vr, vc = net(X_val) - v_loss = mse(vr, y_val).item() + bce(vc, c_val).item() - - tqdm.write(f"[{app}] Epoch {ep+1}/{self.n_epochs} | " f"Train Loss: {avg_loss:.4f} | Val Loss: {v_loss:.4f}") - - if v_loss < self.best[app]: - self.best[app] = v_loss - torch.save(net.state_dict(), f"resnet_cls-{app}.pth") - - net.load_state_dict(torch.load(f"resnet_cls-{app}.pth", map_location=self.device)) - - def disaggregate_chunk(self, mains, model=None, do_preprocessing=True): + # Create classification labels + classify_appliance = copy.deepcopy(train_appliances) + classification = self.classify(classify_appliance) + + # Preprocess regression and classification data + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + train_main = pd.concat(train_main, axis=0).values.reshape((-1, self.sequence_length, 1)) + + # Process appliance data for regression + new_train_appliances = [] + for app_name, app_dfs in train_appliances: + app_df_values = pd.concat(app_dfs, axis=0).values.reshape((-1, self.sequence_length)) + new_train_appliances.append((app_name, app_df_values)) + train_appliances = new_train_appliances + + # Process appliance data for classification + new_train_appliances_classification = {} + for app_name, app_df in classification: + app_df_values = pd.concat(app_df, axis=0).values.reshape((-1, self.sequence_length)) + new_train_appliances_classification[app_name] = app_df_values + + for appliance_name, power in train_appliances: + if appliance_name not in self.models: + print("First time training for", appliance_name) + self.models[appliance_name] = self.return_network() + else: + print("Retraining model for", appliance_name) + + model = self.models[appliance_name] + if train_main.size > 10: + # Combine regression and classification targets + power_df = pd.DataFrame(power) + classification_df = pd.DataFrame(new_train_appliances_classification[appliance_name]) + power_combined = pd.concat([power_df, classification_df], axis=1).values + + # Split data into training and validation sets + train_x, v_x, train_y_combined, v_y_combined = train_test_split( + train_main, power_combined, test_size=0.15, random_state=10) + + train_y = train_y_combined[:, :self.sequence_length] + v_y = v_y_combined[:, :self.sequence_length] + appliance_train_classification = train_y_combined[:, self.sequence_length:] + appliance_val_classification = v_y_combined[:, self.sequence_length:] + + # Convert to PyTorch tensors + train_x = torch.tensor(train_x, dtype=torch.float32).permute(0, 2, 1).to(self.device) + v_x = torch.tensor(v_x, dtype=torch.float32).permute(0, 2, 1).to(self.device) + train_y = torch.tensor(train_y, dtype=torch.float32).to(self.device) + v_y = torch.tensor(v_y, dtype=torch.float32).to(self.device) + appliance_train_classification = torch.tensor(appliance_train_classification, dtype=torch.float32).to(self.device) + appliance_val_classification = torch.tensor(appliance_val_classification, dtype=torch.float32).to(self.device) + + # Setup optimizer and loss functions + optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) + mse_loss = nn.MSELoss() + bce_loss = nn.BCELoss() + + best_val_loss = float('inf') + filepath = f'ResNet_classification-temp-weights-{random.randint(0, 100000)}.pth' + + # Training loop + for epoch in range(self.n_epochs): + model.train() + + train_dataset = TensorDataset(train_x, train_y, appliance_train_classification) + train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) + + epoch_losses = [] + for batch_x, batch_y, batch_c in train_loader: + optimizer.zero_grad() + output, classification_output = model(batch_x) + + # Combined loss for regression and classification + loss = mse_loss(output, batch_y) + bce_loss(classification_output, batch_c) + + loss.backward() + optimizer.step() + epoch_losses.append(loss.item()) + + # Validation + model.eval() + with torch.no_grad(): + val_output, val_classification = model(v_x) + val_loss = mse_loss(val_output, v_y) + bce_loss(val_classification, appliance_val_classification) + + avg_train_loss = np.mean(epoch_losses) + print(f"Epoch {epoch+1}/{self.n_epochs} - loss: {avg_train_loss:.4f} - val_loss: {val_loss.item():.4f}") + + # Save the best model + if val_loss < best_val_loss: + best_val_loss = val_loss + torch.save(model.state_dict(), filepath) + print(f"Validation loss improved, saving model to {filepath}") + + # Load best weights + model.load_state_dict(torch.load(filepath, map_location=self.device)) + + def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): + """Disaggregates a chunk of mains data.""" if model is not None: self.models = model + if do_preprocessing: - mains = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=mains, - submeters_lst=None, - method="test", - appliance_params=self.appliance_params, - windowing=False - ) - - L = self.sequence_length - out = [] - for m in mains: - X = torch.tensor(m.values, dtype=torch.float32).unsqueeze(1).to(self.device) - disc = {} - for app, net in self.models.items(): - net.eval() + test_main_list = self.call_preprocessing( + test_main_list, submeters_lst=None, method='test') + + test_predictions = [] + for test_mains_df in test_main_list: + disggregation_dict = {} + test_main_array = test_mains_df.values.reshape((-1, self.sequence_length, 1)) + test_main_tensor = torch.tensor(test_main_array, dtype=torch.float32).permute(0, 2, 1).to(self.device) + + for appliance in self.models: + model = self.models[appliance] + model.eval() + with torch.no_grad(): - pr, _ = net(X) # pr: [batch, seq_len] - pr = pr.cpu().numpy() - - def overlap(wins): - # Coverts overlapping windows into continuous sequence - s, c = np.zeros(len(wins)+L-1), np.zeros(len(wins)+L-1) # sum and count arrays - for i in range(len(wins)): - s[i:i+L] += wins[i].flatten() - c[i:i+L] += 1 - return s / c - - power = overlap(pr) - p = self.appliance_params[app] - power = np.clip(p["min"] + power*(p["max"]-p["min"]), 0, None) - disc[app] = pd.Series(power, dtype="float32") - out.append(pd.DataFrame(disc, dtype="float32")) - return out - - def _make_on_off(self, apps): - """Convert appliance data to binary on/off labels.""" - TH, n, pad = 15, self.sequence_length, self.sequence_length//2 - res = [] - for app, dfs in apps: - lbls = [] - for df in dfs: - a = df.copy() - a[a<=TH] = 0; a[a>TH] = 1 - v = np.pad(a.values.flatten(), (pad,pad)) - w = np.array([v[i:i+n] for i in range(len(v)-n+1)]) - lbls.append(pd.DataFrame(w)) - res.append((app, lbls)) - return res - - def set_appliance_params(self, apps): - """Compute mean, std, min, max for each appliance.""" - for app, dfs in apps: - data = np.concatenate([d.values.flatten() for d in dfs]) - self.appliance_params[app] = { - "mean": data.mean(), - "std": max(data.std(), 1.0), - "min": data.min(), - "max": data.max() - } - - def _set_mains_params(self, mains): - """Compute mean and std for mains data.""" - data = np.concatenate([m.values.flatten() for m in mains]) - self.mains_mean, self.mains_std = data.mean(), data.std() - - # NILMTK wrappers - def train(self, mains, apps, **kw): - return self.partial_fit(mains, apps, **kw) - - def disaggregate(self, mains, store): - preds = self.disaggregate_chunk(mains) - for i, df in enumerate(preds): - for col in df.columns: - store.put(f"/building1/elec/meter{i+1}/{col}", df[col]) + prediction_output, _ = model(test_main_tensor) + prediction = prediction_output.cpu().numpy() + + # Average predictions over overlapping windows + l = self.sequence_length + n = len(prediction) + sum_arr = np.zeros(n + l - 1) + counts_arr = np.zeros(n + l - 1) + for i in range(n): + sum_arr[i:i+l] += prediction[i] + counts_arr[i:i+l] += 1 + for i in range(len(counts_arr)): + if counts_arr[i] == 0: + counts_arr[i] = 1 + averaged_prediction = sum_arr / counts_arr + + # Denormalize the predictions + app_min = self.appliance_params[appliance]['min'] + app_max = self.appliance_params[appliance]['max'] + prediction = averaged_prediction * (app_max - app_min) + app_min + prediction[prediction < 0] = 0 + + df = pd.Series(prediction) + disggregation_dict[appliance] = df + results = pd.DataFrame(disggregation_dict, dtype='float32') + test_predictions.append(results) + return test_predictions + + def classification_output_plot(self, prediction_classification, appliance): + """Optional plotting function for classification output (matching TensorFlow)""" + pass # Placeholder for plotting functionality diff --git a/nilmtk_contrib/torch/rnn.py b/nilmtk_contrib/torch/rnn.py index 52d3789..7fc8003 100644 --- a/nilmtk_contrib/torch/rnn.py +++ b/nilmtk_contrib/torch/rnn.py @@ -1,28 +1,12 @@ from collections import OrderedDict import numpy as np import pandas as pd +import random from nilmtk.disaggregate import Disaggregator import torch import torch.nn as nn -import torch.nn.functional as F import torch.optim as optim -from torch.utils.data import Dataset, DataLoader, TensorDataset -from sklearn.model_selection import train_test_split -from tqdm import tqdm -import random -import os -from nilmtk_contrib.torch.preprocessing import preprocess - -# Set random seeds for reproducibility across runs -random.seed(10) -np.random.seed(10) -torch.manual_seed(10) -if torch.cuda.is_available(): - torch.cuda.manual_seed(10) - torch.cuda.manual_seed_all(10) - -# Use GPU if available, otherwise fall back to CPU -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +from torch.utils.data import TensorDataset, DataLoader class SequenceLengthError(Exception): pass @@ -32,347 +16,286 @@ class ApplianceNotFoundError(Exception): class RNNModel(nn.Module): """ - Neural network combining CNN feature extraction and bidirectional LSTMs - for NILM energy disaggregation. + An RNN-based model for NILM, with an architecture designed to mirror the + original TensorFlow implementation. """ def __init__(self, sequence_length): super(RNNModel, self).__init__() self.sequence_length = sequence_length - # 1D CNN for initial feature extraction from raw power sequence - self.conv1d = nn.Conv1d( - in_channels=1, - out_channels=16, - kernel_size=4, - stride=1, - padding=2 # Maintain sequence length - ) - - # First bidirectional LSTM layer - self.lstm1 = nn.LSTM( - input_size=16, - hidden_size=128, - num_layers=1, - batch_first=True, - bidirectional=True - ) + # Layers are defined to match the TensorFlow architecture + self.conv1d = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=4, + stride=1, padding=2) # 'same' padding + self.lstm1 = nn.LSTM(input_size=16, hidden_size=128, batch_first=True, bidirectional=True) + self.lstm2 = nn.LSTM(input_size=256, hidden_size=256, batch_first=True, bidirectional=True) + self.fc1 = nn.Linear(512, 128) + self.fc2 = nn.Linear(128, 1) - # Second bidirectional LSTM layer for deeper feature learning - self.lstm2 = nn.LSTM( - input_size=256, # 128 * 2 (bidirectional) - hidden_size=256, - num_layers=1, - batch_first=True, - bidirectional=True - ) - - # Final fully connected layers for prediction - self.fc1 = nn.Linear(512, 128) # 256 * 2 (bidirectional) - self.fc2 = nn.Linear(128, 1) # Output single power value - - # Dropout for regularization - self.dropout = nn.Dropout(0.1) + self._init_weights() + + def _init_weights(self): + """Initializes weights to match TensorFlow's default initializations.""" + # Use Xavier uniform for Conv, LSTM, and Linear layers by default + for m in self.modules(): + if isinstance(m, (nn.Conv1d, nn.Linear)): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.LSTM): + for name, param in m.named_parameters(): + if 'weight' in name: + nn.init.xavier_uniform_(param) + elif 'bias' in name: + nn.init.zeros_(param) def forward(self, x): - # Input shape: (batch_size, sequence_length, 1) - # Rearrange for Conv1D: (batch_size, channels, sequence_length) - x = x.permute(0, 2, 1) # (batch_size, 1, sequence_length) + # Input shape: (batch, seq_len, 1) -> permute for Conv1D + x = x.permute(0, 2, 1) - # Extract features using 1D convolution - x = self.conv1d(x) # (batch_size, 16, sequence_length) + # Feature extraction + x = self.conv1d(x) - # Rearrange back for LSTM: (batch_size, sequence_length, features) - x = x.permute(0, 2, 1) # (batch_size, sequence_length, 16) + # Permute for LSTM layers + x = x.permute(0, 2, 1) - # Process through bidirectional LSTM layers - x, _ = self.lstm1(x) # (batch_size, sequence_length, 256) - x = self.dropout(x) + # Sequence processing + x, _ = self.lstm1(x) + x, _ = self.lstm2(x) - x, _ = self.lstm2(x) # (batch_size, sequence_length, 512) - - # Use only the last time step output - x = x[:, -1, :] # (batch_size, 512) + # In the original TF model, only the output of the last time step is used. + x = x[:, -1, :] # Final prediction layers - x = torch.tanh(self.fc1(x)) # (batch_size, 128) - x = self.dropout(x) - x = self.fc2(x) # (batch_size, 1) + x = torch.tanh(self.fc1(x)) + x = self.fc2(x) return x class RNN(Disaggregator): """ - NILM disaggregator using RNN without attention mechanism. - Inherits from NILMTK's Disaggregator base class. - """ + RNN disaggregator for Non-Intrusive Load Monitoring (NILM). + Based on "Neural NILM: Deep Neural Networks Applied to Energy Disaggregation" + (https://arxiv.org/abs/1507.06594). This implementation uses a convolutional + layer followed by bidirectional LSTM layers to learn temporal patterns in + aggregate power consumption data and predict individual appliance usage. + + The model architecture consists of: + 1. 1D Convolutional layer for feature extraction from power sequences + 2. Two bidirectional LSTM layers for learning long-term dependencies + 3. Fully connected layers for final power regression + + Args: + params (dict): Dictionary containing model hyperparameters: + - sequence_length (int): Length of input sequences (default: 19) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - appliance_params (dict): Appliance-specific parameters + - mains_mean (float): Mean normalization for mains power (default: 1800) + - mains_std (float): Standard deviation for mains power (default: 600) + - chunk_wise_training (bool): Enable chunk-wise training (default: False) + """ def __init__(self, params): - """Initialize the disaggregator with hyperparameters""" + """Initializes the disaggregator and its hyperparameters.""" self.MODEL_NAME = "RNN" - self.models = OrderedDict() # Store separate models for each appliance - self.file_prefix = "{}-temp-weights".format(self.MODEL_NAME.lower()) + self.models = OrderedDict() + self.file_prefix = f"{self.MODEL_NAME.lower()}-temp-weights" - # Extract hyperparameters from params dict self.chunk_wise_training = params.get('chunk_wise_training', False) self.sequence_length = params.get('sequence_length', 19) self.n_epochs = params.get('n_epochs', 10) self.batch_size = params.get('batch_size', 512) - self.appliance_params = params.get('appliance_params', {}) # Normalization stats + self.appliance_params = params.get('appliance_params', {}) self.mains_mean = params.get('mains_mean', 1800) self.mains_std = params.get('mains_std', 600) - self.device = device + self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - # Sequence length must be odd for proper windowing if self.sequence_length % 2 == 0: - print("Sequence length should be odd!") - raise SequenceLengthError - + raise SequenceLengthError("Sequence length must be odd for proper windowing.") + def partial_fit(self, train_main, train_appliances, do_preprocessing=True, current_epoch=0, **load_kwargs): - """Train models on a chunk of data (supports incremental learning)""" - - # Compute appliance-specific normalization parameters if not provided - if len(self.appliance_params) == 0: + """Trains the model on a chunk of data.""" + if not self.appliance_params: self.set_appliance_params(train_appliances) - + print("...............RNN partial_fit running...............") - # Preprocess data: windowing, normalization, etc. if do_preprocessing: - print("Preprocessing data...") - train_main, train_appliances = preprocess( - sequence_length=self.sequence_length, - mains_std=self.mains_std, - mains_mean=self.mains_mean, - mains_lst=train_main, - submeters_lst=train_appliances, - method="train", - appliance_params=self.appliance_params, - windowing=False - ) - - # Prepare main power data for training - train_main = pd.concat(train_main, axis=0) - train_main = train_main.values.reshape((-1, self.sequence_length, 1)) + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + # Prepare data for training + train_main = pd.concat(train_main, axis=0).values.reshape((-1, self.sequence_length, 1)) - # Prepare appliance power data new_train_appliances = [] - for app_name, app_df in train_appliances: - app_df = pd.concat(app_df, axis=0) - app_df_values = app_df.values.reshape((-1, 1)) + for app_name, app_dfs in train_appliances: + app_df_values = pd.concat(app_dfs, axis=0).values.reshape((-1, 1)) new_train_appliances.append((app_name, app_df_values)) train_appliances = new_train_appliances - - print(f"Training data shape: {train_main.shape}") - - # Train a separate model for each appliance - appliance_progress = tqdm(train_appliances, desc="Training appliances", unit="appliance") - - for appliance_name, power in appliance_progress: - appliance_progress.set_postfix({"Current": appliance_name}) - - # Create new model if this appliance hasn't been seen before + + for appliance_name, power in train_appliances: if appliance_name not in self.models: - print(f"\nFirst model training for {appliance_name}") + print(f"First time training for {appliance_name}") self.models[appliance_name] = self.return_network() else: - print(f"\nStarted Retraining model for {appliance_name}") - + print(f"Retraining model for {appliance_name}") + model = self.models[appliance_name] - - # Train only if we have sufficient data - if train_main.size > 0: - if len(train_main) > 10: - # Convert to PyTorch tensors and move to device - train_x = torch.FloatTensor(train_main).to(self.device) - train_y = torch.FloatTensor(power).to(self.device) + if train_main.size > 10: + filepath = f"{self.file_prefix}-{'_'.join(appliance_name.split())}-epoch{current_epoch}.pt" + + # Convert to PyTorch Tensors + train_main_tensor = torch.tensor(train_main, dtype=torch.float32) + power_tensor = torch.tensor(power, dtype=torch.float32).squeeze() + + # Use the last 15% of data for validation to mirror TensorFlow's behavior + val_size = int(0.15 * len(train_main_tensor)) + train_size = len(train_main_tensor) - val_size - # Split data into training and validation sets - train_x_split, val_x_split, train_y_split, val_y_split = train_test_split( - train_x.cpu().numpy(), train_y.cpu().numpy(), - test_size=0.15, random_state=42 - ) + train_x = train_main_tensor[:train_size].to(self.device) + val_x = train_main_tensor[train_size:].to(self.device) + train_y = power_tensor[:train_size].to(self.device) + val_y = power_tensor[train_size:].to(self.device) - # Convert back to tensors and move to device - train_x_split = torch.FloatTensor(train_x_split).to(self.device) - val_x_split = torch.FloatTensor(val_x_split).to(self.device) - train_y_split = torch.FloatTensor(train_y_split).to(self.device) - val_y_split = torch.FloatTensor(val_y_split).to(self.device) + # Optimizer and loss function, with parameters matching TensorFlow + optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-07) + criterion = nn.MSELoss() - # Create PyTorch DataLoaders for batch processing - train_dataset = TensorDataset(train_x_split, train_y_split) - val_dataset = TensorDataset(val_x_split, val_y_split) + best_val_loss = float('inf') + + # Create DataLoader for batching + train_dataset = TensorDataset(train_x, train_y) train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) - val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False) - # Train the model - self.train_model(model, train_loader, val_loader, appliance_name, current_epoch) - - def train_model(self, model, train_loader, val_loader, appliance_name, current_epoch): - """Train a single appliance model with early stopping based on validation loss""" - optimizer = optim.Adam(model.parameters(), lr=0.001) - criterion = nn.MSELoss() - - best_val_loss = float('inf') - best_model_state = None - - epoch_progress = tqdm(range(self.n_epochs), desc=f"Training {appliance_name}", unit="epoch") - - for epoch in epoch_progress: - # Training phase - model.train() - train_loss = 0.0 - - train_batch_progress = tqdm(train_loader, desc=f"Epoch {epoch+1} Training", - leave=False, unit="batch") - - for batch_x, batch_y in train_batch_progress: - optimizer.zero_grad() - - outputs = model(batch_x) - loss = criterion(outputs.squeeze(), batch_y.squeeze()) - - loss.backward() - optimizer.step() - - train_loss += loss.item() - train_batch_progress.set_postfix({"Loss": f"{loss.item():.4f}"}) - - # Validation phase - model.eval() - val_loss = 0.0 - - val_batch_progress = tqdm(val_loader, desc=f"Epoch {epoch+1} Validation", - leave=False, unit="batch") - - with torch.no_grad(): - for batch_x, batch_y in val_batch_progress: - outputs = model(batch_x) - loss = criterion(outputs.squeeze(), batch_y.squeeze()) - val_loss += loss.item() - val_batch_progress.set_postfix({"Loss": f"{loss.item():.4f}"}) - - # Calculate average losses - train_loss /= len(train_loader) - val_loss /= len(val_loader) - - epoch_progress.set_postfix({ - "Train Loss": f"{train_loss:.4f}", - "Val Loss": f"{val_loss:.4f}", - "Best": f"{best_val_loss:.4f}" - }) - - # Save best model based on validation loss - if val_loss < best_val_loss: - best_val_loss = val_loss - best_model_state = model.state_dict().copy() - epoch_progress.write(f'New best model saved with val_loss: {val_loss:.4f}') - - # Save model checkpoint - filepath = f"{self.file_prefix}-{appliance_name.replace(' ', '_')}-epoch{current_epoch}.pth" - torch.save(best_model_state, filepath) - - # Load the best model weights - if best_model_state is not None: - model.load_state_dict(best_model_state) - print(f"\nLoaded best model for {appliance_name} with validation loss: {best_val_loss:.4f}") - + for epoch in range(self.n_epochs): + # --- Training Phase --- + model.train() + train_loss = 0.0 + + for batch_x, batch_y in train_loader: + optimizer.zero_grad() + outputs = model(batch_x).squeeze(-1) + loss = criterion(outputs, batch_y) + loss.backward() + optimizer.step() + train_loss += loss.item() + + train_loss /= len(train_loader) + + # --- Validation Phase --- + model.eval() + with torch.no_grad(): + val_outputs = model(val_x).squeeze(-1) + val_loss = criterion(val_outputs, val_y).item() + + # Save the best model based on validation loss + if val_loss < best_val_loss: + best_val_loss = val_loss + torch.save(model.state_dict(), filepath) + print(f'Epoch {epoch+1}/{self.n_epochs} - loss: {train_loss:.4f} - val_loss: {val_loss:.4f}') + + # Load the best performing model + model.load_state_dict(torch.load(filepath)) + def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): - """Disaggregate power consumption for each appliance from aggregate mains data""" - + """Disaggregates a chunk of mains data.""" if model is not None: self.models = model - - # Preprocess test data similar to training data + if do_preprocessing: - print("Preprocessing test data...") - test_main_list = preprocess( - sequence_length=self.sequence_length, - mains_lst=test_main_list, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - submeters_lst=None, - method="test", - appliance_params=self.appliance_params, - windowing=False - ) - + test_main_list = self.call_preprocessing( + test_main_list, submeters_lst=None, method='test') + test_predictions = [] - - chunk_progress = tqdm(test_main_list, desc="Processing test chunks", unit="chunk") - - # Process each chunk of test data - for test_main in chunk_progress: - test_main = test_main.values - test_main = test_main.reshape((-1, self.sequence_length, 1)) - test_main_tensor = torch.FloatTensor(test_main).to(self.device) - + for test_mains_df in test_main_list: + test_main_array = test_mains_df.values.reshape((-1, self.sequence_length, 1)) disggregation_dict = {} - appliance_progress = tqdm(self.models.items(), desc="Disaggregating appliances", - leave=False, unit="appliance") - - # Get predictions from each appliance model - for appliance, model in appliance_progress: - appliance_progress.set_postfix({"Current": appliance}) + for appliance, model in self.models.items(): + test_tensor = torch.tensor(test_main_array, dtype=torch.float32).to(self.device) model.eval() - - # Create DataLoader for batched inference - test_dataset = TensorDataset(test_main_tensor) - test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False) - - predictions = [] - - pred_progress = tqdm(test_loader, desc=f"Predicting {appliance}", - leave=False, unit="batch") - - # Generate predictions with torch.no_grad(): - for batch_x, in pred_progress: - batch_pred = model(batch_x) - predictions.append(batch_pred.cpu().numpy()) + # Process in batches to manage memory + predictions = [] + for i in range(0, len(test_tensor), self.batch_size): + batch = test_tensor[i:i + self.batch_size] + batch_pred = model(batch).cpu().numpy() + predictions.append(batch_pred) + prediction = np.concatenate(predictions, axis=0) - prediction = np.concatenate(predictions, axis=0) + # Denormalize the prediction + app_mean = self.appliance_params[appliance]['mean'] + app_std = self.appliance_params[appliance]['std'] + denormalized_prediction = app_mean + (prediction * app_std) - # Denormalize predictions back to original power scale - prediction = (self.appliance_params[appliance]['mean'] + - prediction * self.appliance_params[appliance]['std']) - - # Ensure non-negative power values - valid_predictions = prediction.flatten() - valid_predictions = np.where(valid_predictions > 0, valid_predictions, 0) - df = pd.Series(valid_predictions) + # Set negative values to zero + denormalized_prediction[denormalized_prediction < 0] = 0 + df = pd.Series(denormalized_prediction.flatten()) disggregation_dict[appliance] = df - - # Combine all appliance predictions for this chunk + results = pd.DataFrame(disggregation_dict, dtype='float32') test_predictions.append(results) - return test_predictions - + def return_network(self): - """Factory method to create a new RNN model instance""" + """Returns a new, initialized RNNModel instance.""" model = RNNModel(self.sequence_length).to(self.device) return model - + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """ + Preprocesses data by windowing and normalizing, mirroring the + original TensorFlow implementation. + """ + if method == 'train': + # Preprocess mains + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + + # Preprocess appliances + appliance_list = [] + for app_index, (app_name, app_df_lst) in enumerate(submeters_lst): + if app_name not in self.appliance_params: + raise ApplianceNotFoundError(f"Parameters for appliance '{app_name}' not found!") + + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + + processed_app_dfs = [] + for app_df in app_df_lst: + new_app_readings = app_df.values.reshape((-1, 1)) + new_app_readings = (new_app_readings - app_mean) / app_std + processed_app_dfs.append(pd.DataFrame(new_app_readings)) + appliance_list.append((app_name, processed_app_dfs)) + return processed_mains_lst, appliance_list + + else: # method == 'test' + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + return processed_mains_lst def set_appliance_params(self, train_appliances): - """Compute normalization statistics (mean, std) for each appliance""" - print("Setting appliance parameters...") - - param_progress = tqdm(train_appliances, desc="Computing appliance stats", unit="appliance") - - for (app_name, df_list) in param_progress: - param_progress.set_postfix({"Current": app_name}) - - # Concatenate all data for this appliance and compute statistics - l = np.array(pd.concat(df_list, axis=0)) + """Computes and sets normalization parameters for each appliance.""" + for (app_name, df_list) in train_appliances: + l = np.concatenate([df.values for df in df_list]) app_mean = np.mean(l) app_std = np.std(l) - - # Prevent division by zero in normalization if app_std < 1: - app_std = 100 - self.appliance_params.update({app_name: {'mean': app_mean, 'std': app_std}}) - - print(self.appliance_params) \ No newline at end of file + app_std = 100 # Avoid division by zero for flat signals + self.appliance_params[app_name] = {'mean': app_mean, 'std': app_std} + print("Appliance parameters set:", self.appliance_params) \ No newline at end of file diff --git a/nilmtk_contrib/torch/rnn_attention.py b/nilmtk_contrib/torch/rnn_attention.py index 53d8b08..1c85c0a 100644 --- a/nilmtk_contrib/torch/rnn_attention.py +++ b/nilmtk_contrib/torch/rnn_attention.py @@ -16,15 +16,6 @@ from tqdm import tqdm import random import sys -from nilmtk_contrib.torch.preprocessing import preprocess - -# Set random seeds for reproducibility across runs -random.seed(10) -np.random.seed(10) -torch.manual_seed(10) -if torch.cuda.is_available(): - torch.cuda.manual_seed(10) - torch.cuda.manual_seed_all(10) # Use GPU if available, otherwise fall back to CPU device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') @@ -37,318 +28,255 @@ class ApplianceNotFoundError(Exception): class AttentionLayer(nn.Module): """ - Attention mechanism to focus on relevant parts of the input sequence. - Inspired from: https://github.com/antoniosudoso/attention-nilm + An attention mechanism that computes a context-aware representation of the input sequence. + This implementation is designed to mirror the original TensorFlow version. """ def __init__(self, units): super(AttentionLayer, self).__init__() self.units = units - # Linear layers for attention computation - self.W = nn.Linear(512, units) # 512 = bidirectional LSTM output (256*2) + # Linear layers for computing attention scores + self.W = nn.Linear(512, units) # Input is from a bidirectional LSTM (256*2) self.V = nn.Linear(units, 1) - # Initialize weights using He normal initialization + # Initialize weights with He normal to match TensorFlow's 'he_normal' nn.init.kaiming_normal_(self.W.weight, mode='fan_in', nonlinearity='relu') nn.init.kaiming_normal_(self.V.weight, mode='fan_in', nonlinearity='relu') nn.init.zeros_(self.W.bias) nn.init.zeros_(self.V.bias) def forward(self, encoder_output): - # encoder_output shape: (batch_size, sequence_length, hidden_size) - - # Compute attention scores - score = self.V(torch.tanh(self.W(encoder_output))) # (batch_size, seq_len, 1) - - # Convert scores to probabilities - attention_weights = F.softmax(score, dim=1) # (batch_size, seq_len, 1) - - # Compute weighted context vector - context_vector = attention_weights * encoder_output # (batch_size, seq_len, hidden_size) - context_vector = torch.sum(context_vector, dim=1) # (batch_size, hidden_size) + """ + Args: + encoder_output: The output from the LSTM layer, shape (batch, seq_len, hidden_size). + Returns: + context_vector: The weighted sum of encoder outputs, shape (batch, hidden_size). + """ + # Calculate alignment scores + score = self.V(torch.tanh(self.W(encoder_output))) # (batch, seq_len, 1) + + # Convert scores to weights using softmax + attention_weights = F.softmax(score, dim=1) + + # Compute the context vector + context_vector = attention_weights * encoder_output + context_vector = torch.sum(context_vector, dim=1) return context_vector class RNNAttentionModel(nn.Module): """ - Neural network combining CNN feature extraction, bidirectional LSTMs, - and attention mechanism for NILM energy disaggregation. + An RNN-based model with an attention mechanism for NILM, designed to + mirror the original TensorFlow implementation. """ def __init__(self, sequence_length): super(RNNAttentionModel, self).__init__() self.sequence_length = sequence_length - # 1D CNN for initial feature extraction from raw power sequence - self.conv1d = nn.Conv1d( - in_channels=1, - out_channels=16, - kernel_size=4, - stride=1, - padding=2 # Maintain sequence length - ) - - # First bidirectional LSTM layer - self.lstm1 = nn.LSTM( - input_size=16, - hidden_size=128, - num_layers=1, - batch_first=True, - bidirectional=True - ) - - # Second bidirectional LSTM layer for deeper feature learning - self.lstm2 = nn.LSTM( - input_size=256, # 128 * 2 (bidirectional) - hidden_size=256, - num_layers=1, - batch_first=True, - bidirectional=True - ) - - # Attention mechanism to focus on important time steps + # Layers are defined to match the TensorFlow architecture + self.conv1d = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=4, + stride=1, padding=2) # 'same' padding + self.lstm1 = nn.LSTM(input_size=16, hidden_size=128, batch_first=True, bidirectional=True) + self.lstm2 = nn.LSTM(input_size=256, hidden_size=256, batch_first=True, bidirectional=True) self.attention = AttentionLayer(units=128) + self.fc1 = nn.Linear(512, 128) + self.fc2 = nn.Linear(128, 1) - # Final fully connected layers for prediction - self.fc1 = nn.Linear(512, 128) # 256 * 2 (bidirectional) - self.fc2 = nn.Linear(128, 1) # Output single power value - - # Dropout for regularization - self.dropout = nn.Dropout(0.1) + self._initialize_weights() + + def _initialize_weights(self): + """Initializes weights to match TensorFlow's default initializations.""" + # Use Xavier uniform for Conv, LSTM, and Linear layers by default + for m in self.modules(): + if isinstance(m, (nn.Conv1d, nn.Linear)): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.LSTM): + for name, param in m.named_parameters(): + if 'weight' in name: + nn.init.xavier_uniform_(param) + elif 'bias' in name: + nn.init.zeros_(param) def forward(self, x): - # Input shape: (batch_size, sequence_length, 1) - # Rearrange for Conv1D: (batch_size, channels, sequence_length) + # Input shape: (batch, seq_len, 1) -> permute for Conv1D x = x.permute(0, 2, 1) - # Extract features using 1D convolution - x = self.conv1d(x) # (batch_size, 16, sequence_length) + # Feature extraction + x = self.conv1d(x) - # Rearrange back for LSTM: (batch_size, sequence_length, features) + # Permute for LSTM layers x = x.permute(0, 2, 1) - # Process through bidirectional LSTM layers - x, _ = self.lstm1(x) # (batch_size, sequence_length, 256) - x = self.dropout(x) - - x, _ = self.lstm2(x) # (batch_size, sequence_length, 512) - - # Apply attention to get context-aware representation - x = self.attention(x) # (batch_size, 512) + # Sequence processing + x, _ = self.lstm1(x) + x, _ = self.lstm2(x) - # Final prediction layers - x = torch.tanh(self.fc1(x)) # (batch_size, 128) - x = self.dropout(x) - x = self.fc2(x) # (batch_size, 1) + # Attention and final prediction + x = self.attention(x) + x = torch.tanh(self.fc1(x)) + x = self.fc2(x) return x class RNN_attention(Disaggregator): """ - NILM disaggregator using RNN with attention mechanism. - Inherits from NILMTK's Disaggregator base class. - """ + RNN with attention mechanism for non-intrusive load monitoring. + + This implementation is based on the paper: + "ResNet-based Multi-output Regression for NILM: Towards Enhanced Appliance State Detection" + https://arxiv.org/abs/2411.15805v1 + + The model uses bidirectional LSTM layers with attention mechanism for learning + temporal dependencies and focusing on relevant time steps in energy + disaggregation tasks. + Architecture Overview: + - Bidirectional LSTM layers for sequence modeling + - Attention mechanism for learning relevant temporal features + - Dense layers for final power consumption prediction + - Sequence-to-point prediction for energy disaggregation + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Length of input sequences (default: 19) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - chunk_wise_training (bool): Enable chunk-wise training (default: False) + - appliance_params (dict): Appliance-specific normalization parameters + """ def __init__(self, params): - """Initialize the disaggregator with hyperparameters""" + """Initializes the disaggregator and its hyperparameters.""" self.MODEL_NAME = "RNN_attention" - self.models = OrderedDict() # Store separate models for each appliance + self.models = OrderedDict() - # Extract hyperparameters from params dict self.chunk_wise_training = params.get('chunk_wise_training', False) self.sequence_length = params.get('sequence_length', 19) self.n_epochs = params.get('n_epochs', 10) self.batch_size = params.get('batch_size', 512) self.load_model_path = params.get('load_model_path', None) - self.appliance_params = params.get('appliance_params', {}) # Normalization stats + self.appliance_params = params.get('appliance_params', {}) self.mains_mean = params.get('mains_mean', 1800) self.mains_std = params.get('mains_std', 600) self.device = device - # Sequence length must be odd for proper windowing if self.sequence_length % 2 == 0: - print("Sequence length should be odd!") - raise SequenceLengthError + raise SequenceLengthError("Sequence length must be odd for proper windowing.") def partial_fit(self, train_main, train_appliances, do_preprocessing=True, **load_kwargs): - """Train models on a chunk of data (supports incremental learning)""" - - # Compute appliance-specific normalization parameters if not provided - if len(self.appliance_params) == 0: + """Trains the model on a chunk of data.""" + if not self.appliance_params: self.set_appliance_params(train_appliances) print("...............RNN_attention partial_fit running...............") - # Preprocess data: windowing, normalization, etc. if do_preprocessing: - print("Preprocessing data...") - train_main, train_appliances = preprocess( - sequence_length=self.sequence_length, - mains_mean = self.mains_mean, - mains_std=self.mains_std, - mains_lst=train_main, - submeters_lst=train_appliances, - method="train", - appliance_params=self.appliance_params, - windowing=False - ) + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') - # Prepare main power data for training - train_main = pd.concat(train_main, axis=0) - train_main = train_main.values.reshape((-1, self.sequence_length, 1)) + # Prepare data for training + train_main = pd.concat(train_main, axis=0).values.reshape((-1, self.sequence_length, 1)) - # Prepare appliance power data new_train_appliances = [] - for app_name, app_df in train_appliances: - app_df = pd.concat(app_df, axis=0) - app_df_values = app_df.values.reshape((-1, 1)) + for app_name, app_dfs in train_appliances: + app_df_values = pd.concat(app_dfs, axis=0).values.reshape((-1, 1)) new_train_appliances.append((app_name, app_df_values)) train_appliances = new_train_appliances - print(f"Training data shape: {train_main.shape}") - - # Train a separate model for each appliance - appliance_progress = tqdm(train_appliances, desc="Training appliances", unit="appliance") - - for appliance_name, power in appliance_progress: - appliance_progress.set_postfix({"Current": appliance_name}) - - # Create new model if this appliance hasn't been seen before + # Train a model for each appliance + for appliance_name, power in train_appliances: if appliance_name not in self.models: - print(f"\nFirst model training for {appliance_name}") + print(f"First time training for {appliance_name}") self.models[appliance_name] = self.return_network() else: - print(f"\nStarted Retraining model for {appliance_name}") + print(f"Retraining model for {appliance_name}") model = self.models[appliance_name] - # Train only if we have sufficient data - if train_main.size > 0 and len(train_main) > 10: - # Split data into training and validation sets + if train_main.size > 10: + # Create training and validation sets train_x, v_x, train_y, v_y = train_test_split( - train_main, power, test_size=.15, random_state=10) + train_main, power, test_size=0.15, random_state=10) - # Convert to PyTorch tensors and move to device + # Convert to PyTorch Tensors train_x = torch.FloatTensor(train_x).to(self.device) v_x = torch.FloatTensor(v_x).to(self.device) train_y = torch.FloatTensor(train_y).to(self.device) v_y = torch.FloatTensor(v_y).to(self.device) - # Create PyTorch DataLoaders for batch processing + # Create DataLoaders train_dataset = TensorDataset(train_x, train_y) val_dataset = TensorDataset(v_x, v_y) train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False) - # Train the model self.train_model(model, train_loader, val_loader, appliance_name) def train_model(self, model, train_loader, val_loader, appliance_name): - """Train a single appliance model with early stopping based on validation loss""" - optimizer = optim.Adam(model.parameters(), lr=0.001) + """Handles the training and validation loop for a single appliance model.""" + optimizer = optim.Adam(model.parameters()) criterion = nn.MSELoss() best_val_loss = float('inf') best_model_state = None - epoch_progress = tqdm(range(self.n_epochs), desc=f"Training {appliance_name}", unit="epoch") - - for epoch in epoch_progress: - # Training phase + for epoch in range(self.n_epochs): + # --- Training Phase --- model.train() train_loss = 0.0 - train_batch_progress = tqdm(train_loader, desc=f"Epoch {epoch+1} Training", - leave=False, unit="batch") - - for batch_x, batch_y in train_batch_progress: + for batch_x, batch_y in train_loader: optimizer.zero_grad() - outputs = model(batch_x) loss = criterion(outputs.squeeze(), batch_y.squeeze()) - loss.backward() optimizer.step() - train_loss += loss.item() - train_batch_progress.set_postfix({"Loss": f"{loss.item():.4f}"}) - # Validation phase + # --- Validation Phase --- model.eval() val_loss = 0.0 - val_batch_progress = tqdm(val_loader, desc=f"Epoch {epoch+1} Validation", - leave=False, unit="batch") - with torch.no_grad(): - for batch_x, batch_y in val_batch_progress: + for batch_x, batch_y in val_loader: outputs = model(batch_x) loss = criterion(outputs.squeeze(), batch_y.squeeze()) val_loss += loss.item() - val_batch_progress.set_postfix({"Loss": f"{loss.item():.4f}"}) - # Calculate average losses train_loss /= len(train_loader) val_loss /= len(val_loader) - epoch_progress.set_postfix({ - "Train Loss": f"{train_loss:.4f}", - "Val Loss": f"{val_loss:.4f}", - "Best": f"{best_val_loss:.4f}" - }) - - # Save best model based on validation loss + # Save the best model based on validation loss if val_loss < best_val_loss: best_val_loss = val_loss best_model_state = model.state_dict().copy() - epoch_progress.write(f'New best model saved with val_loss: {val_loss:.4f}') - # Save model checkpoint - filepath = f'RNN_attention-temp-weights-{appliance_name.replace(" ", "_")}-{random.randint(0,100000)}.pth' + filepath = f'RNN_attention-temp-weights-{random.randint(0,100000)}.pth' torch.save(best_model_state, filepath) + print(f'Epoch {epoch+1}: val_loss improved to {val_loss:.6f}, saving model to {filepath}') - # Load the best model weights + # Load the best performing model if best_model_state is not None: model.load_state_dict(best_model_state) - print(f"\nLoaded best model for {appliance_name} with validation loss: {best_val_loss:.4f}") def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): - """Disaggregate power consumption for each appliance from aggregate mains data""" - + """Disaggregates a chunk of mains data.""" if model is not None: self.models = model - # Preprocess test data similar to training data if do_preprocessing: - print("Preprocessing test data...") - test_main_list = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=test_main_list, - submeters_lst=None, - method="test", - appliance_params=self.appliance_params, - windowing=False - ) + test_main_list = self.call_preprocessing( + test_main_list, submeters_lst=None, method='test') test_predictions = [] - chunk_progress = tqdm(test_main_list, desc="Processing test chunks", unit="chunk") - - # Process each chunk of test data - for test_main in chunk_progress: - test_main = test_main.values - test_main = test_main.reshape((-1, self.sequence_length, 1)) - test_main_tensor = torch.FloatTensor(test_main).to(self.device) + for test_mains_df in test_main_list: + test_main_array = test_mains_df.values.reshape((-1, self.sequence_length, 1)) + test_main_tensor = torch.FloatTensor(test_main_array).to(self.device) disggregation_dict = {} - appliance_progress = tqdm(self.models.items(), desc="Disaggregating appliances", - leave=False, unit="appliance") - - # Get predictions from each appliance model - for appliance, model in appliance_progress: - appliance_progress.set_postfix({"Current": appliance}) - + for appliance, model in self.models.items(): model.eval() # Create DataLoader for batched inference @@ -356,57 +284,86 @@ def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False) predictions = [] - - pred_progress = tqdm(test_loader, desc=f"Predicting {appliance}", - leave=False, unit="batch") - - # Generate predictions with torch.no_grad(): - for batch_x, in pred_progress: + for batch_x, in test_loader: batch_pred = model(batch_x) predictions.append(batch_pred.cpu().numpy()) prediction = np.concatenate(predictions, axis=0) - # Denormalize predictions back to original power scale - prediction = (self.appliance_params[appliance]['mean'] + - prediction * self.appliance_params[appliance]['std']) + # Denormalize predictions + app_mean = self.appliance_params[appliance]['mean'] + app_std = self.appliance_params[appliance]['std'] + denormalized_prediction = app_mean + (prediction * app_std) - # Ensure non-negative power values - valid_predictions = prediction.flatten() - valid_predictions = np.where(valid_predictions > 0, valid_predictions, 0) - df = pd.Series(valid_predictions) + # Set negative values to zero + denormalized_prediction[denormalized_prediction < 0] = 0 + df = pd.Series(denormalized_prediction.flatten()) disggregation_dict[appliance] = df - # Combine all appliance predictions for this chunk results = pd.DataFrame(disggregation_dict, dtype='float32') test_predictions.append(results) return test_predictions def return_network(self): - """Factory method to create a new RNN_Attention model instance""" + """Returns a new, initialized RNNAttentionModel instance.""" model = RNNAttentionModel(self.sequence_length).to(self.device) return model + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """ + Preprocesses data by windowing and normalizing, mirroring the + original TensorFlow implementation. + """ + if method == 'train': + # Preprocess mains + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + + # Preprocess appliances + appliance_list = [] + for app_index, (app_name, app_df_lst) in enumerate(submeters_lst): + if app_name not in self.appliance_params: + raise ApplianceNotFoundError(f"Parameters for appliance '{app_name}' not found!") + + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + + processed_app_dfs = [] + for app_df in app_df_lst: + new_app_readings = app_df.values.reshape((-1, 1)) + new_app_readings = (new_app_readings - app_mean) / app_std + processed_app_dfs.append(pd.DataFrame(new_app_readings)) + appliance_list.append((app_name, processed_app_dfs)) + return processed_mains_lst, appliance_list + + else: # method == 'test' + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + return processed_mains_lst def set_appliance_params(self, train_appliances): - """Compute normalization statistics (mean, std) for each appliance""" - print("Setting appliance parameters...") - - param_progress = tqdm(train_appliances, desc="Computing appliance stats", unit="appliance") - - for (app_name, df_list) in param_progress: - param_progress.set_postfix({"Current": app_name}) - - # Concatenate all data for this appliance and compute statistics - l = np.array(pd.concat(df_list, axis=0)) + """Computes and sets normalization parameters for each appliance.""" + for (app_name, df_list) in train_appliances: + l = np.concatenate([df.values for df in df_list]) app_mean = np.mean(l) app_std = np.std(l) - - # Prevent division by zero in normalization if app_std < 1: - app_std = 100 - - self.appliance_params.update({app_name: {'mean': app_mean, 'std': app_std}}) - - print(self.appliance_params) \ No newline at end of file + app_std = 100 # Avoid division by zero for flat signals + self.appliance_params[app_name] = {'mean': app_mean, 'std': app_std} + print("Appliance parameters set:", self.appliance_params) diff --git a/nilmtk_contrib/torch/rnn_attention_classification.py b/nilmtk_contrib/torch/rnn_attention_classification.py index 6b70791..9fd5e5a 100644 --- a/nilmtk_contrib/torch/rnn_attention_classification.py +++ b/nilmtk_contrib/torch/rnn_attention_classification.py @@ -1,310 +1,485 @@ -from __future__ import annotations -import copy, numpy as np, pandas as pd -from collections import OrderedDict -from typing import Dict, Any, List, Tuple - +from __future__ import print_function, division +from warnings import warn +from nilmtk.disaggregate import Disaggregator import torch import torch.nn as nn import torch.nn.functional as F -from torch.utils.data import TensorDataset, DataLoader -from tqdm import tqdm - -from nilmtk.disaggregate import Disaggregator -from nilmtk_contrib.torch.preprocessing import preprocess +import torch.optim as optim +from torch.utils.data import Dataset, DataLoader, TensorDataset +import os +import pandas as pd +import numpy as np +import pickle +from collections import OrderedDict +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from tqdm import tqdm +import random +import copy +# Set device +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') class SequenceLengthError(Exception): pass - class ApplianceNotFoundError(Exception): pass - -class IdentityBlock(nn.Module): - def __init__(self, ch: int, k: int): - super().__init__() - self.c1 = nn.Conv1d(ch, ch, k, padding="same") - self.c2 = nn.Conv1d(ch, ch, k, padding="same") - self.c3 = nn.Conv1d(ch, ch, k, padding="same") - self.act = nn.ReLU() - - def forward(self, x): - s = x - x = self.act(self.c1(x)) - x = self.act(self.c2(x)) - x = self.c3(x) - return self.act(x + s) - - -class ConvBlock(nn.Module): - def __init__(self, ch_in: int, ch_mid: int, ch_out: int, k: int): - super().__init__() - self.c1 = nn.Conv1d(ch_in, ch_mid, k, padding="same") - self.c2 = nn.Conv1d(ch_mid, ch_mid, k, padding="same") - self.c3 = nn.Conv1d(ch_mid, ch_out, k, padding="same") - self.proj = nn.Conv1d(ch_in, ch_out, 1) - self.act = nn.ReLU() - - def forward(self, x): - s = self.proj(x) - x = self.act(self.c1(x)) - x = self.act(self.c2(x)) - x = self.c3(x) - return self.act(x + s) - - class AttentionLayer(nn.Module): - """Additive (Bahdanau) attention over the Bi-LSTM outputs.""" - def __init__(self, units: int): - super().__init__() - self.W = nn.Linear(units * 2, units) # *2 : bidirectional + """ + An attention layer that computes a context vector from encoder outputs. + This implementation is designed to mirror the original TensorFlow version. + """ + def __init__(self, units): + super(AttentionLayer, self).__init__() + # Layers to compute attention scores + self.W = nn.Linear(units * 2, units) # Input is bidirectional, hence *2 self.V = nn.Linear(units, 1) - - def forward(self, enc_out): # (B, T, 2H) - score = self.V(torch.tanh(self.W(enc_out))) # (B,T,1) - weights = torch.softmax(score, dim=1) # (B,T,1) - ctx = torch.sum(weights * enc_out, dim=1) # (B,2H) - return ctx, weights.squeeze(-1) # (B,2H), (B,T) - - -class _RNNAttNet(nn.Module): - def __init__(self, seq_len: int): - super().__init__() - self.seq_len = seq_len - - self.cls_feat = nn.Sequential( - nn.Conv1d(1, 30, 10), nn.ReLU(), - nn.Conv1d(30, 30, 8), nn.ReLU(), - nn.Conv1d(30, 40, 6), nn.ReLU(), - nn.Conv1d(40, 50, 5), nn.ReLU(), - nn.Conv1d(50, 50, 5), nn.ReLU(), - nn.Conv1d(50, 50, 5), nn.ReLU(), - nn.Flatten(), - nn.LazyLinear(1024), nn.ReLU() - ) - self.cls_head = nn.Sequential( - nn.Linear(1024, seq_len), - nn.Sigmoid() - ) - - self.conv_reg = nn.Conv1d(1, 16, 4, padding="same") - self.bi1 = nn.LSTM(16, 128, batch_first=True, bidirectional=True) - self.bi2 = nn.LSTM(256, 256, batch_first=True, bidirectional=True) - self.att = AttentionLayer(256) - self.reg_dense = nn.Sequential( - nn.Linear(512, 128), nn.Tanh(), - nn.Linear(128, seq_len) - ) - - def forward(self, x): # x (B,1,L) - cls = self.cls_head(self.cls_feat(x)) # (B,L) - - y = self.conv_reg(x).permute(0, 2, 1) # (B,L,16) - y,_ = self.bi1(y) - y,_ = self.bi2(y) - ctx, att = self.att(y) # (B,512) - reg = self.reg_dense(ctx) # (B,L) - - return reg * cls, cls, att # masked power, on/off, att - + + # Initialize weights with He normal to match TensorFlow's default + nn.init.kaiming_normal_(self.W.weight, nonlinearity='relu') + nn.init.kaiming_normal_(self.V.weight, nonlinearity='relu') + nn.init.zeros_(self.W.bias) + nn.init.zeros_(self.V.bias) + + def forward(self, encoder_output): + """ + Args: + encoder_output: The output from the LSTM layer, shape (batch, seq_len, hidden_size*2). + Returns: + context_vector: The weighted sum of encoder outputs, shape (batch, hidden_size*2). + attention_weights: The computed attention weights, shape (batch, seq_len). + """ + # Calculate alignment scores + score = self.V(torch.tanh(self.W(encoder_output))) # (batch, seq_len, 1) + + # Convert scores to weights using softmax + attention_weights = F.softmax(score, dim=1) # (batch, seq_len, 1) + + # Compute the context vector + context_vector = attention_weights * encoder_output + context_vector = torch.sum(context_vector, dim=1) + + return context_vector, attention_weights.squeeze(-1) + +class RNNAttentionClassificationNet(nn.Module): + """ + A dual-subnetwork model for NILM, combining a CNN-based classification + network and an RNN-with-attention regression network. The architecture + is designed to mirror the original TensorFlow implementation. + """ + def __init__(self, sequence_length): + super(RNNAttentionClassificationNet, self).__init__() + self.sequence_length = sequence_length + + # --- CLASSIFICATION SUBNETWORK (CNN) --- + self.cls_conv1 = nn.Conv1d(1, 30, kernel_size=10, padding='valid') + self.cls_conv2 = nn.Conv1d(30, 30, kernel_size=8, padding='valid') + self.cls_conv3 = nn.Conv1d(30, 40, kernel_size=6, padding='valid') + self.cls_conv4 = nn.Conv1d(40, 50, kernel_size=5, padding='valid') + self.cls_conv5 = nn.Conv1d(50, 50, kernel_size=5, padding='valid') + self.cls_conv6 = nn.Conv1d(50, 50, kernel_size=5, padding='valid') + + # Calculate the flattened size dynamically after convolutions + self._calculate_cls_flatten_size(sequence_length) + + self.cls_dense1 = nn.Linear(self.cls_flatten_size, 1024) + self.cls_dense2 = nn.Linear(1024, sequence_length) + + # --- REGRESSION SUBNETWORK (RNN with Attention) --- + self.reg_conv = nn.Conv1d(1, 16, kernel_size=4, stride=1, padding='same') + self.bi_lstm1 = nn.LSTM(16, 128, batch_first=True, bidirectional=True) + self.bi_lstm2 = nn.LSTM(256, 256, batch_first=True, bidirectional=True) + self.attention = AttentionLayer(256) + self.reg_dense1 = nn.Linear(512, 128) # 512 = 256 * 2 (bidirectional) + self.reg_dense2 = nn.Linear(128, sequence_length) + + self._initialize_weights() + + def _calculate_cls_flatten_size(self, seq_len): + """Calculates the input size for the classification FC layer.""" + # Each conv layer reduces length by (kernel_size - 1) + conv_output_length = seq_len - (10-1) - (8-1) - (6-1) - (5-1) - (5-1) - (5-1) + self.cls_flatten_size = 50 * conv_output_length + + def _initialize_weights(self): + """Initializes weights to match TensorFlow's default initializations.""" + for m in self.modules(): + if isinstance(m, (nn.Conv1d, nn.Linear)): + # Use Xavier uniform for Conv and Linear layers by default + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.LSTM): + # Initialize LSTM weights and biases + for name, param in m.named_parameters(): + if 'weight' in name: + nn.init.xavier_uniform_(param) + elif 'bias' in name: + nn.init.zeros_(param) + + def forward(self, x): + """ + Performs the forward pass, combining classification and regression outputs. + + Args: + x: Input tensor of shape (batch_size, 1, sequence_length). + Returns: + output: The final disaggregated power, shape (batch, seq_len). + classification_output: The appliance status prediction, shape (batch, seq_len). + attention_weights: The attention weights from the regression subnetwork, shape (batch, seq_len). + """ + # --- CLASSIFICATION SUBNETWORK --- + cls_x = F.relu(self.cls_conv1(x)) + cls_x = F.relu(self.cls_conv2(cls_x)) + cls_x = F.relu(self.cls_conv3(cls_x)) + cls_x = F.relu(self.cls_conv4(cls_x)) + cls_x = F.relu(self.cls_conv5(cls_x)) + cls_x = F.relu(self.cls_conv6(cls_x)) + cls_x = cls_x.flatten(1) + cls_x = F.relu(self.cls_dense1(cls_x)) + classification_output = torch.sigmoid(self.cls_dense2(cls_x)) + + # --- REGRESSION SUBNETWORK --- + reg_x = self.reg_conv(x).permute(0, 2, 1) # (batch, seq_len, 16) + reg_x, _ = self.bi_lstm1(reg_x) + reg_x, _ = self.bi_lstm2(reg_x) + context_vector, attention_weights = self.attention(reg_x) + reg_x = torch.tanh(self.reg_dense1(context_vector)) + regression_output = self.reg_dense2(reg_x) + + # Final output is the element-wise product of the two subnetworks + output = regression_output * classification_output + + return output, classification_output, attention_weights class RNN_attention_classification(Disaggregator): """ - RNN-based disaggregator with attention mechanism for classification. - This model uses a combination of convolutional layers, LSTM layers, - and attention mechanisms to disaggregate mains electricity data into - appliance-level data. + RNN with attention and classification for non-intrusive load monitoring. + + This implementation is based on the paper: + "ResNet-based Multi-output Regression for NILM: Towards Enhanced Appliance State Detection" + https://arxiv.org/abs/2411.15805v1 + + The model combines RNN with attention mechanism and CNN-based classification for + enhanced appliance state detection and power consumption prediction in energy + disaggregation tasks. + + Architecture Overview: + - Classification subnetwork with 1D convolutions for appliance state detection + - Regression subnetwork with bidirectional LSTM and attention mechanism + - Attention layer for learning relevant temporal features + - Element-wise multiplication of classification and regression outputs + - Multi-output learning for enhanced appliance state detection + + Parameters: + params (dict): Configuration parameters including: + - sequence_length (int): Length of input sequences (default: 99) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - chunk_wise_training (bool): Enable chunk-wise training (default: False) + - appliance_params (dict): Appliance-specific normalization parameters + - mains_params (dict): Mains-specific normalization parameters """ - def __init__(self, params: Dict[str, Any]): - super().__init__() + def __init__(self, params): self.MODEL_NAME = "RNN_attention_classification" - self.chunk_wise_training = params.get("chunk_wise_training", True) - self.sequence_length = params.get("sequence_length", 99) + self.chunk_wise_training = params.get('chunk_wise_training', False) + self.sequence_length = params.get('sequence_length', 99) + self.n_epochs = params.get('n_epochs', 10) + self.models = OrderedDict() + self.att_models = OrderedDict() # Store attention models separately like TensorFlow + self.mains_mean = 1800 + self.mains_std = 600 + self.batch_size = params.get('batch_size', 512) + self.appliance_params = params.get('appliance_params', {}) + self.mains_params = params.get('mains_params', {}) + self.device = device + if self.sequence_length % 2 == 0: - raise SequenceLengthError("Sequence length must be odd") - - self.n_epochs = params.get("n_epochs", 10) - self.batch_size = params.get("batch_size", 512) - - self.appliance_params: Dict[str, Dict[str, float]] = {} - self.mains_mean, self.mains_std = 1800, 600 - - self.models: "OrderedDict[str,_RNNAttNet]" = OrderedDict() - self.best: Dict[str, float] = {} - - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - def _fresh_network(self): - return _RNNAttNet(self.sequence_length).to(self.device) - - def set_mains_params(self, mains_list): - data = np.concatenate([m.values.flatten() for m in mains_list]) - self.mains_mean = data.mean() - self.mains_std = max(data.std(), 1.0) - - def set_appliance_params(self, train_apps): - for app, dfs in train_apps: - data = np.concatenate([d.values.flatten() for d in dfs]) - self.appliance_params[app] = { - "mean": data.mean(), - "std" : max(data.std(), 1.0), - "min" : data.min(), - "max" : data.max() + raise SequenceLengthError("Sequence length must be odd!") + + def return_network(self): + """Returns a new model and a corresponding attention model wrapper.""" + model = RNNAttentionClassificationNet(self.sequence_length).to(self.device) + + # Wrapper to extract attention weights, for compatibility with TF version + class AttentionWrapper(nn.Module): + def __init__(self, full_model): + super().__init__() + self.full_model = full_model + + def forward(self, x): + _, _, attention_weights = self.full_model(x) + return attention_weights + + attention_model = AttentionWrapper(model).to(self.device) + return model, attention_model + + def classify(self, classify_appliance): + """ + Generates binary on/off classification targets from appliance data. + This preprocessing mirrors the original TensorFlow implementation. + """ + appliance_on_off = [] + THRESHOLD = 15 # Power threshold to consider an appliance 'on' + + for app_index, (appliance_name, on_off_list) in enumerate(classify_appliance): + classification_appliance_dfs = [] + for appliance in on_off_list: + n = self.sequence_length + units_to_pad = n // 2 + + # Apply thresholding + appliance_copy = appliance.copy() + appliance_copy[appliance_copy <= THRESHOLD] = 0 + appliance_copy[appliance_copy > THRESHOLD] = 1 + + # Create sequences + new_app_readings = appliance_copy.values.flatten() + new_app_readings = np.pad(new_app_readings, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_app_readings = np.array([new_app_readings[i:i + n] for i in range(len(new_app_readings) - n + 1)]) + classification_appliance_dfs.append(pd.DataFrame(new_app_readings)) + + appliance_on_off.append((appliance_name, classification_appliance_dfs)) + return appliance_on_off + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """ + Preprocesses data by windowing and normalizing, mirroring the + original TensorFlow implementation. + """ + if method == 'train': + # Preprocess mains + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + + # Preprocess appliances + appliance_list = [] + for app_index, (app_name, app_df_lst) in enumerate(submeters_lst): + if app_name in self.appliance_params: + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + app_min = self.appliance_params[app_name]['min'] + app_max = self.appliance_params[app_name]['max'] + else: + raise ApplianceNotFoundError(f"Parameters for appliance '{app_name}' not found!") + + processed_app_dfs = [] + for app_df in app_df_lst: + new_app_readings = app_df.values.flatten() + new_app_readings = np.pad(new_app_readings, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_app_readings = np.array([new_app_readings[i:i + n] for i in range(len(new_app_readings) - n + 1)]) + # Normalize with min-max scaling, matching TensorFlow + new_app_readings = (new_app_readings - app_min) / (app_max - app_min) + processed_app_dfs.append(pd.DataFrame(new_app_readings)) + + appliance_list.append((app_name, processed_app_dfs)) + + return processed_mains_lst, appliance_list + + else: # method == 'test' + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + new_mains = new_mains.reshape((-1, self.sequence_length)) + processed_mains_lst.append(pd.DataFrame(new_mains)) + return processed_mains_lst + + def set_mains_params(self, train_main): + """Computes and sets normalization parameters for the mains data.""" + all_mains_data = np.concatenate([mains.values.flatten() for mains in train_main]) + self.mains_params = { + 'mean': np.mean(all_mains_data), + 'std': np.std(all_mains_data), + 'min': np.min(all_mains_data), + 'max': np.max(all_mains_data) + } + + def set_appliance_params(self, train_appliances): + """Computes and sets normalization parameters for each appliance.""" + for (app_name, df_list) in train_appliances: + app_data = np.concatenate([df.values for df in df_list]) + app_mean = np.mean(app_data) + app_std = np.std(app_data) + if app_std < 1: + app_std = 100 # Avoid division by zero for flat signals + self.appliance_params[app_name] = { + 'mean': app_mean, + 'std': app_std, + 'min': np.min(app_data), + 'max': np.max(app_data) } - def classify(self, apps, threshold: float = 15.0): - L, pad = self.sequence_length, self.sequence_length // 2 - out = [] - for app, dfs in apps: - proc = [] - for df in dfs: - v = df.values.flatten() # Flatten the DataFrame to 1D array - v[v <= threshold] = 0 - v[v > threshold] = 1 - v = np.pad(v, (pad, pad)) - w = np.array([v[i:i+L] for i in range(len(v)-L+1)], np.float32) # Overlapping windows - proc.append(pd.DataFrame(w)) - out.append((app, proc)) - return out - - def partial_fit(self, mains, apps, do_preprocessing=True, **_): - + def partial_fit(self, train_main, train_appliances, do_preprocessing=True, **load_kwargs): + """Trains the model on a chunk of data.""" + print("...............RNN_attention_classification partial_fit running...............") + if not self.appliance_params: - self.set_appliance_params(apps) - self.set_mains_params(mains) + self.set_appliance_params(train_appliances) + if not self.mains_params: + self.set_mains_params(train_main) if do_preprocessing: - cls_targets = self.classify(copy.deepcopy(apps)) - mains, apps = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=mains, - submeters_lst=apps, - method="train", - appliance_params=self.appliance_params, - windowing=False - ) - - X = torch.tensor(pd.concat(mains).values, - dtype=torch.float32).unsqueeze(1) # (N,1,L) - N = X.size(0) # Number of samples - perm = torch.randperm(N) - split = int(0.15 * N) - val_idx, tr_idx = perm[:split], perm[split:] - X_tr, X_val = X[tr_idx].to(self.device), X[val_idx].to(self.device) - - y_reg, y_cls = {}, {} - for app, dfs in apps: - y_reg[app] = torch.tensor(pd.concat(dfs).values, dtype=torch.float32) - for app, dfs in cls_targets: - y_cls[app] = torch.tensor(pd.concat(dfs).values, dtype=torch.float32) - - mse, bce = nn.MSELoss(), nn.BCELoss() - - for app in y_reg: - y_tr = y_reg[app][tr_idx].to(self.device) - y_val = y_reg[app][val_idx].to(self.device) - c_tr = y_cls[app][tr_idx].to(self.device) - c_val = y_cls[app][val_idx].to(self.device) - - if app not in self.models: - self.models[app] = self._fresh_network() - self.best[app] = np.inf - - net = self.models[app] - optim = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9) - - loader = DataLoader( - TensorDataset(X_tr, y_tr, c_tr), - batch_size=self.batch_size, shuffle=True - ) - - # Training loop - for ep in range(self.n_epochs): - net.train() - run_loss = 0.0 - bar = tqdm(loader, - desc=f"{app} ▏epoch {ep+1}/{self.n_epochs}", - leave=False, unit="batch") - for xb, yb, cb in bar: - optim.zero_grad() - pr, pc, _ = net(xb) - loss = mse(pr, yb) + bce(pc, cb) - loss.backward() - optim.step() - run_loss += loss.item() - bar.set_postfix(loss=f"{loss.item():.4f}") - - avg_loss = run_loss / len(loader) - - # Validation - net.eval() - with torch.no_grad(): - vr, vc, _ = net(X_val) - v_loss = mse(vr, y_val).item() + bce(vc, c_val).item() - - tqdm.write( - f"[{app}] Epoch {ep+1}/{self.n_epochs} | " - f"Train Loss: {avg_loss:.4f} | Val Loss: {v_loss:.4f}" - ) - - if v_loss < self.best[app]: - self.best[app] = v_loss - torch.save(net.state_dict(), f"rnn_att-{app}.pth") - - net.load_state_dict(torch.load(f"rnn_att-{app}.pth", - map_location=self.device)) - - def disaggregate_chunk(self, mains, model=None, do_preprocessing=True): + # Create classification targets before normalizing appliance data + classify_appliance = copy.deepcopy(train_appliances) + classification = self.classify(classify_appliance) + + # Normalize mains and appliance data + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + # Reshape all data into sequences + train_main = pd.concat(train_main, axis=0).values.reshape((-1, self.sequence_length, 1)) + + # Process appliance power data + new_train_appliances = [] + for app_name, app_dfs in train_appliances: + app_df_values = pd.concat(app_dfs, axis=0).values.reshape((-1, self.sequence_length)) + new_train_appliances.append((app_name, app_df_values)) + train_appliances = new_train_appliances + + # Process classification target data + new_train_appliances_classification = {} + for app_name, app_dfs in classification: + app_df_values = pd.concat(app_dfs, axis=0).values.reshape((-1, self.sequence_length)) + new_train_appliances_classification[app_name] = app_df_values + + self.att_models = {} + for appliance_name, power in train_appliances: + if appliance_name not in self.models: + print(f"First time training for {appliance_name}") + self.models[appliance_name], self.att_models[appliance_name] = self.return_network() + else: + print(f"Retraining model for {appliance_name}") + + model = self.models[appliance_name] + if train_main.size > 10: + # Combine power and classification targets for splitting + power_classification_target = np.concatenate( + (power, new_train_appliances_classification[appliance_name]), axis=1) + + # Create training and validation sets + train_x, v_x, train_y_combined, v_y_combined = train_test_split( + train_main, power_classification_target, test_size=0.15, random_state=10) + + # Separate power and classification targets after splitting + train_y = train_y_combined[:, :self.sequence_length] + v_y = v_y_combined[:, :self.sequence_length] + train_c = train_y_combined[:, self.sequence_length:] + v_c = v_y_combined[:, self.sequence_length:] + + # Convert to PyTorch Tensors + train_x = torch.tensor(train_x, dtype=torch.float32).permute(0, 2, 1).to(self.device) + v_x = torch.tensor(v_x, dtype=torch.float32).permute(0, 2, 1).to(self.device) + train_y = torch.tensor(train_y, dtype=torch.float32).to(self.device) + v_y = torch.tensor(v_y, dtype=torch.float32).to(self.device) + train_c = torch.tensor(train_c, dtype=torch.float32).to(self.device) + v_c = torch.tensor(v_c, dtype=torch.float32).to(self.device) + + # Optimizer and loss functions, matching TensorFlow + optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) + mse_loss = nn.MSELoss() + bce_loss = nn.BCELoss() + + best_val_loss = float('inf') + filepath = f'RNN_attention_classification-temp-weights-{random.randint(0, 100000)}.pth' + + # Training loop + for epoch in range(self.n_epochs): + model.train() + train_dataset = TensorDataset(train_x, train_y, train_c) + train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) + + epoch_losses = [] + for batch_x, batch_y, batch_c in train_loader: + optimizer.zero_grad() + output, classification_output, _ = model(batch_x) + + # Combined loss (regression + classification) + loss = mse_loss(output, batch_y) + bce_loss(classification_output, batch_c) + + loss.backward() + optimizer.step() + epoch_losses.append(loss.item()) + + # Validation + model.eval() + with torch.no_grad(): + val_output, val_classification, _ = model(v_x) + val_loss = mse_loss(val_output, v_y) + bce_loss(val_classification, v_c) + + avg_train_loss = np.mean(epoch_losses) + print(f"Epoch {epoch+1}/{self.n_epochs} - loss: {avg_train_loss:.4f} - val_loss: {val_loss:.4f}") + + # Save the best model based on validation loss + if val_loss < best_val_loss: + best_val_loss = val_loss + torch.save(model.state_dict(), filepath) + print(f"Validation loss improved, saving model to {filepath}") + + # Load the best performing model + model.load_state_dict(torch.load(filepath, map_location=self.device)) + + def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): + """Disaggregates a chunk of mains data.""" if model is not None: self.models = model + if do_preprocessing: - mains = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=mains, - submeters_lst=None, - method="test", - appliance_params=self.appliance_params, - windowing=False - ) - - L = self.sequence_length - out = [] - for m in mains: - X = torch.tensor(m.values, dtype=torch.float32 - ).unsqueeze(1).to(self.device) - disc = {} - for app, net in self.models.items(): - net.eval() + test_main_list = self.call_preprocessing( + test_main_list, submeters_lst=None, method='test') + + test_predictions = [] + for test_mains_df in test_main_list: + disggregation_dict = {} + test_main_array = test_mains_df.values.reshape((-1, self.sequence_length, 1)) + test_main_tensor = torch.tensor(test_main_array, dtype=torch.float32).permute(0, 2, 1).to(self.device) + + for appliance in self.models: + model = self.models[appliance] + model.eval() + with torch.no_grad(): - pr, _, _ = net(X) - pr = pr.cpu().numpy() - - # overlap-mean - def ov(a): - s, c = np.zeros(len(a)+L-1), np.zeros(len(a)+L-1) # sums, counts - for i,row in enumerate(a): - s[i:i+L] += row - c[i:i+L] += 1 - return s/c - - power = ov(pr) - p = self.appliance_params[app] - power = np.clip(p["min"] + power*(p["max"]-p["min"]), 0, None) - disc[app] = pd.Series(power, dtype="float32") - out.append(pd.DataFrame(disc, dtype="float32")) - return out - - # NILMTK shortcut wrappers - def train(self, mains, apps, **kw): - return self.partial_fit(mains, apps, **kw) - - def disaggregate(self, mains, store): - preds = self.disaggregate_chunk(mains) - for i, df in enumerate(preds): - for col in df.columns: - store.put(f"/building1/elec/meter{i+1}/{col}", df[col]) + prediction_output, _, _ = model(test_main_tensor) + prediction_output = prediction_output.cpu().numpy() + + # Average predictions over overlapping windows to get a single series + l = self.sequence_length + n = len(prediction_output) + l - 1 + sum_arr = np.zeros(n) + counts_arr = np.zeros(n) + + for i, p in enumerate(prediction_output): + sum_arr[i:i+l] += p.flatten() + counts_arr[i:i+l] += 1 + + # Avoid division by zero + counts_arr[counts_arr == 0] = 1 + averaged_prediction = sum_arr / counts_arr + + # Denormalize the prediction + app_min = self.appliance_params[appliance]['min'] + app_max = self.appliance_params[appliance]['max'] + denormalized_prediction = app_min + (averaged_prediction * (app_max - app_min)) + + # Set negative values to zero + denormalized_prediction[denormalized_prediction < 0] = 0 + df = pd.Series(denormalized_prediction) + disggregation_dict[appliance] = df + + results = pd.DataFrame(disggregation_dict, dtype='float32') + test_predictions.append(results) + + return test_predictions diff --git a/nilmtk_contrib/torch/seq2point.py b/nilmtk_contrib/torch/seq2point.py index ee5ee89..89df45b 100644 --- a/nilmtk_contrib/torch/seq2point.py +++ b/nilmtk_contrib/torch/seq2point.py @@ -7,229 +7,292 @@ from torch.utils.data import TensorDataset, DataLoader from tqdm import tqdm from nilmtk.disaggregate import Disaggregator -from nilmtk_contrib.torch.preprocessing import preprocess class SequenceLengthError(Exception): pass - class ApplianceNotFoundError(Exception): pass - class Seq2PointTorch(Disaggregator): """ - Sequence-to-Point NILM disaggregator using PyTorch. - Uses 1D CNN to map power sequences to single appliance power values. + Sequence-to-Point neural network for Non-Intrusive Load Monitoring (NILM). + + Based on "Sequence-to-Point Learning With Neural Networks for Non-Intrusive Load Monitoring" + by Zhang et al., published in Proceedings of the AAAI Conference on Artificial Intelligence, 2018. + DOI: https://doi.org/10.1609/aaai.v32i1.11873 + + This model uses a sequence-to-point learning approach where the input is a window + of mains power consumption and the output is a single point prediction of the target + appliance power. The architecture uses convolutional neural networks that can inherently + learn appliance signatures to reduce the identifiability problem in energy disaggregation. + + Architecture Overview: + - Multiple 1D convolutional layers for feature extraction from power sequences + - Dropout layer for regularization + - Fully connected layers for final power prediction + - Single point output from sequence input (sequence-to-point learning) + + Args: + params (dict): Dictionary containing model hyperparameters: + - sequence_length (int): Length of input sequences (default: 99, must be odd) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - appliance_params (dict): Appliance-specific normalization parameters + - mains_mean (float): Mean normalization for mains power (default: 1800) + - mains_std (float): Standard deviation for mains power (default: 600) + - chunk_wise_training (bool): Enable chunk-wise training (default: False) """ def __init__(self, params): + """Initializes the disaggregator and its hyperparameters.""" super().__init__() self.MODEL_NAME = "Seq2PointTorch" - self.models = OrderedDict() # Store separate models for each appliance + self.models = OrderedDict() self.file_prefix = f"{self.MODEL_NAME.lower()}-temp-weights" - # Extract hyperparameters from params dict self.chunk_wise_training = params.get("chunk_wise_training", False) self.sequence_length = params.get("sequence_length", 99) self.n_epochs = params.get("n_epochs", 10) self.batch_size = params.get("batch_size", 512) - self.appliance_params = params.get("appliance_params", {}) # Normalization stats + self.appliance_params = params.get("appliance_params", {}) self.mains_mean = params.get("mains_mean", 1800) self.mains_std = params.get("mains_std", 600) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - - # Sequence length must be odd for proper windowing + if self.sequence_length % 2 == 0: - raise SequenceLengthError("Sequence length should be odd!") + raise SequenceLengthError("Sequence length must be odd for proper windowing.") - def _build_network(self): - """Build the 1D CNN network architecture for sequence-to-point mapping""" - seq_len = self.sequence_length - # Calculate reduction in sequence length after all conv layers - conv_reduction = (10-1) + (8-1) + (6-1) + (5-1) + (5-1) # = 29 - - model = nn.Sequential( - # Feature extraction layers with 1D convolutions - nn.Conv1d(1, 30, kernel_size=10, stride=1), nn.ReLU(), - nn.Conv1d(30, 30, kernel_size=8, stride=1), nn.ReLU(), - nn.Conv1d(30, 40, kernel_size=6, stride=1), nn.ReLU(), - nn.Conv1d(40, 50, kernel_size=5, stride=1), nn.ReLU(), - nn.Dropout(0.2), - nn.Conv1d(50, 50, kernel_size=5, stride=1), nn.ReLU(), - nn.Dropout(0.2), + def return_network(self): + """Builds the 1D CNN model, mirroring the original TensorFlow architecture.""" + class Seq2PointNet(nn.Module): + """The Seq2Point neural network architecture.""" + def __init__(self, sequence_length): + super().__init__() + # Layer definitions to match the original TensorFlow model + self.conv1 = nn.Conv1d(1, 30, kernel_size=10, stride=1) + self.conv2 = nn.Conv1d(30, 30, kernel_size=8, stride=1) + self.conv3 = nn.Conv1d(30, 40, kernel_size=6, stride=1) + self.conv4 = nn.Conv1d(40, 50, kernel_size=5, stride=1) + self.conv5 = nn.Conv1d(50, 50, kernel_size=5, stride=1) + self.dropout = nn.Dropout(0.2) + + # Calculate the flattened size dynamically after convolutions + self._calculate_flatten_size(sequence_length) + + self.fc1 = nn.Linear(self.flatten_size, 1024) + self.fc2 = nn.Linear(1024, 1) + + self._initialize_weights() + + def _calculate_flatten_size(self, seq_len): + """Calculates the input size for the fully connected layer.""" + # Each conv layer reduces length by (kernel_size - 1) + conv_output_length = seq_len - (10-1) - (8-1) - (6-1) - (5-1) - (5-1) + self.flatten_size = 50 * conv_output_length - # Flatten for fully connected layers - nn.Flatten(), + def _initialize_weights(self): + """Initializes weights to match TensorFlow's default (glorot_uniform).""" + for m in self.modules(): + if isinstance(m, (nn.Conv1d, nn.Linear)): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) - # Dense layers for final prediction - nn.Linear(50 * (seq_len - conv_reduction), 1024), nn.ReLU(), - nn.Dropout(0.2), - nn.Linear(1024, 1) # Output single power value - ) - return model.to(self.device) - - def partial_fit(self, train_main, train_appliances, do_preprocessing=True, - current_epoch=0, **load_kwargs): - """Train models on a chunk of data (supports incremental learning)""" + def forward(self, x): + # Forward pass through the network + x = torch.relu(self.conv1(x)) + x = torch.relu(self.conv2(x)) + x = torch.relu(self.conv3(x)) + x = torch.relu(self.conv4(x)) + x = self.dropout(x) + x = torch.relu(self.conv5(x)) + x = self.dropout(x) + x = x.flatten(1) # Flatten the output for the dense layers + x = torch.relu(self.fc1(x)) + x = self.dropout(x) + x = self.fc2(x) + return x + + model = Seq2PointNet(self.sequence_length).to(self.device) + return model + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """ + Preprocesses data by windowing and normalizing, mirroring the + original TensorFlow implementation. + """ + if method == 'train': + # Preprocess mains + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + + # Preprocess appliances + appliance_list = [] + for app_index, (app_name, app_df_lst) in enumerate(submeters_lst): + if app_name not in self.appliance_params: + raise ApplianceNotFoundError(f"Parameters for appliance '{app_name}' not found!") + + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + + processed_app_dfs = [] + for app_df in app_df_lst: + new_app_readings = app_df.values.reshape((-1, 1)) + new_app_readings = (new_app_readings - app_mean) / app_std + processed_app_dfs.append(pd.DataFrame(new_app_readings)) + appliance_list.append((app_name, processed_app_dfs)) + return processed_mains_lst, appliance_list - # Compute appliance-specific normalization parameters if not provided + else: # method == 'test' + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + return processed_mains_lst + + def set_appliance_params(self, train_appliances): + """Computes and sets normalization parameters for each appliance.""" + for app_name, df_list in train_appliances: + l = np.concatenate([df.values for df in df_list]) + app_mean = np.mean(l) + app_std = np.std(l) + if app_std < 1: + app_std = 100 # Avoid division by zero for flat signals + self.appliance_params[app_name] = {'mean': app_mean, 'std': app_std} + print("Appliance parameters set:", self.appliance_params) + + def partial_fit(self, train_main, train_appliances, do_preprocessing=True, current_epoch=0, **load_kwargs): + """Trains the model on a chunk of data.""" if not self.appliance_params: self.set_appliance_params(train_appliances) - # Preprocess data: windowing, normalization, etc. + print("...............Seq2Point partial_fit running...............") + if do_preprocessing: - train_main, train_appliances = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=train_main, - submeters_lst=train_appliances, - method="train", - appliance_params=self.appliance_params, - windowing=False - ) - - # Prepare main power data for CNN input (batch_size, channels, sequence_length) - train_main = pd.concat(train_main, axis=0).values.reshape( - -1, self.sequence_length, 1 - ) - train_main = torch.tensor(train_main, dtype=torch.float32).permute(0, 2, 1) - - # Prepare appliance power data - new_train_apps = [] - for app_name, app_df_list in train_appliances: - app_df = pd.concat(app_df_list, axis=0).values.reshape(-1, 1) - new_train_apps.append( - (app_name, torch.tensor(app_df, dtype=torch.float32)) - ) - train_appliances = new_train_apps - - # Split data into training and validation sets - n_total = train_main.size(0) - val_split = int(0.15 * n_total) - idx = torch.randperm(n_total) - tr_idx, val_idx = idx[val_split:], idx[:val_split] - - mains_train = train_main[tr_idx].to(self.device) - mains_val = train_main[val_idx].to(self.device) - - # Train a separate model for each appliance - for appliance, power_tensor in train_appliances: - power_tensor = power_tensor.to(self.device) - power_train = power_tensor[tr_idx] - power_val = power_tensor[val_idx] - - # Create new model if this appliance hasn't been seen before - if appliance not in self.models: - print("First model training for", appliance) - self.models[appliance] = self._build_network() - else: - print("Started Retraining model for", appliance) - - model = self.models[appliance] - optimiser = torch.optim.Adam(model.parameters()) - loss_fn = nn.MSELoss() - - best_val = np.inf - best_file = f"{self.file_prefix}-{appliance.replace(' ', '_')}-epoch{current_epoch}.pth" - - # Create DataLoader for batch processing - dataset = TensorDataset(mains_train, power_train) - loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True) - - # Training loop - for epoch in range(self.n_epochs): - model.train() - epoch_losses = [] - - # Training phase - for x_batch, y_batch in loader: - x_batch, y_batch = x_batch.to(self.device), y_batch.to(self.device) - optimiser.zero_grad() - preds = model(x_batch).squeeze(1) - loss = loss_fn(preds, y_batch) - loss.backward() - optimiser.step() - epoch_losses.append(loss.item()) - - # Validation phase - model.eval() - with torch.no_grad(): - val_preds = model(mains_val).squeeze(1) - val_loss = loss_fn(val_preds, power_val).item() + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') - avg_loss = np.mean(epoch_losses) - tqdm.write(f"[{appliance}] Epoch {epoch+1}/{self.n_epochs} | Train Loss: {avg_loss:.4f} | Val Loss: {val_loss:.4f}") + # Prepare data for training + train_main = pd.concat(train_main, axis=0).values.reshape((-1, self.sequence_length, 1)) + + new_train_appliances = [] + for app_name, app_dfs in train_appliances: + app_df_values = pd.concat(app_dfs, axis=0).values.reshape((-1, 1)) + new_train_appliances.append((app_name, app_df_values)) + train_appliances = new_train_appliances - # Save best model based on validation loss - if val_loss < best_val: - best_val = val_loss - torch.save(model.state_dict(), best_file) + for appliance_name, power in train_appliances: + if appliance_name not in self.models: + print(f"First time training for {appliance_name}") + self.models[appliance_name] = self.return_network() + else: + print(f"Retraining model for {appliance_name}") - # Load the best model weights - model.load_state_dict(torch.load(best_file, map_location=self.device)) + model = self.models[appliance_name] + if train_main.size > 10: + # PyTorch Conv1d expects (batch, channels, length) + train_main_tensor = torch.tensor(train_main, dtype=torch.float32).permute(0, 2, 1).to(self.device) + power_tensor = torch.tensor(power, dtype=torch.float32).squeeze().to(self.device) + + # Create validation split + n_samples = train_main_tensor.size(0) + val_size = int(0.15 * n_samples) + indices = torch.randperm(n_samples) + train_idx, val_idx = indices[val_size:], indices[:val_size] + + train_X = train_main_tensor[train_idx] + train_y = power_tensor[train_idx] + val_X = train_main_tensor[val_idx] + val_y = power_tensor[val_idx] + + # Optimizer and loss function + optimizer = torch.optim.Adam(model.parameters()) + criterion = nn.MSELoss() + + best_val_loss = float('inf') + filepath = f"{self.file_prefix}-{'_'.join(appliance_name.split())}-epoch{current_epoch}.pth" + + # Training loop + for epoch in range(self.n_epochs): + model.train() + + train_dataset = TensorDataset(train_X, train_y) + train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) + + epoch_losses = [] + for batch_X, batch_y in train_loader: + optimizer.zero_grad() + predictions = model(batch_X).squeeze() + loss = criterion(predictions, batch_y) + loss.backward() + + # Gradient clipping for stability + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) + + optimizer.step() + epoch_losses.append(loss.item()) + + # Validation + model.eval() + with torch.no_grad(): + val_predictions = model(val_X).squeeze() + val_loss = criterion(val_predictions, val_y).item() + + avg_train_loss = np.mean(epoch_losses) + print(f"Epoch {epoch+1}/{self.n_epochs} - loss: {avg_train_loss:.4f} - val_loss: {val_loss:.4f}") + + # Save the best model based on validation loss + if val_loss < best_val_loss: + best_val_loss = val_loss + torch.save(model.state_dict(), filepath) + print(f"Validation loss improved, saving model to {filepath}") + + # Load the best performing model + model.load_state_dict(torch.load(filepath, map_location=self.device)) def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): - """Disaggregate power consumption for each appliance from aggregate mains data""" - + """Disaggregates a chunk of mains data.""" if model is not None: self.models = model - # Preprocess test data similar to training data if do_preprocessing: - test_main_list = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=test_main_list, - submeters_lst=None, - method="test", - appliance_params=self.appliance_params, - windowing=False - ) - - results = [] - - # Process each chunk of test data - for mains_df in test_main_list: - # Prepare data for CNN input (batch_size, channels, sequence_length) - mains_np = mains_df.values.reshape(-1, self.sequence_length, 1) - mains_tensor = ( - torch.tensor(mains_np, dtype=torch.float32) - .permute(0, 2, 1) - .to(self.device) - ) - - disagg = {} - - # Get predictions from each appliance model - for appliance, net in self.models.items(): - net.eval() - with torch.no_grad(): - # Generate predictions and denormalize back to original power scale - preds = ( - net(mains_tensor).cpu().numpy().flatten() - * self.appliance_params[appliance]["std"] - + self.appliance_params[appliance]["mean"] - ) - # Ensure non-negative power values - preds = np.clip(preds, 0, None) - disagg[appliance] = pd.Series(preds, dtype="float32") - - # Combine all appliance predictions for this chunk - results.append(pd.DataFrame(disagg, dtype="float32")) - return results + test_main_list = self.call_preprocessing(test_main_list, submeters_lst=None, method='test') - def set_appliance_params(self, train_appliances): - """Compute normalization statistics (mean, std) for each appliance""" - for app_name, df_list in train_appliances: - # Concatenate all data for this appliance and compute statistics - data = np.concatenate([df.values.flatten() for df in df_list]) - mean, std = data.mean(), data.std() + test_predictions = [] + for test_mains_df in test_main_list: + test_main_array = test_mains_df.values.reshape((-1, self.sequence_length, 1)) - # Prevent division by zero in normalization - if std < 1: - std = 100 - self.appliance_params[app_name] = {"mean": mean, "std": std} + # PyTorch Conv1d expects (batch, channels, length) + test_main_tensor = torch.tensor(test_main_array, dtype=torch.float32).permute(0, 2, 1).to(self.device) - print(self.appliance_params) \ No newline at end of file + disggregation_dict = {} + for appliance, model in self.models.items(): + model.eval() + with torch.no_grad(): + prediction = model(test_main_tensor).cpu().numpy() + + # Denormalize the prediction + app_mean = self.appliance_params[appliance]['mean'] + app_std = self.appliance_params[appliance]['std'] + denormalized_prediction = app_mean + (prediction * app_std) + + # Set negative values to zero + denormalized_prediction[denormalized_prediction < 0] = 0 + df = pd.Series(denormalized_prediction.flatten()) + disggregation_dict[appliance] = df + + results = pd.DataFrame(disggregation_dict, dtype='float32') + test_predictions.append(results) + return test_predictions \ No newline at end of file diff --git a/nilmtk_contrib/torch/seq2seq.py b/nilmtk_contrib/torch/seq2seq.py index d9c1a6f..a8e2287 100644 --- a/nilmtk_contrib/torch/seq2seq.py +++ b/nilmtk_contrib/torch/seq2seq.py @@ -1,50 +1,70 @@ import os, json, numpy as np, pandas as pd import torch, torch.nn as nn, torch.optim as optim +import random from tqdm import tqdm from collections import OrderedDict from torch.utils.data import TensorDataset, DataLoader from nilmtk.disaggregate import Disaggregator -from nilmtk_contrib.torch.preprocessing import preprocess + +class SequenceLengthError(Exception): + pass + +class ApplianceNotFoundError(Exception): + pass class Seq2SeqModel(nn.Module): """ - Sequence-to-Sequence CNN model that maps input power sequences - to output appliance power sequences of the same length. + A Sequence-to-Sequence (Seq2Seq) CNN model for NILM, with an architecture + designed to mirror the original TensorFlow implementation. """ - def __init__(self, seq_len): + def __init__(self, sequence_length): super().__init__() + self.sequence_length = sequence_length + + # --- Encoder Layers --- + self.conv1 = nn.Conv1d(1, 30, kernel_size=10, stride=2, padding=0) + self.conv2 = nn.Conv1d(30, 30, kernel_size=8, stride=2, padding=0) + self.conv3 = nn.Conv1d(30, 40, kernel_size=6, stride=1, padding=0) + self.conv4 = nn.Conv1d(40, 50, kernel_size=5, stride=1, padding=0) + self.dropout1 = nn.Dropout(0.2) + self.conv5 = nn.Conv1d(50, 50, kernel_size=5, stride=1, padding=0) + self.dropout2 = nn.Dropout(0.2) + + # Calculate the flattened size dynamically after convolutions + self._calculate_flatten_size(sequence_length) - self.seq_len = seq_len + # --- Decoder Layers --- + self.flatten = nn.Flatten() + self.fc1 = nn.Linear(self.flat_size, 1024) + self.dropout3 = nn.Dropout(0.2) + self.fc2 = nn.Linear(1024, sequence_length) - # Encoder: 1D CNN layers with different strides for feature extraction - self.conv1 = nn.Conv1d(1, 30, 10, stride=2) - self.conv2 = nn.Conv1d(30,30, 8, stride=2) - self.conv3 = nn.Conv1d(30,40, 6, stride=1) - self.conv4 = nn.Conv1d(40,50, 5, stride=1) - self.dropout1 = nn.Dropout(.2) - self.conv5 = nn.Conv1d(50,50, 5, stride=1) - self.dropout2 = nn.Dropout(.2) - - # Calculate the flattened size after all convolutions + self._init_weights() + + def _calculate_flatten_size(self, seq_len): + """Calculates the input size for the decoder's fully connected layer.""" + # Simulate the sequence length reduction through the encoder L = seq_len - L = (L - 10)//2 + 1 - L = (L - 8)//2 + 1 + L = (L - 10) // 2 + 1 + L = (L - 8) // 2 + 1 L = L - 6 + 1 L = L - 5 + 1 L = L - 5 + 1 - flat_size = 50 * L - - # Decoder: Fully connected layers to reconstruct sequence - self.flatten = nn.Flatten() - self.fc1 = nn.Linear(flat_size, 1024) - self.dropout3 = nn.Dropout(.2) - self.fc2 = nn.Linear(1024, seq_len) # Output same length as input + self.flat_size = 50 * L + + def _init_weights(self): + """Initializes weights to match TensorFlow's default (glorot_uniform).""" + for m in self.modules(): + if isinstance(m, (nn.Conv1d, nn.Linear)): + nn.init.xavier_uniform_(m.weight) + if m.bias is not None: + nn.init.zeros_(m.bias) def forward(self, x): - # Input: [B, seq_len, 1] → rearrange for Conv1d: [B, 1, seq_len] - x = x.permute(0,2,1) + # Input shape: (batch, seq_len, 1) -> permute for Conv1D + x = x.permute(0, 2, 1) - # Encoder: feature extraction through conv layers + # --- Encoder --- x = torch.relu(self.conv1(x)) x = torch.relu(self.conv2(x)) x = torch.relu(self.conv3(x)) @@ -53,189 +73,258 @@ def forward(self, x): x = torch.relu(self.conv5(x)) x = self.dropout2(x) - # Decoder: reconstruct to original sequence length + # --- Decoder --- x = self.flatten(x) x = torch.relu(self.fc1(x)) x = self.dropout3(x) - x = self.fc2(x) # [B, seq_len] + x = self.fc2(x) # Linear activation return x class Seq2Seq(Disaggregator): """ - NILM disaggregator using sequence-to-sequence learning. - Maps input power sequences to appliance power sequences of the same length. + Sequence-to-Sequence CNN for Non-Intrusive Load Monitoring (NILM). + + Based on the foundational sequence-to-sequence learning approach from: + "Sequence to Sequence Learning with Neural Networks" by Sutskever et al. + https://arxiv.org/abs/1409.3215 + + This implementation adapts the sequence-to-sequence paradigm for energy disaggregation, + using a CNN-based encoder-decoder architecture instead of the original LSTM approach. + The model learns to map input sequences of aggregate power consumption to output + sequences of individual appliance power consumption. + + Architecture Overview: + - Encoder: Multiple 1D convolutional layers with decreasing stride for feature extraction + - Decoder: Fully connected layers that reconstruct the sequence from encoded features + - Dropout layers for regularization throughout the network + - Sequence-to-sequence learning for temporal power disaggregation + + Args: + params (dict): Dictionary containing model hyperparameters: + - sequence_length (int): Length of input/output sequences (default: 99, must be odd) + - n_epochs (int): Number of training epochs (default: 10) + - batch_size (int): Training batch size (default: 512) + - appliance_params (dict): Appliance-specific normalization parameters + - chunk_wise_training (bool): Enable chunk-wise training (default: False) """ def __init__(self, params): - super().__init__() - + """Initializes the disaggregator and its hyperparameters.""" self.MODEL_NAME = "Seq2Seq" self.file_prefix = f"{self.MODEL_NAME.lower()}-temp-weights" + self.chunk_wise_training = params.get('chunk_wise_training', False) + self.sequence_length = params.get('sequence_length', 99) + self.n_epochs = params.get('n_epochs', 10) + self.models = OrderedDict() + self.mains_mean = 1800 + self.mains_std = 600 + self.batch_size = params.get('batch_size', 512) + self.appliance_params = params.get('appliance_params', {}) + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - # Extract hyperparameters - self.sequence_length = params.get('sequence_length', 99) if self.sequence_length % 2 == 0: - raise ValueError("sequence_length must be odd") - self.n_epochs = params.get('n_epochs', 10) - self.batch_size = params.get('batch_size', 512) - self.mains_mean = 1800 - self.mains_std = 600 - self.appliance_params = params.get('appliance_params', {}) # Normalization stats - self.models = OrderedDict() # Store separate models for each appliance - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + raise SequenceLengthError("Sequence length must be odd!") def return_network(self): - """Factory method to create a new Seq2Seq model instance""" + """Returns a new, initialized Seq2SeqModel instance.""" return Seq2SeqModel(self.sequence_length).to(self.device) def set_appliance_params(self, train_appliances): - """Compute normalization statistics (mean, std) for each appliance""" - for name, lst in train_appliances: - arr = pd.concat(lst, axis=0).values.flatten() - m, s = arr.mean(), arr.std() - # Prevent division by zero in normalization - if s < 1: s = 100 - self.appliance_params[name] = {'mean':m, 'std':s} - - def partial_fit(self, train_main, train_appliances, - do_preprocessing=True, current_epoch=0, **_): - """Train models on a chunk of data (supports incremental learning)""" - - # Compute appliance-specific normalization parameters if not provided + """Computes and sets normalization parameters for each appliance.""" + for (app_name, df_list) in train_appliances: + l = np.concatenate([df.values for df in df_list]) + app_mean = np.mean(l) + app_std = np.std(l) + if app_std < 1: + app_std = 100 # Avoid division by zero for flat signals + self.appliance_params[app_name] = {'mean': app_mean, 'std': app_std} + + def partial_fit(self, train_main, train_appliances, do_preprocessing=True, current_epoch=0, **load_kwargs): + """Trains the model on a chunk of data.""" + print("...............Seq2Seq partial_fit running...............") if not self.appliance_params: self.set_appliance_params(train_appliances) - # Preprocess data: windowing, normalization, etc. if do_preprocessing: - train_main, train_appliances = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=train_main, - submeters_lst=train_appliances, - method="train", - appliance_params=self.appliance_params, - windowing=True - ) - - # Prepare main power data for training - mains_arr = pd.concat(train_main,axis=0).values \ - .reshape(-1, self.sequence_length, 1) - - # Train a separate model for each appliance - for name, dfs in train_appliances: - # Prepare appliance power sequences (targets) - arr = pd.concat(dfs,axis=0).values \ - .reshape(-1, self.sequence_length) - - # Create new model if this appliance hasn't been seen before - if name not in self.models: - self.models[name] = self.return_network() - model = self.models[name] - - # Convert to tensors - X = torch.tensor(mains_arr, dtype=torch.float32) - Y = torch.tensor(arr, dtype=torch.float32) - - # Split into training and validation sets - split = int(0.85*len(X)) - - tr_ds = TensorDataset(X[:split], Y[:split]) - va_ds = TensorDataset(X[split:], Y[split:]) - tr = DataLoader(tr_ds, batch_size=self.batch_size, shuffle=True) - va = DataLoader(va_ds, batch_size=self.batch_size) - - # Setup training components - opt = optim.Adam(model.parameters()) - loss_fn = nn.MSELoss() - best = float('inf') - ckpt = f"{self.file_prefix}-{name}-epoch{current_epoch}.pt" - - # Training loop - for epoch in tqdm(range(self.n_epochs), desc=f"Train {name}"): - # Training phase - model.train() - for xb, yb in tr: - xb, yb = xb.to(self.device), yb.to(self.device) - opt.zero_grad() - out = model(xb) # [B, seq_len] - loss_fn(out, yb).backward() - opt.step() - - # Validation phase - model.eval() - val_losses = [] - with torch.no_grad(): - for xb, yb in va: - xb, yb = xb.to(self.device), yb.to(self.device) - val_losses.append(loss_fn(model(xb), yb).item()) - val_loss = sum(val_losses)/len(val_losses) - - # Save best model based on validation loss - if val_loss < best: - best = val_loss - torch.save(model.state_dict(), ckpt) + train_main, train_appliances = self.call_preprocessing( + train_main, train_appliances, 'train') + + # Prepare data for training + train_main = pd.concat(train_main, axis=0).values.reshape((-1, self.sequence_length, 1)) + + new_train_appliances = [] + for app_name, app_dfs in train_appliances: + app_df_values = pd.concat(app_dfs, axis=0).values.reshape((-1, self.sequence_length)) + new_train_appliances.append((app_name, app_df_values)) + train_appliances = new_train_appliances + + for appliance_name, power in train_appliances: + if appliance_name not in self.models: + print(f"First time training for {appliance_name}") + self.models[appliance_name] = self.return_network() + else: + print(f"Retraining model for {appliance_name}") - # Load the best model weights - model.load_state_dict(torch.load(ckpt, map_location=self.device)) + model = self.models[appliance_name] + if train_main.size > 10: + filepath = f"{self.file_prefix}-{'_'.join(appliance_name.split())}-epoch{current_epoch}.pt" + + # Convert to PyTorch Tensors + train_main_tensor = torch.tensor(train_main, dtype=torch.float32) + power_tensor = torch.tensor(power, dtype=torch.float32) + + # Use the last 15% of data for validation to mirror TensorFlow's behavior + n_total = len(train_main_tensor) + val_size = int(0.15 * n_total) + + train_x = train_main_tensor[:-val_size].to(self.device) + val_x = train_main_tensor[-val_size:].to(self.device) + train_y = power_tensor[:-val_size].to(self.device) + val_y = power_tensor[-val_size:].to(self.device) + + # Optimizer and loss function, with parameters matching TensorFlow + optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-07) + criterion = nn.MSELoss() + + best_val_loss = float('inf') + + # Create DataLoader for batching + train_dataset = TensorDataset(train_x, train_y) + train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True) + + for epoch in range(self.n_epochs): + # --- Training Phase --- + model.train() + train_loss = 0.0 + + for batch_x, batch_y in train_loader: + optimizer.zero_grad() + outputs = model(batch_x) + loss = criterion(outputs, batch_y) + loss.backward() + optimizer.step() + train_loss += loss.item() + + train_loss /= len(train_loader) + + # --- Validation Phase --- + model.eval() + with torch.no_grad(): + val_outputs = model(val_x) + val_loss = criterion(val_outputs, val_y).item() + + # Save the best model based on validation loss + if val_loss < best_val_loss: + best_val_loss = val_loss + torch.save(model.state_dict(), filepath) + print(f'Epoch {epoch+1}/{self.n_epochs} - loss: {train_loss:.4f} - val_loss: {val_loss:.4f}') + + # Load the best performing model + model.load_state_dict(torch.load(filepath)) def disaggregate_chunk(self, test_main_list, model=None, do_preprocessing=True): - """Disaggregate power consumption using overlapping windows and averaging""" - - if model: self.models = model - - # Preprocess test data similar to training data + """Disaggregates a chunk of mains data.""" + if model is not None: + self.models = model + if do_preprocessing: - test_main_list = preprocess( - sequence_length=self.sequence_length, - mains_mean=self.mains_mean, - mains_std=self.mains_std, - mains_lst=test_main_list, - submeters_lst=None, - method="test", - appliance_params=self.appliance_params, - windowing=True - ) - - results = [] - n = self.sequence_length - - # Process each chunk of test data - for tm in test_main_list: - arr = tm.values.reshape(-1, n) - ds = DataLoader(TensorDataset(torch.tensor(arr, dtype=torch.float32)), - batch_size=self.batch_size) - outd = {} - - # Get predictions from each appliance model - for name, m in self.models.items(): - preds = [] - m.eval() + test_main_list = self.call_preprocessing( + test_main_list, submeters_lst=None, method='test') + + test_predictions = [] + for test_mains_df in test_main_list: + disggregation_dict = {} + test_main_array = test_mains_df.values.reshape((-1, self.sequence_length, 1)) + + for appliance, model in self.models.items(): + test_tensor = torch.tensor(test_main_array, dtype=torch.float32).to(self.device) + + model.eval() with torch.no_grad(): - for (xb_cpu,) in ds: - # Unsqueeze back to [B, seq_len, 1] for model input - xb = xb_cpu.unsqueeze(-1).to(self.device) - p = m(xb).cpu().numpy() # [B, seq_len] - preds.append(p) + # Process in batches to manage memory + predictions = [] + for i in range(0, len(test_tensor), self.batch_size): + batch = test_tensor[i:i + self.batch_size] + batch_pred = model(batch).cpu().numpy() + predictions.append(batch_pred) + prediction = np.concatenate(predictions, axis=0) + + # Average predictions over overlapping windows + l = self.sequence_length + n = len(prediction) + l - 1 + sum_arr = np.zeros(n) + counts_arr = np.zeros(n) - # Concatenate all predictions - P = np.concatenate(preds, axis=0) + for i, p in enumerate(prediction): + sum_arr[i:i+l] += p.flatten() + counts_arr[i:i+l] += 1 - # Reconstruct full sequence by averaging overlapping windows - total = P.shape[0] + n - 1 - sum_arr = np.zeros(total) - counts_arr = np.zeros(total) - for i in range(P.shape[0]): - sum_arr[i:i+n] += P[i] - counts_arr[i:i+n] += 1 - avg = sum_arr/counts_arr + # Avoid division by zero + counts_arr[counts_arr == 0] = 1 + averaged_prediction = sum_arr / counts_arr + + # Denormalize the prediction + app_mean = self.appliance_params[appliance]['mean'] + app_std = self.appliance_params[appliance]['std'] + denormalized_prediction = app_mean + (averaged_prediction * app_std) - # Denormalize predictions back to original power scale - mpar = self.appliance_params[name] - out = mpar['mean'] + avg * mpar['std'] + # Set negative values to zero + denormalized_prediction[denormalized_prediction < 0] = 0 + df = pd.Series(denormalized_prediction) + disggregation_dict[appliance] = df - # Ensure non-negative power values - outd[name] = pd.Series(np.clip(out, 0, None)) + results = pd.DataFrame(disggregation_dict, dtype='float32') + test_predictions.append(results) + + return test_predictions + + def call_preprocessing(self, mains_lst, submeters_lst, method): + """ + Preprocesses data by windowing and normalizing, mirroring the + original TensorFlow implementation. + """ + if method == 'train': + # Preprocess mains + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + units_to_pad = n // 2 + new_mains = np.pad(new_mains, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + processed_mains_lst.append(pd.DataFrame(new_mains)) + + # Preprocess appliances + appliance_list = [] + for app_index, (app_name, app_df_lst) in enumerate(submeters_lst): + if app_name not in self.appliance_params: + raise ApplianceNotFoundError(f"Parameters for appliance '{app_name}' not found!") - # Combine all appliance predictions for this chunk - results.append(pd.DataFrame(outd, dtype='float32')) - return results \ No newline at end of file + app_mean = self.appliance_params[app_name]['mean'] + app_std = self.appliance_params[app_name]['std'] + + processed_app_dfs = [] + for app_df in app_df_lst: + new_app_readings = app_df.values.flatten() + new_app_readings = np.pad(new_app_readings, (units_to_pad, units_to_pad), 'constant', constant_values=(0, 0)) + new_app_readings = np.array([new_app_readings[i:i + n] for i in range(len(new_app_readings) - n + 1)]) + new_app_readings = (new_app_readings - app_mean) / app_std + processed_app_dfs.append(pd.DataFrame(new_app_readings)) + + appliance_list.append((app_name, processed_app_dfs)) + + return processed_mains_lst, appliance_list + + else: # method == 'test' + processed_mains_lst = [] + for mains in mains_lst: + new_mains = mains.values.flatten() + n = self.sequence_length + # The original TF implementation did not pad test data, so we omit it here. + # units_to_pad = n // 2 + # new_mains = np.pad(new_mains, (units_to_pad,units_to_pad),'constant',constant_values = (0,0)) + new_mains = np.array([new_mains[i:i + n] for i in range(len(new_mains) - n + 1)]) + new_mains = (new_mains - self.mains_mean) / self.mains_std + new_mains = new_mains.reshape((-1, self.sequence_length)) + processed_mains_lst.append(pd.DataFrame(new_mains)) + return processed_mains_lst \ No newline at end of file