diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2b77530..44cb603 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,14 +60,8 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python: ["3.8", "3.9", "3.10", "3.11", "3.12"] platform: [ubuntu-latest, macos-latest, windows-latest] - exclude: # Python < v3.8 does not support Apple Silicon ARM64. - - python: "3.7" - platform: macos-latest - include: # So run those legacy versions on Intel CPUs. - - python: "3.7" - platform: macos-13 runs-on: ${{ matrix.platform }} steps: - uses: actions/checkout@v3 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9a01735..38b3757 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,7 +40,7 @@ repos: - id: isort - repo: https://github.com/psf/black - rev: 24.4.2 + rev: 24.10.0 hooks: - id: black language_version: python3 @@ -53,7 +53,7 @@ repos: # additional_dependencies: [black] - repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 + rev: 7.1.1 hooks: - id: flake8 additional_dependencies: [flake8-docstrings] @@ -66,7 +66,7 @@ repos: # Check for type errors with mypy: - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.10.0' + rev: 'v1.13.0' hooks: - id: mypy args: [--disallow-untyped-defs, --ignore-missing-imports] diff --git a/AUTHORS.md b/AUTHORS.md index 35386b1..8c3133f 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -2,6 +2,6 @@ * Arnab Mondal [arnab.mondal@mila.quebec](mailto:arnab.mondal@mila.quebec) * [Siba Smarak Panigrahi](https://sibasmarak.github.io/) [siba-smarak.panigrahi@mila.quebec](mailto:siba-smarak.panigrahi@mila.quebec) -* [Danielle Benesch](https://github.com/danibene) [daniellerbenesch@gmail.com](mailto:daniellerbenesch@gmail.com) +* [Danielle Benesch](https://github.com/danibene) [dbenesch@uni-osnabrueck.de](mailto:dbenesch@uni-osnabrueck.de) * [Jikael Gagnon](https://github.com/jikaelgagnon) [jikael.gagnon@mila.quebec](mailto:jikael.gagnon@mila.quebec) * [Sékou-Oumar Kaba](https://oumarkaba.github.io) [kabaseko@mila.quebec](mailto:kabaseko@mila.quebec) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba5a48f..e85a21d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,19 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +### Fixed +- Initialization of padding parameters in `DiscreteGroupImageCanonicalization` class, allowing for multiple types of `resize_shape`. + +### Changed +- Increased minimum Python version to 3.8. +- Specified maximum NumPy version as <2.0. + +### Removed + ## [0.1.2] - 2024-05-29 ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 245d2c9..a92fa01 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -224,7 +224,7 @@ package: ``` 3. Make sure to have a reliable [tox] installation that uses the correct - Python version (e.g., 3.7+). When in doubt you can run: + Python version (e.g., 3.8+). When in doubt you can run: ``` tox --version diff --git a/equiadapt/common/basecanonicalization.py b/equiadapt/common/basecanonicalization.py index a82ce1a..1a3f62a 100644 --- a/equiadapt/common/basecanonicalization.py +++ b/equiadapt/common/basecanonicalization.py @@ -300,8 +300,10 @@ def invert_canonicalization( # self.device # ) # return torch.nn.CrossEntropyLoss()(group_activations, dataset_prior) - - def get_prior_regularization_loss(self, dataset_prior: Optional[torch.Tensor] = None) -> torch.Tensor: + + def get_prior_regularization_loss( + self, dataset_prior: Optional[torch.Tensor] = None + ) -> torch.Tensor: """ Gets the prior regularization loss. @@ -322,7 +324,7 @@ def get_prior_regularization_loss(self, dataset_prior: Optional[torch.Tensor] = log_group_activations = F.log_softmax(group_activations, dim=1) # KL Divergence - return F.kl_div(log_group_activations, dataset_prior, reduction='batchmean') + return F.kl_div(log_group_activations, dataset_prior, reduction="batchmean") def get_identity_metric(self) -> torch.Tensor: """ @@ -430,7 +432,6 @@ def get_prior_regularization_loss(self) -> torch.Tensor: .to(self.device) ) return torch.nn.MSELoss()(group_elements_rep, dataset_prior) - def get_identity_metric(self) -> torch.Tensor: """ diff --git a/equiadapt/images/canonicalization/discrete_group.py b/equiadapt/images/canonicalization/discrete_group.py index 061c3a5..af17261 100644 --- a/equiadapt/images/canonicalization/discrete_group.py +++ b/equiadapt/images/canonicalization/discrete_group.py @@ -3,7 +3,7 @@ import kornia as K import torch -from omegaconf import DictConfig +from omegaconf import DictConfig, ListConfig from torch.nn import functional as F from torchvision import transforms @@ -90,7 +90,7 @@ def __init__( if is_grayscale else transforms.Resize(size=canonicalization_hyperparams.resize_shape) ) - + # group augment specific cropping and padding (required for group_augment()) group_augment_in_shape = canonicalization_hyperparams.resize_shape self.crop_group_augment = ( @@ -98,14 +98,23 @@ def __init__( if in_shape[0] == 1 else transforms.CenterCrop(group_augment_in_shape) ) - self.pad_group_augment = ( - torch.nn.Identity() - if in_shape[0] == 1 - else transforms.Pad( - math.ceil(group_augment_in_shape * 0.5), padding_mode="edge" - ) - ) - + self._set_pad_group_augment(in_shape, group_augment_in_shape) + + def _set_pad_group_augment( + self, in_shape: tuple, group_augment_in_shape: Union[ListConfig, float] + ) -> None: + if in_shape[0] == 1: + self.pad_group_augment = torch.nn.Identity() + else: + padding = [] + if isinstance(group_augment_in_shape, ListConfig): + for i in range(len(group_augment_in_shape)): + padding.append(math.ceil(group_augment_in_shape[i] * 0.5)) + else: + padding.append(math.ceil(group_augment_in_shape * 0.5)) + + self.pad_group_augment = transforms.Pad(padding, padding_mode="edge") + def rotate_and_maybe_reflect( self, x: torch.Tensor, degrees: torch.Tensor, reflect: bool = False ) -> List[torch.Tensor]: @@ -133,7 +142,8 @@ def rotate_and_maybe_reflect( def group_augment(self, x: torch.Tensor) -> torch.Tensor: """ Augment the input images by applying group transformations (rotations and reflections). - This function is used both for the energy based optimization method for the discrete rotation + + This function is used both for the energy based optimization method for the discrete rotation. Args: x (torch.Tensor): The input image. @@ -315,15 +325,15 @@ def invert_canonicalization( group_element_dict=self.canonicalization_info_dict["group_element"], # type: ignore induced_rep_type=induced_rep_type, ) - + def get_prior( - self, - x: torch.Tensor, + self, + x: torch.Tensor, model: torch.nn.Module, targets: torch.Tensor, metric_function: torch.nn.Module, tau: float = 1.0, - ) -> torch.Tensor: + ) -> torch.Tensor: """ Get the prior for the input images. @@ -339,30 +349,36 @@ def get_prior( """ with torch.no_grad(): batch_size = x.shape[0] - x_augmented = self.group_augment(x) # size (group_size * batch_size, in_channels, height, width) + x_augmented = self.group_augment( + x + ) # size (group_size * batch_size, in_channels, height, width) # If a self.group_augment_target is defined, apply the same transformation to the targets # Or else just repeat the targets for each group element in the first dimension if hasattr(self, "group_augment_target"): targets_augmented = self.group_augment_target(targets) else: - targets_augmented = targets.repeat(self.num_group, 1).flatten() # size (group_size * batch_size) - + targets_augmented = targets.repeat( + self.num_group, 1 + ).flatten() # size (group_size * batch_size) + # Get the output of the model for the augmented images - model_output = model(x_augmented) # size eg (group_size * batch_size, num_classes) - + model_output = model( + x_augmented + ) # size eg (group_size * batch_size, num_classes) + # Get the unnormalized probability masses for each group element - unnormalized_prob_masses = metric_function( - model_output, targets_augmented - ).reshape(self.num_group, batch_size).transpose(0, 1) # size (batch_size, group_size) - + unnormalized_prob_masses = ( + metric_function(model_output, targets_augmented) + .reshape(self.num_group, batch_size) + .transpose(0, 1) + ) # size (batch_size, group_size) + # Get the prior for the input images - prior = F.softmax(unnormalized_prob_masses / tau, dim=-1) # size (batch_size, group_size) - + prior = F.softmax( + unnormalized_prob_masses / tau, dim=-1 + ) # size (batch_size, group_size) + return prior - - - - class GroupEquivariantImageCanonicalization(DiscreteGroupImageCanonicalization): @@ -486,8 +502,12 @@ def get_group_activations(self, x: torch.Tensor) -> torch.Tensor: torch.Tensor: The group activations. """ x = self.transformations_before_canonicalization_network_forward(x) - x_augmented = self.group_augment(x) # size (batch_size * group_size, in_channels, height, width) - vector_out = self.canonicalization_network(x_augmented) # size (batch_size * group_size, reference_vector_size) + x_augmented = self.group_augment( + x + ) # size (batch_size * group_size, in_channels, height, width) + vector_out = self.canonicalization_network( + x_augmented + ) # size (batch_size * group_size, reference_vector_size) self.canonicalization_info_dict = {"vector_out": vector_out} if self.artifact_err_wt: diff --git a/equiadapt/nbody/canonicalization_networks/custom_equivariant_networks.py b/equiadapt/nbody/canonicalization_networks/custom_equivariant_networks.py index dd983d8..252fd65 100644 --- a/equiadapt/nbody/canonicalization_networks/custom_equivariant_networks.py +++ b/equiadapt/nbody/canonicalization_networks/custom_equivariant_networks.py @@ -1,4 +1,4 @@ -from typing import Any, Tuple +from typing import Any, Optional, Tuple import torch import torch.nn as nn @@ -51,8 +51,10 @@ def __init__( ) -> None: super().__init__() self.device: str = device - self.learning_rate: float = ( - hyperparams.learning_rate if hasattr(hyperparams, "learning_rate") else None + self.learning_rate: Optional[float] = ( + float(hyperparams.learning_rate) + if hasattr(hyperparams, "learning_rate") + else None ) self.weight_decay: float = ( hyperparams.weight_decay if hasattr(hyperparams, "weight_decay") else 0.0 diff --git a/examples/images/classification/model.py b/examples/images/classification/model.py index d758091..95baaad 100644 --- a/examples/images/classification/model.py +++ b/examples/images/classification/model.py @@ -1,11 +1,12 @@ from math import tau + import pytorch_lightning as pl import torch from inference_utils import get_inference_method from model_utils import get_dataset_specific_info, get_prediction_network from omegaconf import DictConfig -from torch.optim.lr_scheduler import MultiStepLR from torch.nn import functional as F +from torch.optim.lr_scheduler import MultiStepLR from examples.images.common.utils import get_canonicalization_network, get_canonicalizer @@ -104,13 +105,19 @@ def training_step(self, batch: torch.Tensor): # Add prior regularization loss if the prior weight is non-zero if self.hyperparams.experiment.training.loss.prior_weight: if self.hyperparams.experiment.training.loss.automated_prior: + def metric_function(model_predictions, targets): - return -F.cross_entropy(model_predictions, targets, reduction='none') - prior = self.canonicalizer.get_prior(x, self.prediction_network, y, metric_function, tau=0.01) - prior_loss = self.canonicalizer.get_prior_regularization_loss(prior) # type: ignore + return -F.cross_entropy( + model_predictions, targets, reduction="none" + ) + + prior = self.canonicalizer.get_prior( + x, self.prediction_network, y, metric_function, tau=0.01 + ) + prior_loss = self.canonicalizer.get_prior_regularization_loss(prior) # type: ignore else: prior_loss = self.canonicalizer.get_prior_regularization_loss() - + loss += prior_loss * self.hyperparams.experiment.training.loss.prior_weight metric_identity = self.canonicalizer.get_identity_metric() training_metrics.update( diff --git a/examples/images/reinforcementlearning/configs/canonicalization/group_equivariant.yaml b/examples/images/reinforcementlearning/configs/canonicalization/group_equivariant.yaml index eb2b11d..6ca2f55 100644 --- a/examples/images/reinforcementlearning/configs/canonicalization/group_equivariant.yaml +++ b/examples/images/reinforcementlearning/configs/canonicalization/group_equivariant.yaml @@ -8,4 +8,4 @@ network_hyperparams: num_rotations: 4 # Number of rotations for the canonization network beta: 1.0 # Beta parameter for the canonization network input_crop_ratio: 0.8 # Ratio at which we crop the input to the canonicalization -resize_shape: 64 # Resize shape for the input \ No newline at end of file +resize_shape: 64 # Resize shape for the input diff --git a/examples/images/reinforcementlearning/configs/canonicalization/identity.yaml b/examples/images/reinforcementlearning/configs/canonicalization/identity.yaml index 1598d17..513e776 100644 --- a/examples/images/reinforcementlearning/configs/canonicalization/identity.yaml +++ b/examples/images/reinforcementlearning/configs/canonicalization/identity.yaml @@ -1 +1 @@ -canonicalization_type: identity \ No newline at end of file +canonicalization_type: identity diff --git a/examples/images/reinforcementlearning/configs/default.yaml b/examples/images/reinforcementlearning/configs/default.yaml index 95b8d43..3718239 100644 --- a/examples/images/reinforcementlearning/configs/default.yaml +++ b/examples/images/reinforcementlearning/configs/default.yaml @@ -20,4 +20,4 @@ defaults: - env: default - experiment: default - canonicalization: identity - - wandb: default \ No newline at end of file + - wandb: default diff --git a/examples/images/reinforcementlearning/configs/experiment/default.yaml b/examples/images/reinforcementlearning/configs/experiment/default.yaml index 909e87b..e883a87 100644 --- a/examples/images/reinforcementlearning/configs/experiment/default.yaml +++ b/examples/images/reinforcementlearning/configs/experiment/default.yaml @@ -9,4 +9,4 @@ replay_memory_size: 100000 end_score: 200 training_stop: 142 num_episodes: 50000 -last_episodes_num: 20 \ No newline at end of file +last_episodes_num: 20 diff --git a/examples/images/reinforcementlearning/network.py b/examples/images/reinforcementlearning/network.py index 6c5729c..bc06214 100644 --- a/examples/images/reinforcementlearning/network.py +++ b/examples/images/reinforcementlearning/network.py @@ -1,7 +1,9 @@ +import random + import torch import torch.nn as nn import torch.nn.functional as F -import random + class DQN(nn.Module): def __init__(self, input_shape, num_actions, dueling_DQN=False): @@ -20,7 +22,7 @@ def __init__(self, input_shape, num_actions, dueling_DQN=False): nn.ReLU(), nn.Conv2d(64, 64, kernel_size=5, stride=2), nn.BatchNorm2d(64), - nn.ReLU() + nn.ReLU(), ) feature_size = self._get_feature_size() @@ -30,20 +32,20 @@ def __init__(self, input_shape, num_actions, dueling_DQN=False): nn.Linear(feature_size, 512), nn.BatchNorm1d(512), nn.ReLU(), - nn.Linear(512, self.num_actions) + nn.Linear(512, self.num_actions), ) self.value = nn.Sequential( nn.Linear(feature_size, 512), nn.BatchNorm1d(512), nn.ReLU(), - nn.Linear(512, 1) + nn.Linear(512, 1), ) else: self.action_value = nn.Sequential( nn.Linear(feature_size, 512), nn.BatchNorm1d(512), nn.ReLU(), - nn.Linear(512, self.num_actions) + nn.Linear(512, self.num_actions), ) def forward(self, x): @@ -57,11 +59,10 @@ def forward(self, x): q_values = value + (advantage - advantage.mean(dim=1, keepdim=True)) else: q_values = self.action_value(x) - + return q_values def _get_feature_size(self): self.features.eval() with torch.no_grad(): return self.features(torch.zeros(1, *self.input_shape)).view(1, -1).size(1) - diff --git a/examples/images/reinforcementlearning/prepare/gym_cartpole.py b/examples/images/reinforcementlearning/prepare/gym_cartpole.py index cf68696..6863302 100644 --- a/examples/images/reinforcementlearning/prepare/gym_cartpole.py +++ b/examples/images/reinforcementlearning/prepare/gym_cartpole.py @@ -1,13 +1,14 @@ +import gym import numpy as np import torch import torchvision.transforms as T from PIL import Image -import gym + class CartpoleWrapper(gym.Wrapper): def __init__(self, env, env_hyperparams): """Initialize the wrapper for the CartPole environment to preprocess images. - + Args: env (gym.Env): The Gym environment to wrap. env_hyperparams (dict): Dictionary containing settings for image preprocessing, @@ -17,12 +18,11 @@ def __init__(self, env, env_hyperparams): self.env = env self.num_actions = env.action_space.n self.state_shape = env.observation_space.shape - + # Base transformations that are always applied transformations = [ T.ToPILImage(), - T.Resize(env_hyperparams["resize_pixels"], - interpolation=Image.BICUBIC), + T.Resize(env_hyperparams["resize_pixels"], interpolation=Image.BICUBIC), ] # Conditional grayscale transformation @@ -34,10 +34,10 @@ def __init__(self, env, env_hyperparams): # Compose all transformations into a single callable object self.resize = T.Compose(transformations) - + def get_cart_location(self, screen_width): """Calculate the cart's location on the screen for cropping. - + Args: screen_width (int): The width of the screen from the environment. @@ -62,7 +62,7 @@ def get_screen(self): _, screen_height, screen_width = screen.shape # Crop the vertical dimension to focus on the main area of interest - screen = screen[:, int(screen_height * 0.4):int(screen_height * 0.8)] + screen = screen[:, int(screen_height * 0.4) : int(screen_height * 0.8)] # Define the width of the cropped area around the cart view_width = int(screen_width * 0.6) @@ -74,16 +74,18 @@ def get_screen(self): elif cart_location > (screen_width - view_width // 2): slice_range = slice(-view_width, None) else: - slice_range = slice(cart_location - view_width // 2, cart_location + view_width // 2) + slice_range = slice( + cart_location - view_width // 2, cart_location + view_width // 2 + ) # Apply the calculated slice to crop horizontally screen = screen[:, :, slice_range] # Normalize, convert to tensor, resize, and add a batch dimension - screen = np.ascontiguousarray(screen, dtype=np.float32) / 255. + screen = np.ascontiguousarray(screen, dtype=np.float32) / 255.0 screen = torch.from_numpy(screen) return self.resize(screen).unsqueeze(0) - + def step(self, action): """Apply an action to the environment, returning the processed screen, reward, done, and info.""" return self.env.step(action) @@ -91,4 +93,3 @@ def step(self, action): def reset(self): """Reset the environment and return the initial processed screen.""" self.env.reset() - diff --git a/examples/images/reinforcementlearning/train.py b/examples/images/reinforcementlearning/train.py index 4958c30..9320084 100644 --- a/examples/images/reinforcementlearning/train.py +++ b/examples/images/reinforcementlearning/train.py @@ -1,44 +1,43 @@ +import math +import os +import random +from collections import deque +from itertools import count from re import A from tracemalloc import stop +from typing import List, Tuple + import gym -import os import hydra import omegaconf -import wandb -import random -import math - -from itertools import count import torch -import torch.optim as optim import torch.nn.functional as F +import torch.optim as optim +import wandb +from network import DQN from omegaconf import DictConfig, OmegaConf -from zmq import device - from prepare.gym_cartpole import CartpoleWrapper -from collections import deque +from tqdm import tqdm +from zmq import device -from typing import List, Tuple -from omegaconf import DictConfig -from network import DQN from utils import ReplayMemory, Transition, load_envs -from tqdm import tqdm - # Setup the environment using a wrapper def setup_environment(env_hyperparams): if env_hyperparams["name"] == "cartpole": - env = gym.make('CartPole-v1', render_mode='rgb_array') + env = gym.make("CartPole-v1", render_mode="rgb_array") env = CartpoleWrapper(env, env_hyperparams) return env + # Action selection , if stop training == True, only exploitation def select_action(dqn, state, steps_done, exp_hyperparams, stop_training): dqn.eval() sample = random.random() - eps_threshold = exp_hyperparams["eps_end"] + (exp_hyperparams["eps_start"]- exp_hyperparams["eps_end"]) * \ - math.exp(-1. * steps_done / exp_hyperparams["eps_decay"]) + eps_threshold = exp_hyperparams["eps_end"] + ( + exp_hyperparams["eps_start"] - exp_hyperparams["eps_end"] + ) * math.exp(-1.0 * steps_done / exp_hyperparams["eps_decay"]) # print('Epsilon = ', eps_threshold, end='\n') if sample > eps_threshold or stop_training: with torch.no_grad(): @@ -52,13 +51,15 @@ def select_action(dqn, state, steps_done, exp_hyperparams, stop_training): dqn.train() return action + def optimize_model( - memory: ReplayMemory, - dqn: DQN, - optimizer: optim.Optimizer, - target_dqn: DQN, - batch_size: int, - gamma: float) -> None: + memory: ReplayMemory, + dqn: DQN, + optimizer: optim.Optimizer, + target_dqn: DQN, + batch_size: int, + gamma: float, +) -> None: """ Optimize the DQN model using the given memory replay buffer. @@ -77,35 +78,37 @@ def optimize_model( state_batch = torch.cat(batch.state) action_batch = torch.cat(batch.action).unsqueeze(1) - + device = state_batch.device - + non_final_mask = torch.tensor( - tuple(map(lambda s: s is not None, batch.next_state)), - device=device, dtype=torch.bool - ) - non_final_next_states = torch.cat( - [s for s in batch.next_state if s is not None] + tuple(map(lambda s: s is not None, batch.next_state)), + device=device, + dtype=torch.bool, ) - + non_final_next_states = torch.cat([s for s in batch.next_state if s is not None]) + reward_batch = torch.cat(batch.reward).type(torch.FloatTensor).to(device) - + state_action_values = dqn(state_batch).gather(1, action_batch) next_state_values = torch.zeros(batch_size, device=action_batch.device) - next_state_values[non_final_mask] = target_dqn(non_final_next_states).max(1)[0].detach() + next_state_values[non_final_mask] = ( + target_dqn(non_final_next_states).max(1)[0].detach() + ) expected_state_action_values = (next_state_values * gamma) + reward_batch - loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1)) - wandb.log({'Loss:': loss.item()}) + loss = F.smooth_l1_loss( + state_action_values, expected_state_action_values.unsqueeze(1) + ) + wandb.log({"Loss:": loss.item()}) optimizer.zero_grad() loss.backward() for param in dqn.parameters(): param.grad.data.clamp_(-1, 1) optimizer.step() - - + def train_rl(hyperparams: DictConfig) -> None: # set system environment variables for wandb @@ -117,7 +120,7 @@ def train_rl(hyperparams: DictConfig) -> None: os.environ["WANDB_MODE"] = "disabled" os.environ["WANDB_DIR"] = hyperparams["wandb"]["wandb_dir"] os.environ["WANDB_CACHE_DIR"] = hyperparams["wandb"]["wandb_cache_dir"] - + # initialize wandb wandb.init( config=OmegaConf.to_container(hyperparams, resolve=True), @@ -126,21 +129,21 @@ def train_rl(hyperparams: DictConfig) -> None: dir=hyperparams["wandb"]["wandb_dir"], ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - + env = setup_environment(hyperparams["env"]) env.reset() init_screen = env.get_screen().to(device) - + _, inchannels, screen_height, screen_width = init_screen.shape FRAMES = hyperparams["env"]["frames"] input_shape = (FRAMES * inchannels, screen_height, screen_width) - print("Screen height: ", screen_height," | Width: ", screen_width) + print("Screen height: ", screen_height, " | Width: ", screen_width) # Get number of actions from gym action space num_actions = env.num_actions - + exp_hyperparams = hyperparams["experiment"] - + dqn = DQN(input_shape, num_actions).to(device) target_dqn = DQN(input_shape, num_actions).to(device) target_dqn.load_state_dict(dqn.state_dict()) @@ -149,17 +152,19 @@ def train_rl(hyperparams: DictConfig) -> None: optimizer = optim.RMSprop(dqn.parameters()) # optimizer = optim.Adam(dqn.parameters(), lr=exp_hyperparams["learning_rate"]) memory = ReplayMemory(exp_hyperparams["replay_memory_size"]) - mean_last = deque([0] * exp_hyperparams['last_episodes_num'], exp_hyperparams['last_episodes_num']) - + mean_last = deque( + [0] * exp_hyperparams["last_episodes_num"], exp_hyperparams["last_episodes_num"] + ) + stop_training = False - count_final = 0 + count_final = 0 steps_done = 0 episode_durations = [] num_episodes = exp_hyperparams["num_episodes"] # Wrap your range function with tqdm for a progress bar for i_episode in tqdm(range(num_episodes), desc="Training Episodes"): - # for i_episode in range(exp_hyperparams["num_episodes"]): + # for i_episode in range(exp_hyperparams["num_episodes"]): # Initialize the environment and state env.reset() init_screen = env.get_screen().to(device) @@ -169,7 +174,9 @@ def train_rl(hyperparams: DictConfig) -> None: for t in count(): # Select and perform an action - action = select_action(dqn, state, steps_done, exp_hyperparams, stop_training) + action = select_action( + dqn, state, steps_done, exp_hyperparams, stop_training + ) state_variables, _, done, _, _ = env.step(action) steps_done += 1 @@ -181,15 +188,17 @@ def train_rl(hyperparams: DictConfig) -> None: # Reward modification for better stability x, x_dot, theta, theta_dot = state_variables r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 - r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 + r2 = ( + env.theta_threshold_radians - abs(theta) + ) / env.theta_threshold_radians - 0.5 reward = r1 + r2 reward = torch.tensor([reward], device=device) if t >= exp_hyperparams["end_score"] - 1: reward = reward + 20 done = 1 - else: + else: if done: - reward = reward - 20 + reward = reward - 20 # Store the transition in memory action = torch.tensor([action], device=device) @@ -203,35 +212,43 @@ def train_rl(hyperparams: DictConfig) -> None: episode_durations.append(t + 1) mean_last.append(t + 1) mean = 0 - wandb.log({'Episode duration': t+1 , 'Episode number': i_episode}) - for i in range(exp_hyperparams['last_episodes_num']): + wandb.log({"Episode duration": t + 1, "Episode number": i_episode}) + for i in range(exp_hyperparams["last_episodes_num"]): mean = mean_last[i] + mean - mean = mean/exp_hyperparams['last_episodes_num'] - if mean < exp_hyperparams['training_stop'] and stop_training == False: - optimize_model(memory, dqn, optimizer, target_dqn, - exp_hyperparams["batch_size"], - exp_hyperparams["gamma"]) + mean = mean / exp_hyperparams["last_episodes_num"] + if mean < exp_hyperparams["training_stop"] and stop_training == False: + optimize_model( + memory, + dqn, + optimizer, + target_dqn, + exp_hyperparams["batch_size"], + exp_hyperparams["gamma"], + ) else: stop_training = True break # Update the target network, copying all weights and biases in DQN - if i_episode % exp_hyperparams['target_update'] == 0: + if i_episode % exp_hyperparams["target_update"] == 0: target_dqn.load_state_dict(dqn.state_dict()) if stop_training == True: count_final += 1 if count_final >= 100: break - - print('Training Complete') + + print("Training Complete") env.close() - + + # load the variables from .env file load_envs() - + + @hydra.main(config_path=str("./configs/"), config_name="default") def main(cfg: omegaconf.DictConfig) -> None: train_rl(cfg) -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/examples/images/reinforcementlearning/utils.py b/examples/images/reinforcementlearning/utils.py index d7c1335..2685f33 100644 --- a/examples/images/reinforcementlearning/utils.py +++ b/examples/images/reinforcementlearning/utils.py @@ -1,15 +1,17 @@ import random from collections import namedtuple -import dotenv from typing import Optional +import dotenv + # Define Transition as a namedtuple for better structure and readability -Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward')) +Transition = namedtuple("Transition", ("state", "action", "next_state", "reward")) + class ReplayMemory: def __init__(self, capacity): """Initialize the ReplayMemory with a fixed capacity. - + Args: capacity (int): The maximum size of the memory. """ @@ -19,7 +21,7 @@ def __init__(self, capacity): def push(self, state, action, next_state, reward): """Saves a transition into memory. - + Overwrites the oldest transition if memory is at capacity. Args: state: The state of the environment before taking the action. @@ -29,7 +31,7 @@ def push(self, state, action, next_state, reward): """ # Create a Transition from the given arguments transition = Transition(state, action, next_state, reward) - + # Check if there is still room to append a new transition if len(self.memory) < self.capacity: self.memory.append(None) @@ -40,10 +42,10 @@ def push(self, state, action, next_state, reward): def sample(self, batch_size): """Samples a batch of transitions from memory. - + Args: batch_size (int): Number of transitions to sample. - + Returns: list: A list of randomly sampled transitions. """ @@ -53,6 +55,7 @@ def __len__(self): """Return the current size of internal memory.""" return len(self.memory) + def load_envs(env_file: Optional[str] = None) -> None: """ Load all the environment variables defined in the `env_file`. @@ -63,4 +66,4 @@ def load_envs(env_file: Optional[str] = None) -> None: :param env_file: the file that defines the environment variables to use. If None it searches for a `.env` file in the project. """ - dotenv.load_dotenv(dotenv_path=env_file, override=True) \ No newline at end of file + dotenv.load_dotenv(dotenv_path=env_file, override=True) diff --git a/setup.cfg b/setup.cfg index 8b1ddc0..2bc0190 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,15 +35,16 @@ packages = find_namespace: include_package_data = True # Require a min/specific Python version (comma-separated conditions) -python_requires = >=3.7 +python_requires = >=3.8 # Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0. # Version specifiers like >=2.2,<3.0 avoid problems due to API changes in # new major versions. This works if the required packages follow Semantic Versioning. # For more information, check out https://semver.org/. install_requires = + setuptools torch - numpy + numpy<2.0 torchvision kornia e2cnn