schneimo
diff --git a/‎examples/wrapper_example.py ‎examples/gym_wrapper_example.py
+9-8 b/‎examples/wrapper_example.py ‎examples/gym_wrapper_example.py
+9-8
diff --git a/‎setup.py
+1-1 b/‎setup.py
+1-1
diff --git a/‎simmod/algorithms/base.py
+3-3 b/‎simmod/algorithms/base.py
+3-3
diff --git a/‎simmod/algorithms/udr.py
+18-13 b/‎simmod/algorithms/udr.py
+18-13
diff --git a/‎simmod/common/noise_distributions.py ‎simmod/common/distributions.py
+145-29 b/‎simmod/common/noise_distributions.py ‎simmod/common/distributions.py
+145-29
diff --git a/‎simmod/common/parametrization.py
+15-8 b/‎simmod/common/parametrization.py
+15-8
@@ -1,19 +1,20 @@
 import gym
+import pybullet_envs
 from simmod.wrappers import UDRMujocoWrapper
 from simmod.modification.mujoco import MujocoTextureModifier, MujocoMaterialModifier, MujocoLightModifier
 
 if __name__ == '__main__':
     # Create the environment as you would normally do
-    env = gym.make('HandReach-v0')
+    env = gym.make('MinitaurBulletEnv-v0')
+
+    env.unwrapped._pybullet_client.getDynamicsInfo()
 
     # Define modifier and algorithm for randomization
     # env.sim is the Mujoco simulation in the environment class
-    #mod_tex = MujocoTextureModifier(sim=env.sim)
-    mod_mat = MujocoMaterialModifier(sim=env.sim)
-    env = UDRMujocoWrapper(env, mod_mat)
 
     # Run algorithm and simulation
-    env.reset()
-    for _ in range(100):
-        env.step(0)
-        env.render()
+    for _ in range(3):
+        env.reset()
+        for _ in range(100):
+            env.step(0)
+            env.render()
@@ -29,6 +29,6 @@
     python_requires='>=3.7',
     classifiers=[
         "Programming Language :: Python :: 3",
-        'Programming Language :: Python :: 3.7',
+        "Programming Language :: Python :: 3.8",
     ],
 )
@@ -35,9 +35,9 @@ def _randomize_object(self, modifier: BaseModifier, instrumentation: Parametriza
         the given modifier.
 
         Args:
-            modifier:            Modifier to change the parameter defined in the instrumentation
-            instrumentation:    Configuration of the parameter we want  change
-            **kwargs:           Additional arguments for the setter function of the modifier
+            modifier: Modifier to change the parameter defined in the instrumentation
+            instrumentation: Configuration of the parameter we want  change
+            **kwargs: Additional arguments for the setter function of the modifier
 
         Returns:
             Return of the setter function
 
@@ -1,12 +1,11 @@
-"""
-Copyright (c) 2020, Moritz Schneider
-@Author: Moritz Schneider
-"""
+"""Uniform Domain Randomization (UDR) algorithm"""
 from inspect import signature
 from typing import Any, Union
 
 import numpy as np
+from numpy.random import default_rng, Generator
 
+from simmod.utils.typings_ import *
 from simmod.algorithms.base import BaseAlgorithm
 from simmod.common.parametrization import Parametrization
 from simmod.modification.base_modifier import BaseModifier
@@ -22,14 +21,17 @@ class UniformDomainRandomization(BaseAlgorithm):
     via the modifiers.
     """
 
-    def __init__(self, *modifiers: BaseModifier, random_state=None, **kwargs: Any) -> None:
+    def __init__(self, *modifiers: BaseModifier, random_state: Optional[Union[Generator, int]] = None,
+                 **kwargs: Any) -> None:
         if random_state is None:
-            self.random_state = np.random.RandomState()
+            self.random_state = default_rng()
         elif isinstance(random_state, int):
             # random_state assumed to be an int
-            self.random_state = np.random.RandomState(random_state)
-        else:
+            self.random_state = default_rng(random_state)
+        elif isinstance(random_state, Generator):
             self.random_state = random_state
+        else:
+            raise ValueError(f"random_state argument must be either None, int or np.random.Generator, not {random_state}")
         super().__init__(*modifiers, **kwargs)
 
     def _randomize_object(self, modifier: BaseModifier, instrumentation: Parametrization, **kwargs) -> None:
@@ -40,11 +42,12 @@ def _randomize_object(self, modifier: BaseModifier, instrumentation: Parametriza
         the instrumentation variable.
 
         Args:
-            modifier:           Modifier to change the parameter defined in the instrumentation
-            instrumentation:    Configuration of the parameter we want  change
-            **kwargs:           Additional arguments for the setter function of the modifier
+            modifier: Modifier to change the parameter defined in the instrumentation
+            instrumentation: Configuration of the parameter we want  change
+            **kwargs: Additional arguments for the setter function of the modifier
 
-        Returns:                Return of the setter function of the modifier
+        Returns:
+            Return of the setter function of the modifier
         """
         object_name = instrumentation.object_name
         setter_func = modifier.standard_setters[instrumentation.setter]
@@ -66,10 +69,12 @@ def _randomize_object(self, modifier: BaseModifier, instrumentation: Parametriza
         for _ in range(n_params):
             values = np.array([self.random_state.uniform(lower_bound[i], upper_bound[i]) for i in range(n)])
             new_values.append(values)
-        self._record_new_val(modifier, instrumentation, new_values)
+        #self._record_new_val(modifier, instrumentation, new_values)
+        instrumentation.update(new_values)
         return setter_func(object_name, *new_values, **kwargs)
 
     def step(self, execution: EXECUTION_POINTS = 'RESET', **kwargs) -> None:
         for modifier in self.modifiers:
             for instrumentation in modifier.instrumentation:
                 self._randomize_object(modifier, instrumentation)
+            modifier.update()
@@ -5,51 +5,122 @@
 """
 
 from abc import ABC, abstractmethod
+from simmod.utils.typings_ import *
 
 import numpy as np
 
 
-class AdaptiveParamNoiseSpec(object):
-    """Adaptive parameter noise"""
+class Distribution(ABC):
+    """Abstract base class for distributions."""
 
-    def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adoption_coefficient=1.01):
+    def __init__(self):
+        super(Distribution, self).__init__()
+
+    @abstractmethod
+    def proba_distribution_net(self, *args, **kwargs) -> Union[NDarray, Tuple[NDarray, NDarray]]:
+        """Create the layers and parameters that represent the distribution.
+
+        Subclasses must define this, but the arguments and return type vary between concrete classes.
         """
 
+    @abstractmethod
+    def proba_distribution(self, *args, **kwargs) -> "Distribution":
+        """Set parameters of the distribution.
+
         Args:
-            initial_stddev: (float) the initial value for the standard deviation of the noise
-            desired_action_stddev: (float) the desired value for the standard deviation of the noise
-            adoption_coefficient: (float) the update coefficient for the standard deviation of the noise
+            *args:
+            **kwargs:
+
+        Returns:
+            self
         """
-        self.initial_stddev = initial_stddev
-        self.desired_action_stddev = desired_action_stddev
-        self.adoption_coefficient = adoption_coefficient
 
-        self.current_stddev = initial_stddev
+    @abstractmethod
+    def log_prob(self, x: NDarray) -> NDarray:
+        """Returns the log likelihood
 
-    def adapt(self, distance):
-        """Update the standard deviation for the parameter noise
+        Args:
+            x: The taken action
+
+        Returns:
+            The log likelihood of the distribution
+        """
+
+    @abstractmethod
+    def entropy(self) -> Optional[NDarray]:
+        """Shannon's entropy of the probability
+
+        Returns:
+            the entropy, or None if no analytical form is known
+        """
+
+    @abstractmethod
+    def sample(self) -> NDarray:
+        """Returns a sample from the probability distribution
+
+        Returns:
+            the stochastic action
+        """
+
+    @abstractmethod
+    def mode(self) -> NDarray:
+        """Returns the most likely action (deterministic output) from the probability distribution
+
+        Returns:
+            the stochastic action
+        """
+
+    def get_actions(self, deterministic: bool = False) -> NDarray:
+        """Return actions according to the probability distribution.
 
         Args:
-            distance: (float) the noise distance applied to the parameters
+            deterministic:
+
+        Returns:
+
         """
-        if distance > self.desired_action_stddev:
-            # Decrease stddev.
-            self.current_stddev /= self.adoption_coefficient
-        else:
-            # Increase stddev.
-            self.current_stddev *= self.adoption_coefficient
+        if deterministic:
+            return self.mode()
+        return self.sample()
 
-    def get_stats(self):
-        """Return the standard deviation for the parameter noise
+    @abstractmethod
+    def actions_from_params(self, *args, **kwargs) -> NDarray:
+        """Returns samples from the probability distribution given its parameters.
+
+        Args:
+            *args:
+            **kwargs:
 
         Returns:
-            (dict) the stats of the noise
+
         """
-        return {'param_noise_stddev': self.current_stddev}
 
-    def __repr__(self):
-        fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adoption_coefficient={})'
-        return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient)
+    @abstractmethod
+    def log_prob_from_params(self, *args, **kwargs) -> Tuple[NDarray, NDarray]:
+        """Returns samples and the associated log probabilities from the probability distribution given its parameters.
+
+        Args:
+            *args:
+            **kwargs:
+
+        Returns:
+            actions and log prob
+        """
+
+
+class UniformDistribution(Distribution):
+    pass
+
+
+
+class NormalDistribution(Distribution):
+    pass
+
+
+class BernoulliDistribution(Distribution):
+    pass
+
+
 
 
 class Noise(ABC):
@@ -91,12 +162,13 @@ def __repr__(self) -> str:
 
 
 class OrnsteinUhlenbeckNoise(Noise):
-    """A Ornstein Uhlenbeck action noise, this is designed to approximate brownian motion with friction.
+    """Ornstein Uhlenbeck noise. Designed to approximate brownian motion with friction.
 
     Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
     """
 
-    def __init__(self, mean, sigma, theta=.15, dt=1e-2, initial_noise=None):
+    def __init__(self, mean: float, sigma: float, theta: float = .15, dt: float = 1e-2,
+                 initial_noise: Optional[float] = None):
         """
 
         Args:
@@ -126,4 +198,48 @@ def reset(self) -> None:
         self.noise_prev = self.initial_noise if self.initial_noise is not None else np.zeros_like(self._mu)
 
     def __repr__(self) -> str:
-        return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)
+        return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)
+
+
+class AdaptiveNoise(Noise):
+    """Adaptive parameter noise"""
+
+    def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adoption_coefficient=1.01):
+        """
+
+        Args:
+            initial_stddev: (float) the initial value for the standard deviation of the noise
+            desired_action_stddev: (float) the desired value for the standard deviation of the noise
+            adoption_coefficient: (float) the update coefficient for the standard deviation of the noise
+        """
+        super().__init__()
+        self.initial_stddev = initial_stddev
+        self.desired_action_stddev = desired_action_stddev
+        self.adoption_coefficient = adoption_coefficient
+
+        self.current_stddev = initial_stddev
+
+    def adapt(self, distance):
+        """Update the standard deviation for the parameter noise
+
+        Args:
+            distance: (float) the noise distance applied to the parameters
+        """
+        if distance > self.desired_action_stddev:
+            # Decrease stddev.
+            self.current_stddev /= self.adoption_coefficient
+        else:
+            # Increase stddev.
+            self.current_stddev *= self.adoption_coefficient
+
+    def get_stats(self):
+        """Return the standard deviation for the parameter noise
+
+        Returns:
+            (dict) the stats of the noise
+        """
+        return {'param_noise_stddev': self.current_stddev}
+
+    def __repr__(self):
+        fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adoption_coefficient={})'
+        return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient)
@@ -1,8 +1,5 @@
 """The parameterization is the connection between the setter functions, the simulation objects and their respective
 value ranges. All those variables are stored in an individual parameterization instance.
-
-Copyright (c) 2020, Moritz Schneider
-@Author: Moritz Schneider
 """
 from abc import ABC
 from typing import Union, List, Callable, AnyStr, Tuple, Any, Dict, Optional
@@ -48,15 +45,25 @@ def __init__(
         self.lower_bound = parameter_range.T[0]
         self.upper_bound = parameter_range.T[1]
         self.name = name
+        self.history = []
+        self.current_val = None
+
+    def __str__(self):
+        return f'{self.setter}:{self.object_name}={self.current_val}'
 
     @property
     def parameter_range(self):
         return (self.lower_bound, self.upper_bound)
 
+    def update(self, new_values, **kwargs):
+        if self.current_val is not None:
+            self.history.append(self.current_val)
+        self.current_val = new_values
+
     def get_json(self) -> Dict:
         result = {
             self.setter: {
-                self.object_name: 0
+                self.object_name: self.current_val
             }
         }
         return result
@@ -78,9 +85,9 @@ class Array(Parameter):
     def __init__(
             self,
             mod_func: Callable,
-            init: ArrayOrNum = None,
-            lower: ArrayOrNum = None,
-            upper: ArrayOrNum = None
+            init: Optional[ArrayOrNum] = None,
+            lower: Optional[ArrayOrNum] = None,
+            upper: Optional[ArrayOrNum] = None
     ) -> None:
         self.lower = lower
         self.upper = upper
@@ -92,7 +99,7 @@ class Scalar(Array):
     def __init__(
             self,
             mod_func: Callable,
-            init: Num = None,
+            init: Optional[Num] = None,
             lower: Num = 0,
             upper: Num = 1
     ) -> None:
Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,6 @@`
`29`	`29`	`python_requires='>=3.7',`
`30`	`30`	`classifiers=[`
`31`	`31`	`"Programming Language :: Python :: 3",`
`32`		`- 'Programming Language :: Python :: 3.7',`
	`32`	`+ "Programming Language :: Python :: 3.8",`
`33`	`33`	`],`
`34`	`34`	`)`