Skip to content

Commit b0b1b05

Browse files
author
Moritz
committed
Long-time update
- Mujoco color randomization not only for textures but also for rgba attributes of materials and geometries - First version of PyBullet integration - Better logging of parameter history - Bugfixing
1 parent a4f3310 commit b0b1b05

13 files changed

+472
-109
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
import gym
2+
import pybullet_envs
23
from simmod.wrappers import UDRMujocoWrapper
34
from simmod.modification.mujoco import MujocoTextureModifier, MujocoMaterialModifier, MujocoLightModifier
45

56
if __name__ == '__main__':
67
# Create the environment as you would normally do
7-
env = gym.make('HandReach-v0')
8+
env = gym.make('MinitaurBulletEnv-v0')
9+
10+
env.unwrapped._pybullet_client.getDynamicsInfo()
811

912
# Define modifier and algorithm for randomization
1013
# env.sim is the Mujoco simulation in the environment class
11-
#mod_tex = MujocoTextureModifier(sim=env.sim)
12-
mod_mat = MujocoMaterialModifier(sim=env.sim)
13-
env = UDRMujocoWrapper(env, mod_mat)
1414

1515
# Run algorithm and simulation
16-
env.reset()
17-
for _ in range(100):
18-
env.step(0)
19-
env.render()
16+
for _ in range(3):
17+
env.reset()
18+
for _ in range(100):
19+
env.step(0)
20+
env.render()

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,6 @@
2929
python_requires='>=3.7',
3030
classifiers=[
3131
"Programming Language :: Python :: 3",
32-
'Programming Language :: Python :: 3.7',
32+
"Programming Language :: Python :: 3.8",
3333
],
3434
)

simmod/algorithms/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@ def _randomize_object(self, modifier: BaseModifier, instrumentation: Parametriza
3535
the given modifier.
3636
3737
Args:
38-
modifier: Modifier to change the parameter defined in the instrumentation
39-
instrumentation: Configuration of the parameter we want change
40-
**kwargs: Additional arguments for the setter function of the modifier
38+
modifier: Modifier to change the parameter defined in the instrumentation
39+
instrumentation: Configuration of the parameter we want change
40+
**kwargs: Additional arguments for the setter function of the modifier
4141
4242
Returns:
4343
Return of the setter function

simmod/algorithms/udr.py

+18-13
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
"""
2-
Copyright (c) 2020, Moritz Schneider
3-
@Author: Moritz Schneider
4-
"""
1+
"""Uniform Domain Randomization (UDR) algorithm"""
52
from inspect import signature
63
from typing import Any, Union
74

85
import numpy as np
6+
from numpy.random import default_rng, Generator
97

8+
from simmod.utils.typings_ import *
109
from simmod.algorithms.base import BaseAlgorithm
1110
from simmod.common.parametrization import Parametrization
1211
from simmod.modification.base_modifier import BaseModifier
@@ -22,14 +21,17 @@ class UniformDomainRandomization(BaseAlgorithm):
2221
via the modifiers.
2322
"""
2423

25-
def __init__(self, *modifiers: BaseModifier, random_state=None, **kwargs: Any) -> None:
24+
def __init__(self, *modifiers: BaseModifier, random_state: Optional[Union[Generator, int]] = None,
25+
**kwargs: Any) -> None:
2626
if random_state is None:
27-
self.random_state = np.random.RandomState()
27+
self.random_state = default_rng()
2828
elif isinstance(random_state, int):
2929
# random_state assumed to be an int
30-
self.random_state = np.random.RandomState(random_state)
31-
else:
30+
self.random_state = default_rng(random_state)
31+
elif isinstance(random_state, Generator):
3232
self.random_state = random_state
33+
else:
34+
raise ValueError(f"random_state argument must be either None, int or np.random.Generator, not {random_state}")
3335
super().__init__(*modifiers, **kwargs)
3436

3537
def _randomize_object(self, modifier: BaseModifier, instrumentation: Parametrization, **kwargs) -> None:
@@ -40,11 +42,12 @@ def _randomize_object(self, modifier: BaseModifier, instrumentation: Parametriza
4042
the instrumentation variable.
4143
4244
Args:
43-
modifier: Modifier to change the parameter defined in the instrumentation
44-
instrumentation: Configuration of the parameter we want change
45-
**kwargs: Additional arguments for the setter function of the modifier
45+
modifier: Modifier to change the parameter defined in the instrumentation
46+
instrumentation: Configuration of the parameter we want change
47+
**kwargs: Additional arguments for the setter function of the modifier
4648
47-
Returns: Return of the setter function of the modifier
49+
Returns:
50+
Return of the setter function of the modifier
4851
"""
4952
object_name = instrumentation.object_name
5053
setter_func = modifier.standard_setters[instrumentation.setter]
@@ -66,10 +69,12 @@ def _randomize_object(self, modifier: BaseModifier, instrumentation: Parametriza
6669
for _ in range(n_params):
6770
values = np.array([self.random_state.uniform(lower_bound[i], upper_bound[i]) for i in range(n)])
6871
new_values.append(values)
69-
self._record_new_val(modifier, instrumentation, new_values)
72+
#self._record_new_val(modifier, instrumentation, new_values)
73+
instrumentation.update(new_values)
7074
return setter_func(object_name, *new_values, **kwargs)
7175

7276
def step(self, execution: EXECUTION_POINTS = 'RESET', **kwargs) -> None:
7377
for modifier in self.modifiers:
7478
for instrumentation in modifier.instrumentation:
7579
self._randomize_object(modifier, instrumentation)
80+
modifier.update()

simmod/common/noise_distributions.py simmod/common/distributions.py

+145-29
Original file line numberDiff line numberDiff line change
@@ -5,51 +5,122 @@
55
"""
66

77
from abc import ABC, abstractmethod
8+
from simmod.utils.typings_ import *
89

910
import numpy as np
1011

1112

12-
class AdaptiveParamNoiseSpec(object):
13-
"""Adaptive parameter noise"""
13+
class Distribution(ABC):
14+
"""Abstract base class for distributions."""
1415

15-
def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adoption_coefficient=1.01):
16+
def __init__(self):
17+
super(Distribution, self).__init__()
18+
19+
@abstractmethod
20+
def proba_distribution_net(self, *args, **kwargs) -> Union[NDarray, Tuple[NDarray, NDarray]]:
21+
"""Create the layers and parameters that represent the distribution.
22+
23+
Subclasses must define this, but the arguments and return type vary between concrete classes.
1624
"""
1725

26+
@abstractmethod
27+
def proba_distribution(self, *args, **kwargs) -> "Distribution":
28+
"""Set parameters of the distribution.
29+
1830
Args:
19-
initial_stddev: (float) the initial value for the standard deviation of the noise
20-
desired_action_stddev: (float) the desired value for the standard deviation of the noise
21-
adoption_coefficient: (float) the update coefficient for the standard deviation of the noise
31+
*args:
32+
**kwargs:
33+
34+
Returns:
35+
self
2236
"""
23-
self.initial_stddev = initial_stddev
24-
self.desired_action_stddev = desired_action_stddev
25-
self.adoption_coefficient = adoption_coefficient
2637

27-
self.current_stddev = initial_stddev
38+
@abstractmethod
39+
def log_prob(self, x: NDarray) -> NDarray:
40+
"""Returns the log likelihood
2841
29-
def adapt(self, distance):
30-
"""Update the standard deviation for the parameter noise
42+
Args:
43+
x: The taken action
44+
45+
Returns:
46+
The log likelihood of the distribution
47+
"""
48+
49+
@abstractmethod
50+
def entropy(self) -> Optional[NDarray]:
51+
"""Shannon's entropy of the probability
52+
53+
Returns:
54+
the entropy, or None if no analytical form is known
55+
"""
56+
57+
@abstractmethod
58+
def sample(self) -> NDarray:
59+
"""Returns a sample from the probability distribution
60+
61+
Returns:
62+
the stochastic action
63+
"""
64+
65+
@abstractmethod
66+
def mode(self) -> NDarray:
67+
"""Returns the most likely action (deterministic output) from the probability distribution
68+
69+
Returns:
70+
the stochastic action
71+
"""
72+
73+
def get_actions(self, deterministic: bool = False) -> NDarray:
74+
"""Return actions according to the probability distribution.
3175
3276
Args:
33-
distance: (float) the noise distance applied to the parameters
77+
deterministic:
78+
79+
Returns:
80+
3481
"""
35-
if distance > self.desired_action_stddev:
36-
# Decrease stddev.
37-
self.current_stddev /= self.adoption_coefficient
38-
else:
39-
# Increase stddev.
40-
self.current_stddev *= self.adoption_coefficient
82+
if deterministic:
83+
return self.mode()
84+
return self.sample()
4185

42-
def get_stats(self):
43-
"""Return the standard deviation for the parameter noise
86+
@abstractmethod
87+
def actions_from_params(self, *args, **kwargs) -> NDarray:
88+
"""Returns samples from the probability distribution given its parameters.
89+
90+
Args:
91+
*args:
92+
**kwargs:
4493
4594
Returns:
46-
(dict) the stats of the noise
95+
4796
"""
48-
return {'param_noise_stddev': self.current_stddev}
4997

50-
def __repr__(self):
51-
fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adoption_coefficient={})'
52-
return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient)
98+
@abstractmethod
99+
def log_prob_from_params(self, *args, **kwargs) -> Tuple[NDarray, NDarray]:
100+
"""Returns samples and the associated log probabilities from the probability distribution given its parameters.
101+
102+
Args:
103+
*args:
104+
**kwargs:
105+
106+
Returns:
107+
actions and log prob
108+
"""
109+
110+
111+
class UniformDistribution(Distribution):
112+
pass
113+
114+
115+
116+
class NormalDistribution(Distribution):
117+
pass
118+
119+
120+
class BernoulliDistribution(Distribution):
121+
pass
122+
123+
53124

54125

55126
class Noise(ABC):
@@ -91,12 +162,13 @@ def __repr__(self) -> str:
91162

92163

93164
class OrnsteinUhlenbeckNoise(Noise):
94-
"""A Ornstein Uhlenbeck action noise, this is designed to approximate brownian motion with friction.
165+
"""Ornstein Uhlenbeck noise. Designed to approximate brownian motion with friction.
95166
96167
Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
97168
"""
98169

99-
def __init__(self, mean, sigma, theta=.15, dt=1e-2, initial_noise=None):
170+
def __init__(self, mean: float, sigma: float, theta: float = .15, dt: float = 1e-2,
171+
initial_noise: Optional[float] = None):
100172
"""
101173
102174
Args:
@@ -126,4 +198,48 @@ def reset(self) -> None:
126198
self.noise_prev = self.initial_noise if self.initial_noise is not None else np.zeros_like(self._mu)
127199

128200
def __repr__(self) -> str:
129-
return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)
201+
return 'OrnsteinUhlenbeckActionNoise(mu={}, sigma={})'.format(self._mu, self._sigma)
202+
203+
204+
class AdaptiveNoise(Noise):
205+
"""Adaptive parameter noise"""
206+
207+
def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adoption_coefficient=1.01):
208+
"""
209+
210+
Args:
211+
initial_stddev: (float) the initial value for the standard deviation of the noise
212+
desired_action_stddev: (float) the desired value for the standard deviation of the noise
213+
adoption_coefficient: (float) the update coefficient for the standard deviation of the noise
214+
"""
215+
super().__init__()
216+
self.initial_stddev = initial_stddev
217+
self.desired_action_stddev = desired_action_stddev
218+
self.adoption_coefficient = adoption_coefficient
219+
220+
self.current_stddev = initial_stddev
221+
222+
def adapt(self, distance):
223+
"""Update the standard deviation for the parameter noise
224+
225+
Args:
226+
distance: (float) the noise distance applied to the parameters
227+
"""
228+
if distance > self.desired_action_stddev:
229+
# Decrease stddev.
230+
self.current_stddev /= self.adoption_coefficient
231+
else:
232+
# Increase stddev.
233+
self.current_stddev *= self.adoption_coefficient
234+
235+
def get_stats(self):
236+
"""Return the standard deviation for the parameter noise
237+
238+
Returns:
239+
(dict) the stats of the noise
240+
"""
241+
return {'param_noise_stddev': self.current_stddev}
242+
243+
def __repr__(self):
244+
fmt = 'AdaptiveParamNoiseSpec(initial_stddev={}, desired_action_stddev={}, adoption_coefficient={})'
245+
return fmt.format(self.initial_stddev, self.desired_action_stddev, self.adoption_coefficient)

simmod/common/parametrization.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
"""The parameterization is the connection between the setter functions, the simulation objects and their respective
22
value ranges. All those variables are stored in an individual parameterization instance.
3-
4-
Copyright (c) 2020, Moritz Schneider
5-
@Author: Moritz Schneider
63
"""
74
from abc import ABC
85
from typing import Union, List, Callable, AnyStr, Tuple, Any, Dict, Optional
@@ -48,15 +45,25 @@ def __init__(
4845
self.lower_bound = parameter_range.T[0]
4946
self.upper_bound = parameter_range.T[1]
5047
self.name = name
48+
self.history = []
49+
self.current_val = None
50+
51+
def __str__(self):
52+
return f'{self.setter}:{self.object_name}={self.current_val}'
5153

5254
@property
5355
def parameter_range(self):
5456
return (self.lower_bound, self.upper_bound)
5557

58+
def update(self, new_values, **kwargs):
59+
if self.current_val is not None:
60+
self.history.append(self.current_val)
61+
self.current_val = new_values
62+
5663
def get_json(self) -> Dict:
5764
result = {
5865
self.setter: {
59-
self.object_name: 0
66+
self.object_name: self.current_val
6067
}
6168
}
6269
return result
@@ -78,9 +85,9 @@ class Array(Parameter):
7885
def __init__(
7986
self,
8087
mod_func: Callable,
81-
init: ArrayOrNum = None,
82-
lower: ArrayOrNum = None,
83-
upper: ArrayOrNum = None
88+
init: Optional[ArrayOrNum] = None,
89+
lower: Optional[ArrayOrNum] = None,
90+
upper: Optional[ArrayOrNum] = None
8491
) -> None:
8592
self.lower = lower
8693
self.upper = upper
@@ -92,7 +99,7 @@ class Scalar(Array):
9299
def __init__(
93100
self,
94101
mod_func: Callable,
95-
init: Num = None,
102+
init: Optional[Num] = None,
96103
lower: Num = 0,
97104
upper: Num = 1
98105
) -> None:

0 commit comments

Comments
 (0)