diff --git a/theta/costfunctions.py b/theta/costfunctions.py
index 279a8e3..3b66b25 100644
--- a/theta/costfunctions.py
+++ b/theta/costfunctions.py
@@ -15,14 +15,27 @@ def cost(self, X, *Y):
     @abstractmethod
     def gradient(self, X):
         pass
+
+
+class kullbackLeibler(costfunction):
+
+    @staticmethod
+    def cost(x, y):
+        if (x < 0).any():
+            import ipdb; ipdb.set_trace()
+        return -np.sum(y*np.log(x)) # + np.sum(y*np.log(y))
     
+    @staticmethod
+    def gradient(x, y):
+        return -np.sum(y/x)
+
 
 class mse(costfunction):
     """ Mean squared error """
 
     @staticmethod
     def cost(x, y):   
-        return np.sum(np.mean((x-y)**2,axis=1))
+        return np.sum(np.mean((x-y)**2,axis=-1))
     
     @staticmethod
     def gradient(x, y):
@@ -49,7 +62,7 @@ def cost(x, *y):
         return -np.sum(x)
 
     @staticmethod
-    def gradient(x):
+    def gradient(x, y):
         sys.exit("Gradient not implemented!")
 
         
@@ -61,7 +74,7 @@ def cost(x, y):
         return np.sqrt(np.sum(np.mean((y-x)**2,axis=1)))
 
     @staticmethod
-    def gradient(x):
+    def gradient(x, y):
         return 1.0/np.sqrt(np.sum(np.mean((y-x)**2,axis=1)))*(x-y)
      
         
@@ -74,6 +87,5 @@ def cost(x, y):
         return -np.sum(np.mean(np.multiply(y,lx),axis=1))
     
     @staticmethod
-    def gradient(x):
+    def gradient(x, y):
         return -1.0/y.shape[1]*y/x
-  
diff --git a/theta/mathtools.py b/theta/mathtools.py
index 192e2ee..e96f222 100644
--- a/theta/mathtools.py
+++ b/theta/mathtools.py
@@ -6,9 +6,13 @@
 RTBM_precision= 1e-8
 
 
-def check_normalization_consistency(t, q, w):
+def normalization_consistency(t, q, w):
     c = q - np.transpose(w).dot(np.linalg.inv(t).dot(w))
-    return np.all(np.linalg.eigvals(c) > 0)
+    return np.linalg.eigvals(c)
+
+
+def check_normalization_consistency(t, q, w):
+    return np.all(normalization_consistency(t, q, w) > 0)
 
 
 def check_pos_def(x):
@@ -57,7 +61,10 @@ def rtbm_probability(v, bv, bh, t, w, q, mode=1):
     uR1, vR1 = RiemannTheta.parts_eval((vT.dot(w) + BhT) / (2.0j * np.pi), -q / (2.0j * np.pi), mode, epsilon=RTBM_precision)
     uR2, vR2 = RiemannTheta.parts_eval((BhT - BtiTW) / (2.0j * np.pi), (-q + WtiTW) / (2.0j * np.pi), mode, epsilon=RTBM_precision)
 
-    return np.sqrt(detT / (2.0 * np.pi) ** (v.shape[0])) * ExpF * vR1 / vR2 * np.exp(uR1-uR2)
+    # In order to avoid problems at multiprocessing, let's add a maximum value for the exponent
+    # And a minimum to avoid division by 0
+    res = np.sqrt(detT / (2.0 * np.pi) ** (v.shape[0])) * ExpF * vR1 / (vR2+RTBM_precision) * np.exp( np.minimum(uR1-uR2, 250) )
+    return res
 
 
 def rtbm_log_probability(v, bv, bh, t, w, q, mode=1):
@@ -263,4 +270,4 @@ def rtbm_ph(model, h):
     BBTWh = np.dot(BBTW ,h)
     ExpF = np.exp(-0.5*hTQWTWh-BBTWh)
     u, v = RiemannTheta.parts_eval((BhT-BtiTW)/(2j*np.pi),(-model.q+WtiTW)/(2j*np.pi), mode=1, epsilon=RTBM_precision)
-    return ExpF / v * np.exp(-u)
\ No newline at end of file
+    return ExpF / v * np.exp(-u)
diff --git a/theta/minimizer.py b/theta/minimizer.py
index 10fa879..b27baf9 100644
--- a/theta/minimizer.py
+++ b/theta/minimizer.py
@@ -28,7 +28,10 @@ def worker_initialize(cost, model, x_data, y_data):
 def worker_compute(params):
     if not resource.model.set_parameters(params):
         return np.NaN
-    res = resource.cost_function.cost(np.real(resource.model(resource.x_data)), resource.y_data)
+    prob = np.real(resource.model(resource.x_data))
+    if (prob < 0.0).any():
+        import ipdb; ipdb.set_trace()
+    res = resource.cost_function.cost(prob, resource.y_data)
     return res
 
 
@@ -47,7 +50,8 @@ class CMA(object):
         parallel (bool): if set to True the algorithm uses multi-processing.
         ncores (int): limit the number of cores when ``parallel=True``.
     """
-    def __init__(self, parallel=False, ncores=0):
+    def __init__(self, parallel=False, ncores=0, verbose=True):
+        self._verbose = verbose
         super(CMA, self).__init__()
         if parallel:
             if(ncores==0):
@@ -77,11 +81,20 @@ def train(self, cost, model, x_data, y_data=None, tolfun=1e-11, popsize=None, ma
         Note:
             The parameters of the model are changed by this algorithm.
         """
+        if self._verbose:
+            vb = 0
+        else:
+            vb = -9
 
         initsol = np.real(model.get_parameters())
-        args = {'bounds': model.get_bounds(),
+
+        # Prepare the bounds
+        bounds = model.get_bounds()
+
+        args = {'bounds': bounds,
                 'tolfun': tolfun,
-                'verb_log': 0}
+                'verbose': vb,
+                'verb_log': vb}
         sigma = np.max(model.get_bounds()[1])*0.1
 
         if popsize is not None:
@@ -112,18 +125,12 @@ def train(self, cost, model, x_data, y_data=None, tolfun=1e-11, popsize=None, ma
                 pool.terminate()
         else:
             worker_initialize(cost, model, x_data, y_data)
-            while not es.stop():
-                f_values, solutions = [], []
-                while len(solutions) < es.popsize:
-                    curr_fit = x = np.NaN
-                    while np.isnan(curr_fit):
-                        x = es.ask(1, gradf=grad)[0]
-                        curr_fit = worker_compute(x)
-                    solutions.append(x)
-                    f_values.append(curr_fit)
-                es.tell(solutions, f_values)
-                es.disp()
-        print(es.result)
+            # By using ask_and_eval we let the cma decide what to do with NaNs
+            solutions, f_values = es.ask_and_eval(worker_compute)
+            es.tell(solutions, f_values)
+            es.disp()
+        if self._verbose:
+            print(es.result)
 
         model.set_parameters(es.result[0])
         return es.result[0]
diff --git a/theta/rtbm.py b/theta/rtbm.py
index f5b9534..31b3e42 100644
--- a/theta/rtbm.py
+++ b/theta/rtbm.py
@@ -1,13 +1,64 @@
 # -*- coding: utf-8 -*-
-from __future__ import absolute_import
 import numpy as np
-from theta.mathtools import rtbm_probability, hidden_expectations, rtbm_log_probability, \
-    check_normalization_consistency, check_pos_def, rtbm_ph, RTBM_precision
+from scipy.special import expit, logit
+from theta.mathtools import (
+    rtbm_probability,
+    hidden_expectations,
+    rtbm_log_probability,
+    check_normalization_consistency,
+    check_pos_def,
+    rtbm_ph,
+    RTBM_precision,
+)
 from theta.riemann_theta.riemann_theta import RiemannTheta, radius, integer_points_python
 
+#### Activations
+class Nope:
+    def __init__(self):
+        print("WARNING: using no sampling")
+    def __call__(self, x):
+        return x
+    def grad(self, x):
+        return np.ones_like(x)
+    def inv(self, y):
+        return y
+
+class Tanh:
+    # The image of tanh is (-1, 1)
+    # factor of 2 to move it to (0, 1)
+    def __call__(self, x):
+        return (np.tanh(x)+1.0)/2.0
+    def grad(self, x):
+        return 2.0*np.cosh(x)**2
+    def inv(self, y):
+        return np.arctanh(2.0*y - 1.0)
+
+class Sigmoid:
+    def __call__(self, x):
+        return expit(x)
+    def grad(self, x):
+        return 1.0/(self(x)-self(x)**2)
+    def inv(self, y):
+        return logit(y)
+    
+class Softsign:
+    def __call__(self, x):
+        return (x/(1+np.abs(x))+1.0)/2.0
+    def grad(self, x):
+        return 2.0*np.abs(x)**2
+    def inv(self, y):
+        ay = np.where(y > 0.0, y, -y)
+        return y/(1 - ay)
+
+activation_dict = {
+        "tanh": Tanh,
+        "sigmoid": Sigmoid,
+        "softsign": Softsign
+        }
 
 class AssignError(Exception):
     """Custom exception for Shur complement test"""
+
     pass
 
 
@@ -19,7 +70,7 @@ class RTBM(object):
         hidden_units (int): number of hidden units.
         mode (theta.rtbm.RTBM.Mode): set the working mode among: `probability mode` (``Mode.Probability``),
             `log of probability` (``Mode.LogProbability``) and expectation (``Mode.Expectation``), see :class:`theta.rtbm.RTBM.Mode`.
-        init_max_param_bound (float): maximum value allowed for all parameters during the CMA-ES minimization.
+        minimization_bound (float): maximum value allowed for all parameters during the CMA-ES minimization.
         random_bound (float): selects the maximum random value for the Schur complement initialization.
         phase (complex): number which multiplies w and bh ``phase=1`` for Phase I and ``phase=1j`` for Phase II.
         diagonal_T (bool): force T diagonal, by default T is symmetric.
@@ -51,12 +102,26 @@ class Mode:
             * ``Mode.LogProbability``: set the output to log probability mode.
             * ``Mode.Expectation``: set the output to expectation mode.
         """
+
         Probability = 0
         LogProbability = 1
         Expectation = 2
 
-    def __init__(self, visible_units, hidden_units, mode=Mode.Probability,
-                 init_max_param_bound=2, random_bound=1, phase=1, diagonal_T=False):
+    def __init__(
+        self,
+        visible_units,
+        hidden_units,
+        mode=Mode.Probability,
+        minimization_bound=2,
+        random_bound=1,
+        phase=1,
+        diagonal_T=False,
+        positive_T=False,
+        positive_Q=False,
+        gaussian_init=False,
+        gaussian_parameters=None,
+        sampling_activations="tanh"
+    ):
         self._Nv = visible_units
         self._Nh = hidden_units
         self._bv = np.zeros([visible_units, 1])
@@ -64,27 +129,52 @@ def __init__(self, visible_units, hidden_units, mode=Mode.Probability,
         self._bh = np.zeros([hidden_units, 1])
         self._w = np.zeros([visible_units, hidden_units])
         self._q = np.zeros([hidden_units, hidden_units])
-        self._diagonal_T = diagonal_T
+        self._diagonal_T = diagonal_T or (visible_units == 1 and not gaussian_init)
         self._mode = None
         self._call = None
         self._parameters = None
         self._X = None
         self._phase = phase
         self._check_positivity = True
+        # Get the activation per dimension (# TODO better check)
+        if isinstance(sampling_activations, str):
+            sampling_activations = visible_units*[sampling_activations]
+        self._activations = [activation_dict.get(i.lower(), Nope)() for i in sampling_activations]
+
         if diagonal_T:
-            self._size = 2 * self._Nv + self._Nh + (self._Nh**2+self._Nh)//2 + self._Nv*self._Nh
+            if gaussian_init:
+                raise ValueError("Gaussian initialization doesn't allow for diagonal_T")
+            self._size = (
+                2 * self._Nv + self._Nh + (self._Nh ** 2 + self._Nh) // 2 + self._Nv * self._Nh
+            )
         else:
-            self._size = self._Nv + self._Nh + (self._Nv**2+self._Nv+self._Nh**2+self._Nh)//2+self._Nv*self._Nh
+            self._size = (
+                self._Nv
+                + self._Nh
+                + (self._Nv ** 2 + self._Nv + self._Nh ** 2 + self._Nh) // 2
+                + self._Nv * self._Nh
+            )
+
+        # Note that the list of parameters will always keep the following structure:
+        # [biases_visible, biases_hidden, W, upper_triangular_T (or diagonal), upper_triangular_Q]
+        # therefore, the indices for T start at: (nv+nh + nv*nh)
 
         # set operation mode
         self.mode = mode
 
         # set boundaries
-        self.set_bounds(init_max_param_bound)
-
-        # Populate with random parameters using Schur complement
-        # This guarantees an acceptable and instantaneous initial solution
-        self.random_init(random_bound)
+        self._positive_T = positive_T
+        self._positive_Q = positive_Q
+        self.set_bounds(minimization_bound)
+
+        if gaussian_init:
+            if gaussian_parameters is None:
+                gaussian_parameters = {"mean": 0.0, "std": 2.0}
+            self.gaussian_initialize(**gaussian_parameters)
+        else:
+            # Populate with random parameters using Schur complement
+            # This guarantees an acceptable and instantaneous initial solution
+            self.random_init(random_bound)
 
         # Generate vector for gradient calc call
         self._D1 = []
@@ -100,15 +190,20 @@ def __init__(self, visible_units, hidden_units, mode=Mode.Probability,
         if hidden_units > 1:
             for i in range(0, hidden_units):
                 for j in range(0, hidden_units):
-                    tmp = [0] * hidden_units**2
+                    tmp = [0] * hidden_units ** 2
                     tmp[i] = 1
-                    tmp[j+hidden_units] = 1
+                    tmp[j + hidden_units] = 1
 
-                    self._D2.append([tmp[k:k+hidden_units] for k in range(0, hidden_units+1, hidden_units)])
+                    self._D2.append(
+                        [
+                            tmp[k : k + hidden_units]
+                            for k in range(0, hidden_units + 1, hidden_units)
+                        ]
+                    )
 
             self._D2 = np.array(self._D2)
         else:
-            self._D2.append([1,1])
+            self._D2.append([1, 1])
 
     def __call__(self, data, grad_calc=False):
         """Evaluates the RTBM for a given data array"""
@@ -122,7 +217,6 @@ def __call__(self, data, grad_calc=False):
             self._check_positivity = False
         else:
             self._check_positivity = True
-
         return P
 
     def feed_through(self, X, grad_calc=False):
@@ -155,36 +249,101 @@ def random_init(self, bound):
         """Random initializer which satisfies the Schur complement positivity condition.
         If ``diagonal_T=True`` the initial Q and T are diagonal and W is set to zero.
 
+        Note that if T or Q are forced to be positive what the parameters array hold
+        would be its cholesky decomposition
+
         Args:
             bound (float): the maximum value for the random matrix X used by the Schur complement.
         """
-        a_shape = ((self._Nv+self._Nh), (self._Nv+self._Nh))
-        a_size = (self._Nv+self._Nh)**2
+        a_shape = ((self._Nv + self._Nh), (self._Nv + self._Nh))
+        a_size = (self._Nv + self._Nh) ** 2
+
+        params = np.random.uniform(-bound, bound, a_size + self._Nv + self._Nh)
 
-        params = np.random.uniform(-bound, bound, a_size+self._Nv+self._Nh)
         if self._diagonal_T:
             x = np.eye(a_shape[0])
-            np.fill_diagonal(x, params[:self._Nv+self._Nh])
+            np.fill_diagonal(x, params[: self._Nv + self._Nh])
         else:
             x = params[:a_size].reshape(a_shape)
 
         a = np.transpose(x).dot(x)
 
-        self._q = a[:self._Nh, :self._Nh]
-        self._t = a[self._Nh:self._Nh + self._Nv, self._Nh:]
-        self._w = self._phase * a[self._Nh:, :self._Nh]
-        self._bv = params[a_size:a_size + self._Nv].reshape(self._bv.shape)
-        self._bh = self._phase * params[-self._Nh:].reshape(self._bh.shape)
+        self._q = a[: self._Nh, : self._Nh]
+        self._t = a[self._Nh : self._Nh + self._Nv, self._Nh :]
+        self._w = self._phase * a[self._Nh :, : self._Nh]
+        self._bv = params[a_size : a_size + self._Nv].reshape(self._bv.shape)
+        self._bh = self._phase * params[-self._Nh :].reshape(self._bh.shape)
+
+        self._store_parameters()
 
-        # store parameters having in mind that Q and T are symmetric.
+    def gaussian_initialize(self, mean=0, std=2.0):
+        """Reset the parameters of the rtbm with a multivariate gaussian
+        centered around mean with a covmat given by std*eye(nv)
+        """
+        multi_mean = np.ones(self._Nv) * mean
+        covmat = np.eye(self._Nv)
+        np.fill_diagonal(covmat, std)
+        n = min(pow(10, self._Nv + 2), 1e7)
+        fake_data = np.random.multivariate_normal(multi_mean, covmat, size=int(n))
+
+        # Get the bounds for the matrix q, which corresponds to the last value in the bounds
+        q_min = self._bounds[0][-1]
+        q_max = self._bounds[1][-1]
+        # The biases instead all have the same bounds and are set at the beginning
+        q_bias_min = self._bounds[0][0]
+        q_bias_max = self._bounds[1][0]
+
+        # Create a positive Q
+        self._bh = np.random.uniform(q_bias_min, q_bias_max, self._Nh)
+        params_q = np.random.uniform(q_min, q_max, int((self._Nh ** 2 + self._Nh) / 2))
+        if self._Nh == 1:
+            self._q = params_q.reshape((1, 1))
+        else:
+            chol_q = np.zeros((self._Nh, self._Nh))
+            chol_q[np.triu_indices(self._Nh)] = params_q
+            self._q = chol_q.T.dot(chol_q)
+
+        self._gaussian_init(fake_data.T)
+
+        # Change the bounds of T according to the gaussian initialization?
+
+    def _gaussian_init(self, data):
+        """ Reset parametrization with a gaussian on top of the data """
+        # Set the W andbiases to 0
+        self._w = np.zeros_like(self._w)
+        self._bh = np.zeros_like(self._bh)
+
+        # Solve the equation for the gaussian
+        vi = np.mean(data, axis=1, keepdims=True)
+        vivj = data.dot(data.T) / data.shape[1]
+        invT = vivj - vi.dot(vi.T)
+        self._t = np.linalg.inv(invT)
+        self._bv = -1.0 * np.dot(vi.T, self._t).T
+
+        self._store_parameters()
+
+    def _store_parameters(self):
+        """ Store paramaters in the all_parameters array """
+        all_parameters = [self._bv.flatten(), self._bh.flatten(), self._w.flatten()]
+
+        # store parameters having in mind that Q and T are symmetric
         if self._diagonal_T:
-            self._parameters = np.concatenate([self._bv.flatten(), self._bh.flatten(),
-                                               self._w.flatten(), self._t.diagonal(),
-                                               self._q[np.triu_indices(self._Nh)]])
+            all_parameters.append(self._t.diagonal())
+        else:
+            if self._positive_T and self._Nv > 1:
+                t_target = np.linalg.cholesky(self._t).T
+            else:
+                t_target = self._t
+            all_parameters.append(t_target[np.triu_indices(self._Nv)])
+
+        if self._positive_Q and self._Nh > 1:
+            q_target = np.linalg.cholesky(self._q).T
         else:
-            self._parameters = np.concatenate([self._bv.flatten(), self._bh.flatten(),
-                                               self._w.flatten(), self._t[np.triu_indices(self._Nv)],
-                                               self._q[np.triu_indices(self._Nh)]])
+            q_target = self._q
+
+        all_parameters.append(q_target[np.triu_indices(self._Nh)])
+
+        self._parameters = np.concatenate(all_parameters)
 
     def mean(self):
         """Computes the first moment estimator (mean).
@@ -202,11 +361,22 @@ def mean(self):
             BhT = self._bh.T
             BtiTW = np.dot(np.dot(BvT, invT), self._w)
             WtiTW = np.dot(np.dot(self._w.T, invT), self._w)
-            return np.real(-np.dot(invT, self._bv) + 1.0 / (2j * np.pi) *
-                            np.dot(np.dot(invT, self._w), RiemannTheta.normalized_eval((BhT - BtiTW) / (2.0j * np.pi),
-                                                        (-self._q + WtiTW) / (2.0j * np.pi), mode=self._mode, derivs=self._D1)))
+            return np.real(
+                -np.dot(invT, self._bv)
+                + 1.0
+                / (2j * np.pi)
+                * np.dot(
+                    np.dot(invT, self._w),
+                    RiemannTheta.normalized_eval(
+                        (BhT - BtiTW) / (2.0j * np.pi),
+                        (-self._q + WtiTW) / (2.0j * np.pi),
+                        mode=self._mode,
+                        derivs=self._D1,
+                    ),
+                )
+            )
         else:
-            assert AssertionError('Mean for mode %s not implemented' % self._mode)
+            assert AssertionError("Mean for mode %s not implemented" % self._mode)
 
     def backprop(self, E):
         """Evaluates and stores the gradients for backpropagation.
@@ -248,27 +418,35 @@ def backprop(self, E):
             np.fill_diagonal(Hb, Hb.diagonal() * 0.5)
 
             # Grad Bv
-            self._gradBv = np.mean(E * (self._P * (-self._X - 2.0 * iT.dot(self._bv) + iTW.dot(Db))), axis=1)
+            self._gradBv = np.mean(
+                E * (self._P * (-self._X - 2.0 * iT.dot(self._bv) + iTW.dot(Db))), axis=1
+            )
 
             # Grad Bh
             self._gradBh = np.mean(E * self._P * (Da - Db), axis=1)
 
             # Grad W
-            self._gradW = (E * self._P * self._X).dot(Da.T) / self._X.shape[1] + np.mean(E * self._P, axis=1) * (
-                    self._bv.T.dot(iT).T.dot(Db.T) - 2 * iTW.dot(Hb))
+            self._gradW = (E * self._P * self._X).dot(Da.T) / self._X.shape[1] + np.mean(
+                E * self._P, axis=1
+            ) * (self._bv.T.dot(iT).T.dot(Db.T) - 2 * iTW.dot(Hb))
 
             # Grad T
             iT2 = np.square(iT)
 
-            self._gradT = np.diag(np.mean(-0.5 * self._P * self._X ** 2 * E, axis=1)) + np.mean(E * self._P, axis=1) * (
-                    0.5 * iT + self._bv ** 2 * iT2 - self._bv * iT2 * self._w.dot(Db) + iT2 * self._w.dot(Hb).dot(
-                self._w.T))
+            self._gradT = np.diag(np.mean(-0.5 * self._P * self._X ** 2 * E, axis=1)) + np.mean(
+                E * self._P, axis=1
+            ) * (
+                0.5 * iT
+                + self._bv ** 2 * iT2
+                - self._bv * iT2 * self._w.dot(Db)
+                + iT2 * self._w.dot(Hb).dot(self._w.T)
+            )
 
             # Grad Q
             self._gradQ = np.mean(-self._P * (DDa - DDb) * E, axis=1).reshape(self._q.shape)
             np.fill_diagonal(self._gradQ, self._gradQ.diagonal() * 0.5)
         else:
-            raise AssertionError('Gradients for non-diagonal T not implemented.')
+            raise AssertionError("Gradients for non-diagonal T not implemented.")
 
     def size(self):
         """
@@ -286,37 +464,57 @@ def set_parameters(self, params):
         Returns:
             bool: True if Q, T and Q-WtTW are positive, False otherwise.
         """
-
         if len(params) != self._size:
-            raise Exception('Size does no match.')
+            raise Exception("Size does no match.")
 
         self._parameters = params
 
-        self._bv = params[:self._Nv].reshape(self._bv.shape)
+        self._bv = params[: self._Nv].reshape(self._bv.shape)
         index = self._Nv
 
-        self._bh = self._phase*params[index:index+self._Nh].reshape(self._bh.shape)
+        self._bh = self._phase * params[index : index + self._Nh].reshape(self._bh.shape)
         index += self._Nh
 
-        self._w = self._phase*params[index:index+self._Nv*self._Nh].reshape(self._w.shape)
+        self._w = self._phase * params[index : index + self._Nv * self._Nh].reshape(self._w.shape)
         index += self._w.size
 
         if self._diagonal_T:
-            np.fill_diagonal(self._t, params[index:index+self._Nv])
+            np.fill_diagonal(self._t, params[index : index + self._Nv])
             index += self._Nv
         else:
             inds = np.triu_indices_from(self._t)
-            self._t[inds] = params[index:index+(self._Nv**2+self._Nv)//2]
-            self._t[(inds[1], inds[0])] = params[index:index+(self._Nv**2+self._Nv)//2]
-            index += (self._Nv**2+self._Nv)//2
+            matrix_t = np.zeros((self._Nv, self._Nv))
+            matrix_t[inds] = params[index : index + (self._Nv ** 2 + self._Nv) // 2]
+            if self._positive_T:
+                self._t = matrix_t.T.dot(matrix_t)
+            else:
+                matrix_t[(inds[1], inds[0])] = params[
+                    index : index + (self._Nv ** 2 + self._Nv) // 2
+                ]
+                self._t = matrix_t
+            index += (self._Nv ** 2 + self._Nv) // 2
+
 
         inds = np.triu_indices_from(self._q)
-        self._q[inds] = params[index:index+(self._Nh**2+self._Nh)//2]
-        self._q[(inds[1], inds[0])] = params[index:index+(self._Nh**2+self._Nh)//2]
+        # Note, the choice of creating a new variable to hold the _reference_ to the matrix is purely aesthetical
+        matrix_q = np.zeros((self._Nh, self._Nh))
+        matrix_q[inds] = params[index : index + (self._Nh ** 2 + self._Nh) // 2]
+        if self._positive_Q:
+            if self._Nh == 1:
+                self._q = matrix_q
+            else:
+                self._q = matrix_q.T.dot(matrix_q)
+        else:
+            matrix_q[(inds[1], inds[0])] = params[index : index + (self._Nh ** 2 + self._Nh) // 2]
+            self._q = matrix_q
 
+        # At this point we can already check the positivity
         if self._check_positivity:
-            if not check_normalization_consistency(self._t, self._q, self._w) or \
-                    not check_pos_def(self._q) or not check_pos_def(self._t):
+            if not check_normalization_consistency(self._t, self._q, self._w):
+                return False
+            if not self._positive_T and not check_pos_def(self._t):
+                return False
+            if not self._positive_Q and not check_pos_def(self._q):
                 return False
         return True
 
@@ -336,18 +534,68 @@ def get_gradients(self):
         inds = np.triu_indices_from(self._gradQ)
 
         if self._diagonal_T:
-            return np.real(np.concatenate((self._gradBv.flatten(),self._gradBh.flatten(),self._gradW.flatten(), self._gradT.diagonal(), self._gradQ[inds].flatten())))
+            return np.real(
+                np.concatenate(
+                    (
+                        self._gradBv.flatten(),
+                        self._gradBh.flatten(),
+                        self._gradW.flatten(),
+                        self._gradT.diagonal(),
+                        self._gradQ[inds].flatten(),
+                    )
+                )
+            )
         else:
-            return np.real(np.concatenate((self._gradBv.flatten(),self._gradBh.flatten(),self._gradW.flatten(), self._gradT.flatten(), self._gradQ[inds].flatten())))
+            return np.real(
+                np.concatenate(
+                    (
+                        self._gradBv.flatten(),
+                        self._gradBh.flatten(),
+                        self._gradW.flatten(),
+                        self._gradT.flatten(),
+                        self._gradQ[inds].flatten(),
+                    )
+                )
+            )
 
     def set_bounds(self, param_bound):
-        """Sets the parameter bound for each parameter.
+        """Sets the parameter bounds for each parameter as (-param_bound, param_bound)
+        If the matrices T and/or Q are forced positive, the bounds depend on their shape
+        if T/Q are diagonal then the bounds are (0, param_bound), otherwise, since
+        we optimize the cholesky decomposition the bounds are (-1, +1)*sqrt(param_bound)
 
         Args:
             param_bound (float): the maximum absolute value for parameter variation.
         """
-        upper_bounds = [param_bound] * self._size
-        lower_bounds = [-param_bound] * self._size
+        upper_bounds = np.ones(self._size)*param_bound
+        lower_bounds = -upper_bounds
+
+        if self._positive_T:
+            t_idx = self._Nv + self._Nh + self._Nv * self._Nh
+            t_size = self._Nv if self._diagonal_T else int((self._Nv ** 2 + self._Nv) / 2)
+
+            if self._Nv == 1 or self._diagonal_T:
+                lower_bounds[t_idx : t_idx + t_size] = 1.0/param_bound
+                upper_bounds[t_idx : t_idx + t_size] = param_bound
+            else:
+                sqr_bound = np.sqrt(param_bound)
+                lower_bounds[t_idx : t_idx + t_size] = -sqr_bound
+                upper_bounds[t_idx : t_idx + t_size] = sqr_bound
+
+
+        # We do the same for Q, only Q is not allowed to be diagonal
+        q_idx = t_idx + t_size
+        if self._positive_Q:
+            # if nh = 1 then this is trivial!
+            if self._Nh == 1:
+                lower_bounds[q_idx] = 1.0/param_bound
+                upper_bounds[q_idx] = param_bound
+            else:
+                # if is positive but not diagonal, then let's play with the limit
+                sqr_bound = np.sqrt(param_bound)
+                lower_bounds[q_idx:] = -sqr_bound
+                upper_bounds[q_idx:] = sqr_bound
+
         self._bounds = [lower_bounds, upper_bounds]
 
     def get_bounds(self):
@@ -357,6 +605,45 @@ def get_bounds(self):
         """
         return self._bounds
 
+    def get_transformation(self, xarr):
+        """ Receives an array (nevents, ndim) and returns
+        the transformed array of numbers and the jacobian of the transformation """
+        # Get first the distribution by the raw rtbm
+        px = self(xarr.T)[0]
+        # And now transform the input array and the probability according to the activation
+        r_list = []
+        for x, act in zip(xarr.T, self._activations):
+            r_list.append(act(x))
+            px *= act.grad(x)
+        r = np.stack(r_list, axis=-1)
+        return r, px
+
+    def undo_transformation(self, rand):
+        """ Receives an array of random numbers (the product of `get_transformation`)
+        and invert it returning `xarr` """
+        x_list = []
+        for r, act in zip(rand.T, self._activations):
+            x_list.append(act.inv(r))
+        return np.stack(x_list, axis=-1)
+
+    def make_sample_rho(self, size):
+        """Produces a probability density between 0 and 1
+        such tha
+
+            \int_0^1 p(x) = 1
+
+        Returns:
+            r: np.array (nevents, ndim)
+                sampling of P(v) between 0 and 1
+            p(x): np.array (nevents,)
+                p(x)
+            r_raw: np.array(nevents, ndim)
+                original sampling of P(v) (-inf, inf)
+        """
+        r_raw, _ = self.make_sample(size)
+        r, px = self.get_transformation(r_raw)
+        return r, px, r_raw
+
     def make_sample(self, size, epsilon=RTBM_precision):
         """Produces P(v) and P(h) samples for the current RTBM architecture.
 
@@ -369,19 +656,19 @@ def make_sample(self, size, epsilon=RTBM_precision):
             list of numpy.array: sampling of P(h)
         """
         invT = np.linalg.inv(self._t)
-        WTiW  = self._w.T.dot(invT.dot(self._w))
+        WTiW = self._w.T.dot(invT.dot(self._w))
         BvTiW = self._bv.T.dot(invT.dot(self._w))
 
-        O = (self._q - WTiW)
-        Z = (self._bh.T - BvTiW)
+        O = self._q - WTiW
+        Z = self._bh.T - BvTiW
         Z = Z.flatten()
 
-        Omega = np.array(-O/(2.0j*np.pi), dtype=np.complex)
+        Omega = np.array(-O / (2.0j * np.pi), dtype=np.complex)
         Y = Omega.imag
 
-        RT = RiemannTheta.eval(Z/(2.0j*np.pi),-O/(2.0j*np.pi) )
+        RT = RiemannTheta.eval(Z / (2.0j * np.pi), -O / (2.0j * np.pi))
 
-        if(Y.shape[0]!=1):
+        if Y.shape[0] != 1:
             _T = np.linalg.cholesky(Y).T
         else:
             _T = np.sqrt(Y)
@@ -389,66 +676,67 @@ def make_sample(self, size, epsilon=RTBM_precision):
         T = np.ascontiguousarray(_T)
         g = len(Z)
 
-        R = radius(epsilon, _T, derivs=[], accuracy_radius=5.)
-        S = np.ascontiguousarray(integer_points_python(g,R,_T))
+        R = radius(epsilon, _T, derivs=[], accuracy_radius=5.0)
+        S = np.ascontiguousarray(integer_points_python(g, R, _T))
 
         pmax = 0
         for s in S:
             v = rtbm_ph(self, s)
-            if v > pmax: pmax = v
+            if v > pmax:
+                pmax = v
 
         # Rejection sampling
         ph = []
         while len(ph) < size:
-            U = np.random.randint(0,len(S))
-            X = (np.exp(-0.5*S[U].T.dot(O).dot(S[U])-Z.dot(S[U]))/RT).real
+            U = np.random.randint(0, len(S))
+            X = (np.exp(-0.5 * S[U].T.dot(O).dot(S[U]) - Z.dot(S[U])) / RT).real
             J = np.random.uniform()
-            if(X/pmax > J):
+            if X / pmax > J:
                 ph.append(S[U])
 
         # Draw samples from P(v|h)
         pv = np.zeros(shape=(len(ph), self._bv.shape[0]))
-        for i in range(0,len(ph)):
-            muh = -np.dot(invT, np.dot(self._w, ph[i].reshape(g,1))+ self._bv)
-            pv[i] = np.random.multivariate_normal(mean=muh.flatten(),cov=invT, size=1).flatten()
+        for i in range(0, len(ph)):
+            muh = -np.dot(invT, np.dot(self._w, ph[i].reshape(g, 1)) + self._bv)
+            pv[i] = np.random.multivariate_normal(mean=muh.flatten(), cov=invT, size=1).flatten()
 
         return pv, ph
 
     def conditional(self, d):
-        """Generates the conditional RTBM. 
-    
+        """Generates the conditional RTBM.
+
         Args:
             d (numpy.array): column vector containing the values for the conditional
         Returns:
             theta.rtbm.RTBM: RTBM modelling the conditional probability P(y|d)
         """
 
-        assert (self._Nv > 1), "cannot do the conditional probability of a 1d distribution"
-    
+        assert self._Nv > 1, "cannot do the conditional probability of a 1d distribution"
+
         nh = self._Nh
-        nv = self._Nv 
+        nv = self._Nv
 
-        assert (d.size < nv), "d larger than Nv"
+        assert d.size < nv, "d larger than Nv"
+
+        k = int(nv - d.size)
 
-        k = int(nv-d.size)
-    
         cmodel = RTBM(k, nh)
 
         # Matrix A
-        t = self.t[:k,:k]
+        t = self.t[:k, :k]
         t = t[np.triu_indices(k)]
         q = self.q[np.triu_indices(nh)]
         w = self.w[:k]
 
         # Biases
         bh = self.bh + np.dot(self.w[k:].T, d)
-        bv = self.bv[:k] + np.dot(self.t[:k,k:], d)
+        bv = self.bv[:k] + np.dot(self.t[:k, k:], d)
 
-        cparams = np.concatenate((bv, bh, w, t, q), axis = None)
+        cparams = np.concatenate((bv, bh, w, t, q), axis=None)
         cmodel.set_parameters(cparams)
 
         return cmodel
-    
+
     @property
     def mode(self):
         """Sets and returns the RTBM mode."""
@@ -464,14 +752,18 @@ def mode(self, value):
 
         if value is self.Mode.Probability:
             self._call = lambda data: np.real(
-                rtbm_probability(data, self._bv, self._bh, self._t, self._w, self._q, mode))
+                rtbm_probability(data, self._bv, self._bh, self._t, self._w, self._q, mode)
+            )
         elif value is self.Mode.LogProbability:
             self._call = lambda data: np.real(
-                rtbm_log_probability(data, self._bv, self._bh, self._t, self._w, self._q, mode))
+                rtbm_log_probability(data, self._bv, self._bh, self._t, self._w, self._q, mode)
+            )
         elif value is self.Mode.Expectation:
-            self._call = lambda data: np.real(self._phase * hidden_expectations(data, self._bh, self._w, self._q))
+            self._call = lambda data: np.real(
+                self._phase * hidden_expectations(data, self._bh, self._w, self._q)
+            )
         else:
-            raise AssertionError('Mode %s not implemented.' % value)
+            raise AssertionError("Mode %s not implemented." % value)
 
         self._mode = value
 
@@ -483,7 +775,7 @@ def bv(self):
     @bv.setter
     def bv(self, value):
         if value.shape != self._bv.shape:
-            raise AssertionError('Setting bv with wrong shape.')
+            raise AssertionError("Setting bv with wrong shape.")
         self._bv = value
 
     @property
@@ -494,7 +786,7 @@ def t(self):
     @t.setter
     def t(self, value):
         if value.shape != self._t.shape:
-            raise AssertionError('Setting t with wrong shape.')
+            raise AssertionError("Setting t with wrong shape.")
         self._t = value
 
     @property
@@ -505,7 +797,7 @@ def bh(self):
     @bh.setter
     def bh(self, value):
         if value.shape != self._bh.shape:
-            raise AssertionError('Setting bh with wrong shape.')
+            raise AssertionError("Setting bh with wrong shape.")
         self._bh = value
 
     @property
@@ -516,7 +808,7 @@ def w(self):
     @w.setter
     def w(self, value):
         if value.shape != self._w.shape:
-            raise AssertionError('Setting w with wrong shape.')
+            raise AssertionError("Setting w with wrong shape.")
         self._w = value
 
     @property
@@ -527,5 +819,5 @@ def q(self):
     @q.setter
     def q(self, value):
         if value.shape != self._q.shape:
-            raise AssertionError('Setting q with wrong shape.')
+            raise AssertionError("Setting q with wrong shape.")
         self._q = value