RiemannAI · scarlehoff · Feb 5, 2021 · Feb 5, 2021 · Feb 5, 2021 · Feb 8, 2021
diff --git a/theta/costfunctions.py b/theta/costfunctions.py
@@ -15,14 +15,27 @@ def cost(self, X, *Y):
     @abstractmethod
     def gradient(self, X):
         pass
+
+
+class kullbackLeibler(costfunction):
+
+    @staticmethod
+    def cost(x, y):
+        if (x < 0).any():
+            import ipdb; ipdb.set_trace()
+        return -np.sum(y*np.log(x)) # + np.sum(y*np.log(y))
 
+    @staticmethod
+    def gradient(x, y):
+        return -np.sum(y/x)
+
 
 class mse(costfunction):
     """ Mean squared error """
 
     @staticmethod
     def cost(x, y):   
-        return np.sum(np.mean((x-y)**2,axis=1))
+        return np.sum(np.mean((x-y)**2,axis=-1))
 
     @staticmethod
     def gradient(x, y):
@@ -49,7 +62,7 @@ def cost(x, *y):
         return -np.sum(x)
 
     @staticmethod
-    def gradient(x):
+    def gradient(x, y):
         sys.exit("Gradient not implemented!")
 
 
@@ -61,7 +74,7 @@ def cost(x, y):
         return np.sqrt(np.sum(np.mean((y-x)**2,axis=1)))
 
     @staticmethod
-    def gradient(x):
+    def gradient(x, y):
         return 1.0/np.sqrt(np.sum(np.mean((y-x)**2,axis=1)))*(x-y)
 
 
@@ -74,6 +87,5 @@ def cost(x, y):
         return -np.sum(np.mean(np.multiply(y,lx),axis=1))
 
     @staticmethod
-    def gradient(x):
+    def gradient(x, y):
         return -1.0/y.shape[1]*y/x
-
diff --git a/theta/mathtools.py b/theta/mathtools.py
@@ -6,9 +6,13 @@
 RTBM_precision= 1e-8
 
 
-def check_normalization_consistency(t, q, w):
+def normalization_consistency(t, q, w):
     c = q - np.transpose(w).dot(np.linalg.inv(t).dot(w))
-    return np.all(np.linalg.eigvals(c) > 0)
+    return np.linalg.eigvals(c)
+
+
+def check_normalization_consistency(t, q, w):
+    return np.all(normalization_consistency(t, q, w) > 0)
 
 
 def check_pos_def(x):
@@ -57,7 +61,10 @@ def rtbm_probability(v, bv, bh, t, w, q, mode=1):
     uR1, vR1 = RiemannTheta.parts_eval((vT.dot(w) + BhT) / (2.0j * np.pi), -q / (2.0j * np.pi), mode, epsilon=RTBM_precision)
     uR2, vR2 = RiemannTheta.parts_eval((BhT - BtiTW) / (2.0j * np.pi), (-q + WtiTW) / (2.0j * np.pi), mode, epsilon=RTBM_precision)
 
-    return np.sqrt(detT / (2.0 * np.pi) ** (v.shape[0])) * ExpF * vR1 / vR2 * np.exp(uR1-uR2)
+    # In order to avoid problems at multiprocessing, let's add a maximum value for the exponent
+    # And a minimum to avoid division by 0
+    res = np.sqrt(detT / (2.0 * np.pi) ** (v.shape[0])) * ExpF * vR1 / (vR2+RTBM_precision) * np.exp( np.minimum(uR1-uR2, 250) )
+    return res
 
 
 def rtbm_log_probability(v, bv, bh, t, w, q, mode=1):
@@ -263,4 +270,4 @@ def rtbm_ph(model, h):
     BBTWh = np.dot(BBTW ,h)
     ExpF = np.exp(-0.5*hTQWTWh-BBTWh)
     u, v = RiemannTheta.parts_eval((BhT-BtiTW)/(2j*np.pi),(-model.q+WtiTW)/(2j*np.pi), mode=1, epsilon=RTBM_precision)
-    return ExpF / v * np.exp(-u)
+    return ExpF / v * np.exp(-u)
diff --git a/theta/minimizer.py b/theta/minimizer.py
@@ -28,7 +28,10 @@ def worker_initialize(cost, model, x_data, y_data):
 def worker_compute(params):
     if not resource.model.set_parameters(params):
         return np.NaN
-    res = resource.cost_function.cost(np.real(resource.model(resource.x_data)), resource.y_data)
+    prob = np.real(resource.model(resource.x_data))
+    if (prob < 0.0).any():
+        import ipdb; ipdb.set_trace()
+    res = resource.cost_function.cost(prob, resource.y_data)
     return res
 
 
@@ -47,7 +50,8 @@ class CMA(object):
         parallel (bool): if set to True the algorithm uses multi-processing.
         ncores (int): limit the number of cores when ``parallel=True``.
     """
-    def __init__(self, parallel=False, ncores=0):
+    def __init__(self, parallel=False, ncores=0, verbose=True):
+        self._verbose = verbose
         super(CMA, self).__init__()
         if parallel:
             if(ncores==0):
@@ -77,11 +81,20 @@ def train(self, cost, model, x_data, y_data=None, tolfun=1e-11, popsize=None, ma
         Note:
             The parameters of the model are changed by this algorithm.
         """
+        if self._verbose:
+            vb = 0
+        else:
+            vb = -9
 
         initsol = np.real(model.get_parameters())
-        args = {'bounds': model.get_bounds(),
+
+        # Prepare the bounds
+        bounds = model.get_bounds()
+
+        args = {'bounds': bounds,
                 'tolfun': tolfun,
-                'verb_log': 0}
+                'verbose': vb,
+                'verb_log': vb}
         sigma = np.max(model.get_bounds()[1])*0.1
 
         if popsize is not None:
@@ -112,18 +125,12 @@ def train(self, cost, model, x_data, y_data=None, tolfun=1e-11, popsize=None, ma
                 pool.terminate()
         else:
             worker_initialize(cost, model, x_data, y_data)
-            while not es.stop():
-                f_values, solutions = [], []
-                while len(solutions) < es.popsize:
-                    curr_fit = x = np.NaN
-                    while np.isnan(curr_fit):
-                        x = es.ask(1, gradf=grad)[0]
-                        curr_fit = worker_compute(x)
-                    solutions.append(x)
-                    f_values.append(curr_fit)
-                es.tell(solutions, f_values)
-                es.disp()
-        print(es.result)
+            # By using ask_and_eval we let the cma decide what to do with NaNs
+            solutions, f_values = es.ask_and_eval(worker_compute)
+            es.tell(solutions, f_values)
+            es.disp()
+        if self._verbose:
+            print(es.result)
 
         model.set_parameters(es.result[0])
         return es.result[0]