libAtoms · bernstei · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/pymatnext/analysis/utils.py b/pymatnext/analysis/utils.py
@@ -1,7 +1,28 @@
 import numpy as np
 
-def calc_log_a(iters, n_walkers, n_cull, each_cull=False):
-    if each_cull:
+def calc_log_a(iters, n_walkers, n_cull, discrete=False):
+    if discrete:
+        iters = np.asarray(iters)
+        n_cull = np.asarray(n_cull)
+        # need every iter, and the number culled for each iter
+        assert len(iters) == len(n_cull)
+        assert np.all(iters[1:] - iters[:-1] == 1)
+        # fraction remaining after each iteration
+        fracs = (n_walkers - n_cull) / n_walkers
+        # volume remaining after iteration i
+        # vol_i = \prod_{j=0..i} frac_i
+        # weight of configs culled in iteration i
+        # a_i = vol_{i-1} - vol_{i}
+        #     = \prod_{j=0..i-1} frac_j - \prod_{j=0..i} frac_j
+        #     = (\prod_{j=0..i-1} frac_j) (1 - frac_i)
+        # log(a_i) = (\sum_{j=0..i-1} log(frac_j)) + log(1 - frac_i)
+        frac_log_sums = np.append([0], np.cumsum(np.log(fracs)))
+        log_a = frac_log_sums[:-1] + np.log(1.0 - fracs)
+
+    else:
+
+        """
+        # UNSUPPORTED MULTIPLE CULLS
         # assume that for multiple culls, every energy is reported, use formula from
         #     SENS paper PRX v. 4 p 031034 (2014) Eq. 3
         # also assume that iters array increments by one for each cull (i.e. not exactly NS iters)
@@ -15,8 +36,9 @@ def calc_log_a(iters, n_walkers, n_cull, each_cull=False):
         #     = prod(0..iter[i-1]) (N-i%P)/(N+1-i%P) - prod(0..iter[i]) (N-i%P)/(N+1-i%P)
         #     = [ prod(0..iter[i-1]) (N-i%P)/(N+1-i%P) ] * (1 - prod(iter[i-1]+1..iter[i]) (N-i%P)/(N+1-i%P))
         #     = [ prod(0..iter[i-1]) (N-i%P)/(N+1-i%P) ] * (1 - prod(iter[i-1]+1..iter[i]) (N-i%P)/(N+1-i%P))
-        raise RuntimeError('calc_log_a for each_cull not yet implemented')
-    else:
+        """
+        if n_cull != 1:
+            raise RuntimeError(f'calc_log_a for n_cull = {n_cull} != 1 not yet implemented')
         log_a = iters * np.log((n_walkers - n_cull + 1) / (n_walkers + 1))
 
     return log_a
@@ -178,11 +200,12 @@ def analyse_T(T, Es, E_shift, Vs, extra_vals, log_a, flat_V_prior, N_atoms, kB,
     problem = False
     # one way to get bad sampling is to be too dominated by a few configurations
     problem |= p_entropy < p_entropy_min
-    # another is to clip the top (high iteration #) of the distribution
-    low_percentile_mean = np.mean(Z_term[low_percentile_config:low_percentile_config + 1000] / Z_term_sum)
-    high_percentile_mean = np.mean(Z_term[high_percentile_config - 1000:high_percentile_config] / Z_term_sum)
+    # another is to clip the top (high iteration #) of the distribution, and therefore be very asymmetric
+    n_avg = (high_percentile_config - low_percentile_config) // 10
+    low_percentile_mean = np.mean(Z_term[low_percentile_config:low_percentile_config + n_avg] / Z_term_sum)
+    high_percentile_mean = np.mean(Z_term[high_percentile_config - n_avg:high_percentile_config] / Z_term_sum)
     problem |= high_percentile_mean / low_percentile_mean > 2.0
 
-    results_dict['problem'] = 'true' if problem else 'false'
+    results_dict['problem'] = f'{problem}'.lower()
 
     return results_dict
diff --git a/pymatnext/cli/ns_analyse.py b/pymatnext/cli/ns_analyse.py
@@ -92,12 +92,6 @@ def colname(colname_str):
             return colname_str
 
 
-    # warn about P = 0
-    if comm_rank == 0:
-        if args.delta_P is None or args.delta_P == 0.0 or args.delta_P_GPa is None or args.delta_P_GPa == 0.0:
-            logging.warning("Analysis at P=0 with variable cell is ill defined, and we got --delta_P None or 0.0, "
-                          "so be careful if run had cell moves and _sampling_ P was 0.0")
-
     for infile_i, infile in enumerate(args.infile):
         iters = []
         Es = []
@@ -156,6 +150,16 @@ def colname(colname_str):
             Es = np.asarray(Es)
             vals = np.asarray(vals)
 
+            def _get_and_remove_from_extras(key, vals):
+                key_ind = header['extras'].index(key)
+                vals_of_key = vals[:, key_ind].copy()
+                inds = list(range(vals.shape[1]))
+                del inds[key_ind]
+                vals = vals[:, inds]
+                header['extras'].remove(key)
+
+                return vals_of_key, vals
+
             # pointer to natoms
             try:
                 natoms_ind = header['extras'].index('natoms')
@@ -164,30 +168,37 @@ def colname(colname_str):
                 natoms = None
             # pull out Vs
             try:
-                vol_ind = header['extras'].index('volume')
-                Vs = vals[:, vol_ind]
-                inds = list(range(vals.shape[1]))
-                del inds[vol_ind]
-                vals = vals[:, inds]
-                header['extras'].remove('volume')
+                Vs, vals = _get_and_remove_from_extras('volume', vals)
             except (KeyError, ValueError):
                 Vs = None
 
-            # make into list of ndarrays, each of shape (Nsamples,)
-            vals = list(vals.T)
+            # warn about P = 0
+            if comm_rank == 0:
+                if Vs is not None and (args.delta_P is None or args.delta_P == 0.0 or args.delta_P_GPa is None or args.delta_P_GPa == 0.0):
+                    logging.warning("Analysis at P=0 with variable cell is ill defined, and we got --delta_P None or 0.0, "
+                                    "so be careful if run had cell moves and _sampling_ P was 0.0")
 
+            # enthalpy if needed
             if args.delta_P is not None and args.delta_P != 0.0:
                 if Vs is None:
                     raise RuntimeError('--delta_P != 0 requires volumes')
                 Es += args.delta_P*Vs
 
+            # shift energy 0
             E_min = Es[-1]
             Es -= E_min
 
-            # main
+            # header
             n_walkers = header['n_walkers']
-            n_cull = header.get('n_cull', 1)
-            log_a = utils.calc_log_a(iters, n_walkers, n_cull)
+            discrete = header.get('discrete', False)
+            if discrete:
+                n_cull, vals = _get_and_remove_from_extras('n_cull', vals)
+            else:
+                n_cull = header.get('n_cull', 1)
+            log_a = utils.calc_log_a(iters, n_walkers, n_cull, discrete=discrete)
+
+            # make into list of ndarrays, each of shape (Nsamples,)
+            vals = list(vals.T)
 
             flat_V_prior = False
             if Vs is not None:
@@ -276,9 +287,9 @@ def str_format(fmt):
                    'U' : ('U', '{:11g}'),
                    'Cvp' : ('Cv or Cp', '{:11g}'),
                    'S' : ('S', '{:11g}'),
-                   'low_percentile_config' : ('low % i', '{:10.0f}'),
-                   'mode_config' : ('mode i', '{:10.0f}'),
-                   'high_percentile_config' : ('high % i', '{:10.0f}'),
+                   'low_percentile_config' : ('low % i', '{:10d}'),
+                   'mode_config' : ('mode i', '{:10d}'),
+                   'high_percentile_config' : ('high % i', '{:10d}'),
                    'p_entropy' : ('ent(p)', '{:6.3f}'),
                    'V' : ('V', '{:8g}'),
                    'thermal_exp' : ('alpha', '{:9.3g}'),
@@ -291,7 +302,7 @@ def str_format(fmt):
 
         extensive_fields = ['log_Z', 'FG', 'U', 'Cvp', 'S', 'V', 'thermal_exp']
         if comm_rank == 0:
-            print("# ", infile, "n_walkers", n_walkers, "n_cull", n_cull)
+            print("# ", infile, "n_walkers", n_walkers, "n_cull", n_cull if isinstance(n_cull, int) else "VARIABLE")
 
             header_format = '# ' + T_format_s  + ' ' + ' '.join([str_format(formats.get(k, default_format)[1]) for k in item_keys])
             line_format =          T_format[1] + ' ' + ' '.join([formats.get(k, default_format)[1] for k in item_keys])