Hi all,
first thanks to all for compiling a very useful and toolbox.
I was going to add an alternative solver to PELT for the jump penalized problem with the L1Cost,
which I described in a paper from some years ago: Storath et al., Jump-penalized least absolute values estimation of scalar or circle-valued signals, Information and Inference, 2017. In my experiments I found this go provide significant speedup.
However, my results are not always consistent with the results of ruptures.
Below is an example comparing the two algorithms.
(Note that L1Cost had to be adapted to allow minimum segment sizes of one.)
For the signal below, the functional value (pen * n_breaks + L1Error) seems to be to large when using the Pelt breakpoints of ruptures.
The optimal functional value has to smaller of equal than pen * (N-1), which is 0.4 in this case,
and this is what my method returns. Pelt gives 0.6569190820008954 which may be attributed to the fact that it misses the breakpoint at index 4.
May anyone confirm the observation or am I getting something wrong on the Pelt implementation?
Best
Martin
- Edit: Corrected an error in the numbers reported *
import ruptures as rpt
import numpy as np
import time
from ruptures.base import BaseCost
# algorithm of the paper Storath et al., Jump-penalized least absolute values estimation of scalar or circle-valued signals, Information and Inference, 2017
def pl_min_l1_potts_nk(f, gamma, weights=None):
if weights is None:
weights = np.ones_like(f)
N = len(f)
v = np.unique(f)
d = lambda x,y: np.abs(x - y)
K = len(v)
B = np.zeros((K, N))
# tabulation
B[:, 0] = d(v, f[0]) * weights[0]
for n in range(1, N):
z = np.min(B[:, n - 1])
B[:, n] = d(v, f[n]) * weights[n] + np.minimum(z + gamma, B[:, n - 1])
# backtracking
u = np.zeros(N)
l = np.argmin(B[:, N - 1])
u[N - 1] = v[l]
for n in range(N - 2, -1, -1):
B[l, n] -= gamma
l = np.argmin(B[:, n])
u[n] = v[l]
brkps_np = np.where(np.diff(u) != 0)[0] + 1
# convert to same format ruptures uses
brkps = brkps_np.tolist() + [N]
return brkps
def compute_functional_value(signal, bkps, gamma):
"""
Compute the functional value of the L1 Potts model.
"""
functional_value = gamma * (len(bkps) - 1)
bkps = [0] + bkps
cost = CostL1_Custom()
cost.fit(signal)
for i in range(len(bkps) - 1):
segment = signal[bkps[i]:bkps[i + 1]]
#functional_value += np.sum(np.abs(segment - np.median(segment)))
functional_value += cost.error(bkps[i], bkps[i + 1])
return functional_value
# adapted base costs from ruptures, to make min_size equal to 1, consistent with L1Potts algorithm
class CostL1_Custom(BaseCost):
r"""Least absolute deviation."""
model = "l1_custom"
def __init__(self) -> None:
"""Initialize the object."""
self.signal = None
self.min_size = 1
def fit(self, signal) -> "CostL1_custom":
"""Set parameters of the instance.
Args:
signal (array): signal. Shape (n_samples,) or (n_samples, n_features)
Returns:
self
"""
if signal.ndim == 1:
self.signal = signal.reshape(-1, 1)
else:
self.signal = signal
return self
def error(self, start, end) -> float:
"""Return the approximation cost on the segment [start:end].
Args:
start (int): start of the segment
end (int): end of the segment
Returns:
segment cost
Raises:
NotEnoughPoints: when the segment is too short (less than `min_size` samples).
"""
#if end - start < self.min_size:
# raise NotEnoughPoints
sub = self.signal[start:end]
med = np.median(sub, axis=0)
return abs(sub - med).sum()
# compare the two methods
# creation of data
n, dim = 5, 1
n_bkps, sigma = 1, 1
signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma, seed=0)
pen = 0.1 # penalty value
# Pelt method
time_start = time.time()
algo = rpt.Pelt(custom_cost=CostL1_Custom, min_size=1, jump=1).fit(signal)
pelt_bkps = algo.predict(pen=pen)
time_pelt = time.time() - time_start
# L1Potts (algorithm of the paper Jump-penalized least absolute values estimation of scalar or circle-valued signals)
time_start = time.time()
potts_brkps = pl_min_l1_potts_nk(signal, pen)
time_potts = time.time() - time_start
print("Signal: ", signal)
print("Execution time Pelt/Ruptures: ", time_pelt)
print("Execution time L1Potts: ", time_potts)
print("Breakpoints for Pelt/Ruptures:: ", pelt_bkps)
print("Breakpoints for L1Potts: ", potts_brkps)
print("Functional value for Pelt/Ruptures:: ", compute_functional_value(signal, pelt_bkps, gamma=pen))
print("Functional value for L1Potts: ", compute_functional_value(signal, potts_brkps, gamma=pen))
Hi all,
first thanks to all for compiling a very useful and toolbox.
I was going to add an alternative solver to PELT for the jump penalized problem with the L1Cost,
which I described in a paper from some years ago: Storath et al., Jump-penalized least absolute values estimation of scalar or circle-valued signals, Information and Inference, 2017. In my experiments I found this go provide significant speedup.
However, my results are not always consistent with the results of ruptures.
Below is an example comparing the two algorithms.
(Note that L1Cost had to be adapted to allow minimum segment sizes of one.)
For the signal below, the functional value (pen * n_breaks + L1Error) seems to be to large when using the Pelt breakpoints of ruptures.
The optimal functional value has to smaller of equal than pen * (N-1), which is 0.4 in this case,
and this is what my method returns. Pelt gives 0.6569190820008954 which may be attributed to the fact that it misses the breakpoint at index 4.
May anyone confirm the observation or am I getting something wrong on the Pelt implementation?
Best
Martin