|
| 1 | +import matplotlib.pyplot as plt |
| 2 | +import numpy as np |
| 3 | +import pandas as pd |
| 4 | +import scipy.stats |
| 5 | + |
| 6 | +from .utils_complexity_embedding import complexity_embedding |
| 7 | + |
| 8 | + |
| 9 | +def entropy_angular(signal, delay=1, dimension=2, show=False, **kwargs): |
| 10 | + """**Angular entropy (AngEn)** |
| 11 | +
|
| 12 | + The Angular Entropy (AngEn) is the name that we use in NeuroKit to refer to the complexity |
| 13 | + method described in Nardelli et al. (2022), referred as comEDA due to its application to EDA |
| 14 | + signal. The method comprises the following steps: 1) Phase space reconstruction, 2) Calculation |
| 15 | + of the angular distances between all the pairs of points in the phase space; 3) Computation of |
| 16 | + the probability density function (PDF) of the distances; 4) Quadratic Rényi entropy of the PDF. |
| 17 | +
|
| 18 | + Parameters |
| 19 | + ---------- |
| 20 | + signal : Union[list, np.array, pd.Series] |
| 21 | + The signal (i.e., a time series) in the form of a vector of values. |
| 22 | + delay : int |
| 23 | + Time delay (often denoted *Tau* :math:`\\tau`, sometimes referred to as *lag*) in samples. |
| 24 | + See :func:`complexity_delay` to estimate the optimal value for this parameter. |
| 25 | + dimension : int |
| 26 | + Embedding Dimension (*m*, sometimes referred to as *d* or *order*). See |
| 27 | + :func:`complexity_dimension` to estimate the optimal value for this parameter. |
| 28 | + **kwargs : optional |
| 29 | + Other arguments. |
| 30 | +
|
| 31 | + Returns |
| 32 | + -------- |
| 33 | + angen : float |
| 34 | + The Angular Entropy (AngEn) of the signal. |
| 35 | + info : dict |
| 36 | + A dictionary containing additional information regarding the parameters used |
| 37 | + to compute the index. |
| 38 | +
|
| 39 | + See Also |
| 40 | + -------- |
| 41 | + entropy_renyi |
| 42 | +
|
| 43 | + Examples |
| 44 | + ---------- |
| 45 | + .. ipython:: python |
| 46 | +
|
| 47 | + import neurokit2 as nk |
| 48 | +
|
| 49 | + # Simulate a Signal with Laplace Noise |
| 50 | + signal = nk.signal_simulate(duration=2, frequency=[5, 3], noise=0.1) |
| 51 | +
|
| 52 | + # Compute Angular Entropy |
| 53 | + @savefig p_entropy_angular1.png scale=100% |
| 54 | + angen, info = nk.entropy_angular(signal, delay=1, dimension=3, show=True) |
| 55 | + @suppress |
| 56 | + plt.close() |
| 57 | +
|
| 58 | +
|
| 59 | + References |
| 60 | + ----------- |
| 61 | + * Nardelli, M., Greco, A., Sebastiani, L., & Scilingo, E. P. (2022). ComEDA: A new tool for |
| 62 | + stress assessment based on electrodermal activity. Computers in Biology and Medicine, 150, |
| 63 | + 106144. |
| 64 | +
|
| 65 | + """ |
| 66 | + # Sanity checks |
| 67 | + if isinstance(signal, (np.ndarray, pd.DataFrame)) and signal.ndim > 1: |
| 68 | + raise ValueError( |
| 69 | + "Multidimensional inputs (e.g., matrices or multichannel data) are not supported yet." |
| 70 | + ) |
| 71 | + |
| 72 | + # 1. Phase space reconstruction (time-delay embeddings) |
| 73 | + embedded = complexity_embedding(signal, delay=delay, dimension=dimension) |
| 74 | + |
| 75 | + # 2. Angular distances between all the pairs of points in the phase space |
| 76 | + angles = _angular_distance(embedded) |
| 77 | + |
| 78 | + # 3. Compute the probability density function (PDF) of the upper triangular matrix |
| 79 | + bins, pdf = _kde_sturges(angles) |
| 80 | + |
| 81 | + # 4. Apply the quadratic Rényi entropy to the PDF |
| 82 | + angen = -np.log2(np.sum(pdf**2)) |
| 83 | + |
| 84 | + # Normalize to the range [0, 1] by the log of the number of bins |
| 85 | + |
| 86 | + # Note that in the paper (eq. 4 page 4) there is a minus sign, but adding it would give |
| 87 | + # negative values, plus the linked code does not seem to do that |
| 88 | + # https://github.com/NardelliM/ComEDA/blob/main/comEDA.m#L103 |
| 89 | + angen = angen / np.log2(len(bins)) |
| 90 | + |
| 91 | + if show is True: |
| 92 | + # Plot the PDF as a bar chart |
| 93 | + plt.bar(bins[:-1], pdf, width=bins[1] - bins[0], align="edge", alpha=0.5) |
| 94 | + # Set the x-axis limits to the range of the data |
| 95 | + plt.xlim([np.min(angles), np.max(angles)]) |
| 96 | + # Print titles |
| 97 | + plt.suptitle(f"Angular Entropy (AngEn) = {angen:.3f}") |
| 98 | + plt.title("Distribution of Angular Distances:") |
| 99 | + |
| 100 | + return angen, {"bins": bins, "pdf": pdf} |
| 101 | + |
| 102 | + |
| 103 | +def _angular_distance(m): |
| 104 | + """ |
| 105 | + Compute angular distances between all the pairs of points. |
| 106 | + """ |
| 107 | + # Get index of upper triangular to avoid double counting |
| 108 | + idx = np.triu_indices(m.shape[0], k=1) |
| 109 | + |
| 110 | + # compute the magnitude of each vector |
| 111 | + magnitudes = np.linalg.norm(m, axis=1) |
| 112 | + |
| 113 | + # compute the dot product between all pairs of vectors using np.matmul function, which is |
| 114 | + # more efficient than np.dot for large matrices; and divide the dot product matrix by the |
| 115 | + # product of the magnitudes to get the cosine of the angle |
| 116 | + cos_angles = np.matmul(m, m.T)[idx] / np.outer(magnitudes, magnitudes)[idx] |
| 117 | + |
| 118 | + # clip the cosine values to the range [-1, 1] to avoid any numerical errors and compute angles |
| 119 | + return np.arccos(np.clip(cos_angles, -1, 1)) |
| 120 | + |
| 121 | + |
| 122 | +def _kde_sturges(x): |
| 123 | + """ |
| 124 | + Computes the PDF of a vector x using a kernel density estimator based on linear diffusion |
| 125 | + processes with a Gaussian kernel. The number of bins of the PDF is chosen applying the Sturges |
| 126 | + method. |
| 127 | + """ |
| 128 | + # Estimate the bandwidth |
| 129 | + iqr = np.percentile(x, 75) - np.percentile(x, 25) |
| 130 | + bandwidth = 0.9 * iqr / (len(x) ** 0.2) |
| 131 | + |
| 132 | + # Compute the number of bins using the Sturges method |
| 133 | + nbins = int(np.ceil(np.log2(len(x)) + 1)) |
| 134 | + |
| 135 | + # Compute the bin edges |
| 136 | + bins = np.linspace(np.min(x), np.max(x), nbins + 1) |
| 137 | + |
| 138 | + # Compute the kernel density estimate |
| 139 | + xi = (bins[:-1] + bins[1:]) / 2 |
| 140 | + pdf = np.sum( |
| 141 | + scipy.stats.norm.pdf((xi.reshape(-1, 1) - x.reshape(1, -1)) / bandwidth), axis=1 |
| 142 | + ) / (len(x) * bandwidth) |
| 143 | + |
| 144 | + # Normalize the PDF |
| 145 | + pdf = pdf / np.sum(pdf) |
| 146 | + |
| 147 | + return bins, pdf |
0 commit comments