diff --git a/.vscode/extensions.json b/.vscode/extensions.json index cbb3cc6e..cc96e289 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -4,11 +4,9 @@ "codezombiech.gitignore", "davidanson.vscode-markdownlint", "editorconfig.editorconfig", - "elagil.pre-commit-helper", + "eeyore.yapf", "ms-python.python", "ms-python.vscode-pylance", - "serhioromano.vscode-gitflow", - "yzhang.markdown-all-in-one", - "zeshuaro.vscode-python-poetry" + "yzhang.markdown-all-in-one" ] } diff --git a/lmo/__init__.py b/lmo/__init__.py index f077c696..e0de49e3 100644 --- a/lmo/__init__.py +++ b/lmo/__init__.py @@ -1,38 +1,4 @@ """Lmo: Robust statistics with trimmed L-moments and L-comoments.""" - -__all__ = ( - '__version__', - - 'l_loc', - 'l_scale', - 'l_variation', - 'l_skew', - 'l_kurtosis', - - 'l_moment', - 'l_ratio', - 'l_stats', - - 'l_moment_cov', - 'l_ratio_se', - 'l_stats_se', - - 'l_moment_influence', - 'l_ratio_influence', - - 'l_weights', - - 'l_coloc', - 'l_coscale', - 'l_corr', - 'l_coskew', - 'l_cokurtosis', - - 'l_comoment', - 'l_coratio', - 'l_costats', -) - import sys from typing import TYPE_CHECKING, Final @@ -80,10 +46,41 @@ del np - __version__: Final[str] = _get_version() __author__: Final[str] = 'Joren Hammdugolu' __email__: Final[str] = 'jhammudoglu@gmail.com' __description__: Final[str] = ( 'Robust statistics with trimmed L-moments and L-comoments.' ) +__all__ = ( + '__version__', + + 'l_loc', + 'l_scale', + 'l_variation', + 'l_skew', + 'l_kurtosis', + + 'l_moment', + 'l_ratio', + 'l_stats', + + 'l_moment_cov', + 'l_ratio_se', + 'l_stats_se', + + 'l_moment_influence', + 'l_ratio_influence', + + 'l_weights', + + 'l_coloc', + 'l_coscale', + 'l_corr', + 'l_coskew', + 'l_cokurtosis', + + 'l_comoment', + 'l_coratio', + 'l_costats', +) diff --git a/lmo/_lm.py b/lmo/_lm.py index 8e25d4c5..e28dab8e 100644 --- a/lmo/_lm.py +++ b/lmo/_lm.py @@ -1,29 +1,7 @@ """Unbiased sample estimators of the generalized trimmed L-moments.""" +from __future__ import annotations -__all__ = ( - 'l_weights', - - 'l_loc', - 'l_scale', - 'l_variation', - 'l_skew', - 'l_kurtosis', - - 'l_moment', - 'l_ratio', - 'l_stats', - - 'l_moment_cov', - 'l_ratio_se', - 'l_stats_se', - - 'l_moment_influence', - 'l_ratio_influence', -) - -import sys -from collections.abc import Callable -from typing import Any, Final, SupportsIndex, TypeVar, cast, overload +from typing import TYPE_CHECKING, Any, Final, TypeAlias, cast, overload import numpy as np import numpy.typing as npt @@ -39,89 +17,122 @@ round0, ) from .linalg import ir_pascal, sandwich, sh_legendre, trim_matrix -from .typing import AnyInt, AnyTrim, IntVector, LMomentOptions, SortKind +from .typing.compat import TypeVar + + +if TYPE_CHECKING: + from collections.abc import Callable + + from .typing import ( + AnyAWeights, + AnyFWeights, + AnyOrder, + AnyOrderND, + AnyTrim, + LMomentOptions, + np as lnpt, + ) + from .typing.compat import Unpack + + +__all__ = ( + 'l_weights', + + 'l_moment', + 'l_ratio', + 'l_stats', + + 'l_loc', + 'l_scale', + 'l_variation', + 'l_skew', + 'l_kurtosis', + + 'l_moment_cov', + 'l_ratio_se', + 'l_stats_se', + 'l_moment_influence', + 'l_ratio_influence', +) -if sys.version_info < (3, 11): - from typing_extensions import Unpack -else: - from typing import Unpack -T = TypeVar('T', bound=np.floating[Any]) -V = TypeVar('V', bound=float | npt.NDArray[np.floating[Any]]) +_T_order = TypeVar('_T_order', bound=int) +_T_size = TypeVar('_T_size', bound=int) +_T_float = TypeVar('_T_float', bound=np.floating[Any], default=np.float64) -# Low-level weight methods +_DType: TypeAlias = np.dtype[_T_float] | type[_T_float] +_Vectorized: TypeAlias = _T_float | npt.NDArray[_T_float] +_Floating: TypeAlias = np.floating[Any] _L_WEIGHTS_CACHE: Final[ dict[ - tuple[int, int | float, int | float], # (n, s, t) - npt.NDArray[np.floating[Any]], + # (n, s, t) + tuple[int, int, int] | tuple[int, float, float], + lnpt.Array[tuple[int, int], _Floating], ] ] = {} def _l_weights_pwm( - r: int, - n: int, + r: _T_order, + n: _T_size, /, trim: tuple[int, int], - dtype: np.dtype[T] | type[T] = np.float64, -) -> npt.NDArray[T]: + *, + dtype: _DType[_T_float], +) -> lnpt.Array[tuple[_T_order, _T_size], _T_float]: s, t = trim r0 = r + s + t - p0 = sh_legendre(r0, dtype=np.int64 if r0 < 29 else dtype) - w0 = p0 @ pwm_beta.weights(r0, n, dtype=dtype) - out = trim_matrix(r, trim, dtype=dtype) @ w0 if s or t else w0 - return cast(npt.NDArray[T], out) - - # remove numerical noise from the trimmings, and correct for potential - # shifts in means - # p_r[:, :t1] = p_r[:, n - t2:] = 0 - # p_r[1:, t1:n - t2] -= p_r[1:, t1:n - t2].mean(1, keepdims=True) - - # return p_r + # `__matmul__` annotations are lacking (`np.matmul` is equivalent to it) + w0 = np.matmul( + sh_legendre(r0, dtype=np.int64 if r0 < 29 else dtype), + pwm_beta.weights(r0, n, dtype=dtype), + ) + return np.matmul(trim_matrix(r, trim, dtype=dtype), w0) if s or t else w0 def _l_weights_ostat( - r: int, - N: int, # noqa: N803 + r: _T_order, + n: _T_size, /, - trim: tuple[float, float], - dtype: np.dtype[T] | type[T] = np.float64, -) -> npt.NDArray[T]: - s, t = trim - - assert 0 < r + s + t <= N, (r, N, trim) + trim: tuple[int, int] | tuple[float, float], + *, + dtype: _DType[_T_float], +) -> lnpt.Array[tuple[_T_order, _T_size], _T_float]: assert r >= 1, r + + s, t = trim + assert 0 < r + s + t <= n, (r, n, trim) assert s >= 0, trim assert t >= 0, trim c = ir_pascal(r, dtype=dtype) - jnj = np.arange(N, dtype=dtype) - jnj /= N - jnj - - out = np.zeros((r, N), dtype=dtype) - for n in range(r): - w0 = ostats.weights(s, s + t + n + 1, N) - out[n] = c[n, 0] * w0 - for k in range(1, n + 1): + jnj = np.arange(n, dtype=dtype) + jnj /= n - jnj + + out = np.zeros((r, n), dtype=dtype) + for j in range(r): + w0 = ostats.weights(s, s + t + j + 1, n) + out[j] = c[j, 0] * w0 + for k in range(1, j + 1): # order statistic recurrence relation - w0 = np.roll(w0, 1) * jnj * ((t + n - k + 1) / (s + k)) - out[n] += c[n, k] * w0 + w0 = np.roll(w0, 1) * jnj * ((t + j - k + 1) / (s + k)) + out[j] += c[j, k] * w0 return out def l_weights( - r: int, - n: int, + r: _T_order, + n: _T_size, /, - trim: AnyTrim = (0, 0), - dtype: np.dtype[T] | type[T] = np.float64, + trim: AnyTrim = 0, *, + dtype: _DType[_T_float] = np.float64, cache: bool = False, -) -> npt.NDArray[T]: +) -> lnpt.Array[tuple[_T_order, _T_size], _T_float]: r""" Projection matrix of the first $r$ (T)L-moments for $n$ samples. @@ -207,12 +218,8 @@ def l_weights( assert w.shape == (r, n) return w.astype(dtype) - if ( - r + s + t <= 24 - and isinstance(s, int | np.integer) - and isinstance(t, int | np.integer) - ): - w = _l_weights_pwm(r, n, trim=(int(s), int(t)), dtype=dtype) + if r + s + t <= 24 and isinstance(s, int) and isinstance(t, int): + w = _l_weights_pwm(r, n, (s, t), dtype=dtype or np.float64) # ensure that the trimmed ends are 0 if s: @@ -220,76 +227,74 @@ def l_weights( if t: w[:, -t:] = 0 else: - w = _l_weights_ostat(r, n, trim=(float(s), float(t)), dtype=dtype) + w = _l_weights_ostat(r, n, (s, t), dtype=dtype or np.float64) if cache: - # memoize - _L_WEIGHTS_CACHE[cache_key] = w + # the pyright error here is due to the fact that the first type param + # of `np.ndarray` is invariant (which is incorrect), instead of + # being covariant + _L_WEIGHTS_CACHE[cache_key] = w # pyright: ignore[reportArgumentType] return w -# Summary statistics - @overload def l_moment( - a: npt.ArrayLike, - r: IntVector, + a: lnpt.AnyArrayFloat, + r: AnyOrder, /, trim: AnyTrim = ..., *, - axis: int | None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - fweights: IntVector | None = ..., - aweights: npt.ArrayLike | None = ..., - sort: SortKind | None = ..., - cache: bool = ..., -) -> npt.NDArray[T]: ... - + axis: None = ..., + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _T_float: ... @overload def l_moment( - a: npt.ArrayLike, - r: AnyInt, + a: lnpt.AnyMatrixFloat | lnpt.AnyTensorFloat, + r: AnyOrder | AnyOrderND, /, trim: AnyTrim = ..., *, - axis: None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - fweights: IntVector | None = ..., - aweights: npt.ArrayLike | None = ..., - sort: SortKind | None = ..., - cache: bool = ..., -) -> T: ... - + axis: int, + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> lnpt.Array[Any, _T_float]: ... @overload def l_moment( - a: npt.ArrayLike, - r: AnyInt, + a: lnpt.AnyVectorFloat, + r: AnyOrder, + /, + trim: AnyTrim = ..., + *, + axis: int, + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _T_float: ... +@overload +def l_moment( + a: lnpt.AnyArrayFloat, + r: AnyOrderND, /, trim: AnyTrim = ..., *, axis: int | None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - fweights: IntVector | None = ..., - aweights: npt.ArrayLike | None = ..., - sort: SortKind | None = ..., - cache: bool = ..., -) -> npt.NDArray[T] | T: ... - - + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> lnpt.Array[Any, _T_float]: ... def l_moment( - a: npt.ArrayLike, - r: IntVector | AnyInt, + a: lnpt.AnyArrayFloat, + r: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - fweights: IntVector | None = None, - aweights: npt.ArrayLike | None = None, - sort: SortKind | None = None, + dtype: _DType[_T_float] = np.float64, + fweights: AnyFWeights | None = None, + aweights: AnyAWeights | None = None, + sort: lnpt.SortKind | None = None, cache: bool = False, -) -> npt.NDArray[T] | T: +) -> _Vectorized[_T_float]: r""" Estimates the generalized trimmed L-moment $\lambda^{(s, t)}_r$ from the samples along the specified axis. By default, this will be the regular @@ -309,19 +314,19 @@ def l_moment( Some special cases include: - - $(0, 0)$: The original **L**-moment, introduced by Hosking - in 1990. + - $(0, 0)$ or $(0)$: The original **L**-moment, introduced by + Hosking in 1990. + - $(t, t)$ or $(t)$: **TL**-moment (**T**rimmed L-moment) + $\\lambda_r^t$, with symmetric trimming. First introduced by + Elamir & Seheult in 2003, and refined by Hosking in 2007. + Generally more robust than L-moments. Useful for fitting + pathological distributions, such as the Cauchy distribution. - $(0, t)$: **LL**-moment (**L**inear combination of **L**owest order statistics), introduced by Bayazit & Onoz in 2002. Assigns more weight to smaller observations. - $(s, 0)$: **LH**-moment (**L**inear combination of **H**igher order statistics), as described by Wang in 1997. Assigns more weight to larger observations. - - $(t, t)$: **TL**-moment (**T**rimmed L-moment) $\\lambda_r^t$, - with symmetric trimming. First introduced by Elamir & Seheult - in 2003, and refined by Hosking in 2007. Generally more robust - than L-moments. Useful for fitting pathological distributions, - such as the Cauchy distribution. axis: Axis along which to calculate the moments. If `None` (default), all samples in the array will be used. @@ -343,7 +348,7 @@ def l_moment( All `aweights` must be `>=0`, and the sum must be nonzero. The algorithm is similar to that for weighted quantiles. - sort ('quick' | 'stable' | 'heap'): + sort ('quicksort' | 'heapsort' | 'stable'): Sorting algorithm, see [`numpy.sort`][numpy.sort]. cache: Set to `True` to speed up future L-moment calculations that have @@ -361,17 +366,20 @@ def l_moment( Calculate the L-location and L-scale from student-T(2) samples, for different (symmetric) trim-lengths. - >>> import lmo, numpy as np - >>> x = np.random.default_rng(12345).standard_t(2, 99) - >>> lmo.l_moment(x, [1, 2], trim=(0, 0)) + >>> import lmo + >>> import numpy as np + >>> rng = np.random.default_rng(12345) + >>> x = rng.standard_t(2, 99) + + >>> lmo.l_moment(x, [1, 2]) array([-0.01412282, 0.94063132]) - >>> lmo.l_moment(x, [1, 2], trim=(1/2, 1/2)) - array([-0.02158858, 0.5796519 ]) + >>> lmo.l_moment(x, [1, 2], trim=1) + array([-0.0124483 , 0.40120115]) >>> lmo.l_moment(x, [1, 2], trim=(1, 1)) array([-0.0124483 , 0.40120115]) - The theoretical L-locations are all 0, and the the L-scale are - `1.1107`, `0.6002` and `0.4165`, respectively. + The theoretical L- and TL-location is `0`, the L-scale is `1.1107`, + and the TL-scale is `0.4165`, respectively. See Also: - [L-moment - Wikipedia](https://wikipedia.org/wiki/L-moment) @@ -401,87 +409,96 @@ def l_moment( # TODO @jorenham: nan handling, see: # https://github.com/jorenham/Lmo/issues/70 + (s, t) = st = clean_trim(trim) + # ensure that any inf's (not nan's) are properly trimmed - s, t = clean_trim(trim) - if s and isinstance(s, int | np.integer): - x_k[..., :s] = np.nan_to_num(x_k[..., :s], nan=np.nan) - if t and isinstance(t, int | np.integer): - x_k[..., -t:] = np.nan_to_num(x_k[..., -t:], nan=np.nan) + if (s or t) and isinstance(s, int): + if s: + x_k[..., :s] = np.nan_to_num(x_k[..., :s], nan=np.nan) + if t: + x_k[..., -t:] = np.nan_to_num(x_k[..., -t:], nan=np.nan) - l_r = np.inner(l_weights(r_max, n, (s, t), cache=cache, dtype=dtype), x_k) + l_r = np.inner(l_weights(r_max, n, st, dtype=dtype, cache=cache), x_k) # we like 0-based indexing; so if P_r starts at r=1, prepend all 1's # for r=0 (any zeroth moment is defined to be 1) l_r = np.r_[np.ones((1, *l_r.shape[1:]), dtype=l_r.dtype), l_r] # l[r] fails when r is e.g. a tuple (valid sequence). - return cast(npt.NDArray[T] | T, l_r.take(_r, 0)) + return l_r.take(_r, 0) @overload def l_ratio( - a: npt.ArrayLike, - r: IntVector, - s: AnyInt | IntVector, + a: lnpt.AnyVectorFloat, + r: AnyOrder, + s: AnyOrder, /, trim: AnyTrim = ..., *, - axis: int | None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T]: ... - + axis: int | None, + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _T_float: ... @overload def l_ratio( - a: npt.ArrayLike, - r: AnyInt | IntVector, - s: IntVector, + a: lnpt.AnyMatrixFloat | lnpt.AnyTensorFloat, + r: AnyOrder, + s: AnyOrder, /, trim: AnyTrim = ..., *, - axis: int | None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T]: ... - + axis: int, + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> lnpt.Array[Any, _T_float]: ... @overload def l_ratio( - a: npt.ArrayLike, - r: AnyInt, - s: AnyInt, + a: lnpt.AnyArrayFloat, + r: AnyOrder, + s: AnyOrder, /, trim: AnyTrim = ..., *, axis: None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> T: ... - + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _T_float: ... @overload def l_ratio( - a: npt.ArrayLike, - r: AnyInt, - s: AnyInt, + a: lnpt.AnyArrayFloat, + r: AnyOrder, + s: AnyOrderND, /, trim: AnyTrim = ..., *, axis: int | None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: ... - - + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> lnpt.Array[Any, _T_float]: ... +@overload def l_ratio( - a: npt.ArrayLike, - r: AnyInt | IntVector, - s: AnyInt | IntVector, + a: lnpt.AnyArrayFloat, + r: AnyOrderND, + s: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = ..., + *, + axis: int | None = ..., + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> lnpt.Array[Any, _T_float]: ... +def l_ratio( + a: lnpt.AnyArrayFloat, + r: AnyOrder | AnyOrderND, + s: AnyOrder | AnyOrderND, + /, + trim: AnyTrim = 0, *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _Vectorized[_T_float]: r""" Estimates the generalized L-moment ratio. @@ -525,21 +542,20 @@ def l_ratio( - [`lmo.l_moment`][lmo.l_moment] """ rs = np.stack(np.broadcast_arrays(np.asarray(r), np.asarray(s))) - l_rs = l_moment(a, rs, trim, axis=axis, dtype=dtype, **kwargs) - + l_rs = l_moment(a, rs, trim=trim, axis=axis, dtype=dtype, **kwds) return moments_to_ratio(rs, l_rs) def l_stats( - a: npt.ArrayLike, + a: lnpt.AnyArrayFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, num: int = 4, *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> lnpt.Array[Any, _T_float]: """ Calculates the L-loc(ation), L-scale, L-skew(ness) and L-kurtosis. @@ -561,41 +577,48 @@ def l_stats( - [`lmo.l_costats`][lmo.l_costats] """ r, s = l_stats_orders(num) - return l_ratio(a, r, s, trim=trim, axis=axis, dtype=dtype, **kwargs) + return l_ratio(a, r, s, trim=trim, axis=axis, dtype=dtype, **kwds) @overload def l_loc( - a: npt.ArrayLike, + a: lnpt.AnyMatrixFloat | lnpt.AnyTensorFloat, /, trim: AnyTrim = ..., *, - axis: None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> T: ... - + axis: int, + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> lnpt.Array[Any, _T_float]: ... @overload def l_loc( - a: npt.ArrayLike, + a: lnpt.AnyVectorFloat, /, trim: AnyTrim = ..., *, axis: int, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: ... - - + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _T_float: ... +@overload +def l_loc( + a: lnpt.AnyArrayFloat, + /, + trim: AnyTrim = ..., + *, + axis: None = ..., + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _T_float: ... def l_loc( - a: npt.ArrayLike, + a: lnpt.AnyArrayFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _Vectorized[_T_float]: r""" *L-location* (or *L-loc*): unbiased estimator of the first L-moment, $\lambda^{(s, t)}_1$. @@ -668,41 +691,48 @@ def l_loc( - [`lmo.l_moment`][lmo.l_moment] - [`numpy.average`][numpy.average] """ - return l_moment(a, 1, trim=trim, axis=axis, dtype=dtype, **kwargs) + return l_moment(a, 1, trim=trim, axis=axis, dtype=dtype, **kwds) @overload def l_scale( - a: npt.ArrayLike, + a: lnpt.AnyMatrixFloat | lnpt.AnyTensorFloat, /, trim: AnyTrim = ..., *, - axis: None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> T: ... - + axis: int, + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> lnpt.Array[Any, _T_float]: ... @overload def l_scale( - a: npt.ArrayLike, + a: lnpt.AnyVectorFloat, /, trim: AnyTrim = ..., *, axis: int, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: ... - - + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _T_float: ... +@overload def l_scale( - a: npt.ArrayLike, + a: lnpt.AnyArrayFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = ..., + *, + axis: None = ..., + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _T_float: ... +def l_scale( + a: lnpt.AnyArrayFloat, + /, + trim: AnyTrim = 0, *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _Vectorized[_T_float]: r""" *L-scale* unbiased estimator for the second L-moment, $\lambda^{(s, t)}_2$. @@ -728,44 +758,48 @@ def l_scale( - [`lmo.l_moment`][lmo.l_moment] - [`numpy.std`][numpy.std] """ - return l_moment(a, 2, trim, axis=axis, dtype=dtype, **kwargs) + return l_moment(a, 2, trim=trim, axis=axis, dtype=dtype, **kwds) @overload def l_variation( - a: npt.ArrayLike, + a: lnpt.AnyMatrixFloat | lnpt.AnyTensorFloat, /, trim: AnyTrim = ..., *, - axis: None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> T: - ... - - + axis: int, + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> lnpt.Array[Any, _T_float]: ... @overload def l_variation( - a: npt.ArrayLike, + a: lnpt.AnyVectorFloat, /, trim: AnyTrim = ..., *, axis: int, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: - ... - - + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _T_float: ... +@overload +def l_variation( + a: lnpt.AnyArrayFloat, + /, + trim: AnyTrim = ..., + *, + axis: None = ..., + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _T_float: ... def l_variation( - a: npt.ArrayLike, + a: lnpt.AnyArrayFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _Vectorized[_T_float]: r""" The *coefficient of L-variation* (or *L-CV*) unbiased sample estimator: @@ -798,41 +832,18 @@ def l_variation( - [`lmo.l_ratio`][lmo.l_ratio] - [`scipy.stats.variation.l_ratio`][scipy.stats.variation] """ # noqa: D415 - return l_ratio(a, 2, 1, trim, axis=axis, dtype=dtype, **kwargs) - - -@overload -def l_skew( - a: npt.ArrayLike, - /, - trim: AnyTrim = ..., - *, - axis: None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> T: ... - -@overload -def l_skew( - a: npt.ArrayLike, - /, - trim: AnyTrim = ..., - *, - axis: int, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: ... + return l_ratio(a, 2, 1, trim=trim, axis=axis, dtype=dtype, **kwds) def l_skew( - a: npt.ArrayLike, + a: lnpt.AnyArrayFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _Vectorized[_T_float]: r""" Unbiased sample estimator for the *L-skewness* coefficient. @@ -856,41 +867,18 @@ def l_skew( - [`lmo.l_ratio`][lmo.l_ratio] - [`scipy.stats.skew`][scipy.stats.skew] """ - return l_ratio(a, 3, 2, trim, axis=axis, dtype=dtype, **kwargs) - - -@overload -def l_kurtosis( - a: npt.ArrayLike, - /, - trim: AnyTrim = ..., - *, - axis: None = ..., - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> T: ... - -@overload -def l_kurtosis( - a: npt.ArrayLike, - /, - trim: AnyTrim = ..., - *, - axis: int, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: ... + return l_ratio(a, 3, 2, trim=trim, axis=axis, dtype=dtype, **kwds) def l_kurtosis( - a: npt.ArrayLike, + a: lnpt.AnyArrayFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T] | T: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _Vectorized[_T_float]: r""" L-kurtosis coefficient; the 4th sample L-moment ratio. @@ -919,19 +907,19 @@ def l_kurtosis( - [`lmo.l_ratio`][lmo.l_ratio] - [`scipy.stats.kurtosis`][scipy.stats.kurtosis] """ - return l_ratio(a, 4, 2, trim, axis=axis, dtype=dtype, **kwargs) + return l_ratio(a, 4, 2, trim=trim, axis=axis, dtype=dtype, **kwds) def l_moment_cov( - a: npt.ArrayLike, - r_max: SupportsIndex, + a: lnpt.AnyArrayFloat, + r_max: AnyOrder, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Any, +) -> npt.NDArray[_T_float]: """ Non-parmateric auto-covariance matrix of the generalized trimmed L-moment point estimates with orders `r = 1, ..., r_max`. @@ -993,23 +981,23 @@ def l_moment_cov( # p_l = np.round(p_l, 12) + 0. # PWM covariance matrix - s_b = pwm_beta.cov(a, ks, axis=axis, dtype=dtype, **kwargs) + s_b = pwm_beta.cov(a, ks, axis=axis, dtype=dtype, **kwds) # tasty, eh? return sandwich(p_l, s_b, dtype=dtype) def l_ratio_se( - a: npt.ArrayLike, - r: AnyInt | IntVector, - s: AnyInt | IntVector, + a: lnpt.AnyArrayFloat, + r: AnyOrder | AnyOrderND, + s: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _Vectorized[_T_float]: """ Non-parametric estimates of the Standard Error (SE) in the L-ratio estimates from [`lmo.l_ratio`][lmo.l_ratio]. @@ -1045,11 +1033,11 @@ def l_ratio_se( r_max = np.amax(np.r_[_r, _s].ravel()) # L-moments - l_rs = l_moment(a, _rs, trim, axis=axis, dtype=dtype, **kwargs) + l_rs = l_moment(a, _rs, trim, axis=axis, dtype=dtype, **kwds) l_r, l_s = l_rs[0], l_rs[1] # L-moment auto-covariance matrix - k_l = l_moment_cov(a, r_max, trim, axis=axis, dtype=dtype, **kwargs) + k_l = l_moment_cov(a, r_max, trim, axis=axis, dtype=dtype, **kwds) # prepend the "zeroth" moment, with has 0 (co)variance k_l = np.pad(k_l, (1, 0), constant_values=0) @@ -1071,15 +1059,15 @@ def l_ratio_se( def l_stats_se( - a: npt.ArrayLike, + a: lnpt.AnyArrayFloat, /, + trim: AnyTrim = 0, num: int = 4, - trim: AnyTrim = (0, 0), *, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LMomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LMomentOptions], +) -> _Vectorized[_T_float]: """ Calculates the standard errors (SE's) of the [`L-stats`][lmo.l_stats]. @@ -1103,18 +1091,21 @@ def l_stats_se( - [`lmo.l_ratio_se`][lmo.l_ratio_se] """ r, s = l_stats_orders(num) - return l_ratio_se(a, r, s, trim=trim, axis=axis, dtype=dtype, **kwargs) + return l_ratio_se(a, r, s, trim=trim, axis=axis, dtype=dtype, **kwds) + + +_T_x = TypeVar('_T_x', float, npt.NDArray[_Floating]) def l_moment_influence( - a: npt.ArrayLike, - r: SupportsIndex, + a: lnpt.AnyVectorFloat, + r: AnyOrder, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - sort: SortKind | None = None, + sort: lnpt.SortKind | None = 'quicksort', tol: float = 1e-8, -) -> Callable[[V], V]: +) -> Callable[[_T_x], _T_x]: r""" Empirical Influence Function (EIF) of a sample L-moment. @@ -1129,7 +1120,7 @@ def l_moment_influence( non-negative int or float. Other parameters: - sort ('quick' | 'stable' | 'heap'): + sort ('quicksort' | 'heapsort' | 'stable'): Sorting algorithm, see [`numpy.sort`][numpy.sort]. tol: Zero-roundoff absolute threshold. @@ -1141,18 +1132,22 @@ def l_moment_influence( _r = clean_order(r) s, t = clean_trim(trim) - x_k = np.sort(a, kind=sort) + x_k = np.array(a, copy=True) + if sort: + x_k.sort(kind=sort) + + x_k = np.sort(np.asarray(a), kind=sort) n = len(x_k) w_k = l_weights(_r, n, (s, t))[-1] l_r = np.inner(w_k, x_k) - def influence_function(x: V, /) -> V: + def influence_function(x: _T_x, /) -> _T_x: _x = np.asarray(x) # ECDF # k = np.maximum(np.searchsorted(x_k, _x, side='right') - 1, 0) - w = cast(V, np.interp( + w = cast(_T_x, np.interp( _x, x_k, w_k, @@ -1161,26 +1156,26 @@ def influence_function(x: V, /) -> V: )) alpha = n * w * np.where(w, _x, 0) - return cast(V, round0(alpha - l_r, tol=tol)[()]) + return cast(_T_x, round0(alpha - l_r, tol=tol)[()]) influence_function.__doc__ = ( f'Empirical L-moment influence function for {r=}, {trim=}, and {n=}.' ) # piggyback the L-moment, to avoid recomputing it in l_ratio_influence - influence_function.l = l_r # type: ignore + influence_function.l = l_r # pyright: ignore[reportFunctionMemberAccess] return influence_function def l_ratio_influence( - a: npt.ArrayLike, - r: SupportsIndex, - k: SupportsIndex = 2, + a: lnpt.AnyVectorFloat, + r: AnyOrder, + s: AnyOrder = 2, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - sort: SortKind | None = None, + sort: lnpt.SortKind = 'quicksort', tol: float = 1e-8, -) -> Callable[[V], V]: +) -> Callable[[_T_x], _T_x]: r""" Empirical Influence Function (EIF) of a sample L-moment ratio. @@ -1190,13 +1185,13 @@ def l_ratio_influence( Args: a: 1-D array-like containing observed samples. r: L-moment ratio order. Must be a non-negative integer. - k: Denominator L-moment order, defaults to 2. + s: Denominator L-moment order, defaults to 2. trim: Left- and right- trim. Can be scalar or 2-tuple of non-negative int or float. Other parameters: - sort ('quick' | 'stable' | 'heap'): + sort ('quicksort' | 'heapsort' | 'stable'): Sorting algorithm, see [`numpy.sort`][numpy.sort]. tol: Zero-roundoff absolute threshold. @@ -1205,29 +1200,34 @@ def l_ratio_influence( The (vectorized) empirical influence function. """ - _x = np.sort(a, kind=sort) - _r, _k = clean_order(r), clean_order(k) + _r, _s = clean_order(r), clean_order(s, name='s') + + _x = np.array(a, copy=True) + _x.sort(kind=sort) n = len(_x) - eif_r = l_moment_influence(_x, _r, trim, sort='stable', tol=0) - eif_k = l_moment_influence(_x, _k, trim, sort='stable', tol=0) + eif_r = l_moment_influence(_x, _r, trim, sort=None, tol=0) + eif_k = l_moment_influence(_x, _s, trim, sort=None, tol=0) - l_r, l_k = cast(tuple[float, float], (eif_r.l, eif_k.l)) # type: ignore + l_r, l_k = cast( + tuple[float, float], + (eif_r.l, eif_k.l), # pyright: ignore[reportFunctionMemberAccess] + ) if abs(l_k) <= tol * abs(l_r): - msg = f'L-ratio ({r=}, {k=}) denominator is approximately zero.' + msg = f'L-ratio ({r=}, {s=}) denominator is approximately zero.' raise ZeroDivisionError(msg) t_r = l_r / l_k - def influence_function(x: V, /) -> V: + def influence_function(x: _T_x, /) -> _T_x: psi_r = eif_r(x) # cheat a bit to avoid `inf - inf = nan` situations psi_k = np.where(np.isinf(psi_r), 0, eif_k(x)) - return cast(V, round0((psi_r - t_r * psi_k) / l_k, tol=tol)[()]) + return cast(_T_x, round0((psi_r - t_r * psi_k) / l_k, tol=tol)[()]) influence_function.__doc__ = ( f'Theoretical influence function for L-moment ratio with r={_r}, ' - f'k={_k}, {trim=}, and {n=}' + f'k={_s}, {trim=}, and {n=}' ) return influence_function diff --git a/lmo/_lm_co.py b/lmo/_lm_co.py index 3d7fc9a1..ba91101f 100644 --- a/lmo/_lm_co.py +++ b/lmo/_lm_co.py @@ -1,3 +1,23 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal, TypeAlias, cast + +import numpy as np + +from ._lm import l_weights +from ._utils import clean_order, clean_orders, ordered +from .typing import AnyOrder, AnyOrderND +from .typing.compat import TypeVar, Unpack + + +if TYPE_CHECKING: + from .typing import ( + AnyTrim, + LComomentOptions, + np as lnpt, + ) + + __all__ = ( 'l_comoment', 'l_coratio', @@ -9,55 +29,48 @@ 'l_cokurtosis', ) -import sys -from typing import Any, TypeVar, cast - -import numpy as np -from numpy import typing as npt - -from ._lm import l_weights -from ._utils import broadstack, clean_order, ordered -from .typing import AnyInt, AnyTrim, IntVector, LComomentOptions, SortKind - -if sys.version_info < (3, 11): - from typing_extensions import Unpack -else: - from typing import Unpack +_T_scalar = TypeVar('_T_scalar', bound=np.generic) +_T_float = TypeVar('_T_float', bound=np.floating[Any], default=np.float64) +_DType: TypeAlias = np.dtype[_T_scalar] | type[_T_scalar] -T = TypeVar('T', bound=np.floating[Any]) +_N0 = TypeVar('_N0', bound=int) +_N1 = TypeVar('_N1', bound=int) +_N2 = TypeVar('_N2', bound=int) +_Array2D: TypeAlias = np.ndarray[tuple[_N0, _N1], np.dtype[_T_scalar]] +_Array3D: TypeAlias = np.ndarray[tuple[_N0, _N1, _N2], np.dtype[_T_scalar]] def l_comoment( - a: npt.ArrayLike, - r: AnyInt | IntVector, + a: lnpt.AnyVectorFloat | lnpt.AnyMatrixFloat, + r: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - dtype: np.dtype[T] | type[T] = np.float64, + dtype: _DType[_T_float] = np.float64, rowvar: bool = True, - sort: SortKind | None = None, + sort: lnpt.SortKind | None = None, cache: bool = False, -) -> npt.NDArray[T]: +) -> lnpt.Array[Any, _T_float]: r""" Multivariate extension of [`lmo.l_moment`][lmo.l_moment]. Estimates the L-comoment matrix: $$ - \Lambda_{r}^{(t_1, t_2)} = + \Lambda_{r}^{(s, t)} = \left[ - \lambda_{r [ij]}^{(t_1, t_2)} + \lambda_{r [ij]}^{(s, t)} \right]_{m \times m} $$ Whereas the L-moments are calculated using the order statistics of the observations, i.e. by sorting, the L-comoment sorts $x_i$ using the order of $x_j$. This means that in general, - $\lambda_{r [ij]}^{(t_1, t_2)} \neq \lambda_{r [ji]}^{(t_1, t_2)}$, i.e. - $\Lambda_{r}^{(t_1, t_2)}$ is not symmetric. + $\lambda_{r [ij]}^{(s, t)} \neq \lambda_{r [ji]}^{(s, t)}$, i.e. + $\Lambda_{r}^{(s, t)}$ is not symmetric. - The $r$-th L-comoment $\lambda_{r [ij]}^{(t_1, t_2)}$ reduces to the + The $r$-th L-comoment $\lambda_{r [ij]}^{(s, t)}$ reduces to the L-moment if $i=j$, and can therefore be seen as a generalization of the (univariate) L-moments. Similar to how the diagonal of a covariance matrix contains the variances, the diagonal of the L-comoment matrix contains the @@ -72,13 +85,11 @@ def l_comoment( observations. Each row of `a` represents a variable, and each column a single observation of all those variables. Also see `rowvar` below. If `a` is not an array, a conversion is attempted. - r: The L-moment order(s), non-negative integer or array. - trim: - Left- and right-trim orders $(t_1, t_2)$, non-negative ints or - floats that are bound by $t_1 + t_2 < n - r$. + Left- and right-trim orders $(s, t)$, non-negative ints or + floats that are bound by $s + t < n - r$. Some special cases include: @@ -97,20 +108,17 @@ def l_comoment( Generally more robust than L-moments. Useful for fitting heavy-tailed distributions, such as the Cauchy distribution. - rowvar: If `rowvar` is True (default), then each row (axis 0) represents a variable, with observations in the columns (axis 1). Otherwise, the relationship is transposed: each column represents a variable, while the rows contain observations. - dtype: Floating type to use in computing the L-moments. Default is [`numpy.float64`][numpy.float64]. sort ('quick' | 'stable' | 'heap'): Sorting algorithm, see [`numpy.sort`][numpy.sort]. - cache: Set to `True` to speed up future L-moment calculations that have the same number of observations in `a`, equal `trim`, and equal or @@ -136,63 +144,76 @@ def l_comoment( array([1.2766793 , 1.05990727]) References: - * [R. Serfling & P. Xiao (2007) - A Contribution to Multivariate + - [R. Serfling & P. Xiao (2007) - A Contribution to Multivariate L-Moments: L-Comoment Matrices](https://doi.org/10.1016/j.jmva.2007.01.008) """ - - def _clean_array(arr: npt.ArrayLike) -> npt.NDArray[T]: - out = np.asanyarray(arr, dtype=dtype) - return out if rowvar else out.T - - x = np.atleast_2d(_clean_array(a)) + x = np.array( + a, + order='C' if rowvar else 'F', + subok=True, + ndmin=2, + ) if x.ndim != 2: - msg = f'sample array must be 2-D, got {x.ndim}' + msg = f'sample array must be 2-D, got shape {x.shape}' raise ValueError(msg) - - _r = np.asarray(r) - r_max = clean_order(cast(int, np.max(_r))) + if not rowvar: + x = x.T m, n = x.shape + if np.isscalar(r): + _r = np.array(clean_order(cast(AnyOrder, r))) + else: + _r = clean_orders(cast(AnyOrderND, r)) + + r_min = int(np.min(_r)) + r_max = int(np.max(_r)) + + if r_min == r_max == 0 and _r.ndim == 0: + return np.identity(m, dtype=dtype) + if not m: return np.empty((*np.shape(_r), 0, 0), dtype=dtype) - # projection matrix of shape (r, n) - p_r = l_weights(r_max, n, trim, cache=cache) - - # L-comoment matrices for r = 0, ..., r_max - l_ij = np.empty((r_max + 1, m, m), dtype=dtype) + # projection/hat matrix of shape (r_max - r_min, n) + p_k = l_weights(r_max, n, trim=trim, dtype=dtype, cache=cache) + if r_min > 1: + p_k = p_k[r_min - 1:] - # the zeroth L-comoment is the delta function, so the L-comoment - # matrix is the identity matrix - l_ij[0] = np.eye(m, dtype=dtype) + # L-comoment matrices for k = r_min, ..., r_max + l_kij = np.empty((p_k.shape[0], m, m), dtype=dtype, order='F') for j in range(m): - # concomitants of x[i] w.r.t. x[j] for all i - x_k_ij = ordered(x, x[j], axis=-1, dtype=dtype, sort=sort) + # *concomitants* of x[i] w.r.t. x[j] for all i + x_kij = ordered(x, x[j], axis=-1, sort=sort) + l_kij[:, :, j] = np.inner(p_k, x_kij) - l_ij[1:, :, j] = np.inner(p_r, x_k_ij) + if r_min == 0: + # the zeroth L-comoment is the delta function, so the L-comoment + # matrix is the identity matrix + l_0ij = np.identity(m, dtype=dtype)[None, :] + return np.concat((l_0ij, l_kij)).take(_r, 0) - return l_ij.take(_r, 0) + return l_kij.take(_r - r_min, 0) def l_coratio( - a: npt.ArrayLike, - r: AnyInt | IntVector, - s: AnyInt | IntVector, + a: lnpt.AnyVectorFloat | lnpt.AnyMatrixFloat, + r: AnyOrder | AnyOrderND, + s: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LComomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LComomentOptions], +) -> lnpt.Array[Any, _T_float]: r""" Estimate the generalized matrix of L-comoment ratio's. $$ - \tilde \Lambda_{rs}^{(t_1, t_2)} = + \tilde \Lambda_{rk}^{(s, t)} = \left[ - \left. \lambda_{r [ij]}^{(t_1, t_2)} \right/ - \lambda_{s [ii]}^{(t_1, t_2)} + \left. \lambda_{r [ij]}^{(s, t)} \right/ + \lambda_{k [ii]}^{(s, t)} \right]_{m \times m} $$ @@ -200,18 +221,19 @@ def l_coratio( - [`lmo.l_comoment`][lmo.l_comoment] - [`lmo.l_ratio`][lmo.l_ratio] """ - l_r, l_s = l_comoment(a, broadstack(r, s), trim, dtype=dtype, **kwargs) + rs = np.stack(np.broadcast_arrays(np.asarray(r), np.asarray(s))) + l_r, l_s = l_comoment(a, rs, trim=trim, dtype=dtype, **kwds) return l_r / np.expand_dims(np.diagonal(l_s, axis1=-2, axis2=-1), -1) def l_costats( - a: npt.ArrayLike, + a: lnpt.AnyVectorFloat | lnpt.AnyMatrixFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LComomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LComomentOptions], +) -> _Array3D[Literal[4], Any, Any, _T_float]: """ Calculates the L-*co*scale, L-corr(elation), L-*co*skew(ness) and L-*co*kurtosis. @@ -223,19 +245,19 @@ def l_costats( - [`lmo.l_coratio`][lmo.l_coratio] """ r, s = [2, 2, 3, 4], [0, 2, 2, 2] - return l_coratio(a, r, s, trim=trim, dtype=dtype, **kwargs) + return l_coratio(a, r, s, trim=trim, dtype=dtype, **kwds) def l_coloc( - a: npt.ArrayLike, + a: lnpt.AnyVectorFloat | lnpt.AnyMatrixFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LComomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LComomentOptions], +) -> _Array2D[Any, Any, _T_float]: r""" - L-colocation matrix of 1st L-comoment estimates, $\Lambda^{(t_1, t_2)}_1$. + L-colocation matrix of 1st L-comoment estimates, $\Lambda^{(s, t)}_1$. Alias for [`lmo.l_comoment(a, 1, *, **)`][lmo.l_comoment]. @@ -272,19 +294,19 @@ def l_coloc( - [`lmo.l_loc`][lmo.l_loc] - [`numpy.mean`][numpy.mean] """ - return l_comoment(a, 1, trim, dtype=dtype, **kwargs) + return l_comoment(a, 1, trim=trim, dtype=dtype, **kwds) def l_coscale( - a: npt.ArrayLike, + a: lnpt.AnyVectorFloat | lnpt.AnyMatrixFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LComomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LComomentOptions], +) -> _Array2D[Any, Any, _T_float]: r""" - L-coscale matrix of 2nd L-comoment estimates, $\Lambda^{(t_1, t_2)}_2$. + L-coscale matrix of 2nd L-comoment estimates, $\Lambda^{(s, t)}_2$. Alias for [`lmo.l_comoment(a, 2, *, **)`][lmo.l_comoment]. @@ -308,19 +330,19 @@ def l_coscale( - [`lmo.l_scale`][lmo.l_scale] - [`numpy.cov`][numpy.cov] """ - return l_comoment(a, 2, trim, dtype=dtype, **kwargs) + return l_comoment(a, 2, trim=trim, dtype=dtype, **kwds) def l_corr( - a: npt.ArrayLike, + a: lnpt.AnyVectorFloat | lnpt.AnyMatrixFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LComomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LComomentOptions], +) -> _Array2D[Any, Any, _T_float]: r""" - Sample L-correlation coefficient matrix $\tilde\Lambda^{(t_1, t_2)}_2$; + Sample L-correlation coefficient matrix $\tilde\Lambda^{(s, t)}_2$; the ratio of the L-coscale matrix over the L-scale **column**-vectors. Alias for [`lmo.l_coratio(a, 2, 2, *, **)`][lmo.l_coratio]. @@ -354,19 +376,19 @@ def l_corr( - [`lmo.l_coratio`][lmo.l_coratio] - [`numpy.corrcoef`][numpy.corrcoef] """ - return l_coratio(a, 2, 2, trim, dtype=dtype, **kwargs) + return l_coratio(a, 2, 2, trim=trim, dtype=dtype, **kwds) def l_coskew( - a: npt.ArrayLike, + a: lnpt.AnyVectorFloat | lnpt.AnyMatrixFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LComomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LComomentOptions], +) -> _Array2D[Any, Any, _T_float]: r""" - Sample L-coskewness coefficient matrix $\tilde\Lambda^{(t_1, t_2)}_3$. + Sample L-coskewness coefficient matrix $\tilde\Lambda^{(s, t)}_3$. Alias for [`lmo.l_coratio(a, 3, 2, *, **)`][lmo.l_coratio]. @@ -374,19 +396,19 @@ def l_coskew( - [`lmo.l_coratio`][lmo.l_coratio] - [`lmo.l_skew`][lmo.l_skew] """ - return l_coratio(a, 3, 2, trim, dtype=dtype, **kwargs) + return l_coratio(a, 3, 2, trim=trim, dtype=dtype, **kwds) def l_cokurtosis( - a: npt.ArrayLike, + a: lnpt.AnyVectorFloat | lnpt.AnyMatrixFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Unpack[LComomentOptions], -) -> npt.NDArray[T]: + dtype: _DType[_T_float] = np.float64, + **kwds: Unpack[LComomentOptions], +) -> _Array2D[Any, Any, _T_float]: r""" - Sample L-cokurtosis coefficient matrix $\tilde\Lambda^{(t_1, t_2)}_4$. + Sample L-cokurtosis coefficient matrix $\tilde\Lambda^{(s, t)}_4$. Alias for [`lmo.l_coratio(a, 4, 2, *, **)`][lmo.l_coratio]. @@ -394,4 +416,4 @@ def l_cokurtosis( - [`lmo.l_coratio`][lmo.l_coratio] - [`lmo.l_kurtosis`][lmo.l_kurtosis] """ - return l_coratio(a, 4, 2, trim, dtype=dtype, **kwargs) + return l_coratio(a, 4, 2, trim=trim, dtype=dtype, **kwds) diff --git a/lmo/_poly.py b/lmo/_poly.py index e651da6c..412f0ec1 100644 --- a/lmo/_poly.py +++ b/lmo/_poly.py @@ -8,6 +8,7 @@ """ __all__ = ( + 'PolySeries', 'eval_sh_jacobi', 'peaks_jacobi', 'arg_extrema_jacobi', @@ -17,40 +18,42 @@ 'roots', ) -from typing import TypeVar, cast, overload +from typing import TypeAlias, TypeVar, cast, overload import numpy as np import numpy.polynomial as npp -import numpy.typing as npt import scipy.special as scs -from .typing import FloatVector, PolySeries +from .typing import np as lnpt -P = TypeVar('P', bound=PolySeries) - +PolySeries: TypeAlias = ( + npp.Polynomial + | npp.Chebyshev + | npp.Hermite + | npp.HermiteE + | npp.Legendre +) -@overload -def eval_sh_jacobi(n: int, a: float, b: float, x: float) -> float: - ... +_T_shape = TypeVar('_T_shape', bound=lnpt.AtLeast1D) +_T_poly = TypeVar('_T_poly', bound=PolySeries) +@overload +def eval_sh_jacobi(n: int, a: float, b: float, x: float) -> float: ... @overload def eval_sh_jacobi( n: int, a: float, b: float, - x: npt.NDArray[np.float64], -) -> npt.NDArray[np.float64]: - ... - - + x: lnpt.Array[_T_shape, lnpt.Float], +) -> lnpt.Array[_T_shape, np.float64]: ... def eval_sh_jacobi( n: int, a: float, b: float, - x: float | npt.NDArray[np.float64], -) -> float | npt.NDArray[np.float64]: + x: float | lnpt.Array[_T_shape, lnpt.Float], +) -> float | lnpt.Array[_T_shape, np.float64]: """ Fast evaluation of the n-th shifted Jacobi polynomial. Faster than pre-computing using np.Polynomial, and than @@ -102,7 +105,11 @@ def eval_sh_jacobi( return scs.eval_jacobi(n, a, b, u) -def peaks_jacobi(n: int, a: float, b: float) -> npt.NDArray[np.float64]: +def peaks_jacobi( + n: int, + a: float, + b: float, +) -> lnpt.Array[tuple[int], np.float64]: r""" Finds the \( x \in [-1, 1] \) s.t. \( /frac{\dd{\shjacobi{n}{a}{b}{x}}}{\dd{x}} = 0 \) of a Jacobi polynomial, @@ -155,7 +162,7 @@ def peaks_jacobi(n: int, a: float, b: float) -> npt.NDArray[np.float64]: # otherwise, peaks are at the ends, and at the roots of the derivative x = np.empty(n + 1) x[0] = -1 - x[1:-1] = scs.roots_jacobi(n - 1, a + 1, b + 1)[0] # type: ignore + x[1:-1] = scs.roots_jacobi(n - 1, a + 1, b + 1)[0] # pyright: ignore[reportUnknownMemberType] x[-1] = 1 return np.round(x, 15) + 0.0 # cleanup of numerical noise @@ -288,7 +295,11 @@ def extrema_jacobi(n: int, a: float, b: float) -> tuple[float, float]: return cast(float, np.min(p)), cast(float, np.max(p)) -def _jacobi_coefs(n: int, a: float, b: float) -> npt.NDArray[np.float64]: +def _jacobi_coefs( + n: int, + a: float, + b: float, +) -> lnpt.Array[tuple[int], np.float64]: p_n: np.poly1d p_n = scs.jacobi(n, a, b) # pyright: ignore[reportUnknownMemberType] return p_n.coef[::-1] @@ -299,24 +310,24 @@ def jacobi( /, a: float, b: float, - domain: FloatVector = (-1, 1), - window: FloatVector = (-1, 1), + domain: tuple[float, float] = (-1, 1), + window: tuple[float, float] = (-1, 1), symbol: str = 'x', ) -> npp.Polynomial: return npp.Polynomial(_jacobi_coefs(n, a, b), domain, window, symbol) def jacobi_series( - coef: npt.ArrayLike, + coef: lnpt.AnyArrayFloat, /, a: float, b: float, *, - domain: FloatVector = (-1, 1), - kind: type[P] | None = None, - window: FloatVector = (-1, 1), + domain: tuple[float, float] = (-1, 1), + kind: type[_T_poly] | None = None, + window: tuple[float, float] = (-1, 1), symbol: str = 'x', -) -> P: +) -> _T_poly: r""" Construct a polynomial from the weighted sum of shifted Jacobi polynomials. @@ -334,32 +345,42 @@ def jacobi_series( n = len(w) p = cast( - PolySeries, + npp.Polynomial, sum( w[r] * jacobi(r, a, b, domain=domain, symbol=symbol, window=window) for r in range(n) ), ) - return cast(P, p.convert(domain=domain, kind=kind, window=window)) + return cast( + _T_poly, + p.convert( # pyright: ignore[reportUnknownMemberType] + domain=domain, + kind=kind, + window=window, + ), + ) def roots( p: PolySeries, /, outside: bool = False, -) -> npt.NDArray[np.float64]: +) -> lnpt.Array[tuple[int], np.float64]: """ Return the $x$ in the domain of $p$, where $p(x) = 0$. If outside=False (default), the values that fall outside of the domain interval will be not be included. """ - z = p.roots() + z = cast( + lnpt.Array[tuple[int], np.float64], + p.roots(), # pyright: ignore[reportUnknownMemberType] + ) if not np.isrealobj(z) and np.isrealobj(p.domain): x = z[np.isreal(z)].real else: - x = cast(npt.NDArray[np.float64], z) + x = z if not outside and len(x): a, b = np.sort(p.domain) diff --git a/lmo/_utils.py b/lmo/_utils.py index 3d42ccbe..54d2cd06 100644 --- a/lmo/_utils.py +++ b/lmo/_utils.py @@ -1,72 +1,62 @@ -__all__ = ( - 'as_float_array', - 'broadstack', - 'ensure_axis_at', - 'plotting_positions', - 'round0', - 'ordered', - - 'clean_order', - 'clean_orders', - 'clean_trim', - - 'moments_to_ratio', - 'moments_to_stats_cov', - 'l_stats_orders', -) +from __future__ import annotations -from typing import Any, SupportsIndex, TypeVar, cast +import math +from typing import TYPE_CHECKING, Any, Final, TypeAlias import numpy as np import numpy.typing as npt -from .typing import AnyInt, AnyTrim, IndexOrder, IntVector, SortKind +from .typing import np as lnpt +from .typing.compat import TypeVar -T = TypeVar('T', bound=np.generic) -FT = TypeVar('FT', bound=np.floating[Any]) +if TYPE_CHECKING: + from .typing import AnyAWeights, AnyFWeights, AnyOrder, AnyOrderND, AnyTrim +__all__ = ( + 'clean_order', + 'clean_orders', + 'clean_trim', + 'ensure_axis_at', + 'l_stats_orders', + 'moments_to_ratio', + 'moments_to_stats_cov', + 'ordered', + 'plotting_positions', + 'round0', +) -def as_float_array( - a: npt.ArrayLike, - /, - dtype: npt.DTypeLike = None, - order: IndexOrder | None = None, - *, - check_finite: bool = False, - flat: bool = False, -) -> npt.NDArray[np.floating[Any]]: - """ - Convert to array if needed, and only cast to float64 dtype if not a - floating type already. Similar as in e.g. `numpy.mean`. - """ - asarray = np.asarray_chkfinite if check_finite else np.asarray - x = asarray(a, dtype=dtype, order=order) - out = x if isinstance(x.dtype.type, np.floating) else x.astype(np.float64) +_T_scalar = TypeVar('_T_scalar', bound=np.generic) +_T_number = TypeVar('_T_number', bound=np.number[Any]) +_T_int = TypeVar('_T_int', bound=np.integer[Any], default=np.intp) +_T_float = TypeVar('_T_float', bound=np.floating[Any], default=np.float64) - # the `_[()]` ensures that 0-d arrays become scalars - return (out.reshape(-1) if flat and out.ndim != 1 else out)[()] +_T_size = TypeVar('_T_size', bound=int) +_T_shape0 = TypeVar('_T_shape0', bound=lnpt.AtLeast0D) +_T_shape1 = TypeVar('_T_shape1', bound=lnpt.AtLeast1D) +_T_shape2 = TypeVar('_T_shape2', bound=lnpt.AtLeast2D) -def broadstack( - r: AnyInt | IntVector, - s: AnyInt | IntVector, -) -> npt.NDArray[np.int64]: - return np.stack(np.broadcast_arrays(np.asarray(r), np.asarray(s))) +_DType: TypeAlias = np.dtype[_T_scalar] | type[_T_scalar] def ensure_axis_at( - a: npt.NDArray[T], + a: npt.NDArray[_T_scalar], /, source: int | None, destination: int, - order: IndexOrder = 'C', -) -> npt.NDArray[T]: + *, + order: lnpt.OrderReshape = 'C', +) -> npt.NDArray[_T_scalar]: + """ + Moves the from `source` to `destination` if needed, or returns a flattened + array is `source` is set to `None`. + """ if a.ndim <= 1 or source == destination: return a if source is None: - return a.ravel(order) + return a.reshape(-1, order=order) source = source + a.ndim if source < 0 else source destination = destination + a.ndim if destination < 0 else destination @@ -92,16 +82,22 @@ def plotting_positions( return np.linspace(x0 / xn, (x0 + n - 1) / xn, n, dtype=dtype) -def round0(a: npt.NDArray[T], /, tol: float = 1e-8) -> npt.NDArray[T]: - """Round values close to zero.""" - return np.where(np.abs(a) <= abs(tol), 0, a) if tol else a +def round0( + a: lnpt.CanArray[_T_shape0, _T_float], + /, + tol: float | None = None, +) -> lnpt.Array[_T_shape0, _T_float]: + """Replace all values `<= tol` with `0`.""" + _a = np.asarray(a) + _tol = np.finfo(_a.dtype).resolution * 2 if tol is None else abs(tol) + return np.where(np.abs(a) <= _tol, 0, a) def _apply_aweights( - x: npt.NDArray[np.floating[Any]], - v: npt.NDArray[np.floating[Any]], + x: lnpt.Array[_T_shape1, _T_float], + v: lnpt.Array[_T_shape1 | lnpt.AtLeast1D, _T_float | lnpt.Float], axis: int, -) -> npt.NDArray[np.float64]: +) -> lnpt.Array[_T_shape1, _T_float]: # interpret the weights as horizontal coordinates using cumsum vv = np.cumsum(v, axis=axis) assert vv.shape == x.shape, (vv.shape, x.shape) @@ -112,11 +108,8 @@ def _apply_aweights( # cannot use np.apply_along_axis here, since both x_k and w_k need to be # applied simultaneously - out = np.empty(x.shape, dtype=np.float64) + out = np.empty_like(x) - x_jk: npt.NDArray[np.floating[Any]] - w_jk: npt.NDArray[np.floating[Any]] - v_jk: npt.NDArray[np.float64] for j in np.ndindex(out.shape[:-1]): x_jk, w_jk = x[j], vv[j] if w_jk[-1] <= 0: @@ -133,11 +126,11 @@ def _apply_aweights( def _sort_like( - a: npt.NDArray[T], - i: npt.NDArray[np.int_], + a: lnpt.Array[_T_shape1, _T_number], + i: lnpt.Array[tuple[int], np.integer[Any]], /, axis: int | None, -) -> npt.NDArray[T]: +) -> lnpt.Array[_T_shape1, _T_number]: return ( np.take(a, i, axis=None if a.ndim == i.ndim else axis) if min(a.ndim, i.ndim) <= 1 @@ -146,16 +139,16 @@ def _sort_like( def ordered( # noqa: C901 - x: npt.ArrayLike, - y: npt.ArrayLike | None = None, + x: lnpt.AnyArrayFloat, + y: lnpt.AnyArrayFloat | None = None, /, axis: int | None = None, - dtype: npt.DTypeLike = None, + dtype: _DType[np.floating[Any]] | None = None, *, - fweights: IntVector | None = None, - aweights: npt.ArrayLike | None = None, - sort: SortKind | None = None, -) -> npt.NDArray[np.floating[Any]]: + fweights: AnyFWeights | None = None, + aweights: AnyAWeights | None = None, + sort: lnpt.SortKind | None = None, +) -> lnpt.Array[lnpt.AtLeast1D, lnpt.Float]: """ Calculate `n = len(x)` order stats of `x`, optionally weighted. If `y` is provided, the order of `y` is used instead. @@ -177,10 +170,7 @@ def ordered( # noqa: C901 _z = _y + 1j * _x else: assert axis is not None - _z = cast( - npt.NDArray[Any], - np.apply_along_axis(np.add, axis, 1j * _x, _y), # type: ignore - ) + _z = np.apply_along_axis(np.add, axis, 1j * _x, _y) # apply the ordering i_kk = np.argsort(_z, axis=axis, kind=sort) @@ -189,12 +179,11 @@ def ordered( # noqa: C901 # prepare observation weights w_kk = None if aweights is not None: - w = np.asanyarray(aweights) - w_kk = _sort_like(w, i_kk, axis=axis) + w_kk = _sort_like(np.asanyarray(aweights), i_kk, axis=axis) # apply the frequency weights to x, and (optionally) to aweights if fweights is not None: - r = np.asanyarray(fweights, np.int64) + r = np.asanyarray(fweights, int) r_kk = _sort_like(r, i_kk, axis=axis) # avoid unnecessary repeats by normalizing by the GCD @@ -217,12 +206,13 @@ def ordered( # noqa: C901 def clean_order( - r: SupportsIndex, + r: AnyOrder, /, name: str = 'r', rmin: int = 0, ) -> int: - if (_r := r.__index__()) < rmin: + """Validates and cleans an single (L-)moment order.""" + if (_r := int(r)) < rmin: msg = f'expected {name} >= {rmin}, got {_r}' raise TypeError(msg) @@ -230,12 +220,14 @@ def clean_order( def clean_orders( - r: IntVector | AnyInt, + r: AnyOrderND, /, name: str = 'r', rmin: int = 0, -) -> npt.NDArray[np.int64]: - _r = np.asarray_chkfinite(r, np.int64) + dtype: _DType[_T_int] = np.intp, +) -> lnpt.Array[Any, _T_int]: + """Validates and cleans an array-like of (L-)moment orders.""" + _r = np.asarray_chkfinite(r, dtype=dtype) if np.any(invalid := _r < rmin): i = np.argmax(invalid) @@ -245,45 +237,60 @@ def clean_orders( return _r -def clean_trim(trim: AnyTrim) -> tuple[int, int] | tuple[float, float]: - _trim = np.asarray_chkfinite(trim) - - if not np.isrealobj(_trim): - msg = 'trim must be real' - raise TypeError(msg) - - if _trim.ndim > 1: - msg = 'trim cannot be vectorized' - raise TypeError(trim) - - n = _trim.size - if n == 0: - _trim = np.array([0, 0]) - if n == 1: - _trim = np.repeat(_trim, 2) - elif n > 2: - msg = f'expected two trim values, got {n} instead' - raise TypeError(msg) - - s, t = _trim +_COMMON_TRIM1: Final[frozenset[int]] = frozenset({0, 1, 2}) +_COMMON_TRIM2: Final[frozenset[tuple[int, int]]] = frozenset( + {(0, 0), (1, 1), (2, 2), (0, 1), (0, 2), (1, 0), (2, 0)}, +) - if s <= -1 / 2 or t <= -1 / 2: - msg = f'trim must both be >-1/2, got {(s, t)}' - raise ValueError(msg) - if s.is_integer() and t.is_integer(): - return int(s), int(t) +def clean_trim(trim: AnyTrim, /) -> tuple[int, int] | tuple[float, float]: + """ + Validates and cleans the passed trim; and return a 2-tuple of either ints + or floats. + + Notes: + - This uses `.is_integer()`, instead of `isinstance(int)`. + So e.g. `clean_trim(1.0)` will return `tuple[int, int]`. + - Although not allowed by typecheckers, numpy integer or floating + scalars are also accepted, and will be converted to `int` or `float`. + """ + # fast pass-through for the common cases + if trim in _COMMON_TRIM1: + return trim, trim + if trim in _COMMON_TRIM2: + return trim + + match trim: + case s, t: + pass + case st: + s = t = st + + fractional = False + for f in map(float, (s, t)): + if not math.isfinite(f): + msg = 'trim orders must be finite' + raise ValueError(msg) + if f <= -1 / 2: + msg = 'trim orders must be greater than -1/2' + raise ValueError(msg) + if not f.is_integer(): + fractional = True - return float(s), float(t) + return (float(s), float(t)) if fractional else (int(s), int(t)) def moments_to_ratio( - rs: npt.NDArray[np.integer[Any]], - l_rs: npt.NDArray[FT], + rs: lnpt.Array[Any, np.integer[Any]], + l_rs: lnpt.Array[lnpt.AtLeast1D, _T_float], /, -) -> FT | npt.NDArray[FT]: - assert rs.shape[:l_rs.ndim] == l_rs.shape[:rs.ndim], [rs.shape, l_rs.shape] +) -> _T_float | npt.NDArray[_T_float]: + """ + Using stacked order of shape (2, ...), and an L-moments array, returns + the L-moment ratio's. + """ assert len(rs) == 2 + assert rs.shape[:l_rs.ndim] == l_rs.shape[:rs.ndim], [rs.shape, l_rs.shape] r_eq_s = rs[0] == rs[1] if r_eq_s.ndim < l_rs.ndim - 1: @@ -300,9 +307,9 @@ def moments_to_ratio( def moments_to_stats_cov( - t_0r: npt.NDArray[np.float64], - ll_kr: npt.NDArray[np.float64], -) -> npt.NDArray[np.float64]: + t_0r: lnpt.Array[tuple[int], np.floating[Any]], + ll_kr: lnpt.Array[_T_shape2, _T_float], +) -> lnpt.Array[_T_shape2, _T_float]: # t_0r are L-ratio's for r = 0, 1, ..., R (t_0r[0] == 1 / L-scale) # t_0r[1] isn't used, and can be set to anything # ll_kr is the L-moment cov of size R**2 (orders start at 1 here) @@ -330,10 +337,17 @@ def moments_to_stats_cov( def l_stats_orders( - num: int, + num: _T_size, /, -) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.int64]]: - return ( - np.arange(1, num + 1), - np.array([0] * min(2, num) + [2] * (num - 2)), - ) + dtype: _DType[_T_int] = np.intp, +) -> tuple[ + lnpt.Array[tuple[_T_size], _T_int], + lnpt.Array[tuple[_T_size], _T_int], +]: + """ + Create the L-moment order array `[1, 2, ..., r]` and corresponding + ratio array `[0, 0, 2, ...]` of same size. + """ + r = np.arange(1, num + 1, dtype=dtype) + s = np.array([0] * min(2, num) + [2] * (num - 2), dtype=dtype) + return r, s diff --git a/lmo/contrib/__init__.py b/lmo/contrib/__init__.py index a8fe2f59..0053d049 100644 --- a/lmo/contrib/__init__.py +++ b/lmo/contrib/__init__.py @@ -1,14 +1,14 @@ """Integrations and extensions for 3rd party packages.""" -__all__ = ('install',) - try: import pandas as pd except ImportError: pd = None +__all__ = ('install',) + -def install(): +def install() -> None: """ Install the extensions for all available 3rd party packages. diff --git a/lmo/contrib/pandas.py b/lmo/contrib/pandas.py index ff6d0452..0111030e 100644 --- a/lmo/contrib/pandas.py +++ b/lmo/contrib/pandas.py @@ -1,3 +1,4 @@ +# pyright: reportUnknownMemberType=false """ Extension methods for `pandas.Series` and `pandas.DataFrame`. @@ -40,15 +41,8 @@ ``` """ -__all__ = ( - 'Series', - 'DataFrame', - 'install', -) - -import sys from collections.abc import Callable -from typing import Any, Literal, Protocol, TypeAlias, Union, cast, final +from typing import Any, Literal, Protocol, TypeAlias, cast, final import numpy as np import numpy.typing as npt @@ -63,32 +57,49 @@ l_comoment as _l_comoment, l_coratio as _l_coratio, ) -from lmo._utils import broadstack, clean_trim, moments_to_ratio +from lmo._utils import clean_trim, moments_to_ratio from lmo.typing import ( - AnyInt, + AnyOrder, + AnyOrderND, AnyTrim, - IntVector, LComomentOptions, LMomentOptions, ) +from lmo.typing.compat import TypeVar, Unpack + + +__all__ = ( + 'DataFrame', + 'Series', + 'install', +) + + +_T = TypeVar('_T', bound=object) + +_Axis: TypeAlias = Literal[0, 'index', 1, 'columns'] -if sys.version_info < (3, 11): - from typing_extensions import Unpack -else: - from typing import Unpack +# `from __future__ import annotations` won't solve this; +# see https://github.com/pandas-dev/pandas-stubs/discussions/308 +def __ensure_generic(tp: type[_T]): + if hasattr(tp, '__class_getitem__'): + return -_FloatOrSeries: TypeAlias = Union[float, 'pd.Series[float]'] -_SeriesOrFrame: TypeAlias = Union['pd.Series[float]', pd.DataFrame] -_FloatOrFrame: TypeAlias = _FloatOrSeries | pd.DataFrame + def __class_getitem__(cls: _T, _: Any, /) -> _T: # noqa: N807 + return cls -AxisDF: TypeAlias = Literal[0, 'index', 1, 'columns'] + tp.__class_getitem__ = classmethod(__class_getitem__) # pyright: ignore[reportArgumentType,reportAttributeAccessIssue] + + +__ensure_generic(pd.Series) +__ensure_generic(pd.Index) def _setindex( df: pd.DataFrame, - axis: AxisDF, - index: 'pd.Index[Any]', + axis: _Axis, + index: pd.Index[Any], ) -> None: if axis in {0, 'index'}: df.index = index @@ -100,11 +111,11 @@ def _setindex( def _ratio_index(rk: npt.NDArray[np.int64]) -> pd.MultiIndex: - return pd.MultiIndex.from_arrays(rk, names=('r', 'k')) # type: ignore + return pd.MultiIndex.from_arrays(rk, names=('r', 'k')) @final -class Series(pd.Series): # type: ignore [missingTypeArguments] +class Series(pd.Series): # pyright: ignore[reportMissingTypeArgument] """ Extension methods for [`pandas.Series`][pandas.Series]. @@ -116,20 +127,20 @@ class Series(pd.Series): # type: ignore [missingTypeArguments] def __lmo_register__( # noqa: D105 cls, name: str, - method: Callable[..., _FloatOrSeries | pd.DataFrame], + method: Callable[..., pd.Series[float]], ) -> None: - def fn(obj: 'pd.Series[Any]') -> Callable[..., _FloatOrSeries]: + def fn(obj: pd.Series[Any]) -> Callable[..., float | pd.Series[float]]: return method.__get__(obj, Series) - pd.api.extensions.register_series_accessor(name)(fn) # type: ignore + pd.api.extensions.register_series_accessor(name)(fn) def l_moment( self, - r: AnyInt | IntVector, + r: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LMomentOptions], - ) -> _FloatOrSeries: + ) -> float | pd.Series[float]: """ See [`lmo.l_moment`][lmo.l_moment]. @@ -150,12 +161,12 @@ def l_moment( def l_ratio( self, - r: AnyInt | IntVector, - k: AnyInt | IntVector, + r: AnyOrder | AnyOrderND, + k: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LMomentOptions], - ) -> _FloatOrSeries: + ) -> float | pd.Series[float]: """ See [`lmo.l_ratio`][lmo.l_ratio]. @@ -163,7 +174,7 @@ def l_ratio( out: A scalar, or [`pd.Series[float]`][pandas.Series], with a [`MultiIndex`][pandas.MultiIndex] of `r` and `k`. """ - rk = broadstack(r, k) + rk = np.stack(np.broadcast_arrays(np.asarray(r), np.asarray(k))) out = moments_to_ratio(rk, _l_moment(self, rk, trim=trim, **kwargs)) if rk.ndim == 1: return cast(float, out) @@ -177,10 +188,10 @@ def l_ratio( def l_stats( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, num: int = 4, **kwargs: Unpack[LMomentOptions], - ) -> 'pd.Series[float]': + ) -> pd.Series[float]: """ See [`lmo.l_stats`][lmo.l_stats]. @@ -196,7 +207,7 @@ def l_stats( def l_loc( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LMomentOptions], ) -> float: """ @@ -209,7 +220,7 @@ def l_loc( def l_scale( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LMomentOptions], ) -> float: """ @@ -222,7 +233,7 @@ def l_scale( def l_variation( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LMomentOptions], ) -> float: """ @@ -235,7 +246,7 @@ def l_variation( def l_skew( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LMomentOptions], ) -> float: """ @@ -248,7 +259,7 @@ def l_skew( def l_kurtosis( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LMomentOptions], ) -> float: """ @@ -275,9 +286,12 @@ class DataFrame(pd.DataFrame): def __lmo_register__( # noqa: D105 cls, name: str, - method: Callable[..., _FloatOrFrame], + method: Callable[..., float | pd.Series[float] | pd.DataFrame], ) -> None: - def fn(obj: pd.DataFrame) -> Callable[..., _FloatOrFrame]: + def fn( + obj: pd.DataFrame, + /, + ) -> Callable[..., float | pd.Series[float] | pd.DataFrame]: # return functools.partial(method, obj) return method.__get__(obj, cls) @@ -285,29 +299,27 @@ def fn(obj: pd.DataFrame) -> Callable[..., _FloatOrFrame]: def l_moment( self, - r: AnyInt | IntVector, + r: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), - axis: AxisDF = 0, + trim: AnyTrim = 0, + axis: _Axis = 0, **kwargs: Unpack[LMomentOptions], - ) -> _SeriesOrFrame: + ) -> pd.Series[float] | pd.DataFrame: """ See [`lmo.l_moment`][lmo.l_moment]. Returns: - out: A [`Series[float]`][pandas.Series], or + out: + A [`Series[float]`][pandas.Series], or a [`DataFrame`][pandas.DataFrame] with `r` as index along the specified axis. """ - out = cast( - _SeriesOrFrame, - self.apply( # type: ignore - _l_moment, - axis=axis, - result_type='expand', - args=(r, trim), - **kwargs, - ), + out = self.apply( + _l_moment, + axis=axis, + result_type='expand', + args=(r, trim), + **kwargs, ) if isinstance(out, pd.DataFrame): _setindex(out, axis, pd.Index(np.asarray(r), name='r')) @@ -317,13 +329,13 @@ def l_moment( def l_ratio( self, - r: AnyInt | IntVector, - k: AnyInt | IntVector, + r: AnyOrder | AnyOrderND, + k: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), - axis: AxisDF = 0, + trim: AnyTrim = 0, + axis: _Axis = 0, **kwargs: Unpack[LMomentOptions], - ) -> _SeriesOrFrame: + ) -> pd.Series[float] | pd.DataFrame: """ See [`lmo.l_ratio`][lmo.l_ratio]. @@ -333,19 +345,16 @@ def l_ratio( [`MultiIndex`][pandas.MultiIndex] of `r` and `k` along the specified axis. """ - rk = broadstack(r, k) + rk = np.stack(np.broadcast_arrays(np.asarray(r), np.asarray(k))) if rk.ndim > 2: rk = np.r_[rk[0].reshape(-1), rk[1].reshape(-1)] - out = cast( - _SeriesOrFrame, - self.apply( # type: ignore - _l_ratio, - axis=axis, - result_type='expand', - args=(rk[0], rk[1], trim), - **kwargs, - ), + out = self.apply( + _l_ratio, + axis=axis, + result_type='expand', + args=(rk[0], rk[1], trim), + **kwargs, ) if isinstance(out, pd.DataFrame): assert rk.ndim > 1 @@ -356,9 +365,9 @@ def l_ratio( def l_stats( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, num: int = 4, - axis: AxisDF = 0, + axis: _Axis = 0, **kwargs: Unpack[LMomentOptions], ) -> pd.DataFrame: """ @@ -368,15 +377,12 @@ def l_stats( out: A [`DataFrame`][pandas.DataFrame] with `r = 1, ..., num` as index along the specified axis. """ - out = cast( - pd.DataFrame, - self.apply( # type: ignore - _l_stats, - axis=axis, - result_type='expand', - args=(trim, num), - **kwargs, - ), + out = self.apply( + _l_stats, + axis=axis, + result_type='expand', + args=(trim, num), + **kwargs, ) _setindex(out, axis, pd.RangeIndex(1, num + 1, name='r')) out.attrs['l_kind'] = 'stat' @@ -385,8 +391,8 @@ def l_stats( def l_loc( self, - trim: AnyTrim = (0, 0), - axis: AxisDF = 0, + trim: AnyTrim = 0, + axis: _Axis = 0, **kwargs: Unpack[LMomentOptions], ) -> 'pd.Series[float]': """ @@ -394,7 +400,7 @@ def l_loc( [`l_moment(1, ...)`][lmo.contrib.pandas.DataFrame.l_moment]. See [`lmo.l_loc`][lmo.l_loc] for details. """ - return self.apply( # type: ignore + return self.apply( # pyright: ignore[reportReturnType] _l_moment, axis=axis, args=(1, trim), @@ -403,16 +409,16 @@ def l_loc( def l_scale( self, - trim: AnyTrim = (0, 0), - axis: AxisDF = 0, + trim: AnyTrim = 0, + axis: _Axis = 0, **kwargs: Unpack[LMomentOptions], - ) -> 'pd.Series[float]': + ) -> pd.Series[float]: """ Alias for [`l_moment(2, ...)`][lmo.contrib.pandas.DataFrame.l_moment]. See [`lmo.l_scale`][lmo.l_scale] for details. """ - return self.apply( # type: ignore + return self.apply( # pyright: ignore[reportReturnType] _l_moment, axis=axis, args=(2, trim), @@ -421,16 +427,16 @@ def l_scale( def l_variation( self, - trim: AnyTrim = (0, 0), - axis: AxisDF = 0, + trim: AnyTrim = 0, + axis: _Axis = 0, **kwargs: Unpack[LMomentOptions], - ) -> 'pd.Series[float]': + ) -> pd.Series[float]: """ Alias for [`l_ratio(2, 1, ...)`][lmo.contrib.pandas.DataFrame.l_ratio]. See [`lmo.l_variation`][lmo.l_variation] for details. """ - return self.apply( # type: ignore + return self.apply( # pyright: ignore[reportReturnType] _l_ratio, axis=axis, args=(2, 1, trim), @@ -439,16 +445,16 @@ def l_variation( def l_skew( self, - trim: AnyTrim = (0, 0), - axis: AxisDF = 0, + trim: AnyTrim = 0, + axis: _Axis = 0, **kwargs: Unpack[LMomentOptions], - ) -> 'pd.Series[float]': + ) -> pd.Series[float]: """ Alias for [`l_ratio(3, 2, ...)`][lmo.contrib.pandas.DataFrame.l_ratio]. See [`lmo.l_skew`][lmo.l_skew] for details. """ - return self.apply( # type: ignore + return self.apply( # pyright: ignore[reportReturnType] _l_ratio, axis=axis, args=(3, 2, trim), @@ -457,16 +463,16 @@ def l_skew( def l_kurtosis( self, - trim: AnyTrim = (0, 0), - axis: AxisDF = 0, + trim: AnyTrim = 0, + axis: _Axis = 0, **kwargs: Unpack[LMomentOptions], - ) -> 'pd.Series[float]': + ) -> pd.Series[float]: """ Alias for [`l_ratio(4, 2, ...)`][lmo.contrib.pandas.DataFrame.l_ratio]. See [`lmo.l_kurtosis`][lmo.l_kurtosis] for details. """ - return self.apply( # type: ignore + return self.apply( # pyright: ignore[reportReturnType] _l_ratio, axis=axis, args=(4, 2, trim), @@ -475,10 +481,10 @@ def l_kurtosis( def l_kurt( self, - trim: AnyTrim = (0, 0), - axis: AxisDF = 0, + trim: AnyTrim = 0, + axis: _Axis = 0, **kwargs: Unpack[LMomentOptions], - ) -> 'pd.Series[float]': + ) -> pd.Series[float]: """ Alias for [`l_kurtosis`][lmo.contrib.pandas.DataFrame.l_kurtosis]. @@ -487,9 +493,9 @@ def l_kurt( def l_comoment( self, - r: AnyInt, + r: AnyOrder, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LComomentOptions], ) -> pd.DataFrame: """ @@ -526,10 +532,10 @@ def l_comoment( def l_coratio( self, - r: AnyInt, - k: AnyInt = 2, + r: AnyOrder, + k: AnyOrder = 2, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LComomentOptions], ) -> pd.DataFrame: """ @@ -569,7 +575,7 @@ def l_coratio( def l_coloc( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LComomentOptions], ) -> pd.DataFrame: """ @@ -581,7 +587,7 @@ def l_coloc( def l_coscale( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LComomentOptions], ) -> pd.DataFrame: """ @@ -593,7 +599,7 @@ def l_coscale( def l_corr( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LComomentOptions], ) -> pd.DataFrame: """ @@ -605,7 +611,7 @@ def l_corr( def l_coskew( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LComomentOptions], ) -> pd.DataFrame: """ @@ -617,7 +623,7 @@ def l_coskew( def l_cokurtosis( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LComomentOptions], ) -> pd.DataFrame: """ @@ -629,7 +635,7 @@ def l_cokurtosis( def l_cokurt( self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Unpack[LComomentOptions], ) -> pd.DataFrame: """ diff --git a/lmo/contrib/scipy_stats.py b/lmo/contrib/scipy_stats.py index bf2ec6c5..6b78d3e5 100644 --- a/lmo/contrib/scipy_stats.py +++ b/lmo/contrib/scipy_stats.py @@ -1,11 +1,13 @@ -"""Extensions for `scipy.stats` distributions.""" - -__all__ = ('l_rv_generic', 'l_rv_frozen', 'install') - # pyright: reportUninitializedInstanceVariable=false +""" +Extension methods for the (univariate) distributions in +[`scipy.stats`][scipy.stats]. +""" +from __future__ import annotations from collections.abc import Callable, Mapping, Sequence from typing import ( + TYPE_CHECKING, Any, ClassVar, Literal, @@ -19,7 +21,9 @@ import numpy as np import numpy.typing as npt -from scipy.stats import fit as scipy_fit # type: ignore +from scipy.stats import ( + fit as scipy_fit, # pyright: ignore[reportUnknownVariableType] +) from scipy.stats.distributions import rv_continuous, rv_frozen from lmo import ( @@ -27,7 +31,7 @@ l_moment as l_moment_est, ) from lmo._utils import ( - broadstack, + clean_order, clean_orders, clean_trim, l_stats_orders, @@ -42,31 +46,45 @@ l_stats_cov_from_cdf, ) from lmo.typing import ( - AnyInt, - AnyTrim, - DistributionFunction, - IntVector, - QuadOptions, + AnyOrder, + AnyOrderND, + scipy as lsct, ) -T = TypeVar('T') -V = TypeVar('V', bound=float | npt.NDArray[np.float64]) +if TYPE_CHECKING: + from lmo.typing import ( + AnyTrim, + np as lnpt, + ) + + +__all__ = ( + 'install', + 'l_rv_frozen', + 'l_rv_generic', +) -_Tuple4: TypeAlias = tuple[T, T, T, T] + +_T = TypeVar('_T') +_T_x = TypeVar('_T_x', bound=float | npt.NDArray[np.float64]) + +_Tuple2: TypeAlias = tuple[_T, _T] +_Tuple4: TypeAlias = tuple[_T, _T, _T, _T] +_ArrF8: TypeAlias = npt.NDArray[np.float64] class _ShapeInfo(Protocol): """Stub for `scipy.stats._distn_infrastructure._ShapeInfo`.""" name: str integrality: bool - domain: Sequence[float] # in practice a list of size 2 + domain: Sequence[float] # in practice a list of size 2 (y no tuple?) def __init__( self, name: str, integrality: bool = ..., - domain: tuple[float, float] = ..., - inclusive: tuple[bool, bool] = ..., + domain: _Tuple2[float] = ..., + inclusive: _Tuple2[bool] = ..., ) -> None: ... @@ -114,29 +132,31 @@ class l_rv_generic(PatchClass): shapes: str _argcheck: Callable[..., int] - _logpxf: DistributionFunction[...] - _cdf: DistributionFunction[...] + _logpxf: lsct.RVFunction[...] + _cdf: lsct.RVFunction[...] _fitstart: Callable[..., tuple[float, ...]] - _get_support: Callable[..., tuple[float, float]] + _get_support: Callable[..., _Tuple2[float]] _param_info: Callable[[], list[_ShapeInfo]] _parse_args: Callable[..., tuple[tuple[Any, ...], float, float]] - _ppf: DistributionFunction[...] + _ppf: lsct.RVFunction[...] _shape_info: Callable[[], list[_ShapeInfo]] _stats: Callable[..., _Tuple4[float | None]] _unpack_loc_scale: Callable[ - [npt.ArrayLike], + [lnpt.AnyVector], tuple[float, float, tuple[float, ...]], ] - cdf: DistributionFunction[...] + cdf: lsct.RVFunction[...] fit: Callable[..., tuple[float, ...]] mean: Callable[..., float] - ppf: DistributionFunction[...] + ppf: lsct.RVFunction[...] std: Callable[..., float] - def _get_xxf(self, *args: Any, loc: float = 0, scale: float = 1) -> tuple[ - Callable[[float], float], - Callable[[float], float], - ]: + def _get_xxf( + self, + *args: Any, + loc: float = 0, + scale: float = 1, + ) -> _Tuple2[Callable[[float], float]]: assert scale > 0 _cdf, _ppf = self._cdf, self._ppf @@ -153,9 +173,9 @@ def _l_moment( self, r: npt.NDArray[np.int64], *args: Any, - trim: tuple[int, int] | tuple[float, float] = (0, 0), - quad_opts: QuadOptions | None = None, - ) -> npt.NDArray[np.float64]: + trim: _Tuple2[int] | _Tuple2[float] = (0, 0), + quad_opts: lsct.QuadOptions | None = None, + ) -> _ArrF8: """ Population L-moments of the standard distribution (i.e. assuming `loc=0` and `scale=1`). @@ -183,18 +203,18 @@ def _l_moment( def _logqdf( self, - u: npt.NDArray[np.float64], + u: _ArrF8, *args: Any, - ) -> npt.NDArray[np.float64]: + ) -> _ArrF8: """Overridable log quantile distribution function (QDF).""" with np.errstate(divide='ignore'): return -self._logpxf(self._ppf(u, *args), *args) def _qdf( self, - u: npt.NDArray[np.float64], + u: _ArrF8, *args: Any, - ) -> npt.NDArray[np.float64]: + ) -> _ArrF8: r""" Overridable quantile distribution function (QDF). @@ -206,34 +226,32 @@ def _qdf( @overload def l_moment( self, - r: IntVector, + r: AnyOrderND, /, *args: Any, trim: AnyTrim = ..., - quad_opts: QuadOptions | None = ..., + quad_opts: lsct.QuadOptions | None = ..., **kwds: Any, - ) -> npt.NDArray[np.float64]: ... - + ) -> _ArrF8: ... @overload def l_moment( self, - r: AnyInt, + r: AnyOrder, /, *args: Any, trim: AnyTrim = ..., - quad_opts: QuadOptions | None = ..., + quad_opts: lsct.QuadOptions | None = ..., **kwds: Any, ) -> np.float64: ... - def l_moment( self, - r: AnyInt | IntVector, + r: AnyOrder | AnyOrderND, /, *args: Any, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, **kwds: Any, - ) -> np.float64 | npt.NDArray[np.float64]: + ) -> np.float64 | _ArrF8: r""" Population L-moment(s) $\lambda^{(s,t)}_r$. @@ -303,28 +321,27 @@ def l_moment( See Also: - [`lmo.l_moment`][lmo.l_moment]: sample L-moment """ - _r = clean_orders(r) - _trim = clean_trim(trim) - - if ( - _trim[0] == _trim[1] == 0 - and not np.isfinite(self.mean(*args, **kwds)) - ): - # first moment condition not met - return np.full(_r.shape, np.nan)[()] + if np.isscalar(r): + _r = np.asarray(clean_order(cast(AnyOrder, r))) + else: + _r = clean_orders(cast(AnyOrderND, r)) + (s, t) = _trim = clean_trim(trim) - args, loc, scale = self._parse_args(*args, **kwds) - if not self._argcheck(*args): - return np.full(_r.shape, np.nan)[()] + shapes, loc, scale = self._parse_args(*args, **kwds) - if _trim[0] <= 0 and _trim[1] <= 0: - mu1 = self._stats(*args)[0] - if mu1 is not None and np.isnan(mu1): - # undefined mean -> distr is "pathological" (e.g. cauchy) + if s <= 0 and t <= 0: + _mean = self._stats(*shapes)[0] + if _mean is None: + _mean = self.mean(*shapes) + if not np.isfinite(_mean): + # first moment condition not met return np.full(_r.shape, np.nan)[()] + if not self._argcheck(*shapes): + return np.full(_r.shape, np.nan)[()] + # L-moments of the standard distribution (loc=0, scale=scale0) - l0_r = self._l_moment(_r, *args, trim=_trim, quad_opts=quad_opts) + l0_r = self._l_moment(_r, *shapes, trim=_trim, quad_opts=quad_opts) # shift (by loc) and scale shift_r = loc * (_r == 1) @@ -340,37 +357,35 @@ def l_moment( @overload def l_ratio( self, - order: IntVector, - order_denom: AnyInt | IntVector, + order: AnyOrderND, + order_denom: AnyOrder | AnyOrderND, /, *args: Any, trim: AnyTrim = ..., - quad_opts: QuadOptions | None = ..., + quad_opts: lsct.QuadOptions | None = ..., **kwds: Any, - ) -> npt.NDArray[np.float64]: ... - + ) -> _ArrF8: ... @overload def l_ratio( self, - order: AnyInt, - order_denom: AnyInt | IntVector, + order: AnyOrder, + order_denom: AnyOrder | AnyOrderND, /, *args: Any, trim: AnyTrim = ..., - quad_opts: QuadOptions | None = ..., + quad_opts: lsct.QuadOptions | None = ..., **kwds: Any, ) -> np.float64: ... - def l_ratio( self, - r: AnyInt | IntVector, - k: AnyInt | IntVector, + r: AnyOrder | AnyOrderND, + k: AnyOrder | AnyOrderND, /, *args: Any, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, **kwds: Any, - ) -> np.float64 | npt.NDArray[np.float64]: + ) -> np.float64 | _ArrF8: r""" L-moment ratio('s) $\tau^{(s,t)}_{r,k}$. @@ -438,24 +453,24 @@ def l_ratio( ][lmo.contrib.scipy_stats.l_rv_generic.l_moment] - [`lmo.l_ratio`][lmo.l_ratio] - Sample L-moment ratio estimator """ - rs = broadstack(r, k) + rk = np.stack(np.broadcast_arrays(np.asarray(r), np.asarray(k))) lms = self.l_moment( - rs, + rk, *args, trim=trim, quad_opts=quad_opts, **kwds, ) - return moments_to_ratio(rs, lms) + return moments_to_ratio(rk, lms) def l_stats( self, *args: Any, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, moments: int = 4, - quad_opts: QuadOptions | None = None, + quad_opts: lsct.QuadOptions | None = None, **kwds: Any, - ) -> npt.NDArray[np.float64]: + ) -> _ArrF8: r""" The L-moments (for $r \le 2$) and L-ratio's (for $r > 2$). @@ -524,7 +539,7 @@ def l_stats( def l_loc( self, *args: Any, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwds: Any, ) -> float: """ @@ -540,7 +555,7 @@ def l_loc( def l_scale( self, *args: Any, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwds: Any, ) -> float: """ @@ -553,7 +568,7 @@ def l_scale( def l_skew( self, *args: Any, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwds: Any, ) -> float: """L-skewness coefficient of the distribution; the 3rd L-moment ratio. @@ -565,7 +580,7 @@ def l_skew( def l_kurtosis( self, *args: Any, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwds: Any, ) -> float: """L-kurtosis coefficient of the distribution; the 4th L-moment ratio. @@ -579,10 +594,10 @@ def l_moments_cov( r_max: int, /, *args: Any, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, **kwds: Any, - ) -> npt.NDArray[np.float64]: + ) -> _ArrF8: r""" Variance/covariance matrix of the L-moment estimators. @@ -710,10 +725,10 @@ def l_stats_cov( self, *args: Any, moments: int = 4, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, **kwds: Any, - ) -> npt.NDArray[np.float64]: + ) -> _ArrF8: r""" Similar to [`l_moments_cov` ][lmo.contrib.scipy_stats.l_rv_generic.l_moments_cov], but for the @@ -820,14 +835,14 @@ def l_stats_cov( def l_moment_influence( self, - r: AnyInt, + r: AnyOrder, /, *args: Any, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, tol: float = 1e-8, **kwds: Any, - ) -> Callable[[V], V]: + ) -> Callable[[_T_x], _T_x]: r""" Returns the influence function (IF) of an L-moment. @@ -897,7 +912,7 @@ def l_moment_influence( args, loc, scale = self._parse_args(*args, **kwds) cdf = cast( - Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]], + Callable[[_ArrF8], _ArrF8], self._get_xxf(*args, loc=loc, scale=scale)[0], ) @@ -912,15 +927,15 @@ def l_moment_influence( def l_ratio_influence( self, - r: AnyInt, - k: AnyInt, + r: AnyOrder, + k: AnyOrder, /, *args: Any, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, tol: float = 1e-8, **kwds: Any, - ) -> Callable[[V], V]: + ) -> Callable[[_T_x], _T_x]: r""" Returns the influence function (IF) of an L-moment ratio. @@ -983,8 +998,9 @@ def l_ratio_influence( Robust Estimation](https://doi.org/10.2307/2285666) """ + rk = np.stack(np.broadcast_arrays(np.asarray(r), np.asarray(k))) lmr, lmk = self.l_moment( - [r, k], + rk, *args, trim=trim, quad_opts=quad_opts, @@ -993,7 +1009,7 @@ def l_ratio_influence( args, loc, scale = self._parse_args(*args, **kwds) cdf = cast( - Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]], + Callable[[_ArrF8], _ArrF8], self._get_xxf(*args, loc=loc, scale=scale)[0], ) @@ -1010,8 +1026,8 @@ def l_ratio_influence( def _reduce_param_bounds( self, - **kwds: dict[str, Any], - ) -> tuple[dict[str, Any], list[tuple[float | None, float | None]]]: + **kwds: Any, + ) -> tuple[dict[str, Any], list[_Tuple2[float | None]]]: """ Based on `scipy.stats.rv_continuous._reduce_func`. @@ -1053,10 +1069,10 @@ def _reduce_param_bounds( def _l_gmm_error( self, - theta: npt.NDArray[np.float64], - trim: tuple[float, float], - l_data: npt.NDArray[np.float64], - weights: npt.NDArray[np.float64], + theta: _ArrF8, + trim: _Tuple2[float], + l_data: _ArrF8, + weights: _ArrF8, ) -> float: """L-GMM objective function.""" loc, scale, args = self._unpack_loc_scale(theta) @@ -1083,10 +1099,10 @@ def _l_gmm_error( @overload def l_fit( self, - data: npt.ArrayLike, + data: lnpt.AnyVectorInt | lnpt.AnyVectorFloat, *args: float, n_extra: int = 0, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, full_output: Literal[True], fit_kwargs: Mapping[str, Any] | None = None, **kwds: Any, @@ -1096,10 +1112,10 @@ def l_fit( @overload def l_fit( self, - data: npt.ArrayLike, + data: lnpt.AnyVectorInt | lnpt.AnyVectorFloat, *args: float, n_extra: int = 0, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, full_output: bool = ..., fit_kwargs: Mapping[str, Any] | None = None, **kwds: Any, @@ -1108,10 +1124,10 @@ def l_fit( def l_fit( self, - data: npt.ArrayLike, + data: lnpt.AnyVectorInt | lnpt.AnyVectorFloat, *args: float, n_extra: int = 0, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, full_output: bool = False, fit_kwargs: Mapping[str, Any] | None = None, random_state: int | np.random.Generator | None = None, @@ -1223,14 +1239,9 @@ def l_fit( # almost never works without custom (finite and tight) bounds... # ... and otherwise it'll runs for +-17 exa-eons args0 = cast( - tuple[float | int, ...], - scipy_fit( - self, - data, - bounds=bounds, - guess=args or None, - ).params, # type: ignore - ) + lsct.FitResult, + scipy_fit(self, data, bounds=bounds, guess=args or None), + ).params _lmo_cache = {} _lmo_fn = self._l_moment @@ -1241,7 +1252,7 @@ def lmo_fn( r: npt.NDArray[np.int64], *args: float, trim: tuple[int, int] | tuple[float, float] = (0, 0), - ) -> npt.NDArray[np.float64]: + ) -> _ArrF8: shapes, loc, scale = args[:-2], args[-2], args[-1] # r and trim will be the same within inference.fit; safe to ignore @@ -1291,9 +1302,9 @@ def lmo_fn( def l_fit_loc_scale( self, - data: npt.ArrayLike, + data: lnpt.AnyArrayFloat, *args: Any, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwds: Any, ) -> tuple[float, float]: """ @@ -1344,28 +1355,28 @@ class l_rv_frozen(PatchClass): # noqa: D101 @overload def l_moment( self, - order: IntVector, + order: AnyOrderND, /, trim: AnyTrim = ..., - quad_opts: QuadOptions | None = ..., - ) -> npt.NDArray[np.float64]: ... + quad_opts: lsct.QuadOptions | None = ..., + ) -> _ArrF8: ... @overload def l_moment( self, - order: AnyInt, + order: AnyOrder, /, trim: AnyTrim = ..., - quad_opts: QuadOptions | None = ..., + quad_opts: lsct.QuadOptions | None = ..., ) -> np.float64: ... def l_moment( # noqa: D102 self, - order: AnyInt | IntVector, + order: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, - ) -> np.float64 | npt.NDArray[np.float64]: + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, + ) -> np.float64 | _ArrF8: return self.dist.l_moment( order, *self.args, @@ -1377,31 +1388,31 @@ def l_moment( # noqa: D102 @overload def l_ratio( self, - order: IntVector, - order_denom: AnyInt | IntVector, + order: AnyOrderND, + order_denom: AnyOrder | AnyOrderND, /, trim: AnyTrim = ..., - quad_opts: QuadOptions | None = ..., - ) -> npt.NDArray[np.float64]: ... + quad_opts: lsct.QuadOptions | None = ..., + ) -> _ArrF8: ... @overload def l_ratio( self, - order: AnyInt, - order_denom: AnyInt | IntVector, + order: AnyOrder, + order_denom: AnyOrder | AnyOrderND, /, trim: AnyTrim = ..., - quad_opts: QuadOptions | None = ..., + quad_opts: lsct.QuadOptions | None = ..., ) -> np.float64: ... def l_ratio( # noqa: D102 self, - order: AnyInt | IntVector, - order_denom: AnyInt | IntVector, + order: AnyOrder | AnyOrderND, + order_denom: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, - ) -> np.float64 | npt.NDArray[np.float64]: + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, + ) -> np.float64 | _ArrF8: return self.dist.l_ratio( order, order_denom, @@ -1413,10 +1424,10 @@ def l_ratio( # noqa: D102 def l_stats( # noqa: D102 self, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, moments: int = 4, - quad_opts: QuadOptions | None = None, - ) -> np.float64 | npt.NDArray[np.float64]: + quad_opts: lsct.QuadOptions | None = None, + ) -> np.float64 | _ArrF8: return self.dist.l_stats( *self.args, trim=trim, @@ -1425,25 +1436,25 @@ def l_stats( # noqa: D102 **self.kwds, ) - def l_loc(self, trim: AnyTrim = (0, 0)) -> float: # noqa: D102 + def l_loc(self, trim: AnyTrim = 0) -> float: # noqa: D102 return self.dist.l_loc(*self.args, trim=trim, **self.kwds) - def l_scale(self, trim: AnyTrim = (0, 0)) -> float: # noqa: D102 + def l_scale(self, trim: AnyTrim = 0) -> float: # noqa: D102 return self.dist.l_scale(*self.args, trim=trim, **self.kwds) - def l_skew(self, trim: AnyTrim = (0, 0)) -> float: # noqa: D102 + def l_skew(self, trim: AnyTrim = 0) -> float: # noqa: D102 return self.dist.l_skew(*self.args, trim=trim, **self.kwds) - def l_kurtosis(self, trim: AnyTrim = (0, 0)) -> float: # noqa: D102 + def l_kurtosis(self, trim: AnyTrim = 0) -> float: # noqa: D102 return self.dist.l_kurtosis(*self.args, trim=trim, **self.kwds) def l_moments_cov( # noqa: D102 self, r_max: int, /, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, - ) -> npt.NDArray[np.float64]: + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, + ) -> _ArrF8: return self.dist.l_moments_cov( r_max, *self.args, @@ -1455,9 +1466,9 @@ def l_moments_cov( # noqa: D102 def l_stats_cov( # noqa: D102 self, moments: int = 4, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, - ) -> npt.NDArray[np.float64]: + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, + ) -> _ArrF8: return self.dist.l_stats_cov( *self.args, moments=moments, @@ -1468,12 +1479,12 @@ def l_stats_cov( # noqa: D102 def l_moment_influence( # noqa: D102 self, - r: AnyInt, + r: AnyOrder, /, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, tol: float = 1e-8, - ) -> Callable[[V], V]: + ) -> Callable[[_T_x], _T_x]: return self.dist.l_moment_influence( r, *self.args, @@ -1485,13 +1496,13 @@ def l_moment_influence( # noqa: D102 def l_ratio_influence( # noqa: D102 self, - r: AnyInt, - k: AnyInt, + r: AnyOrder, + k: AnyOrder, /, - trim: AnyTrim = (0, 0), - quad_opts: QuadOptions | None = None, + trim: AnyTrim = 0, + quad_opts: lsct.QuadOptions | None = None, tol: float = 1e-8, - ) -> Callable[[V], V]: + ) -> Callable[[_T_x], _T_x]: return self.dist.l_ratio_influence( r, k, diff --git a/lmo/diagnostic.py b/lmo/diagnostic.py index 6b32cf38..e3d36e7c 100644 --- a/lmo/diagnostic.py +++ b/lmo/diagnostic.py @@ -1,17 +1,5 @@ """Hypothesis tests, estimator properties, and performance metrics.""" - -__all__ = ( - 'normaltest', - 'l_moment_gof', - 'l_stats_gof', - - 'l_moment_bounds', - 'l_ratio_bounds', - - 'rejection_point', - 'error_sensitivity', - 'shift_sensitivity', -) +from __future__ import annotations import math import warnings @@ -20,6 +8,7 @@ from typing import ( TYPE_CHECKING, Any, + Final, NamedTuple, TypeAlias, TypeVar, @@ -31,29 +20,53 @@ import numpy.typing as npt from scipy.integrate import quad # pyright: ignore[reportUnknownVariableType] from scipy.optimize import ( - OptimizeResult, OptimizeWarning, - minimize, # type: ignore + minimize, # pyright: ignore[reportUnknownVariableType] ) from scipy.special import chdtrc from scipy.stats.distributions import rv_continuous, rv_discrete, rv_frozen +from . import constants from ._lm import l_ratio from ._poly import extrema_jacobi from ._utils import clean_orders, clean_trim from .special import fpow -from .typing import AnyInt, AnyTrim, IntVector +from .typing import ( + AnyOrder, + AnyOrderND, + scipy as lsct, +) if TYPE_CHECKING: from .contrib.scipy_stats import l_rv_generic + from .typing import ( + AnyTrim, + np as lnpt, + ) + + +__all__ = ( + 'normaltest', + 'l_moment_gof', + 'l_stats_gof', + + 'l_moment_bounds', + 'l_ratio_bounds', + + 'rejection_point', + 'error_sensitivity', + 'shift_sensitivity', +) -T = TypeVar('T', bound=np.floating[Any]) -AnyRV: TypeAlias = rv_continuous | rv_discrete +_T = TypeVar('_T') + +_Tuple2: TypeAlias = tuple[_T, _T] +_AnyRV: TypeAlias = rv_continuous | rv_discrete _ArrF8: TypeAlias = npt.NDArray[np.float64] -_MIN_RHO = 1e-5 +_MIN_RHO: Final[float] = 1e-5 class HypothesisTestResult(NamedTuple): @@ -73,15 +86,15 @@ class HypothesisTestResult(NamedTuple): pvalue: float | _ArrF8 @property - def is_valid(self) -> bool | npt.NDArray[np.bool_]: + def is_valid(self) -> np.bool_ | npt.NDArray[np.bool_]: """Check if the statistic is finite and not `nan`.""" return np.isfinite(self.statistic) def is_significant( self, - level: float = 0.05, + level: float | np.floating[Any] = 0.05, /, - ) -> bool | npt.NDArray[np.bool_]: + ) -> np.bool_ | npt.NDArray[np.bool_]: """ Whether or not the null hypothesis can be rejected, with a certain confidence level (5% by default). @@ -89,11 +102,11 @@ def is_significant( if not (0 < level < 1): msg = 'significance level must lie between 0 and 1' raise ValueError(msg) - return self.pvalue < level + return self.pvalue < np.float64(level) def normaltest( - a: npt.ArrayLike, + a: lnpt.AnyArrayFloat, /, *, axis: int | None = None, @@ -147,11 +160,11 @@ def normaltest( n = x.size if axis is None else x.shape[axis] # L-skew and L-kurtosis - t3, t4 = l_ratio(a, [3, 4], [2, 2], axis=axis) + t3, t4 = l_ratio(a, [3, 4], 2, axis=axis) # theoretical L-skew and L-kurtosis of the normal distribution (for all # loc/mu and scale/sigma) - tau3, tau4 = 0.0, 30 / np.pi * np.arctan(np.sqrt(2)) - 9 + tau3, tau4 = .0, 60 * constants.theta_m_bar - 9 z3 = (t3 - tau3) / np.sqrt( 0.1866 / n + (np.sqrt(0.8000) / n) ** 2, @@ -168,10 +181,10 @@ def normaltest( return HypothesisTestResult(k2, p_value) -def _gof_stat_single(l_obs: _ArrF8, l_exp: _ArrF8, cov: _ArrF8) -> float: +def _gof_stat_single(l_obs: _ArrF8, l_exp: _ArrF8, cov: _ArrF8) -> np.float64: err = l_obs - l_exp prec = np.linalg.inv(cov) # precision matrix - return cast(float, err.T @ prec @ err) + return cast(np.float64, err.T @ prec @ err) _gof_stat = cast( @@ -186,11 +199,11 @@ def _gof_stat_single(l_obs: _ArrF8, l_exp: _ArrF8, cov: _ArrF8) -> float: def l_moment_gof( - rv_or_cdf: AnyRV | Callable[[float], float], + rv_or_cdf: _AnyRV | Callable[[float], float], l_moments: _ArrF8, n_obs: int, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Any, ) -> HypothesisTestResult: r""" @@ -282,11 +295,11 @@ def l_moment_gof( def l_stats_gof( - rv_or_cdf: AnyRV | Callable[[float], float], + rv_or_cdf: _AnyRV | Callable[[float], float], l_stats: _ArrF8, n_obs: int, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, **kwargs: Any, ) -> HypothesisTestResult: """ @@ -315,7 +328,7 @@ def l_stats_gof( return HypothesisTestResult(stat, pval) -def _lm2_bounds_single(r: int, trim: tuple[float, float]) -> float: +def _lm2_bounds_single(r: int, trim: _Tuple2[float]) -> float: if r == 1: return float('inf') @@ -343,7 +356,7 @@ def _lm2_bounds_single(r: int, trim: tuple[float, float]) -> float: _lm2_bounds = cast( - Callable[[IntVector, tuple[float, float]], _ArrF8], + Callable[[AnyOrderND, _Tuple2[float]], _ArrF8], np.vectorize( _lm2_bounds_single, otypes=[float], @@ -355,25 +368,20 @@ def _lm2_bounds_single(r: int, trim: tuple[float, float]) -> float: @overload def l_moment_bounds( - r: IntVector, - /, + r: AnyOrderND, /, trim: AnyTrim = ..., scale: float = ..., ) -> _ArrF8: ... - @overload def l_moment_bounds( - r: AnyInt, - /, + r: AnyOrder, /, trim: AnyTrim = ..., scale: float = ..., ) -> float: ... - - def l_moment_bounds( - r: AnyInt | IntVector, + r: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, scale: float = 1.0, ) -> float | _ArrF8: r""" @@ -468,37 +476,34 @@ def l_moment_bounds( - [`lmo.l_moment`][lmo.l_moment] """ - _r = clean_orders(r, rmin=1) + _r = clean_orders(np.asarray(r), rmin=1) _trim = clean_trim(trim) return scale * np.sqrt(_lm2_bounds(_r, _trim))[()] @overload def l_ratio_bounds( - r: IntVector, + r: AnyOrderND, /, trim: AnyTrim = ..., *, legacy: bool = ..., -) -> tuple[_ArrF8, _ArrF8]: ... - +) -> _Tuple2[_ArrF8]: ... @overload def l_ratio_bounds( - r: AnyInt, + r: AnyOrder, /, trim: AnyTrim = ..., *, legacy: bool = ..., -) -> tuple[float, float]: ... - - +) -> _Tuple2[float]: ... def l_ratio_bounds( - r: IntVector | AnyInt, + r: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, legacy: bool = False, -) -> tuple[float | _ArrF8, float | _ArrF8]: +) -> _Tuple2[float | _ArrF8]: r""" Unlike the standardized product-moments, the L-moment ratio's with \( r \ge 2 \) are bounded above and below. @@ -594,13 +599,13 @@ def l_ratio_bounds( L-moments](https://doi.org/10.1016/j.jspi.2006.12.002) """ - _r = clean_orders(r) + _r = clean_orders(np.asarray(r)) s, t = clean_trim(trim) t_min = np.empty(_r.shape) t_max = np.empty(_r.shape) - _cache: dict[int, tuple[float, float]] = {} + _cache: dict[int, _Tuple2[float]] = {} for i, ri in np.ndenumerate(_r): _ri = cast(int, ri) if _ri in _cache: @@ -729,10 +734,10 @@ def integrand(x: float) -> float: return max(abs(influence_fn(-x)), abs(influence_fn(x))) def obj(r: _ArrF8) -> float: - return quad(integrand, r[0], np.inf)[0] # type: ignore + return quad(integrand, r[0], np.inf)[0] # pyright: ignore[reportUnknownVariableType] res = cast( - OptimizeResult, + lsct.OptimizeResult, minimize( obj, bounds=[(rho_min, rho_max)], @@ -741,7 +746,7 @@ def obj(r: _ArrF8) -> float: ), ) - rho = cast(float, res.x[0]) # type: ignore + rho = cast(float, res.x[0]) if rho <= _MIN_RHO or influence_fn(-rho) or influence_fn(rho): return np.nan @@ -751,7 +756,7 @@ def obj(r: _ArrF8) -> float: def error_sensitivity( influence_fn: Callable[[float], float], /, - domain: tuple[float, float] = (-math.inf, math.inf), + domain: _Tuple2[float] = (-math.inf, math.inf), ) -> float: r""" Evaluate the *gross-error sensitivity* of an influence function @@ -800,7 +805,7 @@ def obj(xs: _ArrF8) -> float: bounds = None if np.isneginf(a) and np.isposinf(b) else [(a, b)] res = cast( - OptimizeResult, + lsct.OptimizeResult, minimize( obj, bounds=bounds, @@ -808,20 +813,20 @@ def obj(xs: _ArrF8) -> float: method='COBYLA', ), ) - if not res.success: # type: ignore + if not res.success: warnings.warn( - cast(str, res.message), # type: ignore + res.message, OptimizeWarning, stacklevel=1, ) - return -cast(float, res.fun) # type: ignore + return -res.fun def shift_sensitivity( influence_fn: Callable[[float], float], /, - domain: tuple[float, float] = (-math.inf, math.inf), + domain: _Tuple2[float] = (-math.inf, math.inf), ) -> float: r""" Evaluate the *local-shift sensitivity* of an influence function @@ -887,7 +892,7 @@ def obj(xs: _ArrF8) -> float: bounds = None if np.isneginf(a) and np.isposinf(b) else [(a, b)] res = cast( - OptimizeResult, + lsct.OptimizeResult, minimize( obj, bounds=bounds, @@ -895,11 +900,11 @@ def obj(xs: _ArrF8) -> float: method='COBYLA', ), ) - if not res.success: # type: ignore + if not res.success: warnings.warn( - cast(str, res.message), # type: ignore + cast(str, res.message), OptimizeWarning, stacklevel=1, ) - return -cast(float, res.fun) # type: ignore + return -res.fun diff --git a/lmo/distributions.py b/lmo/distributions.py index 169ca8e7..2526ce29 100644 --- a/lmo/distributions.py +++ b/lmo/distributions.py @@ -1,25 +1,23 @@ -"""Probability distributions, compatible with [`scipy.stats`][scipy.stats].""" -__all__ = ( - 'l_poly', - 'l_rv_nonparametric', - 'kumaraswamy', - 'wakeby', - 'genlambda', -) - # pyright: reportIncompatibleMethodOverride=false -# ruff: noqa: PLR2004 +# ruff: noqa: N801, PLR2004 + +""" +Probability distributions, compatible with [`scipy.stats`][scipy.stats]. +""" + +from __future__ import annotations import functools import math -import sys import warnings -from collections.abc import Callable, Mapping, Sequence +from collections.abc import Callable from typing import ( + TYPE_CHECKING, Any, + ClassVar, Final, Literal, - SupportsIndex, + Protocol, TypeAlias, TypeVar, cast, @@ -27,27 +25,17 @@ ) import numpy as np -import numpy.polynomial as npp import numpy.typing as npt import scipy.special as sc +from scipy.integrate import quad # pyright: ignore[reportUnknownVariableType] from scipy.stats._distn_infrastructure import ( - _ShapeInfo, # type: ignore # noqa: PLC2701 + _ShapeInfo, # noqa: PLC2701 # pyright: ignore[reportPrivateUsage] ) from scipy.stats.distributions import rv_continuous as _rv_continuous -from ._poly import jacobi_series, roots -from ._utils import ( - broadstack, - clean_order, - clean_trim, - l_stats_orders, - moments_to_ratio, - round0, -) -from .diagnostic import l_ratio_bounds +from ._utils import clean_trim, l_stats_orders, moments_to_ratio, round0 from .special import harmonic from .theoretical import ( - _VectorizedPPF, # type: ignore [reportPrivateUsage] cdf_from_ppf, entropy_from_qdf, l_moment_from_ppf, @@ -55,69 +43,92 @@ qdf_from_l_moments, ) from .typing import ( - AnyInt, - AnyNDArray, - AnyScalar, + AnyOrder, + AnyOrderND, AnyTrim, - FloatVector, - IntVector, - PolySeries, - QuadOptions, - RVContinuous, + np as lnpt, + scipy as lsct, ) -if sys.version_info < (3, 11): - from typing_extensions import Self -else: - from typing import Self +if TYPE_CHECKING: + from collections.abc import Sequence -T = TypeVar('T') -X = TypeVar('X', bound='l_rv_nonparametric') -F = TypeVar('F', bound=np.floating[Any]) -M = TypeVar('M', bound=Callable[..., Any]) -V = TypeVar('V', bound=float | npt.NDArray[np.float64]) + from .typing.compat import LiteralString, Self -_ArrF8: TypeAlias = npt.NDArray[np.float64] -_STATS0: TypeAlias = Literal[''] -_STATS1: TypeAlias = Literal['m', 'v', 's', 'k'] -_STATS2: TypeAlias = Literal['mv', 'ms', 'mk', 'vs', 'vk', 'sk'] -_STATS3: TypeAlias = Literal['mvs', 'mvk', 'msk', 'vsk'] -_STATS4: TypeAlias = Literal['mvsk'] -_STATS: TypeAlias = _STATS0 | _STATS1 | _STATS2 | _STATS3 | _STATS4 +__all__ = ( + 'l_poly', + 'kumaraswamy', + 'wakeby', + 'genlambda', +) + + +_T_x = TypeVar('_T_x', bound=float | npt.NDArray[np.float64]) + +_ArrF8: TypeAlias = npt.NDArray[np.float64] +_AnyReal: TypeAlias = lnpt.AnyScalarInt | lnpt.AnyScalarFloat +_AnyReal1D: TypeAlias = lnpt.AnyVectorInt | lnpt.AnyVectorFloat +_AnyReal2D: TypeAlias = lnpt.AnyMatrixInt | lnpt.AnyMatrixFloat +_AnyRealND: TypeAlias = lnpt.AnyArrayInt | lnpt.AnyArrayFloat +_AnyReal3DPlus: TypeAlias = lnpt.AnyTensorInt | lnpt.AnyTensorFloat +_AnyReal2DPlus: TypeAlias = _AnyReal2D | _AnyReal3DPlus + +_Stats0: TypeAlias = Literal[''] +_Stats1: TypeAlias = Literal['m', 'v', 's', 'k'] +_Stats2: TypeAlias = Literal['mv', 'ms', 'mk', 'vs', 'vk', 'sk'] +_Stats3: TypeAlias = Literal['mvs', 'mvk', 'msk', 'vsk'] +_Stats4: TypeAlias = Literal['mvsk'] +_Stats: TypeAlias = _Stats0 | _Stats1 | _Stats2 | _Stats3 | _Stats4 + + +class _VectorizedCDF(Protocol): + @overload + def __call__(self, x: _AnyRealND, /) -> _ArrF8: ... + @overload + def __call__(self, x: _AnyReal, /) -> float: ... -_F_EPS: Final[np.float64] = np.finfo(float).eps # Non-parametric +def _get_rng(seed: lnpt.Seed | None = None) -> np.random.Generator: + if isinstance(seed, np.random.Generator): + return seed + return np.random.default_rng(seed) + + +_LPolyParams: TypeAlias = ( + tuple[lnpt.AnyVectorFloat] + | tuple[lnpt.AnyVectorFloat, AnyTrim] +) + -class l_poly: # noqa: N801 +class l_poly: """ Polynomial quantile distribution with (only) the given L-moments. - - Todo: - - Examples - - `stats(moments='mv')` """ + name: ClassVar[LiteralString] = 'l_poly' + badvalue: ClassVar[float] = np.nan + moment_type: ClassVar[Literal[0, 1]] = 1 + numargs: ClassVar[int] = 2 + shapes: ClassVar[LiteralString | None] = 'lmbda, trim' _l_moments: Final[_ArrF8] _trim: Final[tuple[float, float] | tuple[int, int]] _support: Final[tuple[float, float]] - _ppf: Final[_VectorizedPPF] - _qdf: Final[_VectorizedPPF] - _cdf: Final[_VectorizedPPF] + _cdf: Final[_VectorizedCDF] _random_state: np.random.Generator def __init__( self, - lmbda: npt.ArrayLike, + lmbda: lnpt.AnyVectorFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - seed: np.random.Generator | AnyInt | None = None, + seed: lnpt.Seed | None = None, ) -> None: r""" Create a new `l_poly` instance. @@ -149,7 +160,17 @@ def __init__( self._cdf_single = cdf_from_ppf(self._ppf) self._cdf = np.vectorize(self._cdf_single, [float]) - self._random_state = np.random.default_rng(seed) + self._random_state = _get_rng(seed) + + @property + def a(self) -> float: + """Lower bound of the support.""" + return self._support[0] + + @property + def b(self) -> float: + """Upper bound of the support.""" + return self._support[1] @property def random_state(self) -> np.random.Generator: @@ -159,16 +180,16 @@ def random_state(self) -> np.random.Generator: @random_state.setter def random_state( self, - seed: int | np.random.Generator, # pyright: ignore[reportPropertyTypeMismatch] + seed: lnpt.Seed, # pyright: ignore[reportPropertyTypeMismatch] ): - self._random_state = np.random.default_rng(seed) + self._random_state = _get_rng(seed) @classmethod def fit( cls, - data: npt.ArrayLike, + data: _AnyRealND, moments: int | None = None, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, ) -> Self: r""" Fit distribution using the (trimmed) L-moment estimates of the given @@ -224,20 +245,18 @@ def fit( def rvs( self, size: Literal[1] | None = ..., - random_state: np.random.Generator | AnyInt | None = ..., + random_state: lnpt.Seed | None = ..., ) -> float: ... - @overload def rvs( self, size: int | tuple[int, ...], - random_state: np.random.Generator | AnyInt | None = ..., + random_state: lnpt.Seed | None = ..., ) -> _ArrF8: ... - def rvs( self, size: int | tuple[int, ...] | None = None, - random_state: np.random.Generator | AnyInt | None = None, + random_state: lnpt.Seed | None = None, ) -> float | _ArrF8: """ Draw random variates from the relevant distribution. @@ -261,11 +280,10 @@ def rvs( return self._ppf(rng.uniform(size=size)) @overload - def ppf(self, p: AnyNDArray[Any] | Sequence[Any]) -> _ArrF8: ... + def ppf(self, p: _AnyRealND) -> _ArrF8: ... @overload - def ppf(self, p: AnyScalar) -> float: ... - - def ppf(self, p: npt.ArrayLike) -> float | _ArrF8: + def ppf(self, p: _AnyReal) -> float: ... + def ppf(self, p: _AnyReal | _AnyRealND) -> float | _ArrF8: r""" [Percent point function](https://w.wiki/8cQU) \( Q(p) \) (inverse of [CDF][lmo.distributions.l_poly.cdf], a.k.a. the quantile function) at @@ -282,11 +300,10 @@ def ppf(self, p: npt.ArrayLike) -> float | _ArrF8: return self._ppf(p) @overload - def isf(self, q: AnyNDArray[Any] | Sequence[Any]) -> _ArrF8: ... + def isf(self, q: _AnyRealND) -> _ArrF8: ... @overload - def isf(self, q: AnyScalar) -> float: ... - - def isf(self, q: npt.ArrayLike) -> float | _ArrF8: + def isf(self, q: _AnyReal) -> float: ... + def isf(self, q: _AnyReal | _AnyRealND) -> float | _ArrF8: r""" Inverse survival function \( \bar{Q}(q) = Q(1 - q) \) (inverse of [`sf`][lmo.distributions.l_poly.sf]) at \( q \). @@ -300,11 +317,10 @@ def isf(self, q: npt.ArrayLike) -> float | _ArrF8: return self._ppf(p[()] if np.isscalar(q) else p) @overload - def qdf(self, p: AnyNDArray[Any] | Sequence[Any]) -> _ArrF8: ... + def qdf(self, p: _AnyRealND) -> _ArrF8: ... @overload - def qdf(self, p: AnyScalar) -> float: ... - - def qdf(self, p: npt.ArrayLike) -> float | _ArrF8: + def qdf(self, p: _AnyReal) -> float: ... + def qdf(self, p: _AnyReal | _AnyRealND) -> float | _ArrF8: r""" Quantile density function \( q \equiv \frac{\dd{Q}}{\dd{p}} \) ( derivative of the [PPF][lmo.distributions.l_poly.ppf]) at \( p \) of @@ -321,11 +337,10 @@ def qdf(self, p: npt.ArrayLike) -> float | _ArrF8: return self._qdf(p) @overload - def cdf(self, x: AnyNDArray[Any] | Sequence[Any]) -> _ArrF8: ... + def cdf(self, x: _AnyRealND) -> _ArrF8: ... @overload - def cdf(self, x: AnyScalar) -> float: ... - - def cdf(self, x: npt.ArrayLike) -> float | _ArrF8: + def cdf(self, x: _AnyReal) -> float: ... + def cdf(self, x: _AnyReal | _AnyRealND) -> float | _ArrF8: r""" [Cumulative distribution function](https://w.wiki/3ota) \( F(x) = \mathrm{P}(X \le x) \) at \( x \) of the given distribution. @@ -340,11 +355,11 @@ def cdf(self, x: npt.ArrayLike) -> float | _ArrF8: return self._cdf(x) @overload - def logcdf(self, x: AnyNDArray[Any] | Sequence[Any]) -> _ArrF8: ... + def logcdf(self, x: _AnyRealND) -> _ArrF8: ... @overload - def logcdf(self, x: AnyScalar) -> float: ... + def logcdf(self, x: _AnyReal) -> float: ... @np.errstate(divide='ignore') - def logcdf(self, x: npt.ArrayLike) -> float | _ArrF8: + def logcdf(self, x: _AnyReal | _AnyRealND) -> float | _ArrF8: r""" Logarithm of the cumulative distribution function (CDF) at \( x \), i.e. \( \ln F(x) \). @@ -355,11 +370,11 @@ def logcdf(self, x: npt.ArrayLike) -> float | _ArrF8: return np.log(self._cdf(x)) @overload - def sf(self, x: AnyNDArray[Any] | Sequence[Any]) -> _ArrF8: ... + def sf(self, x: _AnyRealND) -> _ArrF8: ... @overload - def sf(self, x: AnyScalar) -> float: ... + def sf(self, x: _AnyReal) -> float: ... - def sf(self, x: npt.ArrayLike) -> float | _ArrF8: + def sf(self, x: _AnyReal | _AnyRealND) -> float | _ArrF8: r""" Survival function \(S(x) = \mathrm{P}(X > x) = 1 - \mathrm{P}(X \le x) = 1 - F(x) \) (the complement of the @@ -371,11 +386,11 @@ def sf(self, x: npt.ArrayLike) -> float | _ArrF8: return 1 - self._cdf(x) @overload - def logsf(self, x: AnyNDArray[Any] | Sequence[Any]) -> _ArrF8: ... + def logsf(self, x: _AnyRealND) -> _ArrF8: ... @overload - def logsf(self, x: AnyScalar) -> float: ... + def logsf(self, x: _AnyReal) -> float: ... @np.errstate(divide='ignore') - def logsf(self, x: npt.ArrayLike) -> float | _ArrF8: + def logsf(self, x: _AnyReal | _AnyRealND) -> float | _ArrF8: r""" Logarithm of the survical function (SF) at \( x \), i.e. \( \ln \left( S(x) \right) \). @@ -386,11 +401,10 @@ def logsf(self, x: npt.ArrayLike) -> float | _ArrF8: return np.log(self._cdf(x)) @overload - def pdf(self, x: AnyNDArray[Any] | Sequence[Any]) -> _ArrF8: ... + def pdf(self, x: _AnyRealND) -> _ArrF8: ... @overload - def pdf(self, x: AnyScalar) -> float: ... - - def pdf(self, x: npt.ArrayLike) -> float | _ArrF8: + def pdf(self, x: _AnyReal) -> float: ... + def pdf(self, x: _AnyReal | _AnyRealND) -> float | _ArrF8: r""" Probability density function \( f \equiv \frac{\dd{F}}{\dd{x}} \) (derivative of the [CDF][lmo.distributions.l_poly.cdf]) at \( x \). @@ -405,11 +419,18 @@ def pdf(self, x: npt.ArrayLike) -> float | _ArrF8: return 1 / self._qdf(self._cdf(x)) @overload - def hf(self, x: AnyNDArray[Any] | Sequence[Any]) -> _ArrF8: ... + def logpdf(self, x: _AnyRealND) -> _ArrF8: ... @overload - def hf(self, x: AnyScalar) -> float: ... + def logpdf(self, x: _AnyReal) -> float: ... + def logpdf(self, x: _AnyReal | _AnyRealND) -> float | _ArrF8: + """Logarithm of the PDF.""" + return -np.log(self._qdf(self._cdf(x))) - def hf(self, x: npt.ArrayLike) -> float | _ArrF8: + @overload + def hf(self, x: _AnyRealND) -> _ArrF8: ... + @overload + def hf(self, x: _AnyReal) -> float: ... + def hf(self, x: _AnyReal | _AnyRealND) -> float | _ArrF8: r""" [Hazard function ](https://w.wiki/8cWL#Failure_rate_in_the_continuous_sense) @@ -431,7 +452,7 @@ def median(self) -> float: See Also: - [`l_poly.ppf`][lmo.distributions.l_poly.ppf] """ - return self._ppf(.5) + return float(self._ppf(.5)) @functools.cached_property def _mean(self) -> float: @@ -552,17 +573,12 @@ def support(self) -> tuple[float, float]: return self._support @overload - def interval( - self, - confidence: AnyNDArray[Any] | Sequence[Any], - /, - ) -> tuple[_ArrF8, _ArrF8]: ... + def interval(self, confidence: _AnyRealND, /) -> tuple[_ArrF8, _ArrF8]: ... @overload - def interval(self, confidence: AnyScalar, /) -> tuple[float, float]: ... - + def interval(self, confidence: _AnyReal, /) -> tuple[float, float]: ... def interval( self, - confidence: npt.ArrayLike, + confidence: _AnyReal | _AnyRealND, /, ) -> tuple[float, float] | tuple[_ArrF8, _ArrF8]: r""" @@ -594,7 +610,7 @@ def interval( return self._ppf((1 - alpha) / 2), self._ppf((1 + alpha) / 2) - def moment(self, n: float, /) -> float: + def moment(self, n: int | np.integer[Any], /) -> float: r""" Non-central product moment \( \E[X^n] \) of \( X \) of specified order \( n \). @@ -624,22 +640,19 @@ def moment(self, n: float, /) -> float: def _integrand(u: float) -> float: return self._ppf(u)**n - from scipy.integrate import quad # type: ignore - return cast(float, quad(_integrand, 0, 1)[0]) @overload - def stats(self, moments: _STATS0) -> tuple[()]: ... + def stats(self, moments: _Stats0) -> tuple[()]: ... @overload - def stats(self, moments: _STATS1) -> tuple[float]: ... + def stats(self, moments: _Stats1) -> tuple[float]: ... @overload - def stats(self, moments: _STATS2 = ...) -> tuple[float, float]: ... + def stats(self, moments: _Stats2 = ...) -> tuple[float, float]: ... @overload - def stats(self, moments: _STATS3) -> tuple[float, float, float]: ... + def stats(self, moments: _Stats3) -> tuple[float, float, float]: ... @overload - def stats(self, moments: _STATS4) -> tuple[float, float, float, float]: ... - - def stats(self, moments: _STATS = 'mv') -> tuple[float, ...]: + def stats(self, moments: _Stats4) -> tuple[float, float, float, float]: ... + def stats(self, moments: _Stats = 'mv') -> tuple[float, ...]: r""" Some product-moment statistics of the given distribution. @@ -661,13 +674,15 @@ def stats(self, moments: _STATS = 'mv') -> tuple[float, ...]: : Ex. Kurtosis \( \E[(X - \mu)^4] / \sigma^4 - 3 \) """ out: list[float] = [] - if 'm' in moments: + + _moments = set(moments) + if 'm' in _moments: out.append(self._mean) - if 'v' in moments: + if 'v' in _moments: out.append(self._var) - if 's' in moments: + if 's' in _moments: out.append(self._skew) - if 'k' in moments: + if 'k' in _moments: out.append(self._kurtosis) return tuple(round0(np.array(out), 1e-15)) @@ -699,8 +714,6 @@ def expect(self, g: Callable[[float], float], /) -> float: def i(u: float) -> float: return g(ppf(u)) - from scipy.integrate import quad # type: ignore - a = 0 b = 0.05 c = 1 - b @@ -713,22 +726,20 @@ def i(u: float) -> float: @overload def l_moment( self, - r: IntVector, + r: AnyOrderND, /, trim: AnyTrim | None = ..., ) -> _ArrF8: ... - @overload def l_moment( self, - r: AnyInt, + r: AnyOrder, /, trim: AnyTrim | None = ..., ) -> np.float64: ... - def l_moment( self, - r: AnyInt | IntVector, + r: AnyOrder | AnyOrderND, /, trim: AnyTrim | None = None, ) -> np.float64 | _ArrF8: @@ -749,34 +760,31 @@ def l_moment( @overload def l_ratio( self, - r: IntVector, - k: AnyInt | IntVector, + r: AnyOrderND, + k: AnyOrder | AnyOrderND, /, trim: AnyTrim | None = ..., ) -> _ArrF8: ... - @overload def l_ratio( self, - r: AnyInt | IntVector, - k: IntVector, + r: AnyOrder | AnyOrderND, + k: AnyOrderND, /, trim: AnyTrim | None = ..., ) -> _ArrF8: ... - @overload def l_ratio( self, - r: AnyInt, - k: AnyInt, + r: AnyOrder, + k: AnyOrder, /, trim: AnyTrim | None = ..., ) -> np.float64: ... - def l_ratio( self, - r: AnyInt | IntVector, - k: AnyInt | IntVector, + r: AnyOrder | AnyOrderND, + k: AnyOrder | AnyOrderND, /, trim: AnyTrim | None = None, ) -> np.float64 | _ArrF8: @@ -793,7 +801,7 @@ def l_ratio( Left- and right- trim. Can be scalar or 2-tuple of non-negative int or float. """ - rs = broadstack(r, k) + rs = np.stack(np.broadcast_arrays(np.asarray(r), np.asarray(k))) lms = self.l_moment(rs, trim=trim) return moments_to_ratio(rs, lms) @@ -853,368 +861,68 @@ def l_kurtosis(self, trim: AnyTrim | None = None) -> float: """ return float(self.l_ratio(4, 2, trim=trim)) - -def _check_lmoments( - l_r: npt.NDArray[np.floating[Any]], - trim: AnyTrim = (0, 0), - name: str = 'lmbda', -): - if (n := len(l_r)) < 2: - msg = f'at least 2 L-moments required, got {n}' - raise ValueError(msg) - if l_r[1] <= 0: - msg = f'L-scale must be positive, got {name}[1] = {l_r[1]}' - if n == 2: - return - - r = np.arange(1, n + 1) - t_r = l_r[2:] / l_r[1] - t_r_max = l_ratio_bounds(r[2:], trim, legacy=True)[1] - if np.any(rs0_oob := np.abs(t_r) > t_r_max): - r_oob = np.argwhere(rs0_oob)[0] + 3 - t_oob = t_r[rs0_oob][0] - t_max = t_r_max[rs0_oob][0] - msg = ( - f'invalid L-moment ratio for r={list(r_oob)}: ' - f'|{t_oob}| <= {t_max} does not hold' - ) - raise ArithmeticError(msg) - - -def _ppf_poly_series( - l_r: npt.NDArray[np.floating[Any]], - s: float, - t: float, -) -> PolySeries: - # Corrected version of Theorem 3. from Hosking (2007). - # - r = np.arange(1, len(l_r) + 1) - c = (s + t - 1 + 2 * r) * r / (s + t + r) - - return jacobi_series( - c * l_r, - t, - s, - domain=[0, 1], - # convert to Legendre, even if trimmed; this avoids huge coefficient - kind=npp.Legendre, - symbol='q', - ) - - -class l_rv_nonparametric(_rv_continuous): - r""" - Warning: - `l_rv_nonparametric` is deprecated, and will be removed in version - `0.13`. Use `l_poly` instead. - - Estimate a distribution using the given L-moments. - See [`scipy.stats.rv_continuous`][scipy.stats.rv_continuous] for the - available method. - - The PPF (quantile function) is estimated using generalized Fourier series, - with the (shifted) Jacobi orthogonal polynomials as basis, and the (scaled) - L-moments as coefficients. - - The *corrected* version of theorem 3 from Hosking (2007) states that - - $$ - \widehat{Q}(u) = \sum_{r=1}^{R} - \frac{r}{r + s + t} (2r + s + t - 1) - \lambda^{(s, t)}_r - \shjacobi{r - 1}{t}{s}{2u - 1} \ , - $$ - - converges almost everywhere as \( R \rightarrow \infty \), for any - sufficiently smooth quantile function (PPF) \( Q(u) \) on - \( u \in (0, 1) \). - Here, \( \shjacobi n \alpha \beta x = \jacobi{n}{\alpha}{\beta}{2x - 1} \) - is a shifted Jacobi polynomial. - - References: - - [J.R.M. Hosking (2007) - Some theory and practical uses of trimmed - L-moments](https://doi.org/10.1016/j.jspi.2006.12.002) - - [Wolfram Research - Jacobi polynomial Fourier Expansion]( - http://functions.wolfram.com/05.06.25.0007.01) - - See Also: - - [Jacobi Polynomials - Wikipedia]( - https://wikipedia.org/wiki/Jacobi_polynomials) - - [Generalized Fourier series - Wikipedia]( - https://wikipedia.org/wiki/Generalized_Fourier_series) - """ - - _lm: Final[npt.NDArray[np.floating[Any]]] - _trim: Final[tuple[int, int] | tuple[float, float]] - - _ppf_poly: Final[PolySeries] - _isf_poly: Final[PolySeries] - - a: float - b: float - badvalue: float = np.nan - - def __init__( - self, - l_moments: FloatVector, - trim: AnyTrim = (0, 0), - a: float | None = None, - b: float | None = None, - **kwargs: Any, - ) -> None: - r""" - Args: - l_moments: - Vector containing the first $R$ consecutive L-moments - $\left[ - \lambda^{(s, t)}_1 \; - \lambda^{(s, t)}_2 \; - \dots \; - \lambda^{(s, t)}_R - \right]$, where $R \ge 2$. - - Sample L-moments can be estimated using e.g. - `lmo.l_moment(x, np.mgrid[:R] + 1, trim=(s, t))`. - - The trim-lengths $(s, t)$ should be the same for all - L-moments. - trim: - The left and right trim-lengths $(s, t)$, that correspond - to the provided `l_moments`. - a: - Lower bound of the support of the distribution. - By default it is estimated from the L-moments. - b: - Upper bound of the support of the distribution. - By default it is estimated from the L-moments. - **kwargs: - Optional params for `scipy.stats.rv_continuous`. - - Raises: - ValueError: If `len(l_moments) < 2`, `l_moments.ndim != 1`, or - there are invalid L-moments / trim-lengths. - """ - l_r = np.asarray_chkfinite(l_moments) - l_r.setflags(write=False) - - self._trim = _trim = (s, t) = clean_trim(trim) - - _check_lmoments(l_r, _trim) - self._lm = l_r - - # quantile function (inverse of cdf) - self._ppf_poly = ppf = _ppf_poly_series(l_r, s, t).trim(_F_EPS) - - # inverse survival function - self._isf_poly = ppf(1 - ppf.identity(domain=[0, 1])).trim(_F_EPS) - - # empirical support - self._a0, self._b0 = (q0, q1) = ppf(np.array([0, 1])) - if q0 >= q1: - msg = 'invalid l_rv_nonparametric: ppf(0) >= ppf(1)' - raise ArithmeticError(msg) - - kwargs.setdefault('momtype', 1) - super().__init__( # type: ignore [reportUnknownMemberType] - a=q0 if a is None else a, - b=q1 if b is None else b, - **kwargs, - ) + # `rv_continuous` and `rv_frozen` compatibility @property - def l_moments(self) -> npt.NDArray[np.float64]: - r"""Initial L-moments, for orders $r = 1, 2, \dots, R$.""" - return self._lm + def dist(self) -> type[Self]: # noqa: D102 + return type(self) @property - def trim(self) -> tuple[int, int] | tuple[float, float]: - """The provided trim-lengths $(s, t)$.""" - return self._trim + def args(self) -> _LPolyParams: # noqa: D102 + return (self._l_moments, self._trim) @property - def ppf_poly(self) -> PolySeries: - r""" - Polynomial estimate of the percent point function (PPF), a.k.a. - the quantile function (QF), or the inverse cumulative distribution - function (ICDF). - - Note: - Converges to the "true" PPF in the mean-squared sense, with - weight function $q^s (1 - q)^t$ of quantile $q \in [0, 1]$, - and trim-lengths $(t_1, t_2) \in \mathbb{R^+} \times \mathbb{R^+}$. - - Returns: - A [`numpy.polynomial.Legendre`][numpy.polynomial.legendre.Legendre] - orthogonal polynomial series instance. - """ - return self._ppf_poly - - @functools.cached_property - def cdf_poly(self) -> PolySeries: - """ - Polynomial least-squares interpolation of the CDF. - - Returns: - A [`numpy.polynomial.Legendre`][numpy.polynomial.legendre.Legendre] - orthogonal polynomial series instance. - """ - ppf = self._ppf_poly - # number of variables of the PPF poly - k0 = ppf.degree() + 1 - assert k0 > 1 - - n = max(100, k0 * 10) - x = np.linspace(self.a, self.b, n) - q = cast(npt.NDArray[np.float64], self.cdf(x)) # type: ignore - y = ppf.deriv()(q) - w = np.sqrt(self._weights(q) + 0.01) - - # choose the polynomial that minimizes the BIC - bic_min = np.inf - cdf_best = None - for k in range(max(k0 // 2, 2), k0 + max(k0 // 2, 8)): - # fit - cdf = ppf.fit(x, q, k - 1).trim(_F_EPS) - _k = cdf.degree() + 1 - - # according to the inverse function theorem, this should be 0 - eps = 1 / cdf.deriv()(x) - y - - # Bayesian information criterion (BIC) - bic = (_k - 1) * np.log(n) + n * np.log( - np.average(eps**2, weights=w), - ) - - # minimize the BIC - if bic < bic_min: - bic_min = bic - cdf_best = cdf - - assert cdf_best is not None - return cdf_best - - @functools.cached_property - def pdf_poly(self) -> PolySeries: - """ - Derivative of the polynomial interpolation of the CDF, i.e. the - polynomial estimate of the PDF. - - Returns: - A [`numpy.polynomial.Legendre`][numpy.polynomial.legendre.Legendre] - orthogonal polynomial series instance. - """ - return self.cdf_poly.deriv() - - def _weights(self, q: npt.ArrayLike) -> npt.NDArray[np.float64]: - _q = np.asarray(q, np.float64) - s, t = self._trim - return np.where( - (_q >= 0) & (_q <= 1), - _q**s * (1 - _q) ** t, - cast(float, getattr(self, 'badvalue', np.nan)), - ) - - def _ppf(self, q: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - return cast(npt.NDArray[np.float64], self._ppf_poly(q)) - - def _isf(self, q: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - return cast(npt.NDArray[np.float64], self._isf_poly(q)) - - def _cdf_single(self, x: float) -> float: - # find all q where Q(q) == x - q0 = roots(self._ppf_poly - x) - - if (n := len(q0)) == 0: - return self.badvalue - if n > 1: - warnings.warn( - f'multiple fixed points at {x = :.6f}: ' # noqa: E203 - f'{list(np.round(q0, 6))}', - stacklevel=3, - ) - - if cast(float, np.ptp(q0)) <= 1 / 4: - # "close enough" if within the same quartile; - # probability-weighted interpolation - return np.average(q0, weights=q0 * (1 - q0)) # type: ignore - - return self.badvalue - - return q0[0] - - def _pdf(self, x: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - return np.clip(cast(npt.NDArray[np.float64], self.pdf_poly(x)), 0, 1) - - def _munp(self, n: int) -> float: - # non-central product-moment $E[X^n]$ - return cast(float, (self._ppf_poly**n).integ(lbnd=0)(1)) - - def _updated_ctor_param(self) -> Mapping[str, Any]: - return cast( - Mapping[str, Any], - super()._updated_ctor_param() - | { - 'l_moments': self._lm, - 'trim': self._trim, - }, - ) + def kwds(self) -> dict[str, Any]: # noqa: D102 + return {} @classmethod - def fit( + def freeze( # noqa: D102 cls, - data: npt.ArrayLike, + lmbda: lnpt.AnyVectorFloat, /, - rmax: SupportsIndex | None = None, - trim: AnyTrim = (0, 0), - ) -> 'l_rv_nonparametric': - r""" - Estimate L-moment from the samples, and return a new - `l_rv_nonparametric` instance. - - Args: - data: - 1d array-like with univariate sample observations. - rmax: - The (maximum) amount of L-moment orders to use. - Defaults to $\lceil 4 \log_{10} N \rceil$. - The quantile polynomial will be of degree `rmax - 1`. - trim: - The left and right trim-lengths $(s, t)$, that correspond - to the provided `l_moments`. - - Returns: - A fitted - [`l_rv_nonparametric`][lmo.distributions.l_rv_nonparametric] - instance. + trim: AnyTrim = 0, + **kwds: Any, + ) -> Self: + return cls(lmbda, trim, **kwds) - Todo: - - Optimal `rmax` selection (the error appears to be periodic..?) - - Optimal `trim` selection + @overload + @classmethod + def nnlf(cls, theta: _LPolyParams, x: _AnyReal1D) -> float: ... + @overload + @classmethod + def nnlf(cls, theta: _LPolyParams, x: _AnyReal2DPlus) -> _ArrF8: ... + @classmethod + def nnlf(cls, theta: _LPolyParams, x: _AnyRealND) -> float | _ArrF8: """ - # avoid circular imports - from ._lm import l_moment + Negative loglikelihood function. - # x needs to be sorted anyway - x: npt.NDArray[np.floating[Any]] = np.sort(data) + This is calculated as `-sum(log pdf(x, *theta), axis=0)`, + where `theta` are the vector of L-moments, and optionally the trim. - a, b = x[[0, -1]] + Notes: + This is mostly for compatibility `rv_generic`, and is + impractically slow (due to the numerical inversion of the ppf). - if rmax is None: - _rmax = math.ceil(np.log10(x.size) * 4) - else: - _rmax = clean_order(rmax, name='rmax', rmin=2) + Args: + theta: + Tuple of size 1 or 2, with the L-moments vector, and optionally + the trim (defaults to 0). + x: + Array-like with observations of shape `(n)` or `(n, *ks)`. - _trim = clean_trim(trim) + Returns: + Scalar or array of shape `(*ks)` with negative loglikelihoods. + """ + match theta: + case (lmbda,): + rv = cls(lmbda) + case (lmbda, trim): + rv = cls(lmbda, trim) + case _ as huh: # pyright: ignore[reportUnnecessaryComparison] + raise TypeError(huh) - # sort kind 'stable' if already sorted - l_r = l_moment( - x, - np.arange(1, _rmax + 1), - trim=_trim, - sort='stable', # stable sort if fastest if already sorted - ) + return -np.log(rv.pdf(x)).sum(axis=0) - return cls(l_r, trim=_trim, a=a, b=b) # Parametric @@ -1232,8 +940,8 @@ def _kumaraswamy_lmo0( k = np.arange(t + 1, r + s + t + 1) return ( (-1)**(k - 1) - * cast(_ArrF8, sc.comb(r + k - 2, r + t - 1)) # type: ignore - * cast(_ArrF8, sc.comb(r + s + t, k)) # type: ignore + * cast(_ArrF8, sc.comb(r + k - 2, r + t - 1)) # pyright: ignore[reportUnknownMemberType] + * cast(_ArrF8, sc.comb(r + s + t, k)) # pyright: ignore[reportUnknownMemberType] * cast(_ArrF8, sc.beta(1 / a, 1 + k * b)) / a ).sum() / r @@ -1332,13 +1040,13 @@ def _l_moment( a: float, b: float, trim: tuple[int, int] | tuple[float, float], - quad_opts: QuadOptions | None = None, + quad_opts: lsct.QuadOptions | None = None, ) -> _ArrF8: s, t = trim if quad_opts is not None or isinstance(s, float): return cast( _ArrF8, - super()._l_moment( # type: ignore + super()._l_moment( # pyright: ignore[reportUnknownMemberType,reportAttributeAccessIssue] r, a, b, @@ -1350,11 +1058,11 @@ def _l_moment( return np.atleast_1d(cast(_ArrF8, _kumaraswamy_lmo(r, s, t, a, b))) -kumaraswamy: RVContinuous[float, float] = kumaraswamy_gen( +kumaraswamy: Final[lsct.RVContinuous] = kumaraswamy_gen( a=0.0, b=1.0, name='kumaraswamy', -) # type: ignore +) # pyright: ignore[reportAssignmentType] r""" A Kumaraswamy random variable, similar to [`scipy.stats.beta`][scipy.stats.beta]. @@ -1372,7 +1080,6 @@ def _l_moment( See Also: - [Theoretical L-moments - Kumaraswamy](distributions.md#kumaraswamy) - """ @@ -1470,7 +1177,9 @@ def _wakeby_sf0( # noqa: C901 # it's easy to show that this is valid for all x, f, and d w = (1 - f) / f return ( - w / sc.lambertw(w * math.exp((1 + d * x) / f - 1)) # type: ignore + w / sc.lambertw( # pyright: ignore[reportUnknownMemberType] + w * math.exp((1 + d * x) / f - 1), + ) )**(1 / d) if x < _wakeby_isf0(.9, b, d, f): @@ -1546,7 +1255,7 @@ def _lmo0_partial(theta: float, scale: float) -> float: if scale == 0: return 0 if r == 1 and theta == 0: - return cast(float, harmonic(s + t + 1) - harmonic(t)) + return harmonic(s + t + 1) - harmonic(t) return scale * ( sc.poch(r + t, s + 1) @@ -1599,7 +1308,10 @@ def _fitstart( args: tuple[float, float, float] | None = None, ) -> tuple[float, float, float, float, float]: # Arbitrary, but the default f=1 is a bad start - return super()._fitstart(data, args or (1., 1., .5)) # type: ignore + return cast( + tuple[float, float, float, float, float], + super()._fitstart(data, args or (1., 1., .5)), # pyright: ignore[reportUnknownMemberType] + ) def _pdf( self, @@ -1689,7 +1401,7 @@ def _l_moment( d: float, f: float, trim: tuple[int, int] | tuple[float, float], - quad_opts: QuadOptions | None = None, + quad_opts: lsct.QuadOptions | None = None, ) -> _ArrF8: s, t = trim @@ -1698,12 +1410,10 @@ def _l_moment( lmbda_r = cast( float | npt.NDArray[np.float64], l_moment_from_ppf( - functools.partial( - self._ppf, - b=b, - d=d, - f=f, - ), # type: ignore + cast( + Callable[[float], float], + functools.partial(self._ppf, b=b, d=d, f=f), + ), r, trim=trim, quad_opts=quad_opts, @@ -1739,10 +1449,7 @@ def _entropy(self, b: float, d: float, f: float) -> float: ) -wakeby: RVContinuous[float, float, float] = wakeby_gen( - a=0.0, - name='wakeby', -) # type: ignore +wakeby: Final[lsct.RVContinuous] = wakeby_gen(a=0.0, name='wakeby') # pyright: ignore[reportAssignmentType] r"""A Wakeby random variable, a generalization of [`scipy.stats.genpareto`][scipy.stats.genpareto]. @@ -1777,8 +1484,8 @@ def _genlambda_ppf0(q: float, b: float, d: float, f: float) -> float: @np.errstate(divide='ignore') -def _genlambda_qdf(q: V, b: float, d: float, f: float) -> V: - return cast(V, (1 + f) * q**(b - 1) + (1 - f) * (1 - q)**(d - 1)) +def _genlambda_qdf(q: _T_x, b: float, d: float, f: float) -> _T_x: + return cast(_T_x, (1 + f) * q**(b - 1) + (1 - f) * (1 - q)**(d - 1)) def _genlambda_cdf0( # noqa: C901 @@ -1862,7 +1569,7 @@ def _genlambda_lmo0( def _lmo0_partial(trim: float, theta: float) -> float: if r == 1 and theta == 0: - return cast(float, harmonic(trim) - harmonic(s + t + 1)) + return harmonic(trim) - harmonic(s + t + 1) return ( (-1)**r * @@ -1905,7 +1612,10 @@ def _fitstart( args: tuple[float, float, float] | None = None, ) -> tuple[float, float, float, float, float]: # Arbitrary, but the default f=1 is a bad start - return super()._fitstart(data, args or (1., 1., 0.)) # type: ignore + return cast( + tuple[float, float, float, float, float], + super()._fitstart(data, args or (1., 1., 0.)), # pyright: ignore[reportUnknownMemberType] + ) def _pdf( self, @@ -1967,13 +1677,13 @@ def _stats(self, b: float, d: float, f: float) -> tuple[ m2 = ( a**2 + (c / d1)**2 / (d1 + d) - + 2 * a * c / (d * d1) * (1 - cast(float, harmonic(1 + d))) + + 2 * a * c / (d * d1) * (1 - harmonic(1 + d)) ) elif d == 0: m2 = ( c**2 + (a / b1)**2 / (b1 + b) - + 2 * a * c / (b * b1) * (1 - cast(float, harmonic(1 + b))) + + 2 * a * c / (b * b1) * (1 - harmonic(1 + b)) ) else: m2 = ( @@ -2006,7 +1716,7 @@ def _l_moment( d: float, f: float, trim: tuple[int, int] | tuple[float, float], - quad_opts: QuadOptions | None = None, + quad_opts: lsct.QuadOptions | None = None, ) -> _ArrF8: s, t = trim @@ -2015,12 +1725,10 @@ def _l_moment( lmbda_r = cast( float | npt.NDArray[np.float64], l_moment_from_ppf( - functools.partial( - self._ppf, - b=b, - d=d, - f=f, - ), # type: ignore + cast( + Callable[[float], float], + functools.partial(self._ppf, b=b, d=d, f=f), + ), r, trim=trim, quad_opts=quad_opts, @@ -2033,9 +1741,7 @@ def _l_moment( ) -genlambda: RVContinuous[float, float, float] = genlambda_gen( - name='genlambda', -) # type: ignore +genlambda: lsct.RVContinuous = genlambda_gen(name='genlambda') # pyright: ignore[reportAssignmentType] r"""A generalized Tukey-Lambda random variable. `genlambda` takes `b`, `d` and `f` as shape parameters. diff --git a/lmo/inference.py b/lmo/inference.py index 28221899..6da1cb19 100644 --- a/lmo/inference.py +++ b/lmo/inference.py @@ -1,9 +1,9 @@ -"""Parametric inference.""" +""" +Parametric inference using the (Generalized) Method of L-Moments, L-(G)MM. +""" +from __future__ import annotations -__all__ = 'GMMResult', 'fit' - -from collections.abc import Callable -from typing import Any, NamedTuple, cast +from typing import TYPE_CHECKING, Any, NamedTuple, TypeAlias, cast import numpy as np import numpy.typing as npt @@ -14,7 +14,26 @@ from ._utils import clean_orders, clean_trim from .diagnostic import HypothesisTestResult, l_moment_bounds from .theoretical import l_moment_from_ppf -from .typing import AnyTrim, DistributionFunction, IntVector, OptimizeResult +from .typing import scipy as lsct + + +if TYPE_CHECKING: + from collections.abc import Callable + + from .typing import ( + AnyOrderND, + AnyTrim, + np as lnpt, + ) + + +__all__ = ( + 'GMMResult', + 'fit', +) + + +_ArrF8: TypeAlias = npt.NDArray[np.float64] class GMMResult(NamedTuple): @@ -51,9 +70,9 @@ class GMMResult(NamedTuple): args: tuple[float | int, ...] success: bool statistic: float - eps: npt.NDArray[np.float64] + eps: _ArrF8 - weights: npt.NDArray[np.float64] + weights: _ArrF8 @property def n_arg(self) -> int: @@ -134,12 +153,12 @@ def AICc(self) -> float: # noqa: N802 def _loss_step( - args: npt.NDArray[np.float64], - l_fn: Callable[..., npt.NDArray[np.float64]], + args: _ArrF8, + l_fn: Callable[..., _ArrF8], r: npt.NDArray[np.int64], - l_r: npt.NDArray[np.float64], + l_r: _ArrF8, trim: AnyTrim, - w_rr: npt.NDArray[np.float64], + w_rr: _ArrF8, ) -> float: lmbda_r = l_fn(r, *args, trim=trim) @@ -148,27 +167,25 @@ def _loss_step( raise ValueError(msg) g_r = lmbda_r - l_r - return np.sqrt(g_r.T @ w_rr @ g_r) # type: ignore + return cast(float, np.sqrt(g_r.T @ w_rr @ g_r)) -def _get_l_moment_fn( - ppf: DistributionFunction[...], -) -> Callable[..., npt.NDArray[np.float64]]: +def _get_l_moment_fn(ppf: lsct.RVFunction[...]) -> Callable[..., _ArrF8]: def l_moment_fn( - r: IntVector, + r: AnyOrderND, *args: Any, - trim: AnyTrim = (0, 0), - ) -> npt.NDArray[np.float64]: + trim: AnyTrim = 0, + ) -> _ArrF8: return l_moment_from_ppf(lambda q: ppf(q, *args), r, trim=trim) return l_moment_fn def _get_weights_mc( - y: npt.NDArray[np.float64], + y: _ArrF8, r: npt.NDArray[np.int64], trim: tuple[int, int] | tuple[float, float] = (0, 0), -) -> npt.NDArray[np.float64]: +) -> _ArrF8: l_r = l_moment_est( y, r, @@ -197,18 +214,18 @@ def _get_weights_mc( def fit( # noqa: C901 - ppf: DistributionFunction[...], - args0: npt.ArrayLike, + ppf: lsct.RVFunction[...], + args0: lnpt.AnyVectorFloat, n_obs: int, - l_moments: npt.ArrayLike, - r: IntVector | None = None, - trim: AnyTrim = (0, 0), + l_moments: lnpt.AnyVectorFloat, + r: AnyOrderND | None = None, + trim: AnyTrim = 0, *, k: int | None = None, k_max: int = 50, l_tol: float = 1e-4, - l_moment_fn: Callable[..., npt.NDArray[np.float64]] | None = None, + l_moment_fn: Callable[..., _ArrF8] | None = None, n_mc_samples: int = 9999, random_state: ( int @@ -346,7 +363,7 @@ def fit( # noqa: C901 # order- and trim- agnostic (used in convergence criterion) l_r_ub = np.r_[1, l_moment_bounds(_r[1:], trim=_trim)] l_2c = l_r[1] / l_r_ub[1] - scale_r = cast(npt.NDArray[np.float64], 1 / (l_2c * l_r_ub)) + scale_r = cast(_ArrF8, 1 / (l_2c * l_r_ub)) # Initial parametric population L-moments _l_moment_fn = l_moment_fn or _get_l_moment_fn(ppf) @@ -390,15 +407,15 @@ def fit( # noqa: C901 # calculate the weight matrix if n_con > n_par: w_rr = _get_weights_mc( - ppf(cast(npt.NDArray[np.float64], qs), *theta), + ppf(cast(_ArrF8, qs), *theta), _r, trim=_trim, ) # run the optimizer res = cast( - OptimizeResult, - optimize.minimize( # type: ignore + lsct.OptimizeResult, + optimize.minimize( # pyright: ignore[reportUnknownMemberType] _loss_step, theta, args=(_l_moment_fn, _r, l_r, _trim, w_rr), diff --git a/lmo/linalg.py b/lmo/linalg.py index ab5c0f2d..3ee38f6e 100644 --- a/lmo/linalg.py +++ b/lmo/linalg.py @@ -1,5 +1,16 @@ # ruff: noqa: N803 """Linear algebra and linearized orthogonal polynomials.""" +from __future__ import annotations + +from math import comb, lgamma +from typing import Any, TypeAlias, cast + +import numpy as np +import numpy.typing as npt + +from .typing import np as lnpt +from .typing.compat import TypeVar, Unpack, assert_never + __all__ = ( 'sandwich', @@ -11,58 +22,50 @@ 'trim_matrix', ) -import sys -from math import comb, lgamma -from typing import Any, TypeVar, cast - +_T = TypeVar('_T', bound=np.generic) +_TF = TypeVar('_TF', bound=np.floating[Any], default=np.float64) +_TI = TypeVar('_TI', bound=lnpt.Real | np.object_, default=np.int64) -if sys.version_info < (3, 11): - from typing_extensions import assert_never -else: - from typing import assert_never - -import numpy as np -import numpy.typing as npt +_K = TypeVar('_K', bound=int) +_R = TypeVar('_R', bound=int) -from .typing import AnyFloat, AnyInt - - -T = TypeVar('T', bound=np.object_ | np.integer[Any] | np.floating[Any]) +_DType: TypeAlias = np.dtype[_T] | type[_T] +_Square: TypeAlias = lnpt.Array[tuple[_K, _K], _T] def sandwich( - A: npt.NDArray[np.number[Any]], - X: npt.NDArray[T | np.number[Any]], + A: lnpt.Array[tuple[_K, _R], lnpt.Real], + X: lnpt.Array[tuple[_R, Unpack[tuple[_R, ...]]], lnpt.Real], /, - dtype: np.dtype[T] | type[T] = np.float64, -) -> npt.NDArray[T]: + dtype: _DType[_TF] = np.float64, +) -> lnpt.Array[tuple[_K, Unpack[tuple[_K, ...]]], _TF]: """ Calculates the "sandwich" matrix product (`A @ X @ A.T`) along the specified `X` axis. Args: - A: 2-D array of shape `(s, r)`, the "bread". - dtype: The data type of the result. + A: 2-D array of shape `(k, r)`, the "bread". X: Array of shape `(r, r, ...)`. + dtype: The data type of the result. Returns: - C: Array of shape `(s, s, ...)`. + C: Array of shape `(k, k, ...)`. See Also: - https://wikipedia.org/wiki/Covariance_matrix """ # if X is 1 - d, this is equivalent to: C @ S_b @ C.T spec = 'ui, ij..., vj -> uv...' - return np.einsum(spec, A, X, A, dtype=dtype) # pyright: ignore + return np.einsum(spec, A, X, A, dtype=dtype) # pyright: ignore[reportUnknownMemberType] def pascal( - k: int, + k: _K, /, - dtype: np.dtype[T] | type[T] = np.int64, + dtype: _DType[_TI] = np.int64, *, inv: bool = False, -) -> npt.NDArray[T]: +) -> _Square[_K, _TI]: r""" Construct the lower-diagonal Pascal matrix $L_{k \times k$}$, or its matrix inverse $L^{-1}$. @@ -93,7 +96,6 @@ def pascal( Examples: >>> import numpy as np - >>> from lmo.linalg import pascal >>> pascal(4, dtype=np.int_) array([[1, 0, 0, 0], [1, 1, 0, 0], @@ -104,7 +106,7 @@ def pascal( [-1, 1, 0, 0], [ 1, -2, 1, 0], [-1, 3, -3, 1]]) - >>> np.rint(np.linalg.inv(pascal(4))).astype(int) + >>> np.rint(np.linalg.inv(pascal(4))).astype(np.int_) array([[ 1, 0, 0, 0], [-1, 1, 0, 0], [ 1, -2, 1, 0], @@ -112,8 +114,8 @@ def pascal( Now, let's compare with scipy: - >>> from scipy.linalg import invpascal - >>> invpascal(4, kind='lower').astype(int) + >>> import scipy.linalg + >>> scipy.linalg.invpascal(4, kind='lower').astype(np.int_) array([[ 1, 0, 0, 0], [-1, 1, 0, 0], [ 1, -2, 1, 0], @@ -136,34 +138,26 @@ def pascal( return out -def ir_pascal( - k: int, - /, - dtype: np.dtype[T] | type[T] = np.float64, -) -> npt.NDArray[np.float64]: +def ir_pascal(k: _K, /, dtype: _DType[_TF]) -> _Square[_K, _TF]: r""" Inverse regulatized lower-diagonal Pascal matrix, $\bar{L}_{ij} = L^{-1}_ij / i$. Used to linearly combine order statistics order statistics into L-moments. """ - # use native ints to reduce the effect of over-/underflows - dtype_native = k > 62 - _dtype = np.object_ if dtype_native else np.int64 - - p = pascal(k, dtype=_dtype, inv=True) - out = p / np.arange(1, k + 1, dtype=_dtype)[:, None] # type: ignore + p = pascal(k, dtype=dtype, inv=True) + out = p / np.arange(1, k + 1, dtype=dtype)[:, None] return np.asarray(out, dtype) def _sh_jacobi_i( - k: int, + k: _K, a: int, b: int, /, - dtype: np.dtype[T] | type[T], -) -> npt.NDArray[T]: + dtype: _DType[_TI], +) -> _Square[_K, _TI]: out = np.zeros((k, k), dtype=dtype) for r in range(k): for j in range(r + 1): @@ -174,12 +168,12 @@ def _sh_jacobi_i( def _sh_jacobi_f( - k: int, + k: _K, a: float, b: float, /, - dtype: np.dtype[T] | type[T], -) -> npt.NDArray[T]: + dtype: _DType[_TI], +) -> _Square[_K, _TI]: out = np.zeros((k, k), dtype=dtype) # semi dynamic programming @@ -202,11 +196,7 @@ def _sh_jacobi_f( return out -def sh_legendre( - k: int, - /, - dtype: np.dtype[T] | type[T] = np.int64, -) -> npt.NDArray[T]: +def sh_legendre(k: _K, /, dtype: _DType[_TI] = np.int64) -> _Square[_K, _TI]: r""" Shifted Legendre polynomial coefficient matrix $\widetilde{P}$ of shape `(k, k)`. @@ -263,12 +253,12 @@ def sh_legendre( def sh_jacobi( - k: AnyInt, - a: AnyFloat, - b: AnyFloat, + k: _K, + a: float, + b: float, /, - dtype: np.dtype[T] | type[T] | None = None, -) -> npt.NDArray[T | np.int64]: + dtype: _DType[_TF] = np.float64, +) -> _Square[_K, _TF]: r""" Shifted Jacobi polynomial coefficient matrix $\widetilde{P}^{(a,b)}$ of shape `(k, k)`. @@ -324,19 +314,20 @@ def sh_jacobi( - https://mathworld.wolfram.com/JacobiPolynomial.html - [`scipy.special.jacobi`][scipy.special.jacobi] """ - _k, _a, _b = int(k), float(a), float(b) - if _k < 0 or _a < 0 or _b < 0: + if k < 0 or a < 0 or b < 0: msg = 'k, a, and b must be >= 0' raise ValueError(msg) - _dtype = dtype or np.asarray([a, b]).dtype.type - if np.issubdtype(_dtype, np.integer) or np.issubdtype(_dtype, np.bool_): - return _sh_jacobi_i(_k, int(a), int(b), dtype=_dtype) + _sctype = dtype or np.array([a, b]).dtype.type + if np.issubdtype(_sctype, np.integer) or np.issubdtype(_sctype, np.bool_): + return _sh_jacobi_i(k, int(a), int(b), dtype=_sctype) + return _sh_jacobi_f(k, float(a), float(b), dtype=_sctype) - return _sh_jacobi_f(_k, float(a), float(b), dtype=_dtype) - -def succession_matrix(c: npt.NDArray[T], /) -> npt.NDArray[T]: +def succession_matrix( + c: lnpt.Array[tuple[_K, int], _T], + /, +) -> lnpt.Array[tuple[_K, int], _T]: r""" A toeplitz-like transformation matrix construction, that prepends $i$ zeroes to $i$-th row, so that the input shape is mapped from `(n, k)` @@ -376,11 +367,11 @@ def succession_matrix(c: npt.NDArray[T], /) -> npt.NDArray[T]: def trim_matrix( - r: int, + r: _R, /, trim: tuple[int, int], - dtype: np.dtype[T] | type[T] = np.float64, -) -> npt.NDArray[T]: + dtype: _DType[_TF] = np.float64, +) -> lnpt.Array[tuple[_R, int], _TF]: r""" Linearization of the trimmed L-moment recurrence relations, following the (corrected) derivation by Hosking (2007) from the (shifted) Jacobi @@ -470,4 +461,4 @@ def trim_matrix( case _ as wtf: # type: ignore [reportUnnecessaryComparison] assert_never(wtf) - return cast(npt.NDArray[T], out) + return cast(npt.NDArray[_TF], out) diff --git a/lmo/ostats.py b/lmo/ostats.py index e46e79f8..f8660274 100644 --- a/lmo/ostats.py +++ b/lmo/ostats.py @@ -1,3 +1,4 @@ +# ruff: noqa: N803 r""" Order statistics $X_{i:n}$, with $i \in [0, n)$. @@ -8,38 +9,45 @@ ](https://books.google.com/books?id=bdhzFXg6xFkC) """ +from __future__ import annotations + import functools -from collections.abc import Sequence from math import floor -from typing import Any, cast, overload +from typing import TYPE_CHECKING, TypeVar, overload import numpy as np -import numpy.typing as npt from scipy.special import betainc, betaln -from .typing import AnyNDArray + +if TYPE_CHECKING: + from .typing import np as lnpt + + +__all__ = ( + 'weights', + 'from_cdf', +) + + +_T_size = TypeVar('_T_size', bound=int) def _weights( - i: float, - n: float, - N: int, # noqa: N803 + i: float | lnpt.Float, + n: float | lnpt.Float, + N: _T_size, /, -) -> npt.NDArray[np.float64]: +) -> lnpt.Array[tuple[_T_size], np.float64]: assert 0 <= i < n <= N j = np.arange(floor(i), N) - return np.r_[ np.zeros(j[0]), np.exp( - cast( - float, - betaln(j + 1, N - j) - - betaln(i + 1, n - i) - - betaln(j - i + 1, N - j - (n - i) + 1) - - np.log(N - n + 1), - ), + betaln(j + 1, N - j) + - betaln(i + 1, n - i) + - betaln(j - i + 1, N - j - (n - i) + 1) + - np.log(N - n + 1), ), ] @@ -48,13 +56,13 @@ def _weights( def weights( - i: float, - n: float, - N: int, # noqa: N803 + i: float | lnpt.Float, + n: float | lnpt.Float, + N: _T_size, /, *, cached: bool = False, -) -> npt.NDArray[np.float64]: +) -> lnpt.Array[tuple[_T_size], np.float64]: r""" Compute the linear weights $w_{i:n|j:N}$ for $j = 0, \dots, N-1$. @@ -88,7 +96,8 @@ def weights( literature. Args: - i: 0-indexed sample (fractional) index, $0 \le i \lt n$. Negative + i: + 0-indexed sample (fractional) index, $0 \le i \lt n$. Negative indexing is allowed. n: Subsample size, optionally fractional, $0 \le n0$ N: Sample size, i.e. the observation count. @@ -109,34 +118,34 @@ def weights( # impossible case return np.full(N, np.nan) - return (_weights_cached if cached else _weights)(i, n, N) + _fn = _weights_cached if cached else _weights + # this return type incosnsitency is due to the first `np.ndarray` type + # parameter not being covariant, which is incorrect, but is being worked on + return _fn(i, n, N) # pyright: ignore[reportReturnType] @overload -def from_cdf(F: float, i: float, n: float) -> float: # noqa: N803 - ... - - +def from_cdf(F: lnpt.AnyScalarFloat, i: float, n: float) -> np.float64: ... @overload def from_cdf( - F: AnyNDArray[np.floating[Any]] | Sequence[float], # noqa: N803 + F: lnpt.AnyArrayFloat, i: float, n: float, -) -> npt.NDArray[np.float64]: - ... +) -> lnpt.Array[lnpt.AtLeast1D, np.float64]: ... def from_cdf( - F: npt.ArrayLike, # noqa: N803 + F: lnpt.AnyScalarFloat | lnpt.AnyArrayFloat, i: float, n: float, -) -> float | npt.NDArray[np.float64]: +) -> np.float64 | lnpt.Array[lnpt.AtLeast1D, np.float64]: r""" Transform $F(X)$ to $F_{i:n}(X)$, of the $i$th variate within subsamples of size, i.e. $0 \le i \le n - 1$. Args: - F: Scalar or array-like with the returned value of some cdf, i.e. + F: + Scalar or array-like with the returned value of some cdf, i.e. $F_X(x) = P(X \le x)$. Must be between 0 and 1. i: 0-indexed sample (fractional) index, $0 \le i < n$. n: Subsample size, optionally fractional, $0 \le n0$ diff --git a/lmo/pwm_beta.py b/lmo/pwm_beta.py index 3a78b767..ef606c51 100644 --- a/lmo/pwm_beta.py +++ b/lmo/pwm_beta.py @@ -4,25 +4,41 @@ Primarily used as an intermediate step for L-moment estimation. """ -__all__ = 'weights', 'cov' +from __future__ import annotations -from typing import Any, TypeVar, cast +from typing import TYPE_CHECKING, Any, TypeAlias, cast, overload import numpy as np import numpy.typing as npt from ._utils import ordered +from .typing.compat import TypeVar -T = TypeVar('T', bound=np.floating[Any]) +if TYPE_CHECKING: + from .typing import np as lnpt + from .typing.compat import Unpack + + +__all__ = ( + 'weights', + 'cov', +) + + +_F = TypeVar('_F', bound=np.floating[Any], default=np.float64) +_R = TypeVar('_R', bound=int) +_N = TypeVar('_N', bound=int) + +_DType: TypeAlias = np.dtype[_F] | type[_F] def weights( - r: int, - n: int, + r: _R, + n: _N, /, - dtype: np.dtype[T] | type[T] = np.float64, -) -> npt.NDArray[T]: + dtype: _DType[_F] = np.float64, +) -> lnpt.Array[tuple[_R, _N], _F]: r""" Probability Weighted moment (PWM) projection matrix $B$ of the unbiased estimator for $\beta_k = M_{1,k,0}$ for $k = 0, \dots, r - 1$. @@ -63,31 +79,50 @@ def weights( w_r[k, k:] = w_r[k - 1, k:] * i1[:-k] / (n - k) # the + 0. eliminates negative zeros - return cast(npt.NDArray[T], w_r + 0.0) + return cast(npt.NDArray[_F], w_r + 0.0) +@overload +def cov( + a: lnpt.AnyArrayFloat, + r: _R, + /, + axis: None = ..., + *, + dtype: _DType[_F] = np.float64, + **kwds: Any, +) -> lnpt.Array[tuple[_R, _R], _F]: ... +@overload +def cov( + a: lnpt.AnyArrayFloat, + r: _R, + /, + axis: int, + dtype: _DType[_F] = np.float64, + **kwds: Any, +) -> lnpt.Array[tuple[_R, _R, Unpack[tuple[int, ...]]], _F]: ... def cov( - a: npt.ArrayLike, + a: lnpt.AnyArrayFloat, r: int, /, axis: int | None = None, - dtype: np.dtype[T] | type[T] = np.float64, - **kwargs: Any, -) -> npt.NDArray[T]: + dtype: _DType[_F] = np.float64, + **kwds: Any, +) -> lnpt.Array[Any, _F]: r""" Distribution-free variance-covariance matrix of the probability weighted moment (PWM) point estimates $\beta_k = M_{1,k,0}$, with orders $k = 0, \dots, r - 1$. Parameters: - a: Array-like with observations. + a: 1-D or 2-D array-like with observations. r: The amount of orders to evaluate, i.e. $k = 0, \dots, r - 1$. axis: The axis along which to calculate the covariance matrices. dtype: Desired output floating data type. **kwargs: Additional keywords to pass to `lmo.stats.ordered`. Returns: - S_b: Variance-covariance matrix/tensor of shape `(r, ...)` + S_b: Variance-covariance matrix/tensor of shape `(r, r)` or (r, r, n) See Also: - https://wikipedia.org/wiki/Covariance_matrix @@ -96,7 +131,7 @@ def cov( - [E. Elmamir & A. Seheult (2004) - Exact variance structure of sample L-moments](https://doi.org/10.1016/S0378-3758(03)00213-1) """ - x = ordered(a, axis=axis, dtype=dtype, **kwargs) + x = ordered(a, axis=axis, dtype=dtype, **kwds) # ensure the samples are "in front" (along axis=0) if axis and x.ndim > 1: @@ -121,7 +156,7 @@ def cov( # ensure that at most ffact[..., -k_max] will give 0 ffact = np.c_[ffact, np.zeros((r, r))] - spec = 'i..., i...' + spec: str = 'i..., i...' # for k == l (variances on the diagonal): # sum( @@ -140,7 +175,7 @@ def cov( # (n-k-1)^(k+1) denom = n * (n - 2 * k - 1) * ffact[k, n - k - 1] - m_bb = np.einsum(spec, v_ki, x) / denom # pyright: ignore + m_bb = np.einsum(spec, v_ki, x) / denom # pyright: ignore[reportUnknownMemberType] s_b[k, k] = b[k] ** 2 - m_bb # for k != l (actually k > l since symmetric) @@ -162,7 +197,7 @@ def cov( # `(n-k-1)^(l+1)` denom = n * (n - k - m - 1) * ffact[m, n - k - 1] - m_bb = np.einsum(spec, v_ki, x) / denom # pyright: ignore + m_bb = np.einsum(spec, v_ki, x) / denom # pyright: ignore[reportUnknownMemberType] # because s_bb.T == s_bb s_b[k, m] = s_b[m, k] = b[k] * b[m] - m_bb diff --git a/lmo/special.py b/lmo/special.py index 8d9e0984..6bcebadd 100644 --- a/lmo/special.py +++ b/lmo/special.py @@ -1,4 +1,19 @@ """Mathematical "special" functions, extending `scipy.special`.""" +from __future__ import annotations + +from typing import Any, Final, TypeVar, cast, overload + +import numpy as np +import numpy.typing as npt +import scipy.special as sc + +from ._utils import clean_orders +from .typing import ( + AnyOrder, + AnyOrderND, + np as lnpt, +) + __all__ = ( 'fpow', @@ -8,56 +23,60 @@ 'fourier_jacobi', ) -from typing import Any, cast, overload - -import numpy as np -import numpy.typing as npt -import scipy.special as sc - -from ._utils import clean_orders -from .typing import AnyNDArray, AnyScalar, IntVector +_DTYPE_CHARS: Final[str] = '?bBhHiIlLqQpP' -_DTYPE_CHARS = '?bBhHiIlLqQpP' +_T_shape = TypeVar('_T_shape', bound=lnpt.AtLeast1D) +_T_float = TypeVar('_T_float', bound=lnpt.Float) @overload -def fpow(x: AnyScalar, n: AnyScalar, out: None = ...) -> float: ... - +def fpow( + x: lnpt.AnyScalarFloat, + n: lnpt.AnyScalarFloat, + /, + out: None = ..., +) -> np.float64: ... @overload def fpow( - x: AnyNDArray[np.generic], - n: npt.ArrayLike, - out: npt.NDArray[np.float64] | None = ..., -) -> npt.NDArray[np.float64]: ... - + x: lnpt.AnyArrayFloat, + n: lnpt.AnyScalarFloat | lnpt.AnyArrayFloat, + /, + out: None = ..., +) -> lnpt.Array[lnpt.AtLeast1D, np.float64]: ... @overload def fpow( - x: npt.ArrayLike, - n: AnyNDArray[np.generic], - out: npt.NDArray[np.float64] | None = ..., -) -> npt.NDArray[np.float64]: ... - + x: lnpt.AnyScalarFloat, + n: lnpt.AnyArrayFloat, + /, + out: None = ..., +) -> lnpt.Array[lnpt.AtLeast1D, np.float64]: ... @overload def fpow( - x: npt.ArrayLike, - n: npt.ArrayLike, - out: npt.NDArray[np.float64], -) -> npt.NDArray[np.float64]: ... - + x: lnpt.AnyArrayFloat, + n: lnpt.AnyScalarFloat | lnpt.AnyArrayFloat, + /, + out: lnpt.Array[_T_shape, _T_float], +) -> lnpt.Array[_T_shape, _T_float]: ... @overload def fpow( - x: npt.ArrayLike, - n: npt.ArrayLike, - out: npt.NDArray[np.float64] | None = ..., -) -> float | npt.NDArray[np.float64]: ... + x: lnpt.AnyScalarFloat, + n: lnpt.AnyArrayFloat, + /, + out: lnpt.Array[_T_shape, _T_float], +) -> lnpt.Array[_T_shape, _T_float]: ... def fpow( - x: npt.ArrayLike, - n: npt.ArrayLike, - out: npt.NDArray[np.float64] | None = None, -) -> float | npt.NDArray[np.float64]: + x: lnpt.AnyScalarFloat | lnpt.AnyArrayFloat, + n: lnpt.AnyScalarFloat | lnpt.AnyArrayFloat, + /, + out: lnpt.Array[_T_shape, _T_float] | None = None, +) -> ( + np.float64 + | lnpt.Array[_T_shape, _T_float] + | lnpt.Array[lnpt.AtLeast1D, np.float64] +): r""" Factorial power, or falling factorial. @@ -87,38 +106,37 @@ def fpow( @overload def gamma2( - a: float, - x: AnyScalar, + a: lnpt.AnyScalarFloat, + x: lnpt.AnyScalarFloat, + /, out: None = ..., -) -> float: ... - -@overload -def gamma2( - a: float, - x: AnyNDArray[np.generic], - out: npt.NDArray[np.float64] | None = ..., -) -> npt.NDArray[np.float64]: ... - +) -> np.float64: ... @overload def gamma2( - a: float, - x: npt.ArrayLike, - out: npt.NDArray[np.float64], -) -> npt.NDArray[np.float64]: ... - + a: lnpt.AnyScalarFloat, + x: lnpt.AnyArrayFloat, + /, + out: None = ..., +) -> lnpt.Array[lnpt.AtLeast1D, np.float64]: ... @overload def gamma2( - a: float, - x: npt.ArrayLike, - out: npt.NDArray[np.float64] | None = ..., -) -> float | npt.NDArray[np.float64]: ... + a: lnpt.AnyScalarFloat, + x: lnpt.AnyArrayFloat, + /, + out: lnpt.Array[_T_shape, _T_float], +) -> lnpt.Array[_T_shape, _T_float]: ... def gamma2( - a: float, - x: npt.ArrayLike, - out: npt.NDArray[np.float64] | None = None, -) -> float | npt.NDArray[np.float64]: + a: lnpt.AnyScalarFloat, + x: lnpt.AnyScalarFloat | lnpt.AnyArrayFloat, + /, + out: lnpt.Array[_T_shape, _T_float] | None = None, +) -> ( + np.float64 + | lnpt.Array[_T_shape, _T_float] + | lnpt.Array[lnpt.AtLeast1D, np.float64] +): r""" Incomplete (upper) gamma function. @@ -131,8 +149,8 @@ def gamma2( for \( a \ge 0 \) and \( x \ge 0 \). Args: - a: Non-negative scalar. - x: Non-negative array-like. + a: Real-valued non-negative scalar. + x: Real-valued non-negative array-like. out: Optional output array for the results. Returns: @@ -143,18 +161,33 @@ def gamma2( regularized gamma function \( Q(a,\ x) \). """ if a == 0: - return cast(float | npt.NDArray[np.float64], sc.exp1(x, out=out)) - - res = cast(float | npt.NDArray[np.float64], sc.gammaincc(a, x, out=out)) - res *= cast(float, sc.gamma(a)) - return res + return sc.exp1(x, out=out) + return sc.gammaincc(a, x, out=out) * sc.gamma(a) +@overload +def harmonic(n: lnpt.AnyScalarFloat, /, out: None = ...) -> float: ... +@overload +def harmonic( + n: lnpt.AnyArrayFloat, + /, + out: None = ..., +) -> lnpt.Array[lnpt.AtLeast1D, np.float64]: ... +@overload def harmonic( - n: npt.ArrayLike, + n: lnpt.AnyArrayFloat, /, - out: npt.NDArray[np.float64 | np.complex128] | None = None, -) -> float | complex | npt.NDArray[np.float64 | np.complex128]: + out: lnpt.Array[_T_shape, _T_float], +) -> lnpt.Array[_T_shape, _T_float]: ... +def harmonic( + n: lnpt.AnyScalarFloat | lnpt.AnyArrayFloat, + /, + out: lnpt.Array[_T_shape, _T_float] | None = None, +) -> ( + float + | lnpt.Array[_T_shape, _T_float] + | lnpt.Array[lnpt.AtLeast1D, np.float64] +): r""" Harmonic number \( H_n = \sum_{k=1}^{n} 1 / k \), extended for real and complex argument via analytic contunuation. @@ -172,8 +205,6 @@ def harmonic( 1.87274 >>> harmonic(-1 / 12) -0.146106 - >>> harmonic(1 - 1j) # doctest: -FLOAT_CMP, +ELLIPSIS - (1.1718...-0.5766...j) Args: n: Real- or complex- valued parameter, as array-like or scalar. @@ -187,31 +218,25 @@ def harmonic( """ _n = np.asanyarray(n) - _out = cast( - npt.NDArray[np.float64] | npt.NDArray[np.complex128], - sc.digamma(_n + 1, out), - ) - _out += np.euler_gamma - - return _out[()] if np.isscalar(n) else _out + _out = sc.digamma(_n + 1, out) + np.euler_gamma + return _out[()] if _n.ndim == 0 and np.isscalar(n) else _out @overload -def norm_sh_jacobi(n: int, alpha: float, beta: float) -> float: ... - +def norm_sh_jacobi(n: AnyOrder, alpha: float, beta: float) -> np.float64: ... @overload def norm_sh_jacobi( - n: IntVector, + n: AnyOrderND, alpha: float, beta: float, -) -> npt.NDArray[np.float64]: ... +) -> lnpt.Array[lnpt.AtLeast1D, np.float64]: ... def norm_sh_jacobi( - n: int | IntVector, + n: AnyOrder | AnyOrderND, alpha: float, beta: float, -) -> float | npt.NDArray[np.float64]: +) -> np.float64 | lnpt.Array[lnpt.AtLeast1D, np.float64]: r""" Evaluate the (weighted) \( L^2 \)-norm of a shifted Jacobi polynomial. @@ -244,7 +269,7 @@ def norm_sh_jacobi( msg = f'beta must be > -1, got {beta}' raise ValueError(msg) - r = clean_orders(n, 'n') + 1 + r = clean_orders(np.asanyarray(n), 'n') + 1 if alpha == beta == 0: # shifted Legendre @@ -259,32 +284,29 @@ def norm_sh_jacobi( p, q = r + alpha, r + beta c = np.exp(sc.betaln(p, q) - sc.betaln(r, p + beta)) / (p + q - 1) - return c[()] if np.isscalar(n) else c + return c[()] if r.ndim == 0 and np.isscalar(n) else c @overload def fourier_jacobi( - x: AnyScalar, - c: npt.ArrayLike, + x: lnpt.AnyArrayFloat, + c: lnpt.AnyArrayFloat, a: float, b: float, -) -> float: ... - +) -> lnpt.Array[Any, np.float64]: ... @overload def fourier_jacobi( - x: AnyNDArray[np.generic], - c: npt.ArrayLike, + x: lnpt.AnyScalarFloat, + c: lnpt.AnyArrayFloat, a: float, b: float, -) -> npt.NDArray[np.float64]: ... - - +) -> np.float64: ... def fourier_jacobi( - x: npt.ArrayLike, - c: npt.ArrayLike, + x: lnpt.AnyScalarFloat | lnpt.AnyArrayFloat, + c: lnpt.AnyArrayFloat, a: float, b: float, -) -> float | npt.NDArray[np.float64]: +) -> np.float64 | lnpt.Array[lnpt.AtLeast1D, np.float64]: r""" Evaluate the Fourier-Jacobi series, using the Clenshaw summation algorithm. diff --git a/lmo/theoretical.py b/lmo/theoretical.py index 1c6ce7e7..8d40ab17 100644 --- a/lmo/theoretical.py +++ b/lmo/theoretical.py @@ -2,36 +2,13 @@ Theoretical (population) L-moments of known univariate probability distributions. """ - -__all__ = ( - 'l_moment_from_cdf', - 'l_moment_from_ppf', - 'l_moment_from_qdf', - 'l_ratio_from_cdf', - 'l_ratio_from_ppf', - 'l_stats_from_cdf', - 'l_stats_from_ppf', - - 'l_moment_cov_from_cdf', - 'l_stats_cov_from_cdf', - - 'l_moment_influence_from_cdf', - 'l_ratio_influence_from_cdf', - - 'l_comoment_from_pdf', - 'l_coratio_from_pdf', - - 'ppf_from_l_moments', - 'qdf_from_l_moments', - - 'cdf_from_ppf', - 'entropy_from_qdf', -) +from __future__ import annotations import functools from collections.abc import Callable, Sequence from math import exp, factorial, gamma, lgamma, log from typing import ( + TYPE_CHECKING, Any, Concatenate, Final, @@ -46,11 +23,9 @@ import numpy as np import numpy.typing as npt import scipy.integrate as sci -from scipy.stats.distributions import rv_continuous, rv_discrete, rv_frozen from ._poly import eval_sh_jacobi from ._utils import ( - broadstack, clean_order, clean_orders, clean_trim, @@ -61,27 +36,50 @@ round0, ) from .special import fourier_jacobi, fpow -from .typing import ( - AnyFloat, - AnyInt, - AnyNDArray, - AnyScalar, - AnyTrim, - IntVector, - QuadOptions, -) -T = TypeVar('T') -V = TypeVar('V', bound=float | npt.NDArray[np.float64]) -Theta = ParamSpec('Theta') +if TYPE_CHECKING: + from .typing import ( + AnyOrder, + AnyOrderND, + AnyTrim, + np as lnpt, + ) + from .typing._scipy import QuadOptions -Pair: TypeAlias = tuple[T, T] -UnivariateCDF: TypeAlias = Callable[[float], float] -UnivariatePPF: TypeAlias = UnivariateCDF -UnivariateQDF: TypeAlias = UnivariatePPF -UnivariateRV: TypeAlias = rv_continuous | rv_discrete | rv_frozen +__all__ = ( + 'l_moment_from_cdf', + 'l_moment_from_ppf', + 'l_moment_from_qdf', + 'l_ratio_from_cdf', + 'l_ratio_from_ppf', + 'l_stats_from_cdf', + 'l_stats_from_ppf', + + 'l_comoment_from_pdf', + 'l_coratio_from_pdf', + + 'l_moment_cov_from_cdf', + 'l_stats_cov_from_cdf', + + 'l_moment_influence_from_cdf', + 'l_ratio_influence_from_cdf', + + 'entropy_from_qdf', + + 'ppf_from_l_moments', + 'qdf_from_l_moments', + 'cdf_from_ppf', +) + +_T = TypeVar('_T') +_T_x = TypeVar('_T_x', bound=float | npt.NDArray[np.float64]) +_Tss = ParamSpec('_Tss') + +_Pair: TypeAlias = tuple[_T, _T] +_Fn1: TypeAlias = Callable[[float], float] +_ArrF8: TypeAlias = npt.NDArray[np.float64] ALPHA: Final[float] = 0.1 QUAD_LIMIT: Final[int] = 100 @@ -90,11 +88,11 @@ def _nquad( - integrand: Callable[Concatenate[float, float, Theta], float], - domains: Sequence[Pair[AnyFloat] | Callable[..., Pair[AnyFloat]]], + integrand: Callable[Concatenate[float, float, _Tss], float], + domains: Sequence[_Pair[float] | Callable[..., _Pair[float]]], opts: QuadOptions | None = None, - *args: Theta.args, - **kwds: Theta.kwargs, + *args: _Tss.args, + **kwds: _Tss.kwargs, ) -> float: # nquad only has an `args` param for some invalid reason fn = functools.partial(integrand, **kwds) if kwds else integrand @@ -129,16 +127,16 @@ def _l_moment_const(r: int, s: float, t: float, k: int = 0) -> float: - lgamma(r + s) - lgamma(r + t) + lgamma(r - k) - - log(r) # noqa: COM812 + - log(r), ) return factorial(r - 1 - k) / r * v def _tighten_cdf_support( - cdf: UnivariateCDF, - support: Pair[float] | None = None, -) -> Pair[float]: + cdf: _Fn1, + support: _Pair[float] | None = None, +) -> _Pair[float]: """Attempt to tighten the support by checking some common bounds.""" a, b = (-np.inf, np.inf) if support is None else map(float, support) @@ -163,42 +161,42 @@ def _tighten_cdf_support( @overload def l_moment_from_cdf( - cdf: UnivariateCDF, - r: IntVector, + cdf: _Fn1, + r: AnyOrderND, /, trim: AnyTrim = ..., *, - support: Pair[float] | None = ..., + support: _Pair[float] | None = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., - ppf: UnivariatePPF | None = ..., -) -> npt.NDArray[np.float64]: ... + ppf: _Fn1 | None = ..., +) -> _ArrF8: ... @overload def l_moment_from_cdf( - cdf: UnivariateCDF, - r: AnyInt, + cdf: _Fn1, + r: AnyOrder, /, trim: AnyTrim = ..., *, - support: Pair[float] | None = ..., + support: _Pair[float] | None = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., - ppf: UnivariatePPF | None = ..., + ppf: _Fn1 | None = ..., ) -> np.float64: ... def l_moment_from_cdf( - cdf: UnivariateCDF, - r: AnyInt | IntVector, + cdf: _Fn1, + r: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] | None = None, + support: _Pair[float] | None = None, quad_opts: QuadOptions | None = None, alpha: float = ALPHA, - ppf: UnivariatePPF | None = None, -) -> np.float64 | npt.NDArray[np.float64]: + ppf: _Fn1 | None = None, +) -> np.float64 | _ArrF8: r""" Evaluate the population L-moment of a continuous probability distribution, using its Cumulative Distribution Function (CDF) $F_X(x) = P(X \le x)$. @@ -365,39 +363,39 @@ def _l_moment_single(_r: int) -> float: @overload def l_moment_from_ppf( - ppf: UnivariatePPF, - r: IntVector, + ppf: _Fn1, + r: AnyOrderND, /, trim: AnyTrim = ..., *, - support: Pair[float] = ..., + support: _Pair[float] = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., -) -> npt.NDArray[np.float64]: ... +) -> _ArrF8: ... @overload def l_moment_from_ppf( - ppf: UnivariatePPF, - r: AnyInt, + ppf: _Fn1, + r: AnyOrder, /, trim: AnyTrim = ..., *, - support: Pair[float] = ..., + support: _Pair[float] = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., ) -> np.float64: ... def l_moment_from_ppf( - ppf: UnivariatePPF, - r: AnyInt | IntVector, + ppf: _Fn1, + r: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] = (0, 1), + support: _Pair[float] = (0, 1), quad_opts: QuadOptions | None = None, alpha: float = ALPHA, -) -> np.float64 | npt.NDArray[np.float64]: +) -> np.float64 | _ArrF8: r""" Evaluate the population L-moment of a univariate probability distribution, using its Percentile Function (PPF), $x(F)$, also commonly known as the @@ -489,7 +487,6 @@ def l_moment_from_ppf( - [`theoretical.l_moment_from_cdf`][lmo.theoretical.l_moment_from_cdf]: population L-moment, using the CDF (i.e. the inverse PPF) - [`l_moment`][lmo.l_moment]: sample L-moment - """ rs = clean_orders(np.asanyarray(r)) s, t = clean_trim(trim) @@ -526,39 +523,39 @@ def _l_moment_single(_r: int) -> float: @overload def l_moment_from_qdf( - qdf: UnivariateQDF, - r: IntVector, + qdf: _Fn1, + r: AnyOrderND, /, trim: AnyTrim = ..., *, - support: Pair[float] = ..., + support: _Pair[float] = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., -) -> npt.NDArray[np.float64]: ... +) -> _ArrF8: ... @overload def l_moment_from_qdf( - qdf: UnivariateQDF, - r: AnyInt, + qdf: _Fn1, + r: AnyOrder, /, trim: AnyTrim = ..., *, - support: Pair[float] = ..., + support: _Pair[float] = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., ) -> np.float64: ... def l_moment_from_qdf( - qdf: UnivariateQDF, - r: AnyInt | IntVector, + qdf: _Fn1, + r: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] = (0, 1), + support: _Pair[float] = (0, 1), quad_opts: QuadOptions | None = None, alpha: float = ALPHA, -) -> np.float64 | npt.NDArray[np.float64]: +) -> np.float64 | _ArrF8: r""" Evaluate the population L-moments \( \tlmoment{s, t}{r} \) for \( r > 1 \) from the quantile distribution function (QDF), which is the derivative of @@ -588,58 +585,58 @@ def l_moment_from_qdf( @overload def l_ratio_from_cdf( - cdf: UnivariateCDF, - r: IntVector, - s: AnyInt | IntVector, + cdf: _Fn1, + r: AnyOrderND, + s: AnyOrder | AnyOrderND, /, trim: AnyTrim = ..., *, - support: Pair[float] | None = ..., + support: _Pair[float] | None = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., - ppf: UnivariatePPF | None = ..., -) -> npt.NDArray[np.float64]: ... + ppf: _Fn1 | None = ..., +) -> _ArrF8: ... @overload def l_ratio_from_cdf( - cdf: UnivariateCDF, - r: AnyInt | IntVector, - s: IntVector, + cdf: _Fn1, + r: AnyOrder | AnyOrderND, + s: AnyOrderND, /, trim: AnyTrim = ..., *, - support: Pair[float] | None = ..., + support: _Pair[float] | None = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., - ppf: UnivariatePPF | None = ..., -) -> npt.NDArray[np.float64]: ... + ppf: _Fn1 | None = ..., +) -> _ArrF8: ... @overload def l_ratio_from_cdf( - cdf: UnivariateCDF, - r: AnyInt, - s: AnyInt, + cdf: _Fn1, + r: AnyOrder, + s: AnyOrder, /, trim: AnyTrim = ..., *, - support: Pair[float] | None = ..., + support: _Pair[float] | None = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., ) -> np.float64: ... def l_ratio_from_cdf( - cdf: UnivariateCDF, - r: AnyInt | IntVector, - s: AnyInt | IntVector, + cdf: _Fn1, + r: AnyOrder | AnyOrderND, + s: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] | None = None, + support: _Pair[float] | None = None, quad_opts: QuadOptions | None = None, alpha: float = ALPHA, - ppf: UnivariatePPF | None = None, -) -> np.float64 | npt.NDArray[np.float64]: + ppf: _Fn1 | None = None, +) -> np.float64 | _ArrF8: """ Population L-ratio's from a CDF. @@ -647,7 +644,7 @@ def l_ratio_from_cdf( - [`l_ratio_from_ppf`][lmo.theoretical.l_ratio_from_ppf] - [`lmo.l_ratio`][lmo.l_ratio] """ - rs = broadstack(r, s) + rs = np.stack(np.broadcast_arrays(np.asarray(r), np.asarray(s))) l_rs = l_moment_from_cdf( cdf, rs, @@ -662,39 +659,39 @@ def l_ratio_from_cdf( @overload def l_ratio_from_ppf( - ppf: UnivariatePPF, - r: IntVector, - s: AnyInt | IntVector, + ppf: _Fn1, + r: AnyOrderND, + s: AnyOrder | AnyOrderND, /, trim: AnyTrim = ..., *, - support: Pair[float] = ..., + support: _Pair[float] = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., -) -> npt.NDArray[np.float64]: ... +) -> _ArrF8: ... @overload def l_ratio_from_ppf( - ppf: UnivariatePPF, - r: AnyInt | IntVector, - s: IntVector, + ppf: _Fn1, + r: AnyOrder | AnyOrderND, + s: AnyOrderND, /, trim: AnyTrim = ..., *, - support: Pair[float] = ..., + support: _Pair[float] = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., -) -> npt.NDArray[np.float64]: ... +) -> _ArrF8: ... @overload def l_ratio_from_ppf( - ppf: UnivariatePPF, - r: AnyInt, - s: AnyInt, + ppf: _Fn1, + r: AnyOrder, + s: AnyOrder, /, trim: AnyTrim = ..., *, - support: Pair[float] = ..., + support: _Pair[float] = ..., quad_opts: QuadOptions | None = ..., alpha: float = ..., ) -> np.float64: @@ -702,16 +699,16 @@ def l_ratio_from_ppf( def l_ratio_from_ppf( - ppf: UnivariatePPF, - r: AnyInt | IntVector, - s: AnyInt | IntVector, + ppf: _Fn1, + r: AnyOrder | AnyOrderND, + s: AnyOrder | AnyOrderND, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] = (0, 1), + support: _Pair[float] = (0, 1), quad_opts: QuadOptions | None = None, alpha: float = ALPHA, -) -> np.float64 | npt.NDArray[np.float64]: +) -> np.float64 | _ArrF8: """ Population L-ratio's from a PPF. @@ -719,7 +716,7 @@ def l_ratio_from_ppf( - [`l_ratio_from_cdf`][lmo.theoretical.l_ratio_from_cdf] - [`lmo.l_ratio`][lmo.l_ratio] """ - rs = broadstack(r, s) + rs = np.stack(np.broadcast_arrays(np.asarray(r), np.asarray(s))) l_rs = l_moment_from_ppf( ppf, rs, @@ -732,16 +729,16 @@ def l_ratio_from_ppf( def l_stats_from_cdf( - cdf: UnivariateCDF, + cdf: _Fn1, num: int = 4, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] | None = None, + support: _Pair[float] | None = None, quad_opts: QuadOptions | None = None, alpha: float = ALPHA, - ppf: UnivariatePPF | None = None, -) -> npt.NDArray[np.float64]: + ppf: _Fn1 | None = None, +) -> _ArrF8: r""" Calculates the theoretical- / population- L-moments (for $r \le 2$) and L-ratio's (for $r > 2$) of a distribution, from its CDF. @@ -780,15 +777,15 @@ def l_stats_from_cdf( def l_stats_from_ppf( - ppf: UnivariatePPF, + ppf: _Fn1, num: int = 4, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] = (0, 1), + support: _Pair[float] = (0, 1), quad_opts: QuadOptions | None = None, alpha: float = ALPHA, -) -> npt.NDArray[np.float64]: +) -> _ArrF8: r""" Calculates the theoretical- / population- L-moments (for $r \le 2$) and L-ratio's (for $r > 2$) of a distribution, from its quantile function. @@ -825,14 +822,14 @@ def l_stats_from_ppf( def l_moment_cov_from_cdf( - cdf: UnivariateCDF, - r_max: int, + cdf: _Fn1, + r_max: AnyOrder, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] | None = None, + support: _Pair[float] | None = None, quad_opts: QuadOptions | None = None, -) -> npt.NDArray[np.float64]: +) -> _ArrF8: r""" L-moments that are estimated from $n$ samples of a distribution with CDF $F$, converge to the multivariate normal distribution as the sample size @@ -1004,16 +1001,16 @@ def range_x(y: float, *_: int) -> tuple[float, float]: def l_stats_cov_from_cdf( - cdf: UnivariateCDF, - num: int = 4, + cdf: _Fn1, /, - trim: AnyTrim = (0, 0), + num: AnyOrder = 4, + trim: AnyTrim = 0, *, - support: Pair[float] | None = None, + support: _Pair[float] | None = None, quad_opts: QuadOptions | None = None, alpha: float = ALPHA, - ppf: UnivariatePPF | None = None, -) -> npt.NDArray[np.float64]: + ppf: _Fn1 | None = None, +) -> _ArrF8: r""" Similar to [`l_moment_from_cdf`][lmo.theoretical.l_moment_from_cdf], but for the [`lmo.l_stats`][lmo.l_stats]. @@ -1108,17 +1105,17 @@ def l_stats_cov_from_cdf( def l_moment_influence_from_cdf( - cdf: Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]], - r: AnyInt, + cdf: Callable[[_ArrF8], _ArrF8], + r: AnyOrder, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] | None = None, + support: _Pair[float] | None = None, l_moment: float | np.float64 | None = None, quad_opts: QuadOptions | None = None, alpha: float = ALPHA, tol: float = 1e-8, -) -> Callable[[V], V]: +) -> Callable[[_T_x], _T_x]: r""" Influence Function (IF) of a theoretical L-moment. @@ -1178,7 +1175,7 @@ def l_moment_influence_from_cdf( """ _r = clean_order(int(r)) if _r == 0: - def influence0(x: V, /) -> V: + def influence0(x: _T_x, /) -> _T_x: """ L-moment Influence Function for `r=0`. @@ -1189,7 +1186,7 @@ def influence0(x: V, /) -> V: out """ _x = np.asanyarray(x, np.float64)[()] - return cast(V, _x * 0. + .0) # :+) + return cast(_T_x, _x * 0. + .0) # :+) return influence0 @@ -1207,10 +1204,10 @@ def influence0(x: V, /) -> V: else: lm = l_moment - a, b = support or _tighten_cdf_support(cast(UnivariateCDF, cdf), support) + a, b = support or _tighten_cdf_support(cast(_Fn1, cdf), support) c = _l_moment_const(_r, s, t) - def influence(x: V, /) -> V: + def influence(x: _T_x, /) -> _T_x: _x = np.asanyarray(x, np.float64) q = np.piecewise( _x, @@ -1222,7 +1219,7 @@ def influence(x: V, /) -> V: # cheat a bit and replace 0 * inf by 0, ensuring convergence if s or t alpha = w * eval_sh_jacobi(_r - 1, t, s, q) * np.where(w, _x, 0) - return cast(V, round0(alpha - lm, tol)[()]) + return cast(_T_x, round0(alpha - lm, tol)[()]) influence.__doc__ = ( f'Theoretical influence function for L-moment with {r=} and {trim=}.' @@ -1232,18 +1229,18 @@ def influence(x: V, /) -> V: def l_ratio_influence_from_cdf( - cdf: Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]], - r: AnyInt, - k: AnyInt = 2, + cdf: Callable[[_ArrF8], _ArrF8], + r: AnyOrder, + k: AnyOrder = 2, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] | None = None, - l_moments: Pair[float] | None = None, + support: _Pair[float] | None = None, + l_moments: _Pair[float] | None = None, quad_opts: QuadOptions | None = None, alpha: float = ALPHA, tol: float = 1e-8, -) -> Callable[[V], V]: +) -> Callable[[_T_x], _T_x]: r""" Construct the influence function of a theoretical L-moment ratio. @@ -1335,12 +1332,12 @@ def l_ratio_influence_from_cdf( raise ZeroDivisionError(msg) t_r = l_r / l_k - def influence_function(x: V, /) -> V: + def influence_function(x: _T_x, /) -> _T_x: psi_r = if_r(x) # cheat a bit to avoid `inf - inf = nan` situations psi_k = np.where(np.isinf(psi_r), 0, if_k(x)) - return cast(V, round0((psi_r - t_r * psi_k) / l_k, tol=tol)[()]) + return cast(_T_x, round0((psi_r - t_r * psi_k) / l_k, tol=tol)[()]) influence_function.__doc__ = ( f'Theoretical influence function for L-moment ratio with r={_r}, ' @@ -1353,15 +1350,15 @@ def influence_function(x: V, /) -> V: # Multivariate def l_comoment_from_pdf( - pdf: Callable[[npt.NDArray[np.float64]], float], + pdf: Callable[[_ArrF8], float], cdfs: Sequence[Callable[[float], float]], - r: AnyInt, + r: AnyOrder, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - supports: Sequence[Pair[float]] | None = None, + supports: Sequence[_Pair[float]] | None = None, quad_opts: QuadOptions | None = None, -) -> npt.NDArray[np.float64]: +) -> _ArrF8: r""" Evaluate the theoretical L-*co*moment matrix of a multivariate probability distribution, using the joint PDF @@ -1567,16 +1564,16 @@ def integrand(i: int, j: int, *xs: float) -> float: def l_coratio_from_pdf( - pdf: Callable[[npt.NDArray[np.float64]], float], + pdf: Callable[[_ArrF8], float], cdfs: Sequence[Callable[[float], float]], - r: AnyInt, - r0: AnyInt = 2, + r: AnyOrder, + r0: AnyOrder = 2, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - supports: Sequence[Pair[float]] | None = None, + supports: Sequence[_Pair[float]] | None = None, quad_opts: QuadOptions | None = None, -) -> npt.NDArray[np.float64]: +) -> _ArrF8: r""" Evaluate the theoretical L-*co*moment ratio matrix of a multivariate probability distribution, using the joint PDF $f_{\vec{X}}(\vec{x})$ and @@ -1618,25 +1615,21 @@ class _VectorizedPPF(Protocol): @overload def __call__( self, - __u: AnyNDArray[Any] | Sequence[Any], + __u: lnpt.AnyArrayInt | lnpt.AnyArrayFloat, *, r_max: int = ..., - ) -> npt.NDArray[np.float64]: ... - - @overload - def __call__(self, __u: AnyScalar, *, r_max: int = ...) -> float: ... - + ) -> _ArrF8: ... @overload def __call__( self, - __u: npt.ArrayLike, + __u: lnpt.AnyScalarInt | lnpt.AnyScalarFloat, *, r_max: int = ..., - ) -> npt.NDArray[np.float64] | float: ... + ) -> float: ... def _validate_l_bounds( - l_r: npt.NDArray[np.float64], + l_r: _ArrF8, s: float, t: float, ) -> None: @@ -1688,7 +1681,7 @@ def _validate_l_bounds( def _monotonic( - f: Callable[[npt.NDArray[np.float64]], npt.NDArray[np.float64]], + f: Callable[[_ArrF8], _ArrF8], a: float, b: float, n: int = 100, @@ -1704,11 +1697,11 @@ def _monotonic( def ppf_from_l_moments( - lmbda: npt.ArrayLike, + lmbda: lnpt.AnyVectorFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, - support: Pair[float] = (-np.inf, np.inf), + support: _Pair[float] = (-np.inf, np.inf), validate: bool = True, extrapolate: bool = False, ) -> _VectorizedPPF: @@ -1796,7 +1789,7 @@ def ppf( u: npt.ArrayLike, *, r_max: int = -1, - ) -> float | npt.NDArray[np.float64]: + ) -> float | _ArrF8: y = np.asarray(u) y = np.where((y < 0) | (y > 1), np.nan, 2 * y - 1) @@ -1808,20 +1801,20 @@ def ppf( return np.clip(x, *support)[()] - if validate and not _monotonic(ppf, 0, 1): # type: ignore + if validate and not _monotonic(cast(_VectorizedPPF, ppf), 0, 1): msg = ( 'PPF is not monotonically increasing (not invertable); ' 'consider increasing the trim' ) raise ValueError(msg) - return ppf # type: ignore + return cast(_VectorizedPPF, ppf) def qdf_from_l_moments( - lmbda: npt.ArrayLike, + lmbda: lnpt.AnyVectorFloat, /, - trim: AnyTrim = (0, 0), + trim: AnyTrim = 0, *, validate: bool = True, extrapolate: bool = False, @@ -1871,11 +1864,11 @@ def qdf_from_l_moments( alpha, beta = t + 1, s + 1 def qdf( - u: npt.ArrayLike, + u: lnpt.AnyScalarFloat | lnpt.AnyArrayFloat, *, r_max: int = -1, - ) -> float | npt.NDArray[np.float64]: - y = np.asarray(u) + ) -> float | _ArrF8: + y = np.asarray(u, np.float64) y = np.where((y < 0) | (y > 1), np.nan, 2 * y - 1) _c = c[:r_max] if 0 < r_max < len(c) else c @@ -1890,17 +1883,19 @@ def qdf( msg = 'QDF is not positive; consider increasing the trim' raise ValueError(msg) - return qdf # type: ignore + return cast(_VectorizedPPF, qdf) def cdf_from_ppf( - ppf: Callable[Concatenate[float, Theta], float], + ppf: Callable[Concatenate[float, _Tss], float], /, -) -> Callable[Concatenate[float, Theta], float]: +) -> Callable[Concatenate[float, _Tss], float]: """Numerical inversion of the PPF.""" - from scipy.optimize import root_scalar # type: ignore + from scipy.optimize import ( + root_scalar, # pyright: ignore[reportUnknownVariableType] + ) - def cdf(x: float, *args: Theta.args, **kwds: Theta.kwargs) -> float: + def cdf(x: float, /, *args: _Tss.args, **kwds: _Tss.kwargs) -> float: if np.isnan(x): return np.nan if x <= ppf(0, *args, **kwds): @@ -1920,10 +1915,10 @@ def _ppf_to_solve(p: float) -> float: def entropy_from_qdf( - qdf: Callable[Concatenate[float, Theta], float], + qdf: Callable[Concatenate[float, _Tss], float], /, - *args: Theta.args, - **kwds: Theta.kwargs, + *args: _Tss.args, + **kwds: _Tss.kwargs, ) -> float: r""" Evaluate the (differential / continuous) entropy \( H(X) \) of a diff --git a/lmo/typing.py b/lmo/typing.py deleted file mode 100644 index 1eb722eb..00000000 --- a/lmo/typing.py +++ /dev/null @@ -1,981 +0,0 @@ -# ruff: noqa: D102,D105,D107 - -"""Numpy-related type aliases for internal use.""" - -__all__ = ( - 'SupportsArray', - - 'AnyScalar', - 'AnyNDArray', - - 'AnyBool', - 'AnyInt', - 'AnyFloat', - - 'IntVector', - 'IntMatrix', - 'IntTensor', - - 'FloatVector', - 'FloatMatrix', - 'FloatTensor', - - 'SortKind', - 'IndexOrder', - - 'PolySeries', - - 'LMomentOptions', - 'LComomentOptions', - - 'QuadOptions', - 'OptimizeResult', - - 'RVContinuousBase', - 'RVContinuous', - 'RVContinuousFrozen', - - 'AnyTrim', - - 'DistributionFunction', -) - -import sys -from collections.abc import Callable, Iterator, Sequence -from typing import ( - Any, - ClassVar, - Final, - Literal, - ParamSpec, - Protocol, - SupportsInt, - TypeAlias, - TypeGuard, - TypeVar, - TypedDict, - overload, - runtime_checkable, -) - -import numpy as np -import numpy.typing as npt - - -if sys.version_info < (3, 11): - from typing_extensions import Self, TypeVarTuple, Unpack -else: - from typing import Self, TypeVarTuple, Unpack - -T = TypeVar('T', bound=np.generic) -T_co = TypeVar('T_co', covariant=True, bound=np.generic) - - -NP_V2: Final[bool] = np.__version__.startswith('2.') - - -@runtime_checkable -class SupportsArray(Protocol[T_co]): - """ - Custom numpy array containers. - - See Also: - - https://numpy.org/doc/stable/user/basics.dispatch.html - """ - def __array__(self) -> npt.NDArray[T_co]: ... - - -# scalar types -if NP_V2: - _NpBool: TypeAlias = np.bool # noqa: NPY001 -else: - _NpBool: TypeAlias = np.bool_ -_NpNumber: TypeAlias = np.number[Any] | _NpBool -_NpInt: TypeAlias = np.integer[Any] -_NpFloat: TypeAlias = np.floating[Any] -_NpComplex: TypeAlias = np.complexfloating[Any, Any] -_NpScalar: TypeAlias = np.generic - -AnyBool: TypeAlias = bool | _NpBool -AnyInt: TypeAlias = int | _NpInt | _NpBool -AnyFloat: TypeAlias = float | _NpFloat | AnyInt -AnyComplex: TypeAlias = complex | _NpComplex | AnyFloat # no float -AnyNumber: TypeAlias = int | float | complex | _NpNumber -AnyScalar: TypeAlias = int | float | complex | str | bytes | _NpScalar - -# array-like flavours (still waiting on numpy's shape typing) -# - `{}Vector`: ndim == 1 -# - `{}Matrix`: ndim == 2 -# - `{}Tensor`: ndim >= 3 -AnyNDArray: TypeAlias = npt.NDArray[T] | SupportsArray[T] - -_ArrayZ: TypeAlias = AnyNDArray[_NpInt] | AnyNDArray[_NpBool] -IntVector: TypeAlias = _ArrayZ | Sequence[AnyInt] -IntMatrix: TypeAlias = _ArrayZ | Sequence[Sequence[AnyInt]] -IntTensor: TypeAlias = _ArrayZ | Sequence['IntMatrix | IntTensor'] - -_ArrayR: TypeAlias = AnyNDArray[_NpFloat] | _ArrayZ -FloatVector: TypeAlias = _ArrayR | Sequence[AnyFloat] -FloatMatrix: TypeAlias = _ArrayR | Sequence[Sequence[AnyFloat]] -FloatTensor: TypeAlias = _ArrayR | Sequence['FloatMatrix | FloatTensor'] - -_ArrayC: TypeAlias = AnyNDArray[_NpComplex] | _ArrayR -ComplexVector: TypeAlias = _ArrayC | Sequence[AnyComplex] -ComplexMatrix: TypeAlias = _ArrayC | Sequence[Sequence[AnyComplex]] -ComplexTensor: TypeAlias = _ArrayC | Sequence['ComplexMatrix | ComplexTensor'] - -# for numpy.sort -SortKind: TypeAlias = Literal['quicksort', 'heapsort', 'stable'] -IndexOrder: TypeAlias = Literal['C', 'F', 'A', 'K'] - -# numpy.polynomial - - -@runtime_checkable -class _SupportsCoef(Protocol): - coef: npt.NDArray[Any] | SupportsArray[Any] - - -@runtime_checkable -class _SupportsDomain(Protocol): - domain: npt.NDArray[Any] | SupportsArray[Any] - - -@runtime_checkable -class _SupportsWindow(Protocol): - window: npt.NDArray[Any] | SupportsArray[Any] - - -@runtime_checkable -class _SupportsLessThanInt(Protocol): - def __lt__(self, __other: int) -> bool: ... - - -_P = TypeVar('_P', bound='PolySeries') - - -@runtime_checkable -class PolySeries(Protocol): # noqa: PLW1641 - """ - Annotations for the (private) `numpy.polynomial._polybase.ABCPolyBase` - subtypes, e.g. [`numpy.polynomial.Legendre`][numpy.polynomial.Legendre]. - """ - __hash__: ClassVar[None] # type: ignore[assignment] - __array_ufunc__: ClassVar[None] - maxpower: ClassVar[int] - - basis_name: str | None - - coef: npt.NDArray[_NpFloat | _NpComplex] - domain: npt.NDArray[_NpInt | _NpFloat | _NpComplex] - window: npt.NDArray[_NpInt | _NpFloat | _NpComplex] - - @property - def symbol(self) -> str: ... - - def has_samecoef(self, __other: _SupportsCoef) -> bool: ... - - def has_samedomain(self, __other: _SupportsDomain) -> bool: ... - - def has_samewindow(self, __other: _SupportsWindow) -> bool: ... - - def has_sametype(self, __other: type[Any]) -> TypeGuard[type[Self]]: ... - - def __init__( - self, - coef: npt.ArrayLike, - domain: ComplexVector | None = ..., - window: ComplexVector | None = ..., - symbol: str = ..., - ) -> None: ... - - def __format__(self, __fmt_str: str) -> str: ... - @overload - def __call__(self, __arg: _P) -> _P: ... - @overload - def __call__(self, __arg: complex | _NpComplex) -> _NpComplex: ... - @overload - def __call__( - self, - __arg: AnyNDArray[_NpNumber], - ) -> npt.NDArray[_NpFloat] | npt.NDArray[_NpComplex]: ... - @overload - def __call__(self, __arg: AnyNumber) -> _NpFloat | _NpComplex: ... - - def __iter__(self) -> Iterator[_NpFloat | _NpComplex]: ... - - def __len__(self) -> int: ... - - def __neg__(self) -> Self: ... - - def __pos__(self) -> Self: ... - - def __add__(self, __other: npt.ArrayLike | Self) -> Self: ... - - def __sub__(self, __other: npt.ArrayLike | Self) -> Self: ... - - def __mul__(self, __other: npt.ArrayLike | Self) -> Self: ... - - def __truediv__(self, __other: AnyNumber) -> Self: ... - - def __floordiv__(self, __other: npt.ArrayLike | Self) -> Self: ... - - def __mod__(self, __other: npt.ArrayLike | Self) -> Self: ... - - def __divmod__( - self, - __other: npt.ArrayLike | Self, - ) -> tuple[Self, Self]: ... - - def __radd__(self, __other: npt.ArrayLike | Self) -> Self: ... - - def __rsub__(self, __other: npt.ArrayLike | Self) -> Self: ... - - def __rmul__(self, __other: npt.ArrayLike | Self) -> Self: ... - - def __rtruediv__(self, __other: AnyNumber) -> Self: ... - - def __rfloordiv__(self, __other: npt.ArrayLike | Self) -> Self: ... - - def __rmod__(self, __other: npt.ArrayLike | Self) -> Self: ... - - def __rdivmod__( - self, - __other: npt.ArrayLike | Self, - ) -> tuple[Self, Self]: ... - - def __pow__(self, __other: AnyInt) -> Self: ... - - def __eq__(self, __other: object) -> bool: ... - - def __ne__(self, __other: object) -> bool: ... - - def copy(self) -> Self: ... - - def degree(self) -> int: ... - - def cutdeg(self, deg: SupportsInt) -> Self: ... - - def trim(self, tol: AnyFloat | _SupportsLessThanInt = ...) -> Self: ... - - def truncate(self, size: AnyInt) -> Self: ... - @overload - def convert( - self, - domain: ComplexVector | None = ..., - *, - kind: type[_P], - window: ComplexVector = ..., - ) -> _P: ... - @overload - def convert( - self, - domain: ComplexVector, - kind: type[_P], - window: ComplexVector = ..., - ) -> _P: ... - @overload - def convert( - self, - domain: ComplexVector = ..., - kind: type[Self] | None = ..., - window: ComplexVector = ..., - ) -> Self: ... - - def mapparms(self) -> tuple[_NpFloat, _NpFloat]: ... - - def integ( - self, - m: AnyInt = ..., - k: npt.ArrayLike = ..., - lbnd: AnyNumber | None = ..., - ) -> Self: ... - - def deriv(self, m: AnyInt = ...) -> Self: ... - - def roots(self) -> npt.NDArray[_NpFloat | _NpComplex]: ... - - def linspace( - self, - n: AnyInt = ..., - domain: npt.ArrayLike | None = ..., - ) -> tuple[ - npt.NDArray[_NpFloat | _NpComplex], - npt.NDArray[_NpFloat | _NpComplex], - ]: ... - @overload - @classmethod - def fit( - cls, - x: npt.ArrayLike, - y: npt.ArrayLike, - deg: AnyInt | IntVector, - domain: ComplexVector | None = ..., - rcond: AnyFloat | None = ..., - *, - full: Literal[False], - w: FloatVector | None = ..., - window: ComplexVector | None = ..., - # symbol: str = ..., - ) -> Self: ... - @overload - @classmethod - def fit( - cls, - x: npt.ArrayLike, - y: npt.ArrayLike, - deg: AnyInt | IntVector, - domain: ComplexVector | None = ..., - rcond: AnyFloat | None = ..., - *, - full: Literal[True], - w: FloatVector | None = ..., - window: ComplexVector | None = ..., - # symbol: str = ..., - ) -> tuple[Self, list[Any]]: ... - @overload - @classmethod - def fit( - cls, - x: npt.ArrayLike, - y: npt.ArrayLike, - deg: AnyInt | IntVector, - domain: ComplexVector | None = ..., - rcond: AnyFloat | None = ..., - full: bool = ..., - w: FloatVector | None = ..., - window: ComplexVector | None = ..., - # symbol: str = ..., - ) -> Self: ... - @classmethod - def fromroots( - cls, - roots: npt.ArrayLike, - domain: ComplexVector | None = ..., - window: ComplexVector | None = ..., - # symbol: str = ..., - ) -> Self: ... - @classmethod - def identity( - cls, - domain: ComplexVector | None = ..., - window: ComplexVector | None = ..., - # symbol: str = ..., - ) -> Self: ... - @classmethod - def basis( - cls, - deg: AnyInt, - domain: ComplexVector | None = ..., - window: ComplexVector | None = ..., - # symbol: str = ..., - ) -> Self: ... - @classmethod - def cast( - cls, - series: 'PolySeries', - domain: ComplexVector | None = ..., - window: ComplexVector | None = ..., - ) -> Self: ... - - -# PEP 692 precise **kwargs typing - -class _LOptions(TypedDict, total=False): - sort: SortKind | None - cache: bool - - -class LMomentOptions(_LOptions, total=False): - """Use like `def spam(**kwargs: Unpack[LMomentOptions]): ...`.""" - fweights: IntVector | None - aweights: npt.ArrayLike | None - - -class LComomentOptions(_LOptions, total=False): - """Use like `def spam(**kwargs: Unpack[LComomentOptions]): ...`.""" - rowvar: bool - - -# scipy - -class QuadOptions(TypedDict, total=False): - """ - Optional quadrature options to be passed to - [`scipy.integrate.quad`][scipy.integrate.quad]. - """ - epsabs: float - epsrel: float - limit: int - maxp1: int - limlst: int - points: Sequence[float] | npt.NDArray[np.floating[Any]] - weight: Literal[ - 'cos', - 'sin', - 'alg', - 'alg-loga', - 'alg-logb', - 'alg-log', - 'cauchy', - ] - wvar: float | tuple[float, float] - wopts: tuple[int, npt.NDArray[np.float64]] - - -class OptimizeResult(Protocol): - """ - Type stub for the most generally available attributes of - [`scipy.optimize.OptimizeResult`][scipy.optimize.OptimizeResult]. - - Note that `OptimizeResult` is actually subclasses dict, whose attributes - are keys in disguise. - """ - x: npt.NDArray[np.float64] - success: bool - status: int - message: int - fun: float - nfev: int - nit: int - - -V = TypeVar('V', bound=float | npt.NDArray[np.float64]) -Ps = TypeVarTuple('Ps') - -RandomState: TypeAlias = np.random.RandomState | np.random.Generator - - -class RVContinuousBase(Protocol[Unpack[Ps]]): - """ - Generic type stub for both [`rv_continuous`][scipy.stats.rv_continuous] - and `rv_continuous_frozen`. - """ - a: float - b: float - - random_state: RandomState - - @overload - def pdf( - self, - x: _ArrayR, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> npt.NDArray[np.float64]: ... - - @overload - def pdf( - self, - x: AnyFloat, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - @overload - def logpdf( - self, - x: _ArrayR, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> npt.NDArray[np.float64]: ... - - @overload - def logpdf( - self, - x: AnyFloat, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - @overload - def cdf( - self, - x: _ArrayR, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> npt.NDArray[np.float64]: ... - - @overload - def cdf( - self, - x: AnyFloat, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - @overload - def logcdf( - self, - x: _ArrayR, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> npt.NDArray[np.float64]: ... - - @overload - def logcdf( - self, - x: AnyFloat, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - @overload - def sf( - self, - x: _ArrayR, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> npt.NDArray[np.float64]: ... - - @overload - def sf( - self, - x: AnyFloat, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - @overload - def logsf( - self, - x: _ArrayR, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> npt.NDArray[np.float64]: ... - - @overload - def logsf( - self, - x: AnyFloat, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - @overload - def ppf( - self, - q: _ArrayR, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> npt.NDArray[np.float64]: ... - - @overload - def ppf( - self, - q: AnyFloat, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - @overload - def isf( - self, - q: _ArrayR, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> npt.NDArray[np.float64]: ... - - @overload - def isf( - self, - q: AnyFloat, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - def fit( - self, - data: npt.ArrayLike, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - floc: float = ..., - fscale: float = ..., - optimizer: str | Callable[ - [ - Callable[[float, Unpack[Ps]], float], - Sequence[float], - tuple[Unpack[Ps]], - ], - float, - ] = ..., - method: Literal['MLE', 'MM'] = ..., - **__kwds: float, - ) -> tuple[Unpack[Ps], float, float]: ... - - def fit_loc_scale( - self, - data: npt.ArrayLike, - *__args: Unpack[Ps], - ) -> tuple[float, float]: ... - - def expect( - self, - func: Callable[[float], float], - args: tuple[Unpack[Ps]] = ..., - loc: float = ..., - scale: float = ..., - lb: float | None = ..., - ub: float | None = ..., - conditional: bool = ..., - **kwds: Unpack[QuadOptions], - ) -> float: ... - - @overload - def rvs( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - size: int = ..., - random_state: RandomState = ..., - ) -> float: ... - - @overload - def rvs( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - size: tuple[int, ...], - random_state: RandomState = ..., - ) -> npt.NDArray[np.float64]: ... - - def stats( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - moments: str = ..., - ) -> tuple[float]: ... - - def moment( - self, - order: int, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - def entropy( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - def median( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - def mean( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - def var( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - def std( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> float: ... - - @overload - def interval( - self, - confidence: float, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> tuple[float, float]: ... - - @overload - def interval( - self, - confidence: npt.NDArray[np.floating[Any]], - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: ... - - def support( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> tuple[float, float]: ... - - @overload - def l_moment( - self, - r: IntVector, - /, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - **kwds: Any, - ) -> npt.NDArray[np.float64]: ... - - @overload - def l_moment( - self, - r: AnyInt, - /, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - ) -> np.float64: ... - - @overload - def l_ratio( - self, - order: IntVector, - order_denom: AnyInt | IntVector, - /, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - ) -> npt.NDArray[np.float64]: ... - - @overload - def l_ratio( - self, - order: AnyInt, - order_denom: AnyInt | IntVector, - /, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - ) -> np.float64: ... - - def l_stats( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - moments: int = ..., - quad_opts: QuadOptions | None = ..., - ) -> npt.NDArray[np.float64]: ... - - def l_loc( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - ) -> npt.NDArray[np.float64]: ... - - def l_scale( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - ) -> npt.NDArray[np.float64]: ... - - def l_skew( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - ) -> npt.NDArray[np.float64]: ... - - def l_kurtosis( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - ) -> npt.NDArray[np.float64]: ... - - def l_moments_cov( - self, - r_max: int, - /, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - **kwds: Any, - ) -> npt.NDArray[np.float64]: ... - - def l_stats_cov( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - moments: int = 4, - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - **kwds: Any, - ) -> npt.NDArray[np.float64]: ... - - def l_moment_influence( - self, - r: AnyInt, - /, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - tol: float = ..., - **kwds: Any, - ) -> Callable[[V], V]: ... - - def l_ratio_influence( - self, - r: AnyInt, - k: AnyInt, - /, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - trim: 'AnyTrim' = ..., - quad_opts: QuadOptions | None = ..., - tol: float = ..., - **kwds: Any, - ) -> Callable[[V], V]: ... - - -class RVContinuous(RVContinuousBase[Unpack[Ps]], Protocol[Unpack[Ps]]): - """Generic type stub for [`rv_continuous`][scipy.stats.rv_continuous].""" - badvalue: float - name: str - xtol: float - # moment_type: Literal[0, 1] - moment_type: int - shapes: str | None - - def __init__( - self, - momtype: Literal[0, 1] = ..., - a: float | None = ..., - b: float | None = ..., - xtol: float = ..., - badvalue: float | None = ..., - name: str | None = ..., - longname: str | None = ..., - shapes: str | None = ..., - seed: int | RandomState | None = ..., - ) -> None: ... - - def __call__( - self, - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - ) -> 'RVContinuousFrozen[Unpack[Ps]]': ... - - def nnlf( - self, - theta: tuple[Unpack[Ps]], - x: npt.ArrayLike, - ) -> float: ... - - -class RVContinuousFrozen( - # somehow RVContinuousBase[()] fails on (only) Python 3.10. - RVContinuousBase[Unpack[tuple[()]]], - Protocol[Unpack[Ps]], -): - """Generic type stub for [`rv_continuous_frozen`].""" - args: ( - tuple[Unpack[Ps]] - | tuple[Unpack[Ps], float] - | tuple[Unpack[Ps], float, float] - ) - kwargs: dict[str, Any] - dist: RVContinuous[Unpack[Ps]] - - def __init__( - self, - dist: RVContinuous[Unpack[Ps]], - *__args: Unpack[Ps], - loc: float = ..., - scale: float = ..., - **__kwds: Any, - ) -> None: ... - - -# Lmo specific aliases - -AnyTrim: TypeAlias = ( - tuple[AnyFloat, AnyFloat] - | Sequence[AnyFloat] - | SupportsArray[_NpInt | _NpFloat] - | AnyFloat -) - - -# Callable protocols for vectorized functions - - -Theta = ParamSpec('Theta') - - -class DistributionFunction(Protocol[Theta]): - """ - Callable protocol for a vectorized distribution function. E.g. for - the `cdf` and `ppf` methods of `scipy,stats.rv_generic`. In practice, - the returned dtype is always `float64` (even `rv_discrete.ppf`). - """ - @overload - def __call__( - self, - __arg: _ArrayR, - *__args: Theta.args, - **__kwds: Theta.kwargs, - ) -> npt.NDArray[np.float64]: ... - - @overload - def __call__( - self, - __arg: AnyFloat, - *__args: Theta.args, - **__kwds: Theta.kwargs, - ) -> float: ... diff --git a/lmo/typing/__init__.py b/lmo/typing/__init__.py new file mode 100644 index 00000000..f8936150 --- /dev/null +++ b/lmo/typing/__init__.py @@ -0,0 +1,27 @@ +"""Typing utilities, mostly meant for internal usage.""" +from . import ( + _scipy as scipy, + compat, +) +from ._core import ( + AnyAWeights, + AnyFWeights, + AnyOrder, + AnyOrderND, + AnyTrim, + LComomentOptions, + LMomentOptions, +) + + +__all__ = ( + 'AnyAWeights', + 'AnyFWeights', + 'AnyOrder', + 'AnyOrderND', + 'AnyTrim', + 'LComomentOptions', + 'LMomentOptions', + 'compat', + 'scipy', +) diff --git a/lmo/typing/_core.py b/lmo/typing/_core.py new file mode 100644 index 00000000..9fb26919 --- /dev/null +++ b/lmo/typing/_core.py @@ -0,0 +1,38 @@ +from typing import Any, TypeAlias, TypedDict + +import numpy as np +import optype as opt + +from . import np as lnpt + + +_AnyTrimI: TypeAlias = int | tuple[int, int] +_AnyTrimF: TypeAlias = float | tuple[float, float] +AnyTrim: TypeAlias = _AnyTrimI | _AnyTrimF + +AnyOrder: TypeAlias = int | np.integer[Any] +AnyOrderND: TypeAlias = opt.CanSequence[int, int] | lnpt.AnyArrayInt + +AnyFWeights: TypeAlias = lnpt.Array[tuple[int], np.integer[Any]] +AnyAWeights: TypeAlias = lnpt.Array[lnpt.AtLeast1D, np.floating[Any]] + + +class LMomentOptions(TypedDict, total=False): + """ + Use as e.g. `**kwds: Unpack[LMomentOptions]` (on `python<3.11`) or + `**kwds: *LMomentOptions` (on `python>=3.11`). + """ + sort: lnpt.SortKind + cache: bool + fweights: AnyFWeights + aweights: AnyAWeights + + +class LComomentOptions(TypedDict, total=False): + """ + Use as e.g. `**kwds: Unpack[LComomentOptions]` (on `python<3.11`) or + `**kwds: *LComomentOptions` (on `python>=3.11`). + """ + sort: lnpt.SortKind + cache: bool + rowvar: bool diff --git a/lmo/typing/_scipy.py b/lmo/typing/_scipy.py new file mode 100644 index 00000000..7ab9dde4 --- /dev/null +++ b/lmo/typing/_scipy.py @@ -0,0 +1,837 @@ +# pyright: reportPropertyTypeMismatch=false +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Literal, + ParamSpec, + Protocol, + TypeAlias, + TypedDict, + overload, + runtime_checkable, +) + +import numpy as np +import numpy.typing as npt + +from . import np as lnpt +from .compat import TypeVar, Unpack + + +if TYPE_CHECKING: + from collections.abc import ( + Callable, + ItemsView, + Iterator, + KeysView, + Sequence, + ValuesView, + ) + + from .compat import LiteralString, Self + + +__all__ = ( + 'RV', + 'FitResult', + 'OptimizeResult', + 'QuadWeights', + 'QuadOptions', + 'RVContinuous', + 'RVContinuousFrozen', + 'RVDiscrete', + 'RVDiscreteFrozen', + 'RVFrozen', + 'RVFunction', +) + +_T = TypeVar('_T') +_Tss = ParamSpec('_Tss') + +_ND0 = TypeVar('_ND0', bound=lnpt.AtLeast0D) +_ND1 = TypeVar('_ND1', bound=lnpt.AtLeast1D) + +_Tuple2: TypeAlias = tuple[_T, _T] + +_RNG: TypeAlias = np.random.Generator | np.random.RandomState +_Seed: TypeAlias = _RNG | int | None + +_Moments1: TypeAlias = Literal['m', 'v', 's', 'k'] +_Moments2: TypeAlias = Literal['mv', 'ms', 'mk', 'vs', 'vk', 'sk'] +_Moments3: TypeAlias = Literal['mvs', 'mvk', 'msk', 'vsk'] +_Moments4: TypeAlias = Literal['mvsk'] + +_F8: TypeAlias = np.float64 +_Real0D: TypeAlias = lnpt.AnyScalarInt | lnpt.AnyScalarFloat +_Real1D: TypeAlias = lnpt.AnyVectorInt | lnpt.AnyVectorFloat +_Real2D: TypeAlias = lnpt.AnyMatrixInt | lnpt.AnyMatrixFloat +_Real3ND: TypeAlias = lnpt.AnyTensorInt | lnpt.AnyTensorFloat + + +# scipy.integrate + +QuadWeights: TypeAlias = Literal[ + 'cos', 'sin', 'alg', 'alg-loga', 'alg-logb', 'alg-log', 'cauchy', +] + + +class QuadOptions(TypedDict, total=False): + """ + Optional quadrature options to be passed to + [`scipy.integrate.quad`][scipy.integrate.quad]. + """ + epsabs: float + epsrel: float + limit: int + limlst: int + points: lnpt.AnyVectorFloat + weight: QuadWeights + wvar: float | tuple[float, float] + wopts: tuple[ + int, + lnpt.Array[tuple[Literal[25], int], np.floating[Any]], + ] + maxp1: int + + +# scipy.optimize + + +@runtime_checkable +class OptimizeResult(Protocol): + """ + Type stub for the most generally available attributes of + [`scipy.optimize.OptimizeResult`][scipy.optimize.OptimizeResult]. + + Note that other attributes might be present as well, e.g. `jac` or `hess`. + But these are currently impossible to type, as there's no way to define + optional attributes in protocols. + + Note that `OptimizeResult` is actually subclasses dict, whose attributes + are keys in disguise. But because `collections.abc.(Mutable)Mapping` + aren't pure protocols (which makes no sense from a theoretical standpoint), + they cannot be used as a superclass to another protocol. Basically this + means that nothing in `collections.abc` can be used when writing type + stubs... + """ + x: lnpt.Array[tuple[int], np.float64] + success: bool + status: int + fun: float + message: LiteralString + nfev: int + nit: int + + def __getitem__(self, k: str, /) -> object: ... + def __setitem__(self, k: str, v: object, /) -> None: ... + def __delitem__(self, k: str, /) -> None: ... + def __contains__(self, k: object, /) -> bool: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[str]: ... + def __reversed__(self) -> Iterator[str]: ... + def keys(self) -> KeysView[str]: ... + def values(self) -> ValuesView[object]: ... + def items(self) -> ItemsView[str, object]: ... + @overload + def get(self, k: str, /) -> object: ... + @overload + def get(self, k: str, d: object, /) -> object: ... + @overload + def pop(self, k: str, /) -> object: ... + @overload + def pop(self, k: str, d: object, /) -> object: ... + def copy(self) -> dict[str, object]: ... + + +# scipy.stats + +_T_params_co = TypeVar( + '_T_params_co', + bound=tuple[np.float64, ...], # usually a namedtuple + covariant=True, + default=tuple[np.float64, np.float64], # loc, scale +) + +# placeholder for `matplotlib.axes.Axes`. +_PlotAxes: TypeAlias = Any +_PlotType: TypeAlias = Literal['hist', 'qq', 'pp', 'cdf'] + + +@runtime_checkable +class FitResult(Protocol[_T_params_co]): + """ + Type stub for the `scipy.stats.fit` result. + + Examples: + Create a dummy fit result instance + + >>> import numpy as np + >>> from scipy.stats import fit, norm, bernoulli + >>> data = [0] + >>> isinstance(fit(norm, data), FitResult) + True + >>> isinstance(fit(bernoulli, data, [(0, 1)]), FitResult) + True + """ + discrete: bool + success: bool + message: str | None + + @property + def params(self) -> _T_params_co: ... + + def plot( + self, + ax: _PlotAxes | None = ..., + *, + plot_type: _PlotType = ..., + ) -> _PlotAxes: ... + + +class RVFunction(Protocol[_Tss]): + """ + Callable protocol for a vectorized distribution function. E.g. for + the `cdf` and `ppf` methods of `scipy,stats.rv_generic`. In practice, + the returned dtype is always `float64` (even `rv_discrete.ppf`). + """ + @overload + def __call__( + self, + x: lnpt.AnyArrayFloat, + /, + *args: _Tss.args, + **kwds: _Tss.kwargs, + ) -> lnpt.Array[Any, np.float64]: ... + @overload + def __call__( + self, + x: lnpt.AnyScalarFloat, + /, + *args: _Tss.args, + **kwds: _Tss.kwargs, + ) -> float: ... + + +@runtime_checkable +class RV(Protocol): + """Runtime-checkable interface for `scipy.stats.rv_generic`.""" + a: float + b: float + name: LiteralString + badvalue: float + numargs: int + shapes: LiteralString | None + # moment_type: Literal[0, 1] + # xtol: float + + @property + def random_state(self) -> _RNG: ... + @random_state.setter + def random_state(self, seed: _Seed) -> None: ... + + def __init__(self, seed: _Seed = ...) -> None: ... + + def freeze(self, /, *args: _Real0D, **kwds: _Real0D) -> RVFrozen[Self]: ... + def __call__(self, *args: _Real0D, **kwds: _Real0D) -> RVFrozen[Self]: ... + + @overload + def rvs( + self, + *args: _Real0D, + size: None = ..., + random_state: _Seed = ..., + **kwds: _Real0D, + ) -> _F8: ... + @overload + def rvs( + self, + *args: _Real0D, + size: int, + random_state: _Seed = ..., + **kwds: _Real0D, + ) -> lnpt.Array[tuple[int], _F8]: ... + @overload + def rvs( + self, + *args: _Real0D, + size: _ND0, + random_state: _Seed = ..., + **kwds: _Real0D, + ) -> lnpt.Array[_ND0, _F8]: ... + + @overload + def stats( + self, + *args: _Real0D, + moments: _Moments1, + **kwds: _Real0D, + ) -> _F8: ... + @overload + def stats( + self, + *args: _Real0D, + moments: _Moments2 = ..., + **kwds: _Real0D, + ) -> tuple[_F8, _F8]: ... + @overload + def stats( + self, + *args: _Real0D, + moments: _Moments3 = ..., + **kwds: _Real0D, + ) -> tuple[_F8, _F8, _F8]: ... + @overload + def stats( + self, + *args: _Real0D, + moments: _Moments4 = ..., + **kwds: _Real0D, + ) -> tuple[_F8, _F8, _F8, _F8]: ... + + def moment( + self, + order: int | np.integer[Any], + *args: _Real0D, + **kwds: _Real0D, + ) -> _F8: ... + + def entropy(self, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + def median(self, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + def mean(self, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + def var(self, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + def std(self, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + + @overload + def interval( + self, + confidence: _Real0D, + *args: _Real0D, + **kwds: _Real0D, + ) -> tuple[_F8, _F8]: ... + @overload + def interval( + self, + confidence: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> tuple[lnpt.CanArray[_ND1, _F8], lnpt.CanArray[_ND1, _F8]]: ... + @overload + def interval( + self, + confidence: Sequence[npt.ArrayLike], + *args: _Real0D, + **kwds: _Real0D, + ) -> _Tuple2[lnpt.Array[Any, _F8]]: ... + + def support(self, *args: _Real0D, **kwds: _Real0D) -> tuple[_F8, _F8]: ... + + @overload + def nnlf(self, /, __theta: _Real1D, __x: _Real1D) -> _F8: ... + @overload + def nnlf( + self, + /, + __theta: _Real1D, + __x: _Real2D | _Real3ND, + ) -> lnpt.Array[Any, _F8]: ... + + +@runtime_checkable +class RVDiscrete(RV, Protocol): + """ + Runtime-checkable interface for discrete probability distributions, + like [`scipy.stats.rv_discrete`][scipy.stats.rv_discrete] subtype + instances. + + Examples: + >>> import numpy as np + >>> from scipy.stats import distributions as distrs + >>> isinstance(distrs.binom, RVDiscrete) + True + + Continuous distributions aren't included: + + >>> isinstance(distrs.norm, RVDiscrete) + False + + Note that for "frozen" distributions (a.k.a. random variables), + this is not the case: + + >>> isinstance(distrs.binom(5, .42), RVDiscrete) + False + """ + + @property + def inc(self) -> int: ... + + def __init__( + self, + a: float | None = ..., + b: float | None = ..., + name: LiteralString | None = ..., + badvalue: float | None = ..., + moment_tol: float = ..., + values: tuple[npt.ArrayLike, npt.ArrayLike] | None = ..., + inc: int = ..., + longname: LiteralString | None = ..., + shapes: LiteralString | None = ..., + seed: _Seed = ..., + ) -> None: ... + + @overload + def pmf(self, k: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def pmf( + self, + k: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def logpmf(self, k: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def logpmf( + self, + k: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def cdf(self, k: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def cdf( + self, + k: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def logcdf(self, k: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def logcdf( + self, + k: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def sf(self, k: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def sf( + self, + k: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def logsf(self, k: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def logsf( + self, + k: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def ppf(self, q: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def ppf( + self, + q: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def isf(self, q: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def isf( + self, + q: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + def expect( + self, + func: Callable[[_F8], float] | None = ..., + args: tuple[_Real0D, ...] = ..., + loc: _Real0D = ..., + lb: _Real0D | None = ..., + ub: _Real0D | None = ..., + conditional: bool = ..., + maxcount: int = ..., + tolerance: float = ..., + chunksize: int = ..., + ) -> _F8: ... + + +@runtime_checkable +class RVContinuous(RV, Protocol): + """ + Runtime-checkable interface for continuous probability distributions, + like [`scipy.stats.rv_continuous`][scipy.stats.rv_continuous] subtype + instances. + + Examples: + >>> import numpy as np + >>> from scipy.stats import distributions as distrs + >>> from lmo import distributions as l_distrs + >>> isinstance(distrs.norm, RVContinuous) + True + >>> isinstance(l_distrs.wakeby, RVContinuous) + True + + This also works if `rv_continuous` isn't a base class, but it + looks and quacks like one, e.g. [`l_poly`][lmo.distributions.l_poly]. + + >>> isinstance(l_distrs.l_poly, RVContinuous) + True + + Discrete distributions aren't included: + + >>> isinstance(distrs.binom, RVContinuous) + False + + Note that for "frozen" distributions (a.k.a. random variables), + this is not the case: + + >>> isinstance(distrs.norm(), RVContinuous) + False + >>> isinstance(l_distrs.wakeby(5, 1, .5), RVContinuous) + False + """ + def __init__( + self, + momtype: Literal[0, 1] = ..., + a: float | None = ..., + b: float | None = ..., + xtol: float = ..., + badvalue: float | None = ..., + name: LiteralString | None = ..., + longname: LiteralString | None = ..., + shapes: LiteralString | None = ..., + seed: _Seed = ..., + ) -> None: ... + + @overload + def pdf(self, x: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def pdf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def logpdf(self, x: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def logpdf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def cdf(self, x: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def cdf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def logcdf(self, x: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def logcdf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def sf(self, x: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def sf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def logsf(self, x: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def logsf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def ppf(self, q: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def ppf( + self, + q: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def isf(self, q: _Real0D, *args: _Real0D, **kwds: _Real0D) -> _F8: ... + @overload + def isf( + self, + q: lnpt.CanArray[_ND1, lnpt.Real], + *args: _Real0D, + **kwds: _Real0D, + ) -> lnpt.Array[_ND1, _F8]: ... + + def fit( + self, + data: lnpt.AnyArray, + *args: _Real0D, + optimizer: Callable[..., _Real1D] = ..., + method: Literal['MLE', 'MM'] = ..., + floc: _Real0D = ..., + fscale: _Real0D = ..., + **kwds: _Real0D, + ) -> tuple[float | lnpt.Real, ...]: ... + + # def fit_loc_scale( + # self, + # data: npt.ArrayLike, + # *args: _Real0D, + # ) -> tuple[_F8, _F8]: ... + + def expect( + self, + func: Callable[[_F8], float] | None = ..., + args: tuple[_Real0D, ...] = ..., + loc: _Real0D = ..., + scale: _Real0D = ..., + lb: _Real0D | None = ..., + ub: _Real0D | None = ..., + conditional: bool = ..., + **kwds: Unpack[QuadOptions], + ) -> _F8: ... + + +_RV_co = TypeVar('_RV_co', bound=RV, covariant=True) + + +@runtime_checkable +class RVFrozen(Protocol[_RV_co]): + """Currently limited to scalar arguments.""" + + def __init__( + self, + __dist: _RV_co, + *args: _Real0D, + **kwds: _Real0D, + ) -> None: ... + + @property + def args(self) -> tuple[_Real0D, ...]: ... + @property + def kwds(self) -> dict[str, _Real0D]: ... + @property + def dist(self) -> _RV_co: ... + @property + def a(self) -> float: ... + @property + def b(self) -> float: ... + + @property + def random_state(self) -> _RNG: ... + @random_state.setter + def random_state(self, __seed: _Seed) -> None: ... + + @overload + def rvs(self, size: None = ..., random_state: _Seed = ...) -> _F8: ... + @overload + def rvs( + self, + size: int, + random_state: _Seed = ..., + ) -> lnpt.Array[tuple[int], _F8]: ... + @overload + def rvs( + self, + size: _ND0, + random_state: _Seed = ..., + ) -> lnpt.Array[_ND0, _F8]: ... + + @overload + def cdf(self, x: _Real0D) -> _F8: ... + @overload + def cdf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def logcdf(self, __x: _Real0D) -> _F8: ... + @overload + def logcdf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def sf(self, x: _Real0D) -> _F8: ... + @overload + def sf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def ppf(self, q: _Real0D) -> _F8: ... + @overload + def ppf( + self, + q: lnpt.CanArray[_ND1, lnpt.Real], + ) -> lnpt.CanArray[_ND1, _F8]: ... + + @overload + def isf(self, q: _Real0D) -> _F8: ... + @overload + def isf( + self, + q: lnpt.CanArray[_ND1, lnpt.Real], + ) -> lnpt.CanArray[_ND1, _F8]: ... + + @overload + def stats(self, moments: _Moments1) -> _F8: ... + @overload + def stats(self, moments: _Moments2 = ...) -> tuple[_F8, _F8]: ... + @overload + def stats(self, moments: _Moments3) -> tuple[_F8, _F8, _F8]: ... + @overload + def stats(self, moments: _Moments4) -> tuple[_F8, _F8, _F8, _F8]: ... + + def moment(self, order: int | np.integer[Any]) -> _F8: ... + + def median(self) -> _F8: ... + def mean(self) -> _F8: ... + def var(self) -> _F8: ... + def std(self) -> _F8: ... + def entropy(self) -> _F8: ... + + @overload + def interval(self, confidence: _Real0D) -> tuple[_F8, _F8]: ... + @overload + def interval( + self, + confidence: lnpt.CanArray[_ND1, lnpt.Real], + ) -> _Tuple2[lnpt.CanArray[_ND1, _F8]]: ... + @overload + def interval( + self, + confidence: Sequence[npt.ArrayLike], + ) -> _Tuple2[lnpt.CanArray[lnpt.AtLeast1D, _F8]]: ... + + def support(self) -> tuple[_F8, _F8]: ... + + def expect( + self, + func: Callable[[_F8], float] | None = ..., + lb: _Real0D | None = ..., + ub: _Real0D | None = ..., + conditional: bool = ..., + **kwds: Any, + ) -> _F8: ... + + +_RV_D_co = TypeVar('_RV_D_co', bound=RVDiscrete, covariant=True) + + +@runtime_checkable +class RVDiscreteFrozen(RVFrozen[_RV_D_co], Protocol[_RV_D_co]): + """ + Runtime-checkable interface for discrete probability distributions, + like [`scipy.stats.rv_discrete`][scipy.stats.rv_discrete] subtype + instances. + + Examples: + >>> import numpy as np + >>> from scipy.stats import distributions as distrs + >>> isinstance(distrs.bernoulli, RVDiscreteFrozen) + False + >>> isinstance(distrs.bernoulli(.42), RVDiscreteFrozen) + True + >>> isinstance(distrs.uniform(), RVDiscreteFrozen) + False + """ + @overload + def pmf(self, k: _Real0D) -> _F8: ... + @overload + def pmf( + self, + k: lnpt.CanArray[_ND1, lnpt.Real], + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def logpmf(self, k: _Real0D) -> _F8: ... + @overload + def logpmf(self, k: lnpt.AnyArrayFloat) -> lnpt.Array[_ND1, _F8]: ... + + +_RV_C_co = TypeVar('_RV_C_co', bound=RVContinuous, covariant=True) + + +@runtime_checkable +class RVContinuousFrozen(RVFrozen[_RV_C_co], Protocol[_RV_C_co]): + """ + Runtime-checkable interface for discrete probability distributions, + like [`scipy.stats.rv_discrete`][scipy.stats.rv_discrete] subtype + instances. + + Examples: + >>> import numpy as np + >>> from scipy.stats import distributions as distrs + >>> isinstance(distrs.uniform, RVContinuousFrozen) + False + >>> isinstance(distrs.uniform(), RVContinuousFrozen) + True + >>> isinstance(distrs.bernoulli(.5), RVContinuousFrozen) + False + + >>> from lmo.distributions import l_poly + >>> isinstance(l_poly([0, 1/6]), RVContinuousFrozen) + True + >>> isinstance(l_poly, RVContinuousFrozen) + True + """ + + @overload + def pdf(self, x: _Real0D) -> _F8: ... + @overload + def pdf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + ) -> lnpt.Array[_ND1, _F8]: ... + + @overload + def logpdf(self, x: _Real0D) -> _F8: ... + @overload + def logpdf( + self, + x: lnpt.CanArray[_ND1, lnpt.Real], + ) -> lnpt.Array[_ND1, _F8]: ... diff --git a/lmo/typing/compat.py b/lmo/typing/compat.py new file mode 100644 index 00000000..8da9b15b --- /dev/null +++ b/lmo/typing/compat.py @@ -0,0 +1,38 @@ +"""Typing compatibility for Python <3.11.""" +import sys + + +if sys.version_info < (3, 13): + from typing_extensions import ( + LiteralString, + ParamSpec, + Self, + TypeVar, + TypeVarTuple, + Unpack, + assert_never, + assert_type, + ) +else: + from typing import ( + LiteralString, + ParamSpec, + Self, + TypeVar, + TypeVarTuple, + Unpack, + assert_never, + assert_type, + ) + + +__all__ = ( + 'LiteralString', + 'ParamSpec', + 'Self', + 'TypeVar', + 'TypeVarTuple', + 'Unpack', + 'assert_never', + 'assert_type', +) diff --git a/lmo/typing/np.py b/lmo/typing/np.py new file mode 100644 index 00000000..edf4cf1d --- /dev/null +++ b/lmo/typing/np.py @@ -0,0 +1,261 @@ +# ruff: noqa: D105 +"""Numpy-related type aliases for internal use.""" +from collections.abc import Sequence +from typing import ( + Any, + Final, + Literal, + Protocol, + TypeAlias, + TypeVar, + runtime_checkable, +) + +import numpy as np + +from .compat import Unpack + + +__all__ = ( + 'NP_VERSION', 'NP_V2', + 'Bool', 'Int', 'Float', 'Natural', 'Integer', 'Real', + 'AtLeast0D', 'AtLeast1D', 'AtLeast2D', 'AtLeast3D', + 'Array', 'CanArray', + 'AnyScalar', 'AnyScalarBool', 'AnyScalarInt', 'AnyScalarFloat', + 'AnyVector', 'AnyVectorBool', 'AnyVectorFloat', 'AnyVectorFloat', + 'AnyMatrix', 'AnyMatrixBool', 'AnyMatrixInt', 'AnyMatrixFloat', + 'AnyTensor', 'AnyTensorBool', 'AnyTensorInt', 'AnyTensorFloat', + 'AnyArray', 'AnyArrayBool', 'AnyArrayInt', 'AnyArrayFloat', + 'AnyObjectDType', 'AnyBoolDType', + 'AnyUIntDType', 'AnyIntDType', 'AnyFloatDType', + 'SortKind', + 'Order', 'OrderReshape', 'OrderCopy', + 'RandomState', + 'Casting', +) + +_NP_MAJOR: Final[int] = int(np.__version__.split('.', 1)[0]) +_NP_MINOR: Final[int] = int(np.__version__.split('.', 2)[1]) +NP_VERSION: Final[tuple[int, int]] = _NP_MAJOR, _NP_MINOR +NP_V2: Final[bool] = _NP_MAJOR == 2 + + +# Some handy scalar type aliases + +if NP_V2: + Bool: TypeAlias = np.bool # noqa: NPY001 +else: + Bool: TypeAlias = np.bool_ +UInt: TypeAlias = np.unsignedinteger[Any] +Int: TypeAlias = np.signedinteger[Any] +Float: TypeAlias = np.floating[Any] + +Natural: TypeAlias = UInt | Bool +Integer: TypeAlias = Int | Natural +Real: TypeAlias = Float | Integer + + +# Shapes + +AtLeast0D: TypeAlias = tuple[int, ...] +AtLeast1D: TypeAlias = tuple[int, Unpack[AtLeast0D]] +AtLeast2D: TypeAlias = tuple[int, Unpack[AtLeast1D]] +AtLeast3D: TypeAlias = tuple[int, Unpack[AtLeast2D]] + + +# Array and array-likes, with generic shape + +_DN = TypeVar('_DN', bound=tuple[()] | tuple[int, ...]) +_DN_co = TypeVar('_DN_co', bound=tuple[()] | tuple[int, ...], covariant=True) +_ST = TypeVar('_ST', bound=np.generic) +_ST_co = TypeVar('_ST_co', bound=np.generic, covariant=True) + +Array: TypeAlias = np.ndarray[_DN, np.dtype[_ST]] + + +@runtime_checkable +class CanArray(Protocol[_DN_co, _ST_co]): # pyright: ignore[reportInvalidTypeVarUse] + """ + Anything that can be converted to a (numpy) array, e.g. with `np.asarray`, + similarly to `collections.abc.Sequence`. + + Specifically, this includes instances of types that implement the + `__array__` method (which would return a `np.ndarray`). + + Note that `isinstance` can also be used, but keep in mind that due to the + performance-first implementation of `typing.runtime_checkable`, it often + will lead to false positives. So keep in mind that at runtime, + `isinstance(x, CanArray)` is the (cached) equivalent of + `inspect.getattr_static(x, '__array__', nil := object()) is not nil`. + + + Examples: + >>> isinstance([1, 2, 3], CanArray) + False + >>> isinstance(np.array([1, 2, 3]), CanArray) + True + >>> isinstance(np.ma.array([1, 2, 3]), CanArray) + True + + Note that `numpy.generic` instances (which numpy calls "scalars", + even though anyone that knows a bit of linear algebra (i.e. the + entirety of numpy's audience) will find this very confusing) + also implement the `__array__` method, returning a 0-dimensional + `np.ndarray`, i.e. `__array__: (T: np.generic) -> Array0D[T]`: + + >>> isinstance(np.uint(42), CanArray) + True + """ + def __array__(self) -> Array[_DN_co, _ST_co]: ... + + +_PyScalar: TypeAlias = bool | int | float | complex | str | bytes +# _PyScalar: TypeAlias = bool | int | float | complex +_ST_py = TypeVar('_ST_py', bound=_PyScalar) + +_T = TypeVar('_T') +_PyVector: TypeAlias = Sequence[_T] + + +_AnyScalar: TypeAlias = _ST | _ST_py | CanArray[tuple[()], _ST] +_AnyVector: TypeAlias = ( + CanArray[tuple[int], _ST] + | _PyVector[_AnyScalar[_ST, _ST_py]] +) +_AnyMatrix: TypeAlias = ( + CanArray[tuple[int, int], _ST] + | _PyVector[_AnyVector[_ST, _ST_py]] +) + +# these will result in {0,1,2,N}-D arrays when passed to `np.array` (no need +# for a broken "nested sequence" type) + +AnyScalar: TypeAlias = _AnyScalar[np.generic, _PyScalar] +AnyVector: TypeAlias = _AnyVector[np.generic, _PyScalar] +AnyMatrix: TypeAlias = _AnyMatrix[np.generic, _PyScalar] +AnyTensor: TypeAlias = ( + CanArray[AtLeast3D, np.generic] + | _PyVector[AnyMatrix] + | _PyVector['AnyTensor'] +) +AnyArray: TypeAlias = AnyScalar | AnyVector | AnyMatrix | AnyTensor + +AnyScalarBool: TypeAlias = _AnyScalar[Bool, bool] +AnyVectorBool: TypeAlias = _AnyVector[Bool, bool] +AnyMatrixBool: TypeAlias = _AnyMatrix[Bool, bool] +AnyTensorBool: TypeAlias = ( + CanArray[AtLeast3D, Bool] + | _PyVector[AnyMatrixBool] + | _PyVector['AnyTensorBool'] +) +AnyArrayBool: TypeAlias = AnyVectorBool | AnyMatrixBool | AnyTensorBool + +AnyScalarInt: TypeAlias = _AnyScalar[Integer, int] +AnyVectorInt: TypeAlias = _AnyVector[Integer, int] +AnyMatrixInt: TypeAlias = _AnyMatrix[Integer, int] +AnyTensorInt: TypeAlias = ( + CanArray[AtLeast3D, Integer] + | _PyVector[AnyMatrixInt] + | _PyVector['AnyTensorInt'] +) +AnyArrayInt: TypeAlias = AnyVectorInt | AnyMatrixInt | AnyTensorInt + +AnyScalarFloat: TypeAlias = _AnyScalar[Real, float] +AnyVectorFloat: TypeAlias = _AnyVector[Real, float] +AnyMatrixFloat: TypeAlias = _AnyMatrix[Real, float] +AnyTensorFloat: TypeAlias = ( + CanArray[AtLeast1D, Real] + | _PyVector[AnyMatrixFloat] + | _PyVector['AnyTensorFloat'] +) +AnyArrayFloat: TypeAlias = AnyVectorFloat | AnyMatrixFloat | AnyTensorFloat + +# some of the allowed `np.dtype` argument + +_AnyDType: TypeAlias = np.dtype[_ST] | type[_ST] +AnyObjectDType: TypeAlias = ( + _AnyDType[np.object_] + | Literal['object', 'object_', 'O', '=O', 'O'] +) +AnyBoolDType: TypeAlias = ( + _AnyDType[np.bool_] + | type[bool] + | Literal['bool', 'bool_', '?', '=?', '?'] +) +AnyUIntDType: TypeAlias = _AnyDType[UInt] | Literal[ + 'uint8', 'u1', '=u1', 'u1', + 'uint16', 'u2', '=u2', 'u2', + 'uint32', 'u4', '=u4', 'u4', + 'uint64', 'u8', '=u8', 'u8', + 'ubyte', 'B', '=B', 'B', + 'ushort', 'H', '=H', 'H', + 'uintc', 'I', '=I', 'I', + 'uintp', 'P', '=P', 'P', + 'uint', 'N', '=N', 'N', + 'ulong', 'L', '=L', 'L', + 'ulonglong', 'Q', '=Q', 'Q', +] +AnyIntDType: TypeAlias = _AnyDType[Int] | Literal[ + 'int8', 'i1', '=i1', 'i1', + 'int16', 'i2', '=i2', 'i2', + 'int32', 'i4', '=i4', 'i4', + 'int64', 'i8', '=i8', 'i8', + 'byte', 'b', '=b', 'b', + 'short', 'h', '=h', 'h', + 'intc', 'i', '=i', 'i', + 'intp', 'p', '=p', 'p', + 'int', 'int_', 'n', '=n', 'n', + 'long', 'l', '=l', 'l', + 'longlong', 'q', '=q', 'q', +] +AnyFloatDType: TypeAlias = _AnyDType[Float] | Literal[ + 'float16', 'f2', '=f2', 'f2', + 'float32', 'f4', '=f4', 'f4', + 'float64', 'f8', '=f8', 'f8', + 'half', 'e', '=e', 'e', + 'single', 'f', '=f', 'f', + 'double', 'float', 'float_', 'd', '=d', 'd', + 'longdouble', 'g', '=g', 'g', +] + + +# Various type aliases + + +Order: TypeAlias = Literal['C', 'F'] +"""Type of the `order` parameter of e.g. [`np.empty`][numpy.empty].""" +OrderReshape: TypeAlias = Order | Literal['A'] +"""Type of the `order` parameter of e.g. [`np.reshape`][numpy.array].""" +OrderCopy: TypeAlias = OrderReshape | Literal['K'] +"""Type of the `order` parameter of e.g. [`np.array`][numpy.array].""" + +SortKind: TypeAlias = Literal['quicksort', 'heapsort', 'stable'] +""" +Type of the `kind` parameter of e.g. [`np.sort`][numpy.sort], as +allowed by numpy's own stubs. +Note that the actual implementation just looks at `kind[0].lower() == 'q'`. +This means that it's possible to select stable-sort by passing +`kind='SnailSort'` instead of `kind='stable'` (although your typechecker might +ruin the fun). +""" + +RandomState: TypeAlias = np.random.Generator | np.random.RandomState +""" +Union of the [`numpy.random.Generator`][numpy.random.Generator] and the +(legacy) [`numpy.random.RandomState`][numpy.random.RandomState] "RNG" types, +that are mostly compatible. +""" + +Seed: TypeAlias = ( + int + | np.random.SeedSequence + | np.random.BitGenerator + | np.random.Generator +) +""" +Any acceptable "seed" type that can be passed to +[`numpy.random.default_rng`][numpy.random.default_rng]. +""" + +Casting: TypeAlias = Literal['no', 'equiv', 'safe', 'same_kind', 'unsafe'] +"""See [`numpy.can_cast`][numpy.can_cast].""" diff --git a/poetry.lock b/poetry.lock index 868a5d77..9ec41661 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1418,6 +1418,20 @@ files = [ {file = "numpy-2.0.0rc2.tar.gz", hash = "sha256:868e9edbee689d6fdb7957c0b790de2b2123e6feff5d66045d10760c521f2c00"}, ] +[[package]] +name = "optype" +version = "0.4.0" +description = "Building blocks for precise & flexible type hints" +optional = false +python-versions = "<4.0,>=3.10" +files = [ + {file = "optype-0.4.0-py3-none-any.whl", hash = "sha256:1548654f9b063e199a46a0429fa143d1b2c4b0f3dc66e9d051f5d85c1426a9b5"}, + {file = "optype-0.4.0.tar.gz", hash = "sha256:f0cc8931e14017394e667c1691f3a1ee6fcec5ccc53b956c0dd1b7798c1bbe53"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.5", markers = "python_version < \"3.12\""} + [[package]] name = "packaging" version = "24.0" @@ -2550,4 +2564,4 @@ pandas = ["pandas"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.14" -content-hash = "5d3a96652533e491d6ed373a9a99f38f21b1c634c05863023f14424eefef3d48" +content-hash = "711845b916c770c2006ec8cd5cbf09c4c9ba80a083f635706d0a620b50e9148a" diff --git a/pyproject.toml b/pyproject.toml index a32aa726..e40d4c8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,12 +37,13 @@ Documentation = "https://jorenham.github.io/Lmo/" [tool.poetry.dependencies] python = ">=3.10,<3.14" -typing_extensions = {version = "^4.1", python = "<3.11"} +typing_extensions = {version = "^4.5", python = "<3.13"} # https://scientific-python.org/specs/spec-0000/ numpy = ">=1.23" scipy = ">=1.9" pandas = {version = ">=1.5", optional = true} +optype = "^0.4.0" [tool.poetry.group.dev.dependencies] codespell = "^2.2.6" @@ -127,7 +128,6 @@ exclude = [ "tests", ] ignore = [ - # TODO: figure out how to do this conditionally "lmo/contrib/pandas.py", ] defineConstant = { NP_V2 = true } @@ -137,30 +137,19 @@ venv = ".venv" pythonVersion = "3.10" pythonPlatform = "All" typeCheckingMode = "strict" + useLibraryCodeForTypes = true +reportMissingTypeStubs = false -deprecateTypingAliases = true -disableBytesTypePromotions = true -reportPropertyTypeMismatch = "warning" -reportMissingTypeStubs = "none" -reportUnusedImport = "warning" -reportUnusedClass = "warning" -reportUnusedFunction = "warning" -reportUnusedVariable = "warning" -reportConstantRedefinition = "warning" -reportInconsistentConstructor = "warning" -reportMissingTypeArgument = "warning" -reportUninitializedInstanceVariable = "warning" -reportCallInDefaultInitializer = "warning" -reportUnnecessaryIsInstance = "warning" -reportUnnecessaryCast = "warning" -reportUnnecessaryComparison = "warning" -reportUnnecessaryContains = "warning" -reportUnusedCallResult = "warning" -reportUnusedExpression = "warning" -reportUnnecessaryTypeIgnoreComment = "warning" -reportMatchNotExhaustive = "error" -reportShadowedImports = "warning" +reportCallInDefaultInitializer = true +reportImplicitOverride = false +reportImplicitStringConcatenation = true +reportImportCycles = false +reportPropertyTypeMismatch = true +reportShadowedImports = true +reportUninitializedInstanceVariable = true +reportUnnecessaryTypeIgnoreComment = true +reportUnusedCallResult = true [tool.ruff] @@ -201,12 +190,15 @@ select = [ "A", # flake8-builtins "COM", # flake8-commas "C4", # flake8-comprehensions + "DTZ", # flake8-datetimez "T10", # flake8-debugger "EXE", # flake8-executable "FA", # flake8-future-annotations "ISC", # flake8-implicit-str-concat "ICN", # flake8-import-conventions "INP", # flake8-no-pep420 + "LOG", # flake8-logging + "G", # flake8-logging-format "PIE", # flake8-pie "T20", # flake8-print "PYI", # flake8-pyi @@ -241,12 +233,7 @@ ignore = [ # flake8-annotations "ANN101", # missing-type-self (deprecated) "ANN102", # missing-type-cls (deprecated) - - # flake8-bugbear - "B023", # function-uses-loop-variable - - # pygrep-hooks - "PGH003", # blanket-type-ignore + "ANN401", # any-type # pylint "PLC0415", # import-outside-top-level @@ -271,6 +258,7 @@ typing-modules = [ [tool.ruff.lint.per-file-ignores] "tests/*" = ["D", "N806", "ANN", "PLC2701"] +"typetests/*" = ["D", "INP001"] [tool.ruff.lint.pep8-naming] extend-ignore-names = ["*_gen", "l_rv_*"] @@ -343,3 +331,25 @@ skip = """\ *.ipynb\ """ context = 2 + + +[tool.yapf] +based_on_style = 'pep8' + +align_closing_bracket_with_visual_indent = false +blank_lines_around_top_level_definition = 1 +blank_line_before_nested_class_or_def = false +blank_lines_between_top_level_imports_and_variables = 2 +coalesce_brackets = true +continuation_align_style = 'space' +dedent_closing_brackets = true +force_multiline_dict = true +indent_dictionary_value = true +spaces_around_power_operator = true +split_all_top_level_comma_separated_values = true +split_arguments_when_comma_terminated = true +split_before_arithmetic_operator = true +split_before_dot = true +split_before_expression_after_opening_paren = true +split_before_first_argument = true +split_complex_comprehension = true diff --git a/tests/test_distributions.py b/tests/test_distributions.py index 24fe3a17..0c68e6f1 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -1,17 +1,20 @@ +import functools from typing import cast import numpy as np import pytest -from numpy.testing import assert_allclose -from scipy.stats.distributions import tukeylambda, uniform # type: ignore +from numpy.testing import assert_allclose as _assert_allclose +from scipy.stats.distributions import tukeylambda, uniform from lmo.distributions import genlambda, l_poly, wakeby -from lmo.typing import AnyTrim, RVContinuous +from lmo.typing import AnyTrim +from lmo.typing._scipy import RVContinuous -ATOL = 1e-10 Q = np.linspace(1 / 100, 1, 99, endpoint=False) +assert_allclose = functools.partial(_assert_allclose, atol=1e-9) + @pytest.mark.parametrize( 'trim', @@ -20,24 +23,24 @@ def test_l_poly_eq_uniform(trim: AnyTrim): p0 = x0 = np.linspace(0, 1) - X = cast('RVContinuous[()]', uniform()) + X = cast('RVContinuous', uniform()) X_hat = l_poly(X.l_moment([1, 2], trim=trim), trim=trim) t4 = X.l_stats(trim=trim) t4_hat = X_hat.l_stats(trim=trim) - assert_allclose(t4_hat, t4, atol=ATOL) + assert_allclose(t4_hat, t4) mvsk = X.stats(moments='mvsk') mvsk_hat = X_hat.stats(moments='mvsk') - assert_allclose(mvsk_hat, mvsk, atol=ATOL) + assert_allclose(mvsk_hat, mvsk) x = X.ppf(p0) x_hat = X_hat.ppf(p0) - assert_allclose(x_hat, x, atol=ATOL) + assert_allclose(x_hat, x) F = X.cdf(p0) F_hat = X_hat.cdf(p0) - assert_allclose(F_hat, F, atol=ATOL) + assert_allclose(F_hat, F) f = X.pdf(x0) f_hat = X_hat.pdf(x0) @@ -45,7 +48,7 @@ def test_l_poly_eq_uniform(trim: AnyTrim): H = X.entropy() H_hat = X_hat.entropy() - assert_allclose(H_hat, H, atol=ATOL) + assert_allclose(H_hat, H) @pytest.mark.parametrize('scale', [1, .5, 2]) @@ -78,24 +81,24 @@ def test_wakeby(b: float, d: float, f: float, loc: float, scale: float): # quad_opts={} forces numerical evaluation l_stats_quad = X.l_stats(quad_opts={}) l_stats_theo = X.l_stats() - assert_allclose(l_stats_theo, l_stats_quad, atol=ATOL, equal_nan=d >= 1) + assert_allclose(l_stats_theo, l_stats_quad, equal_nan=d >= 1) ll_stats_quad = X.l_stats(quad_opts={}, trim=(0, 1)) ll_stats_theo = X.l_stats(trim=(0, 1)) - assert_allclose(ll_stats_theo, ll_stats_quad, atol=ATOL) + assert_allclose(ll_stats_theo, ll_stats_quad) tl_stats_quad = X.l_stats(quad_opts={}, trim=1) tl_stats_theo = X.l_stats(trim=1) - assert_allclose(tl_stats_theo, tl_stats_quad, atol=ATOL) + assert_allclose(tl_stats_theo, tl_stats_quad) tll_stats_quad = X.l_stats(quad_opts={}, trim=(1, 2)) tll_stats_theo = X.l_stats(trim=(1, 2)) - assert_allclose(tll_stats_theo, tll_stats_quad, atol=ATOL) + assert_allclose(tll_stats_theo, tll_stats_quad) @pytest.mark.parametrize('lam', [0, 0.14, 1, -1]) def test_genlambda_tukeylamba(lam: float): - X0 = cast(RVContinuous[float], tukeylambda(lam)) + X0 = cast(RVContinuous, tukeylambda(lam)) X = genlambda(lam, lam, 0) x0 = X0.ppf(Q) @@ -126,19 +129,16 @@ def test_genlambda_tukeylamba(lam: float): tl_tau0 = X0.l_stats(trim=1) tl_tau = X.l_stats(trim=1) - assert_allclose(tl_tau, tl_tau0, atol=ATOL) + assert_allclose(tl_tau, tl_tau0) # @pytest.mark.parametrize('scale', [1, .5, 2]) # @pytest.mark.parametrize('loc', [0, 1, -1]) -# @pytest.mark.parametrize('f', [0, .5, 1, -.5, -1]) -@pytest.mark.parametrize('scale', [1]) -@pytest.mark.parametrize('loc', [0]) @pytest.mark.parametrize('f', [0, 1, -1]) @pytest.mark.parametrize('d', [0, .5, 2, -0.9, -1.95]) @pytest.mark.parametrize('b', [0, .5, 1, -0.9, -1.95]) -def test_genlambda(b: float, d: float, f: float, loc: float, scale: float): - X = genlambda(b, d, f, loc, scale) +def test_genlambda(b: float, d: float, f: float): + X = genlambda(b, d, f) assert X.cdf(X.support()[0]) == 0 assert X.ppf(0) == X.support()[0] @@ -150,7 +150,7 @@ def test_genlambda(b: float, d: float, f: float, loc: float, scale: float): # m_x1 = X.expect(lambda x: x) if min(b, d) > -1 else np.nan # mean = X.mean() - # assert_allclose(mean, m_x1, equal_nan=True, atol=ATOL) + # assert_allclose(mean, m_x1, equal_nan=True) # m_x2 = X.expect(lambda x: (x - m_x1)**2) if min(b, d) > -.5 else np.nan # var = X.var() @@ -159,22 +159,22 @@ def test_genlambda(b: float, d: float, f: float, loc: float, scale: float): # quad_opts={} forces numerical evaluation if b > -1 and d > -1: l_tau_quad = X.l_stats(quad_opts={}) - assert_allclose(l_tau_quad[0], X.mean(), atol=ATOL) + assert_allclose(l_tau_quad[0], X.mean(), atol=1e-8) assert l_tau_quad[1] > 0 or np.isnan(l_tau_quad[1]) l_tau_theo = X.l_stats() - assert_allclose(l_tau_theo, l_tau_quad, atol=ATOL) + assert_allclose(l_tau_theo, l_tau_quad, atol=1e-8) if b > -1 and d > -2: ll_tau_quad = X.l_stats(quad_opts={}, trim=(0, 1)) assert ll_tau_quad[1] > 0 or np.isnan(ll_tau_quad[1]) ll_tau_theo = X.l_stats(trim=(0, 1)) - assert_allclose(ll_tau_theo, ll_tau_quad, atol=ATOL) + assert_allclose(ll_tau_theo, ll_tau_quad) if b > -2 and d > -1: lh_tau_quad = X.l_stats(quad_opts={}, trim=(1, 0)) assert lh_tau_quad[1] > 0 or np.isnan(lh_tau_quad[1]) lh_tau_theo = X.l_stats(trim=(1, 0)) - assert_allclose(lh_tau_theo, lh_tau_quad, atol=ATOL) + assert_allclose(lh_tau_theo, lh_tau_quad) tl_tau_quad = X.l_stats(quad_opts={}, trim=1) assert tl_tau_quad[1] > 0 or np.isnan(tl_tau_quad[1]) diff --git a/tests/test_theoretical.py b/tests/test_theoretical.py index ed226057..29094074 100644 --- a/tests/test_theoretical.py +++ b/tests/test_theoretical.py @@ -12,7 +12,7 @@ settings, strategies as st, ) -from numpy.testing import assert_allclose +from numpy.testing import assert_allclose as _assert_allclose from scipy.special import ndtr, ndtri, zeta from lmo import constants @@ -27,6 +27,8 @@ ) +assert_allclose = functools.partial(_assert_allclose, atol=1e-12) + norm_cdf = cast(Callable[[float], float], ndtr) norm_ppf = cast(Callable[[float], float], ndtri) @@ -116,7 +118,7 @@ def test_lm_expon(a: float): l_cdf = l_moment_from_cdf(cdf, [0, 1, 2, 3, 4]) l_stats_cdf = l_cdf[1:] / l_cdf[[0, 0, 2, 2]] - assert_allclose(l_stats_cdf, l_stats) + assert_allclose(l_stats_cdf, l_stats, rtol=5e-7) def test_lm_normal(): @@ -184,7 +186,7 @@ def test_tlm_cauchy(): @given(a=st.floats(0.1, 10)) -def test_lhm_expon(a: float): +def test_llm_expon(a: float): r = [1, 2, 3, 4] lr = a * np.array([1, 1 / 2, 1 / 9, 1 / 24]) / 2 @@ -196,7 +198,7 @@ def test_lhm_expon(a: float): assert_allclose(l_ppf, lr) l_cdf = l_moment_from_cdf(cdf, r, trim=(0, 1)) - assert_allclose(l_cdf, lr) + assert_allclose(l_cdf, lr, rtol=5e-7) l_qdf = l_moment_from_qdf(qdf, r[1:], trim=(0, 1)) assert_allclose(l_qdf, lr[1:]) @@ -225,7 +227,7 @@ def test_lm_cov_expon(): assert_allclose(k3, k3_hat) -def test_lhm_cov_expon(): +def test_llm_cov_expon(): k3 = np.array([ [1 / 3, 1 / 8, 0], [1 / 8, 3 / 40, 1 / 60], diff --git a/tests/test_univariate.py b/tests/test_univariate.py index b4b88fb4..c3168176 100644 --- a/tests/test_univariate.py +++ b/tests/test_univariate.py @@ -131,10 +131,10 @@ def test_l_loc_linearity( assert np.isscalar(l1) l1_add = lmo.l_loc(x + dloc, trim) - assert l1_add == pytest.approx(l1 + dloc, rel=1e-5, abs=1e-8) # type: ignore + assert l1_add == pytest.approx(l1 + dloc, rel=1e-5, abs=1e-8) l1_mul = lmo.l_loc(x * dscale, trim) - assert l1_mul == pytest.approx(l1 * dscale, rel=1e-5, abs=1e-8) # type: ignore + assert l1_mul == pytest.approx(l1 * dscale, rel=1e-5, abs=1e-8) @given(a=st_a1) @@ -170,7 +170,7 @@ def test_l_scale_invariant_loc( l2 = lmo.l_scale(x, trim) assert np.isfinite(l2) assert np.isscalar(l2) - assert round(l2, 8) >= 0 # type: ignore + assert round(l2, 8) >= 0 l2_add = lmo.l_scale(x + dloc, trim) assert l2_add == pytest.approx(l2, rel=1e-5, abs=1e-8) @@ -189,13 +189,13 @@ def test_l_scale_linear_scale( l2 = lmo.l_scale(x, trim) assert np.isfinite(l2) assert np.isscalar(l2) - assert round(l2, 8) >= 0 # type: ignore + assert round(l2, 8) >= 0 # asymmetric trimming flips under sign change itrim = trim[::-1] if dscale < 0 else trim l2_mul = lmo.l_scale(x * dscale, itrim) - assert l2_mul == pytest.approx(abs(l2 * dscale), abs=1e-8) # type: ignore + assert l2_mul == pytest.approx(abs(l2 * dscale), abs=1e-8) def test_ll_trim_ev(): diff --git a/tests/test_utils.py b/tests/test_utils.py index ef1ce96d..0cb594d9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,3 @@ -# type: ignore - import numpy as np from hypothesis import ( given, diff --git a/tests/test_weights.py b/tests/test_weights.py index 69b4cc36..aa0929bd 100644 --- a/tests/test_weights.py +++ b/tests/test_weights.py @@ -1,5 +1,3 @@ -# type: ignore - import numpy as np from hypothesis import ( given, diff --git a/typetests/l_comoment.py b/typetests/l_comoment.py new file mode 100644 index 00000000..ab163acf --- /dev/null +++ b/typetests/l_comoment.py @@ -0,0 +1,22 @@ +# pyright: reportUnusedCallResult=false +from typing import Any, TypeAlias + +import numpy as np + +import lmo +from lmo.typing import np as lnpt +from lmo.typing.compat import assert_type + + +_ArrayF8: TypeAlias = lnpt.Array[Any, np.float64] + +X = [ + [1.9517689, -0.39353141, -0.46680832, -0.43176034, 0.03754792, -0.2559433], + [-0.18679035, -0.30584785, -1.32954, 0.27871746, -0.19124341, -2.1717801], +] + + +assert_type(lmo.l_comoment(X, 2), _ArrayF8) +assert_type(lmo.l_comoment(np.array(X), 2), _ArrayF8) +assert_type(lmo.l_comoment(np.array(X).T, 2, rowvar=False), _ArrayF8) +assert_type(lmo.l_comoment(X, 2, dtype=np.half), lnpt.Array[Any, np.half]) diff --git a/typetests/l_moment.py b/typetests/l_moment.py new file mode 100644 index 00000000..43a91555 --- /dev/null +++ b/typetests/l_moment.py @@ -0,0 +1,50 @@ +# pyright: reportUnusedCallResult=false +from typing import Any, TypeAlias + +import numpy as np +import numpy.typing as npt + +import lmo +from lmo.typing import np as lnpt +from lmo.typing.compat import assert_type + + +_ArrayF8: TypeAlias = lnpt.Array[Any, np.float64] + +X = [0.14543334, 2.17509751, 0.60844233, 1.47809552, -1.32510269, 1.0979731] +XX = [X, X] + +# default +assert_type(lmo.l_moment(X, 2), np.float64) +assert_type(lmo.l_moment(np.array(X), 2), np.float64) +assert_type(lmo.l_moment(np.array(X, dtype=np.float32), 2), np.float64) +assert_type(lmo.l_moment(np.array(X, dtype=np.int32), 2), np.float64) +assert_type(lmo.l_moment(X, np.intp(2)), np.float64) +assert_type(lmo.l_moment(X, np.uint8(2)), np.float64) +assert_type(lmo.l_moment(XX, np.uint8(2)), np.float64) +assert_type(lmo.l_moment(np.array(XX), np.uint8(2)), np.float64) + +# trim +assert_type(lmo.l_moment(X, 2, 0), np.float64) +assert_type(lmo.l_moment(X, 2, 1), np.float64) +assert_type(lmo.l_moment(X, 2, trim=1), np.float64) +assert_type(lmo.l_moment(X, 2, trim=(1, 1)), np.float64) +assert_type(lmo.l_moment(X, 2, trim=(.5, .5)), np.float64) +assert_type(lmo.l_moment(X, 2, trim=(1, .5)), np.float64) +assert_type(lmo.l_moment(X, 2, trim=(.5, 1)), np.float64) + +# vectorized r +assert_type(lmo.l_moment(X, [1, 2, 3, 4]), _ArrayF8) +assert_type(lmo.l_moment(X, (1, 2, 3, 4)), _ArrayF8) +assert_type(lmo.l_moment(X, np.arange(1, 5)), _ArrayF8) + +# sctype +assert_type(lmo.l_moment(X, 2, dtype=np.float32), np.float32) +assert_type(lmo.l_moment(X, 2, dtype=np.longdouble), np.longdouble) +assert_type(lmo.l_moment(X, 2, dtype=np.dtype(np.float16)), np.float16) +assert_type(lmo.l_moment(X, [1, 2, 3, 4], dtype=np.half), npt.NDArray[np.half]) + +# axis +assert_type(lmo.l_moment(XX, 2, axis=0), _ArrayF8) +assert_type(lmo.l_moment(np.array(XX), 2, axis=0), _ArrayF8) +assert_type(lmo.l_moment(XX, 2, axis=0, dtype=np.half), npt.NDArray[np.half]) diff --git a/typetests/l_ratio.py b/typetests/l_ratio.py new file mode 100644 index 00000000..9c422022 --- /dev/null +++ b/typetests/l_ratio.py @@ -0,0 +1,45 @@ +# pyright: reportUnusedCallResult=false +from typing import TypeAlias + +import numpy as np +import numpy.typing as npt + +import lmo +from lmo.typing.compat import assert_type + + +X = [0.14543334, 2.17509751, 0.60844233, 1.47809552, -1.32510269, 1.0979731] + +_ArrF8: TypeAlias = npt.NDArray[np.float64] + +# default +assert_type(lmo.l_ratio(X, 4, 2), np.float64) +assert_type(lmo.l_ratio(np.array(X), 4, 2), np.float64) +assert_type(lmo.l_ratio(np.array(X, dtype=np.float32), 4, 2), np.float64) +assert_type(lmo.l_ratio(np.array(X, dtype=np.int32), 4, 2), np.float64) +assert_type(lmo.l_ratio(X, np.int16(4), 2), np.float64) +assert_type(lmo.l_ratio(X, 4, np.uint8(2)), np.float64) +assert_type(lmo.l_ratio(X, np.int16(4), np.uint8(2)), np.float64) + +# trim +assert_type(lmo.l_ratio(X, 4, 2, 0), np.float64) +assert_type(lmo.l_ratio(X, 4, 2, 1), np.float64) +assert_type(lmo.l_ratio(X, 4, 2, trim=1), np.float64) +assert_type(lmo.l_ratio(X, 4, 2, trim=(1, 1)), np.float64) +assert_type(lmo.l_ratio(X, 4, 2, trim=(.5, .5)), np.float64) +assert_type(lmo.l_ratio(X, 4, 2, trim=(1, .5)), np.float64) +assert_type(lmo.l_ratio(X, 4, 2, trim=(.5, 1)), np.float64) + +# sctype +assert_type(lmo.l_ratio(X, 4, 2, dtype=np.float32), np.float32) +assert_type(lmo.l_ratio(X, 4, 2, dtype=np.longdouble), np.longdouble) +assert_type(lmo.l_ratio(X, 4, 2, dtype=np.dtype(np.float16)), np.float16) + +# vectorized r +assert_type(lmo.l_ratio(X, [3, 4], 2), _ArrF8) +assert_type(lmo.l_ratio(X, np.array([3, 4]), 2), _ArrF8) +assert_type(lmo.l_ratio(X, [1, 2, 3, 4], [0, 0, 2, 2]), _ArrF8) +assert_type(lmo.l_ratio(X, np.array([1, 2, 3, 4]), [0, 0, 2, 2]), _ArrF8) +assert_type(lmo.l_ratio(X, [1, 2, 3, 4], np.array([0, 0, 2, 2])), _ArrF8) +assert_type(lmo.l_ratio(X, 3, [0, 2]), _ArrF8) +assert_type(lmo.l_ratio(X, 3, np.array([0, 2])), _ArrF8) diff --git a/typetests/l_stats.py b/typetests/l_stats.py new file mode 100644 index 00000000..c49e6961 --- /dev/null +++ b/typetests/l_stats.py @@ -0,0 +1,29 @@ +# pyright: reportUnusedCallResult=false +from typing import Any, TypeAlias + +import numpy as np + +import lmo +from lmo.typing import np as lnpt +from lmo.typing.compat import assert_type + + +X = [0.14543334, 2.17509751, 0.60844233, 1.47809552, -1.32510269, 1.0979731] +XX = [X, X] + +_ArrF8: TypeAlias = lnpt.Array[Any, np.float64] + +# default +assert_type(lmo.l_stats(X), _ArrF8) +assert_type(lmo.l_stats(np.array(X, dtype=np.float32)), _ArrF8) +assert_type(lmo.l_stats(np.array(X, dtype=np.int32)), _ArrF8) +assert_type(lmo.l_stats(XX), _ArrF8) +assert_type(lmo.l_stats(np.array(XX)), _ArrF8) + +# num +assert_type(lmo.l_stats(X, num=3), _ArrF8) +assert_type(lmo.l_stats(X, 0, 3), _ArrF8) + +# axis +assert_type(lmo.l_stats(XX, axis=0), _ArrF8) +assert_type(lmo.l_stats(np.array(XX), axis=0), _ArrF8)