diff --git a/docs/Project.toml b/docs/Project.toml
index 4fc67780b..9641ba043 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -2,7 +2,6 @@
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 ChaosTools = "608a59af-f2a3-5ad4-90b4-758bdf3122a7"
 CoordinateTransformations = "150eb455-5306-5404-9cee-2592286d6298"
-DelayEmbeddings = "5732040d-69e3-5649-938a-b6b4f237613f"
 Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
diff --git a/docs/make.jl b/docs/make.jl
index eca857b90..ac802111b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -2,10 +2,10 @@ cd(@__DIR__)
 using Pkg
 CI = get(ENV, "CI", nothing) == "true" || get(ENV, "GITHUB_TOKEN", nothing) !== nothing
 using Entropies
-using DelayEmbeddings
 using Documenter
 using DocumenterTools: Themes
 using CairoMakie
+using Entropies.DelayEmbeddings
 import Entropies.Wavelets
 
 # %% JuliaDynamics theme
@@ -35,6 +35,7 @@ ENV["JULIA_DEBUG"] = "Documenter"
 ENTROPIES_PAGES = [
     "index.md",
     "probabilities.md",
+    "encodings.md",
     "entropies.md",
     "complexity.md",
     "multiscale.md",
diff --git a/docs/src/devdocs.md b/docs/src/devdocs.md
index d2a1569d3..003d4d3bc 100644
--- a/docs/src/devdocs.md
+++ b/docs/src/devdocs.md
@@ -11,7 +11,7 @@ Good practices in developing a code base apply in every Pull Request. The [Good
 5. If suitable, the estimator may be able to operate based on [`Encoding`]s. If so, it is preferred to implement an `Encoding` subtype and extend the methods [`encode`](@ref) and [`decode`](@ref). This will allow your probabilities estimator to be used with a larger span of entropy and complexity methods without additional effort.
 6. Implement dispatch for [`probabilities_and_outcomes`](@ref) and your probabilities estimator type.
 7. Implement dispatch for [`outcome_space`](@ref) and your probabilities estimator type.
-8. Add your probabilities estimator type to the list in the docstring of [`ProbabilitiyEstimator`](@ref), and if you also made an encoding, add it to the [`Encoding`](@ref) docstring.
+8. Add your probabilities estimator type to the table list in the documentation page of probabilities. If you made an encoding, also add it to corresponding table in the encodings section.
 
 ### Optional steps
 You may extend any of the following functions if there are potential performance benefits in doing so:
diff --git a/docs/src/encodings.md b/docs/src/encodings.md
new file mode 100644
index 000000000..b267cbe89
--- /dev/null
+++ b/docs/src/encodings.md
@@ -0,0 +1,20 @@
+# Encodings
+
+## Encoding API
+
+Some probability estimators first "encode" input data into an intermediate representation indexed by the positive integers. This intermediate representation is called an "encoding" and its API is defined by the following:
+
+```@docs
+Encoding
+encode
+decode
+```
+
+## Available encodings
+
+```@docs
+OrdinalPatternEncoding
+GaussianCDFEncoding
+RectangularBinEncoding
+```
+
diff --git a/docs/src/examples.md b/docs/src/examples.md
index 1e8dbada4..c127fb161 100644
--- a/docs/src/examples.md
+++ b/docs/src/examples.md
@@ -233,7 +233,7 @@ fig
 
 ### Kaniadakis entropy
 
-Here, we show how [`Kaniadakis`](@ref) entropy changes as function of the parameter `a` for 
+Here, we show how [`Kaniadakis`](@ref) entropy changes as function of the parameter `a` for
 a range of two-element probability distributions given by
 `Probabilities([p, 1 - p] for p in 1:0.0:0.01:1.0)`.
 
@@ -370,11 +370,41 @@ end
 You see that while the direct entropy values of the chaotic and noisy signals change massively with `N` but they are almost the same for the normalized version.
 For the regular signals, the entropy decreases nevertheless because the noise contribution of the Fourier computation becomes less significant.
 
+## Spatiotemporal permutation entropy
+
+Usage of a [``SpatialSymbolicPermutation`](@ref) estimator is straightforward.
+Here we get the spatial permutation entropy of a 2D array (e.g., an image):
+
+```@example MAIN
+using Entropies
+x = rand(50, 50) # some image
+stencil = [1 1; 0 1] # or one of the other ways of specifying stencils
+est = SpatialSymbolicPermutation(stencil, x)
+h = entropy(est, x)
+```
+
+To apply this to timeseries of spatial data, simply loop over the call, e.g.:
+
+```@example MAIN
+data = [rand(50, 50) for i in 1:10] # e.g., evolution of a 2D field of a PDE
+est = SpatialSymbolicPermutation(stencil, first(data))
+h_vs_t = map(d -> entropy(est, d), data)
+```
+
+Computing any other generalized spatiotemporal permutation entropy is trivial, e.g. with [`Renyi`](@ref):
+
+```@example MAIN
+x = reshape(repeat(1:5, 500) .+ 0.1*rand(500*5), 50, 50)
+est = SpatialSymbolicPermutation(stencil, x)
+entropy(Renyi(q = 2), est, x)
+```
+
+
 ## Spatial discrete entropy: Fabio
 
 Let's see how the normalized permutation and dispersion entropies increase for an image that gets progressively more noise added to it.
 
-```@example
+```@example MAIN
 using Entropies
 using Distributions
 using CairoMakie
@@ -386,11 +416,11 @@ rot = warp(img, recenter(RotMatrix(-3pi/2), center(img));)
 original = Float32.(rot)
 noise_levels = collect(0.0:0.25:1.0) .* std(original) * 5 # % of 1 standard deviation
 
-noisy_imgs = [i == 1 ? original : original .+ rand(Uniform(0, nL), size(original)) 
+noisy_imgs = [i == 1 ? original : original .+ rand(Uniform(0, nL), size(original))
     for (i, nL) in enumerate(noise_levels)]
 
 # a 2x2 stencil (i.e. dispersion/permutation patterns of length 4)
-stencil = ((2, 2), (1, 1)) 
+stencil = ((2, 2), (1, 1))
 
 est_disp = SpatialDispersion(stencil, original; c = 5, periodic = false)
 est_perm = SpatialSymbolicPermutation(stencil, original; periodic = false)
@@ -399,8 +429,8 @@ hs_perm = [entropy_normalized(est_perm, img) for img in noisy_imgs]
 
 # Plot the results
 fig = Figure(size = (800, 1000))
-ax = Axis(fig[1, 1:length(noise_levels)], 
-    xlabel = "Noise level", 
+ax = Axis(fig[1, 1:length(noise_levels)],
+    xlabel = "Noise level",
     ylabel = "Normalized entropy")
 scatterlines!(ax, noise_levels, hs_disp, label = "Dispersion")
 scatterlines!(ax, noise_levels, hs_perm, label = "Permutation")
diff --git a/docs/src/index.md b/docs/src/index.md
index c4012d8f5..d55ab8f25 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -8,7 +8,7 @@ Entropies
     You are reading the development version of the documentation of Entropies.jl,
     that will become version 2.0.
 
-## API & terminology
+## Terminology
 
 !!! note
     The documentation here follows (loosely) chapter 5 of
@@ -16,15 +16,14 @@ Entropies
     Datseris & Parlitz, Springer 2022.
 
 In the literature, the term "entropy" is used (and abused) in multiple contexts.
-The API and documentation of Entropies.jl aim to clarify some aspects of its usage, and
-to provide a simple way to obtain probabilities, entropies, or other complexity measures.
+The API and documentation of Entropies.jl aim to clarify some aspects of its usage, and to provide a simple way to obtain probabilities, entropies, or other complexity measures.
 
 ### Probabilities
 
 Entropies and other complexity measures are typically computed based on _probability distributions_.
-These are obtained from [Input data for Entropies.jl](@ref) in a plethora of different ways.
-The central API function that returns a probability distribution (in fact, just a vector of probabilities) is [`probabilities`](@ref), which takes in a subtype of [`ProbabilitiesEstimator`](@ref) to specify how the probabilities are computed.
-All estimators available in Entropies.jl can be found in the [estimators page](@ref probabilities_estimators).
+These can be obtained from input data in a plethora of different ways.
+The central API function that returns a probability distribution (or more precisely a probability mass function) is [`probabilities`](@ref), which takes in a subtype of [`ProbabilitiesEstimator`](@ref) to specify how the probabilities are computed.
+All available estimators can be found in the [estimators page](@ref probabilities_estimators).
 
 ### Entropies
 
@@ -40,24 +39,28 @@ Thus, any of the implemented [probabilities estimators](@ref probabilities_estim
 
     These names are commonplace, and so in Entropies.jl we provide convenience functions like [`entropy_wavelet`](@ref). However, it should be noted that these functions really aren't anything more than 2-lines-of-code wrappers that call [`entropy`](@ref) with the appropriate [`ProbabilitiesEstimator`](@ref).
 
-    In addition to `ProbabilitiesEstimators`, we also provide [`EntropyEstimator`](@ref)s, 
-    which compute entropies via alternate means, without explicitly computing some 
+    In addition to `ProbabilitiesEstimators`, we also provide [`EntropyEstimator`](@ref)s,
+    which compute entropies via alternate means, without explicitly computing some
     probability distribution. Differential/continuous entropy, for example, is computed
-    using a dedicated [`EntropyEstimator`](@ref). For example, the [`Kraskov`](@ref) 
-    estimator computes Shannon differential entropy via a nearest neighbor algorithm, while 
-    the [`Zhu`](@ref) estimator computes Shannon differential entropy using order statistics.
+    using a dedicated [`EntropyEstimator`](@ref). For example, the [`Kraskov`](@ref)
+    estimator computes Shannon differential entropy via a nearest neighbor algorithm, while
+    the [`Correa`](@ref) estimator computes Shannon differential entropy using order statistics.
 
 ### Other complexity measures
 
-Other complexity measures, which strictly speaking don't compute entropies, and may or may
-not explicitly compute probability distributions, are found in
-[Complexity.jl](https://github.com/JuliaDynamics/Complexity.jl) package. This includes
-measures like sample entropy and approximate entropy.
+Other complexity measures, which strictly speaking don't compute entropies, and may or may not explicitly compute probability distributions, are found in
+[Complexity measures](@ref) page.
+This includes measures like sample entropy and approximate entropy.
 
 ## [Input data for Entropies.jl](@id input_data)
 
-The input data type typically depend on the probability estimator chosen. In general though, the standard DynamicalSystems.jl approach is taken and as such we have three types of input data:
+The input data type typically depend on the probability estimator chosen.
+In general though, the standard DynamicalSystems.jl approach is taken and as such we have three types of input data:
 
 - _Timeseries_, which are `AbstractVector{<:Real}`, used in e.g. with [`WaveletOverlap`](@ref).
 - _Multi-dimensional timeseries, or datasets, or state space sets_, which are [`Dataset`](@ref), used e.g. with [`NaiveKernel`](@ref).
 - _Spatial data_, which are higher dimensional standard `Array`s, used e.g. with  [`SpatialSymbolicPermutation`](@ref).
+
+```@docs
+Dataset
+```
\ No newline at end of file
diff --git a/docs/src/probabilities.md b/docs/src/probabilities.md
index 52c8f1ad6..6e2e4d7a0 100644
--- a/docs/src/probabilities.md
+++ b/docs/src/probabilities.md
@@ -1,4 +1,4 @@
-# [Probabilities](@id probabilities_estimators)
+# Probabilities
 
 ## Probabilities API
 
@@ -8,11 +8,20 @@ The probabilities API is defined by
 - [`probabilities`](@ref)
 - [`probabilities_and_outcomes`](@ref)
 
+and related functions that you will find in the following documentation blocks:
+
+### Probabilitities
+
 ```@docs
 ProbabilitiesEstimator
 probabilities
 probabilities!
 Probabilities
+```
+
+### Outcomes
+
+```@docs
 probabilities_and_outcomes
 outcomes
 outcome_space
@@ -20,31 +29,34 @@ total_outcomes
 missing_outcomes
 ```
 
-## Overview
+## [Overview of probabilities estimators](@id probabilities_estimators)
 
-Any of the following estimators can be used with [`probabilities`](@ref).
+Any of the following estimators can be used with [`probabilities`](@ref)
+(in the column "input data"  it is assumed that the `eltype` of the input is `<: Real`).
 
 | Estimator                                   | Principle                   | Input data          |
-| ------------------------------------------- | --------------------------- | ------------------- |
-| [`CountOccurrences`](@ref)                  | Frequencies                 | `Vector`, `Dataset` |
+|:--------------------------------------------|:----------------------------|:--------------------|
+| [`CountOccurrences`](@ref)                  | Count of unique elements    | `Any` |
 | [`ValueHistogram`](@ref)                    | Binning (histogram)         | `Vector`, `Dataset` |
 | [`TransferOperator`](@ref)                  | Binning (transfer operator) | `Vector`, `Dataset` |
 | [`NaiveKernel`](@ref)                       | Kernel density estimation   | `Dataset`           |
-| [`SymbolicPermutation`](@ref)               | Ordinal patterns            | `Vector`            |
-| [`SymbolicWeightedPermutation`](@ref)       | Ordinal patterns            | `Vector`            |
-| [`SymbolicAmplitudeAwarePermutation`](@ref) | Ordinal patterns            | `Vector`            |
+| [`SymbolicPermutation`](@ref)               | Ordinal patterns            | `Vector`, `Dataset` |
+| [`SymbolicWeightedPermutation`](@ref)       | Ordinal patterns            | `Vector`, `Dataset` |
+| [`SymbolicAmplitudeAwarePermutation`](@ref) | Ordinal patterns            | `Vector`, `Dataset` |
+| [`SpatialSymbolicPermutation`](@ref)        | Ordinal patterns in space   | `Array` |
 | [`Dispersion`](@ref)                        | Dispersion patterns         | `Vector`            |
+| [`SpatialDispersion`](@ref)                 | Dispersion patterns in space  | `Array` |
 | [`Diversity`](@ref)                         | Cosine similarity           | `Vector`            |
 | [`WaveletOverlap`](@ref)                    | Wavelet transform           | `Vector`            |
-| [`PowerSpectrum`](@ref)                     | Fourier spectra             | `Vector`, `Dataset` |
+| [`PowerSpectrum`](@ref)                     | Fourier transform           | `Vector` |
 
-## Count occurrences (counting)
+## Count occurrences
 
 ```@docs
 CountOccurrences
 ```
 
-## Visitation frequency (histograms)
+## Histograms
 
 ```@docs
 ValueHistogram
@@ -52,23 +64,21 @@ RectangularBinning
 FixedRectangularBinning
 ```
 
-## Permutation (symbolic)
+## Symbolic permutations
 
 ```@docs
 SymbolicPermutation
 SymbolicWeightedPermutation
 SymbolicAmplitudeAwarePermutation
-SpatialSymbolicPermutation
 ```
 
-## Dispersion (symbolic)
+## Dispersion patterns
 
 ```@docs
 Dispersion
-SpatialDispersion
 ```
 
-## Transfer operator (binning)
+## Transfer operator
 
 ```@docs
 TransferOperator
@@ -100,3 +110,10 @@ PowerSpectrum
 ```@docs
 Diversity
 ```
+
+## Spatial estimators
+
+```@docs
+SpatialSymbolicPermutation
+SpatialDispersion
+```
diff --git a/src/Entropies.jl b/src/Entropies.jl
index 27130d009..7e532831c 100644
--- a/src/Entropies.jl
+++ b/src/Entropies.jl
@@ -24,10 +24,10 @@ include("complexity.jl")
 include("multiscale.jl")
 
 # Library implementations (files include other files)
+include("encoding/all_encodings.jl") # other structs depend on these
 include("probabilities_estimators/probabilities_estimators.jl")
 include("entropies/entropies.jl")
-include("encoding/all_encodings.jl")
-include("complexity/complexity_measures.jl") # relies on encodings, so include after
+include("complexity/complexity_measures.jl")
 include("deprecations.jl")
 
 
diff --git a/src/encoding/all_encodings.jl b/src/encoding/all_encodings.jl
index 773392745..63b168a35 100644
--- a/src/encoding/all_encodings.jl
+++ b/src/encoding/all_encodings.jl
@@ -1,2 +1,4 @@
+include("fasthist.jl")
+include("rectangular_binning.jl")
 include("gaussian_cdf.jl")
 include("ordinal_pattern.jl")
diff --git a/src/encoding/fasthist.jl b/src/encoding/fasthist.jl
new file mode 100644
index 000000000..daeb72a11
--- /dev/null
+++ b/src/encoding/fasthist.jl
@@ -0,0 +1,80 @@
+# This function is part of the DEV API and could be used downstream.
+# (documented and tested).
+"""
+    fasthist!(x) → c::Vector{Int}
+
+Count the occurrences `c` of the unique data values in `x`,
+so that `c[i]` is the number of times the value
+`sort!(unique(x))[i]` occurs. Hence, this method is useful mostly when
+`x` contains integer or categorical data.
+
+Prior to counting, `x` is sorted, so this function also mutates `x`.
+Therefore, it is called with `copy` in higher level API when necessary.
+This function works for any `x` for which `sort!(x)` works.
+"""
+function fasthist!(x)
+    L = length(x)
+    hist = Vector{Int}()
+    # Reserve enough space for histogram (Base suggests this improves performance):
+    sizehint!(hist, L)
+    # Fill the histogram by counting consecutive equal values:
+    sort!(x; alg = QuickSort)
+    prev_val, count = x[1], 0
+    for val in x
+        if val == prev_val
+            count += 1
+        else
+            push!(hist, count)
+            prev_val = val
+            count = 1
+        end
+    end
+    push!(hist, count)
+    # Shrink histogram capacity to fit its size:
+    sizehint!(hist, length(hist))
+    return hist
+end
+
+fasthist!(x, ::Nothing) = fasthist!(x)
+
+"""
+    fasthist!(x, weights) → c::Vector{Real}
+
+Similar to `fasthist!(x)`, but here the `weights` are summed up for each unique
+entry of `x`. `x` is sorted just like in `fasthist!(x)`.
+"""
+function fasthist!(x::AbstractVector, weights::AbstractVector{T}) where {T}
+    length(x) == length(weights) || error("Need length(x) == length(weights)")
+
+    idxs = sortperm(x)
+    x .= x[idxs] # sort in-place
+    # weights = weights[idxs] # we don't have to sort them
+    L = length(x)
+
+    i = 1
+    W = zero(T)
+    ps = Vector{T}()
+    sizehint!(ps, L)
+
+    prev_sym = first(x)
+
+    @inbounds while i <= L
+        symᵢ = x[i]
+        wtᵢ = weights[idxs[i]] # get weights at the sorted index
+        if symᵢ == prev_sym
+            W += wtᵢ
+        else
+            # Finished counting weights for the previous symbol, so push
+            # the summed weights (normalization happens later).
+            push!(ps, W)
+            # We are at a new symbol, so refresh sum with the first weight
+            # of the new symbol.
+            W = wtᵢ
+        end
+        prev_sym = symᵢ
+        i += 1
+    end
+    push!(ps, W) # last entry
+    sizehint!(ps, length(ps))
+    return ps
+end
diff --git a/src/encoding/gaussian_cdf.jl b/src/encoding/gaussian_cdf.jl
index bd42d8e51..30027a45c 100644
--- a/src/encoding/gaussian_cdf.jl
+++ b/src/encoding/gaussian_cdf.jl
@@ -10,7 +10,11 @@ An encoding scheme that [`encode`](@ref)s a scalar value into one of the integer
 `sᵢ ∈ [1, 2, …, c]` based on the normal cumulative distribution function (NCDF),
 and [`decode`](@ref)s the `sᵢ` into subintervals of `[0, 1]` (with some loss of information).
 
-## Algorithm
+Notice that the decoding step does not yield an element of any outcome space of the
+estimators that use `GaussianCDFEncoding` internally, such as [`Dispersion`](@ref).
+That is because these estimators additionally delay embed the encoded data.
+
+## Description
 
 `GaussianCDFEncoding` first maps an input point ``x``  (scalar) to a new real number
 ``y_ \\in [0, 1]`` by using the normal cumulative distribution function (CDF) with the
@@ -21,41 +25,34 @@ x \\to y : y = \\dfrac{1}{ \\sigma
     \\sqrt{2 \\pi}} \\int_{-\\infty}^{x} e^{(-(x - \\mu)^2)/(2 \\sigma^2)} dx.
 ```
 
-Next, the interval `[0, 1]` is equidistantly binned and enumerated ``1, 2, \\ldots, c``.
+Next, the interval `[0, 1]` is equidistantly binned and enumerated ``1, 2, \\ldots, c``,
  and ``y`` is linearly mapped to one of these integers using the linear map
  ``y \\to z : z = \\text{floor}(y(c-1)) + 1``.
 
-Because of the ceiling operation, some information is lost, so when used with
+Because of the floor operation, some information is lost, so when used with
 [`decode`](@ref), each decoded `sᵢ` is mapped to a *subinterval* of `[0, 1]`.
 
-## Usage
-
-In the implementations of [`Dispersion`](@ref) and [`RelativeDispersion`](@ref),
-we use the empirical means `μ` and standard deviation `σ`, as determined from
-a univariate input timeseries ``X = \\{x_i\\}_{i=1}^N``, and after encoding, the input
-is thus transformed to a symbol time
-series ``S = \\{ s_i \\}_{i=1}^N``, where ``s_i \\in [1, 2, \\ldots, c]``.
-
 ## Examples
 
 ```jldoctest
 julia> using Entropies, Statistics
 
-julia> x = [0.1, 0.4, 0.7, -2.1, 8.0, 0.9, -5.2];
+julia> x = [0.1, 0.4, 0.7, -2.1, 8.0];
 
 julia> μ, σ = mean(x), std(x); encoding = GaussianCDFEncoding(; μ, σ, c = 5)
 
-julia> es = Entropies.encode.(Ref(encoding), x)
-7-element Vector{Int64}:
- 3
- 3
- 3
+julia> es = encode.(Ref(encoding), x)
+5-element Vector{Int64}:
+ 2
  2
- 5
  3
  1
+ 5
 
-julia ds = Entropies.decode.(Ref(encoding), es)
+julia> decode(encoding, 3)
+2-element SVector{2, Float64} with indices SOneTo(2):
+ 0.4
+ 0.6
 ```
 """
 struct GaussianCDFEncoding{T} <: Encoding
@@ -70,21 +67,19 @@ end
 
 total_outcomes(encoding::GaussianCDFEncoding) = encoding.c
 
-g(xᵢ, μ, σ) = exp((-(xᵢ - μ)^2)/(2σ^2))
+gaussian(x, μ, σ) = exp((-(x - μ)^2)/(2σ^2))
 
-function encode(encoding::GaussianCDFEncoding, x)
+function encode(encoding::GaussianCDFEncoding, x::Real)
     (; c, σ, μ) = encoding
     # We only need the value of the integral (not the error), so
     # index first element returned from quadgk
     k = 1/(σ*sqrt(2π))
-    y = k * first(quadgk(xᵢ -> g(xᵢ, μ, σ), -Inf, x))
-    return floor.(Int, y ./ (1 / c)) + 1
+    y = k * first(quadgk(x -> gaussian(x, μ, σ), -Inf, x))
+    return floor(Int, y / (1 / c)) + 1
 end
 
 function decode(encoding::GaussianCDFEncoding, i::Int)
     c = encoding.c
-    V = SVector{2, eltype(1.0)}
     lower_interval_bound = (i - 1)/(c)
-
-    yⱼ = V(lower_interval_bound, lower_interval_bound + 1/c - eps())
+    return SVector(lower_interval_bound, prevfloat(lower_interval_bound + 1/c))
 end
diff --git a/src/encoding/ordinal_pattern.jl b/src/encoding/ordinal_pattern.jl
index 2d160a8f8..0f974af61 100644
--- a/src/encoding/ordinal_pattern.jl
+++ b/src/encoding/ordinal_pattern.jl
@@ -1,74 +1,72 @@
 using StaticArrays: MVector
-using StateSpaceSets: AbstractDataset
+using Combinatorics: permutations
 
 export OrdinalPatternEncoding
-#TODO: The docstring here, and probably the source code, needs a full re-write
-# based on new `encode` interface.
 
 """
     OrdinalPatternEncoding <: Encoding
-    OrdinalPatternEncoding(; m::Int, lt = est.lt)
+    OrdinalPatternEncoding(m::Int, lt = Entropies.isless_rand)
 
-An encoding scheme that [`encode`](@ref)s `m`-dimensional permutation/ordinal patterns to
-integers and [`decode`](@ref)s these integers to permutation patterns based on the Lehmer
-code.
-
-## Usage
-
-Used in [`outcomes`](@ref) with probabilities estimators such as
-[`SymbolicPermutation`](@ref) to map state vectors `xᵢ ∈ x` into their
-integer symbol representations `πᵢ`.
+An encoding scheme that [`encode`](@ref)s length-`m` vectors into
+their permutation/ordinal patterns and then into the integers based on the Lehmer
+code. It is used by [`SymbolicPermutation`](@ref) and similar estimators, see that for
+a description of the outcome space.
 
 ## Description
 
-The Lehmer code, as implemented here, is a bijection between the set of `factorial(n)`
-possible permutations for a length-`n` sequence, and the integers `1, 2, …, n`.
-
-- *Encoding* converts an `m`-dimensional permutation pattern `p` into its unique integer
-    symbol ``\\omega \\in \\{0, 1, \\ldots, m - 1 \\}``, using Algorithm 1 in Berger
-    et al. (2019)[^Berger2019].
-- *Decoding* converts an ``\\omega_i \\in \\Omega` ` to its original permutation pattern.
-
-`OrdinalPatternEncoding` is thus meant to be applied on a *permutation*, i.e.
-a vector of indices that would sort some vector in ascending order (in practice: the
-result of calling `sortperm(x)` for some input vector `x`).
+The Lehmer code, as implemented here, is a bijection between the set of `factorial(m)`
+possible permutations for a length-`m` sequence, and the integers `1, 2, …, factorial(m)`.
+The encoding step uses algorithm 1 in Berger et al. (2019)[^Berger2019], which is
+highly optimized.
+The decoding step is much slower due to missing optimizations (pull requests welcomed!).
 
 ## Example
 
 ```jldoctest
 julia> using Entropies
 
-julia> x = [1.2, 5.4, 2.2, 1.1]; encoding = OrdinalPatternEncoding(m = length(x));
+julia> x = [4.0, 1.0, 9.0];
 
-julia> xs = sortperm(x)
-4-element Vector{Int64}:
- 4
- 1
- 3
- 2
+julia> c = OrdinalPatternEncoding(3);
 
-julia> s = encode(encoding, xs)
-20
+julia> encode(c, x)
+3
 
-julia> decode(encoding, s)
-4-element SVector{4, Int64} with indices SOneTo(4):
- 4
+julia> decode(c, 1)
+3-element SVector{3, Int64} with indices SOneTo(3):
+ 2
  1
  3
- 2
 ```
 
 [^Berger2019]:
-    Berger, Sebastian, et al. "Teaching Ordinal Patterns to a Computer: Efficient
+    Berger et al. "Teaching Ordinal Patterns to a Computer: Efficient
     Encoding Algorithms Based on the Lehmer Code." Entropy 21.10 (2019): 1023.
 """
-Base.@kwdef struct OrdinalPatternEncoding{M <: Integer} <: Encoding
-    m::M = 3
-    lt::Function = isless_rand
+struct OrdinalPatternEncoding{M, F} <: Encoding
+    perm::MVector{M, Int}
+    lt::F
+end
+function OrdinalPatternEncoding{m}(lt::F) where {m,F}
+    OrdinalPatternEncoding{m, F}(zero(MVector{m, Int}), lt)
 end
+function OrdinalPatternEncoding(m = 3, lt::F = isless_rand) where {F}
+    return OrdinalPatternEncoding{m, F}(zero(MVector{m, Int}), lt)
+end
+
+# So that SymbolicPerm stuff fallback here
+total_outcomes(::OrdinalPatternEncoding{m}) where {m} = factorial(m)
+outcome_space(::OrdinalPatternEncoding{m}) where {m} = permutations(1:m) |> collect
+
 
-function encode(encoding::OrdinalPatternEncoding, perm)
-    m = encoding.m
+# Notice that `χ` is an element of a `Dataset`, so most definitely a static vector in
+# our code. However we allow `AbstractVector` if a user wanna use `encode` directly
+function encode(encoding::OrdinalPatternEncoding{m}, χ::AbstractVector) where {m}
+    if m != length(χ)
+        throw(ArgumentError("Permutation order and length of input must match!"))
+    end
+    perm = sortperm!(encoding.perm, χ; lt = encoding.lt)
+    # Begin Lehmer code
     n = 0
     for i = 1:m-1
         for j = i+1:m
@@ -77,18 +75,17 @@ function encode(encoding::OrdinalPatternEncoding, perm)
         n = (m-i)*n
     end
     # The Lehmer code actually results in 0 being an encoded symbol. Shift by 1, so that
-    # encodings are positive integers.
+    # encodings are the positive integers.
     return n + 1
 end
 
 # I couldn't find any efficient algorithm in the literature for converting
 # between factorial number system representations and Lehmer codes, so we'll just have to
 # use this naive approach for now. It is probably possible to do this in a faster way.
-function decode(encoding::OrdinalPatternEncoding, s::Int)
-    m = encoding.m
+function decode(::OrdinalPatternEncoding{m}, s::Int) where {m}
     # Convert integer to its factorial number representation. Each factorial number
     # corresponds to a unique permutation of the numbers `1, 2, ..., m`.
-    f = base10_to_factorial(s - 1, m) # subtract 1 because we add 1 in `encode`
+    f::SVector{m, Int} = base10_to_factorial(s - 1, m) # subtract 1 because we add 1 in `encode`
 
     # Reconstruct the permutation from the factorial representation
     xs = 1:m |> collect
@@ -139,13 +136,12 @@ function ndigits_in_factorial_base(n::Int)
     return k
 end
 
-
 function isless_rand(a, b)
-    if a == b
-        rand(Bool)
-    elseif a < b
+    if  a < b
         true
-    else
+    elseif a > b
         false
+    else
+        rand(Bool)
     end
 end
diff --git a/src/probabilities_estimators/histograms/rectangular_binning.jl b/src/encoding/rectangular_binning.jl
similarity index 98%
rename from src/probabilities_estimators/histograms/rectangular_binning.jl
rename to src/encoding/rectangular_binning.jl
index d58b2dc48..2f8a01589 100644
--- a/src/probabilities_estimators/histograms/rectangular_binning.jl
+++ b/src/encoding/rectangular_binning.jl
@@ -43,8 +43,8 @@ const ValidFixedBinInputs = Union{Number, NTuple}
 
 Rectangular box partition of state space where the extent of the grid is explicitly
 specified by `ϵmin` and `emax`, and along each dimension, the grid is subdivided into `N`
-subintervals. Points falling outside the partition do not attribute to probabilities.
-This binning type leads to a well-defined outcome space without knowledge of input,
+subintervals. Points falling outside the partition do not contribute to probabilities.
+This binning type leads to a well-defined outcome space without knowledge of input data,
 see [`ValueHistogram`](@ref).
 
 `ϵmin`/`emax` must be `NTuple{D, <:Real}` for input of `D`-dimensional data.
@@ -187,6 +187,7 @@ end
 # low level histogram call
 ##################################################################
 # This method is called by `probabilities(est::ValueHistogram, x::Array_or_Dataset)`
+# `fasthist!` is in the `estimators/value_histogram` folder.
 """
     fasthist(c::RectangularBinEncoding, x::Vector_or_Dataset)
 Intermediate method that runs `fasthist!` in the encoded space
diff --git a/src/entropy.jl b/src/entropy.jl
index 6edcc1b1c..9501d1117 100644
--- a/src/entropy.jl
+++ b/src/entropy.jl
@@ -1,5 +1,5 @@
 export Entropy, EntropyEstimator
-export entropy, entropy_maximum, entropy_normalized, entropy!
+export entropy, entropy_maximum, entropy_normalized
 
 abstract type AbstractEntropy end
 
@@ -120,25 +120,6 @@ end
 entropy(est::ProbabilitiesEstimator, x::Array_or_Dataset) = entropy(Shannon(; base = 2), est, x)
 entropy(probs::Probabilities) = entropy(Shannon(; base = 2), probs)
 
-"""
-    entropy!(s, [e::Entropy,] x, est::ProbabilitiesEstimator)
-
-Similar to `probabilities!`, this is an in-place version of [`entropy`](@ref) that allows
-pre-allocation of temporarily used containers.
-
-The entropy (second argument) is optional: if not given, `Shannon()` is used instead.
-
-Only works for certain estimators. See for example [`SymbolicPermutation`](@ref).
-"""
-function entropy!(s::AbstractVector{Int}, e::Entropy, est::ProbabilitiesEstimator, x)
-    probs = probabilities!(s, est, x)
-    entropy(e, probs)
-end
-
-function entropy!(s::AbstractVector{Int}, est::ProbabilitiesEstimator, x)
-    entropy!(s, Shannon(; base = 2), est, x)
-end
-
 ###########################################################################################
 # API: entropy from entropy estimators
 ###########################################################################################
diff --git a/src/probabilities.jl b/src/probabilities.jl
index 37cd93744..08c252e78 100644
--- a/src/probabilities.jl
+++ b/src/probabilities.jl
@@ -12,8 +12,8 @@ export outcome_space
     Probabilities <: AbstractVector
     Probabilities(x) → p
 
-`Probabilities` is a simple wrapper around `AbstractVector` that ensures its values sum
-to 1, so that `p` can be interpreted as probability distribution.
+`Probabilities` is a simple wrapper around `AbstractVector{<:Real}` that ensures its values
+sum to 1, so that `p` can be interpreted as probability mass function.
 """
 struct Probabilities{T} <: AbstractVector{T}
     p::Vector{T}
@@ -49,18 +49,19 @@ The supertype for all probabilities estimators.
 In Entropies.jl, probability distributions are estimated from data by defining a set of
 possible outcomes ``\\Omega = \\{\\omega_1, \\omega_2, \\ldots, \\omega_L \\}``, and
 assigning to each outcome ``\\omega_i`` a probability ``p(\\omega_i)``, such that
-``\\sum_{i=1}^N p(\\omega_i) = 1``. It is the role a [`ProbabilitiesEstimator`](@ref) to
+``\\sum_{i=1}^N p(\\omega_i) = 1``. It is the role of a [`ProbabilitiesEstimator`](@ref) to
 
 1. Define ``\\Omega``, the "outcome space", which is the set of all possible outcomes over
     which probabilities are estimated. The cardinality of this set can be obtained using
     [`total_outcomes`](@ref).
-2. Define how probabilities ``p_i = p(\\omega_i)` are assigned to outcomes ``\\omega_i``.
+2. Define how probabilities ``p_i = p(\\omega_i)`` are assigned to outcomes ``\\omega_i``.
 
 In practice, probability estimation is done by calling [`probabilities`](@ref) with some
 input data and one of the following probabilities estimators. The result is a
 [`Probabilities`](@ref) `p` (`Vector`-like), where each element `p[i]` is the probability of
 the outcome `ω[i]`. Use [`probabilities_and_outcomes`](@ref) if you need
-both the probabilities and the outcomes and [`outcome_space`](@ref) to obtain ``\\Omega``.
+both the probabilities and the outcomes, and use [`outcome_space`](@ref) to obtain
+``\\Omega`` alone.
 The element type of ``\\Omega`` varies between estimators, but it is guranteed to be
 _hashable_. This allows for conveniently tracking the probability of a specific event
 across experimental realizations, by using the outcome as a dictionary key and the
@@ -69,23 +70,11 @@ and one has a vector of probabilities, one for each experimental realization).
 
 We have made the design decision that all probabilities estimators have a well defined
 outcome space when instantiated. For some estimators this means that the input data
-`x` must be provided both when instantiating the estimator, but also when computing
+`x` not only must be provided both when instantiating the estimator, but also when computing
 the probabilities.
 
-All currently implemented probability estimators are:
-
-- [`CountOccurrences`](@ref).
-- [`ValueHistogram`](@ref).
-- [`TransferOperator`](@ref).
-- [`Dispersion`](@ref).
-- [`SpatialDispersion`](@ref).
-- [`WaveletOverlap`](@ref).
-- [`PowerSpectrum`](@ref).
-- [`SymbolicPermutation`](@ref).
-- [`SymbolicWeightedPermutation`](@ref).
-- [`SymbolicAmplitudeAwarePermutation`](@ref).
-- [`SpatialSymbolicPermutation`](@ref).
-- [`NaiveKernel`](@ref).
+All currently implemented probability estimators are listed in a nice table in the
+[probabilities estimators](@ref probabilities_estimators) section of the online documentation.
 """
 abstract type ProbabilitiesEstimator end
 
diff --git a/src/probabilities_estimators/counting/count_occurences.jl b/src/probabilities_estimators/counting/count_occurences.jl
index fd96a696c..da837f1f9 100644
--- a/src/probabilities_estimators/counting/count_occurences.jl
+++ b/src/probabilities_estimators/counting/count_occurences.jl
@@ -15,7 +15,7 @@ struct CountOccurrences{X} <: ProbabilitiesEstimator
     x::X
 end
 
-function probabilities_and_outcomes(::CountOccurrences, x::Array_or_Dataset)
+function probabilities_and_outcomes(::CountOccurrences, x)
     z = copy(x)
     probs = Probabilities(fasthist!(z))
     # notice that `z` is now sorted within `fasthist!` so we can skip sorting
@@ -24,8 +24,8 @@ end
 
 outcome_space(est::CountOccurrences) = sort!(unique(est.x))
 
-probabilities(::CountOccurrences, x::Array_or_Dataset) = probabilities(x)
-function probabilities(x::Array_or_Dataset)
+probabilities(::CountOccurrences, x) = probabilities(x)
+function probabilities(x)
     # Fast histograms code is in the `histograms` folder
     return Probabilities(fasthist!(copy(x)))
 end
diff --git a/src/probabilities_estimators/dispersion/dispersion.jl b/src/probabilities_estimators/dispersion/dispersion.jl
index 64dd80d04..2ebf38fdd 100644
--- a/src/probabilities_estimators/dispersion/dispersion.jl
+++ b/src/probabilities_estimators/dispersion/dispersion.jl
@@ -16,43 +16,23 @@ categories for the Gaussian symbol mapping.
 ## Description
 
 Assume we have a univariate time series ``X = \\{x_i\\}_{i=1}^N``. First, this time series
-is discretized a "Gaussian encoding", which uses the normal cumulative distribution
-function (CDF) to encode a timeseries ``x`` as integers like so:
-
-- Each ``x_i`` is mapped to a new real number ``y_i \\in [0, 1]`` by using the normal
-    cumulative distribution function (CDF), ``x_i \\to y_i : y_i = \\dfrac{1}{ \\sigma
-    \\sqrt{2 \\pi}} \\int_{-\\infty}^{x_i} e^{(-(x_i - \\mu)^2)/(2 \\sigma^2)} dx``,
-    where ``\\mu`` and ``\\sigma`` are the empirical mean and standard deviation of ``X``.
-    Other choices of CDFs are also possible, but currently only Gaussian is implemented.
-- Next, each ``y_i`` is linearly mapped to an integer
-    ``z_i \\in [1, 2, \\ldots, c]`` using the map
-    ``y_i \\to z_i : z_i = \\text{floor}(y_i ./ (1 / c)) + 1`` (*note: this mapping differs
-    slightly from the linear mapping in the original paper*). This procedure subdivides the
-    interval ``[0, 1]`` into ``c``
-    different subintervals that form a covering of ``[0, 1]``, and assigns each ``y_i`` to one
-    of these subintervals. The original time series ``X`` is thus transformed to a symbol time
-    series ``S = \\{ s_i \\}_{i=1}^N``, where ``s_i \\in [1, 2, \\ldots, c]``.
-- Next, the symbol time series ``S`` is embedded into an
-    ``m``-dimensional time series, using an embedding lag of ``\\tau = 1``, which yields a
-    total of ``N - (m - 1)\\tau`` points, or "dispersion patterns". Because each ``z_i`` can
-    take on ``c`` different values, and each embedding point has ``m`` values, there
-    are ``c^m`` possible dispersion patterns. This number is used for normalization when
-    computing dispersion entropy.
-
-### Computing dispersion probabilities and entropy
-
-A probability distribution ``P = \\{p_i \\}_{i=1}^{c^m}``, where
-``\\sum_i^{c^m} p_i = 1``, can then be estimated by counting and sum-normalising
-the distribution of dispersion patterns among the embedding vectors.
-Note that dispersion patterns that are not present are not counted. Therefore, you'll
-always get non-zero probabilities using the `Dispersion` probability estimator.
+is encoded into a symbol timeseries ``S`` using the Gaussian encoding
+[`GaussianCDFEncoding`](@ref) with empirical mean `μ` and empirical standard deviation `σ`
+(both determined from ``X``), and `c` as given to `Dispersion`.
+
+Then, ``S`` is embedded into an ``m``-dimensional time series, using an embedding lag of
+``\\tau``, which yields a total of ``N - (m - 1)\\tau`` delay vectors ``z_i``,
+or "dispersion patterns". Since each element of ``z_i`` can take on `c` different values,
+and each delay vector has `m` entries, there are `c^m` possible dispersion patterns.
+This number is used for normalization when computing dispersion entropy.
+
+The returned probabilities are simply the frequencies of the unique dispersion patterns
+present in ``S`` (i.e., the [`CountOccurences`](@ref) of ``S``).
 
 ## Outcome space
 
 The outcome space for `Dispersion` is the unique delay vectors whose elements are the
-the symbols (integers) encoded by the Gaussian CDF.
-Hence, the outcome space is all `m`-dimensional delay vectors whose elements
-are all possible values in `1:c`. There are `c ^ m` such vectors.
+the symbols (integers) encoded by the Gaussian CDF, i.e., the unique elements of ``S``.
 
 ## Data requirements and parameters
 
@@ -70,14 +50,23 @@ unique element, then a `InexactError` is thrown when trying to compute probabili
     When ``c = 3``, values clustering far below mean are in one group, values clustered
     around the mean are in one group, and values clustering far above the mean are in a
     third group. Then the embedding vector ``[2, 2, 2, 2, 2]`` consists of values that are
-    relatively close together (close to the mean), so it represents a set of numbers that
+    close together (close to the mean), so it represents a set of numbers that
     are not very spread out (less dispersed). The embedding vector ``[1, 1, 2, 3, 3]``,
     however, represents numbers that are much more spread out (more dispersed), because the
     categories representing "outliers" both above and below the mean are represented,
     not only values close to the mean.
 
-[^Rostaghi2016]: Rostaghi, M., & Azami, H. (2016). Dispersion entropy: A measure for time-series analysis. IEEE Signal Processing Letters, 23(5), 610-614.
-[^Li2018]: Li, G., Guan, Q., & Yang, H. (2018). Noise reduction method of underwater acoustic signals based on CEEMDAN, effort-to-compress complexity, refined composite multiscale dispersion entropy and wavelet threshold denoising. Entropy, 21(1), 11.
+For a version of this estimator that can be used on high-dimensional arrays, see
+[`SpatialDispersion`](@ref).
+
+[^Rostaghi2016]:
+    Rostaghi, M., & Azami, H. (2016). Dispersion entropy: A measure for time-series analysis.
+    IEEE Signal Processing Letters, 23(5), 610-614.
+
+[^Li2018]:
+    Li, G., Guan, Q., & Yang, H. (2018). Noise reduction method of underwater acoustic
+    signals based on CEEMDAN, effort-to-compress complexity, refined composite multiscale
+    dispersion entropy and wavelet threshold denoising. Entropy, 21(1), 11.
 """
 Base.@kwdef struct Dispersion{S <: Encoding} <: ProbabilitiesEstimator
     encoding::Type{S} = GaussianCDFEncoding # any encoding at accepts keyword `c`
diff --git a/src/probabilities_estimators/histograms/fasthist.jl b/src/probabilities_estimators/histograms/fasthist.jl
deleted file mode 100644
index 20e8cffae..000000000
--- a/src/probabilities_estimators/histograms/fasthist.jl
+++ /dev/null
@@ -1,36 +0,0 @@
-# This function is part of the DEV API and could be used downstream.
-# (documented and tested).
-"""
-    fasthist!(x) → c::Vector{Int}
-
-Count the occurrences `c` of the unique data values in `x`,
-so that `c[i]` is the number of times the value
-`sort!(unique(x))[i]` occurs. Hence, this method is useful mostly when
-`x` contains integer or categorical data.
-
-Prior to counting, `x` is sorted, so this function also mutates `x`.
-Therefore, it is called with `copy` in higher level API when necessary.
-This function works for any `x` for which `sort!(x)` works.
-"""
-function fasthist!(x)
-    L = length(x)
-    hist = Vector{Int}()
-    # Reserve enough space for histogram (Base suggests this improves performance):
-    sizehint!(hist, L)
-    # Fill the histogram by counting consecutive equal values:
-    sort!(x; alg = QuickSort)
-    prev_val, count = x[1], 0
-    for val in x
-        if val == prev_val
-            count += 1
-        else
-            push!(hist, count)
-            prev_val = val
-            count = 1
-        end
-    end
-    push!(hist, count)
-    # Shrink histogram capacity to fit its size:
-    sizehint!(hist, length(hist))
-    return hist
-end
diff --git a/src/probabilities_estimators/histograms/value_histogram.jl b/src/probabilities_estimators/histograms/value_histogram.jl
index d9e79b024..dc8773a62 100644
--- a/src/probabilities_estimators/histograms/value_histogram.jl
+++ b/src/probabilities_estimators/histograms/value_histogram.jl
@@ -1,7 +1,5 @@
 export ValueHistogram, VisitationFrequency
-# We need binning to be defined first to add it as a field to a struct
-include("rectangular_binning.jl")
-include("fasthist.jl")
+# Binnings are defined in the encoding folder!
 
 """
     ValueHistogram(x, b::RectangularBinning) <: ProbabilitiesEstimator
diff --git a/src/probabilities_estimators/kernel/kernel_density.jl b/src/probabilities_estimators/kernel/kernel_density.jl
index cc7be4968..98e165ed3 100644
--- a/src/probabilities_estimators/kernel/kernel_density.jl
+++ b/src/probabilities_estimators/kernel/kernel_density.jl
@@ -44,7 +44,8 @@ struct NaiveKernel{KM, M <: Metric, I} <: ProbabilitiesEstimator
     indices::I
 end
 function NaiveKernel(x, ϵ::Real; method = KDTree, w = 0, metric = Euclidean())
-    ϵ > 0 || error("Radius ϵ must be larger than zero!")
+    x isa Vector && throw(ArgumentError("Input must be dataset, not vector."))
+    ϵ ≤ 0 && throw(ArgumentError("Radius ϵ must be larger than zero!"))
     return NaiveKernel(ϵ, method, w, metric, eachindex(x))
 end
 
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
deleted file mode 100644
index 4a14b70de..000000000
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicAmplitudeAware.jl
+++ /dev/null
@@ -1,86 +0,0 @@
-export SymbolicAmplitudeAwarePermutation
-
-"""
-    SymbolicAmplitudeAwarePermutation <: ProbabilitiesEstimator
-    SymbolicAmplitudeAwarePermutation(; τ = 1, m = 3, A = 0.5, lt = Entropies.isless_rand)
-
-A variant of [`SymbolicPermutation`](@ref) that also incorporates amplitude information,
-based on the amplitude-aware permutation entropy (Azami & Escudero, 2016).
-
-## Outcome space
-
-Like for [`SymbolicPermutation`](@ref), the outcome space `Ω` for
-`SymbolicAmplitudeAwarePermutation` is the lexiographically ordered set of
-length-`m` ordinal patterns (i.e. permutations) that can be formed by the integers
-`1, 2, …, m`. There are `factorial(m)` such patterns.
-
-## Description
-
-Probabilities are computed as
-
-```math
-p(\\pi_i^{m, \\tau}) =
-\\dfrac{\\sum_{k=1}^N
-\\mathbf{1}_{u:S(u) = s_i} \\left( \\mathbf{x}_k^{m, \\tau} \\right) \\, a_k}{\\sum_{k=1}^N
-\\mathbf{1}_{u:S(u) \\in \\Pi} \\left( \\mathbf{x}_k^{m, \\tau} \\right) \\,a_k} =
-\\dfrac{\\sum_{k=1}^N \\mathbf{1}_{u:S(u) = s_i}
-\\left( \\mathbf{x}_k^{m, \\tau} \\right) \\, a_k}{\\sum_{k=1}^N a_k}.
-```
-
-The weights encoding amplitude information about state vector
-``\\mathbf{x}_i = (x_1^i, x_2^i, \\ldots, x_m^i)`` are
-
-```math
-a_i = \\dfrac{A}{m} \\sum_{k=1}^m |x_k^i | + \\dfrac{1-A}{d-1}
-\\sum_{k=2}^d |x_{k}^i - x_{k-1}^i|,
-```
-
-with ``0 \\leq A \\leq 1``. When ``A=0`` , only internal differences between the
-elements of
-``\\mathbf{x}_i`` are weighted. Only mean amplitude of the state vector
-elements are weighted when ``A=1``. With, ``0<A<1``, a combined weighting is used.
-
-See [`SymbolicPermutation`](@ref) for an estimator that only incorporates ordinal/sorting
-information and disregards amplitudes, and [`SymbolicWeightedPermutation`](@ref) for
-another estimator that incorporates amplitude information.
-
-[^Azami2016]: Azami, H., & Escudero, J. (2016). Amplitude-aware permutation entropy:
-    Illustration in spike detection and signal segmentation. Computer methods and programs
-    in biomedicine, 128, 40-51.
-"""
-struct SymbolicAmplitudeAwarePermutation{F} <: PermutationProbabilitiesEstimator
-    τ::Int
-    m::Int
-    A::Float64
-    lt::F
-end
-function SymbolicAmplitudeAwarePermutation(; τ::Int = 1, m::Int = 2, A::Real = 0.5,
-        lt::F = isless_rand) where {F <: Function}
-    2 ≤ m || throw(ArgumentError("Need m ≥ 2, otherwise no dynamical information is encoded in the symbols."))
-    0 ≤ A ≤ 1 || throw(ArgumentError("Weighting factor A must be on interval [0, 1]. Got A=$A."))
-    SymbolicAmplitudeAwarePermutation{F}(τ, m, A, lt)
-end
-function permutation_weights(est::SymbolicAmplitudeAwarePermutation, x::AbstractDataset)
-    AAPE.(x.data, A = est.A, m = est.m)
-end
-
-"""
-    AAPE(x, A::Real = 0.5, m::Int = length(a))
-
-Encode relative amplitude information of the elements of `a`.
-- `A = 1` emphasizes only average values.
-- `A = 0` emphasizes changes in amplitude values.
-- `A = 0.5` equally emphasizes average values and changes in the amplitude values.
-"""
-function AAPE(x; A::Real = 0.5, m::Int = length(x))
-    (A/m)*sum(abs.(x)) + (1-A)/(m-1)*sum(abs.(diff(x)))
-end
-
-probabilities(est::SymbolicAmplitudeAwarePermutation, x) = encodings_and_probs(est, x)[2]
-function probabilities_and_outcomes(est::SymbolicAmplitudeAwarePermutation, x)
-    encodings, probs = encodings_and_probs(est, x)
-    return probs, observed_outcomes(est, encodings)
-end
-
-total_outcomes(est::SymbolicAmplitudeAwarePermutation)::Int = factorial(est.m)
-outcome_space(est::SymbolicAmplitudeAwarePermutation) = permutations(1:est.m) |> collect
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
deleted file mode 100644
index b45f62446..000000000
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicPermutation.jl
+++ /dev/null
@@ -1,149 +0,0 @@
-using Combinatorics: permutations
-
-export SymbolicPermutation
-"""
-    SymbolicPermutation <: ProbabilitiesEstimator
-    SymbolicPermutation(; m = 3, τ = 1, lt::Function = Entropies.isless_rand)
-
-A probabilities estimator based on ordinal permutation patterns.
-
-The quantity computed depends on the input data:
-
-- **Univariate data**. If applied to a univariate time series, then the time series
-    is first embedded using embedding delay `τ` and dimension `m`, resulting in embedding
-    vectors ``\\{ \\bf{x}_i \\}_{i=1}^{N-(m-1)\\tau}``. Then, for each ``\\bf{x}_i``,
-    we find its permutation pattern ``\\pi_{i}``, which we internally encode a an integer
-    ``s_i \\in \\mathbb{N}^+`` for efficient computation (integer symbols are obtained by
-    using [`encode`](@ref) with [`OrdinalPatternEncoding`](@ref)).
-    Probabilities are then
-    estimated as naive frequencies over the encoded permutation symbols
-    ``\\{ s_i \\}_{i=1}^{N-(m-1)\\tau}`` by using [`CountOccurrences`](@ref).
-    The resulting probabilities can be used to compute permutation entropy (PE;
-    Bandt & Pompe, 2002[^BandtPompe2002]).
-- **Multivariate data**. If applied to a an `D`-dimensional `Dataset`,
-    then it is assumed that the input data represents ``N`` observations of a multivariate
-    system ``\\{ \\bf{x}_i \\}_{i=1}^N``, and no embedding is constructed.
-    For each ``\\bf{x}_i \\in \\mathbb{R}^D``, we direct find its permutation pattern
-    ``\\pi_{i}`` and encode it as ``s_i \\in \\mathbb{N}^+`` (i.e. `est.τ` and `est.m` are
-    ignored, and we set `m = D` instead). Finally, probabilities are estimated as relative
-    frequencies of occurrences of the encoded permutation symbols.
-    The resulting probabilities can be used to compute multivariate permutation
-    entropy (MvPE; He et al., 2016[^He2016]), but here we don't perform any subdivision
-    of the permutation patterns (see Figure 3 in He et al., 2016).
-
-## Outcome space
-
-The outcome space `Ω` for `SymbolicPermutation` is the set of length-`m` ordinal
-patterns (i.e. permutations) that can be formed by the integers `1, 2, …, m`,
-ordered lexicographically. There are `factorial(m)` such patterns.
-
-## In-place symbolization
-
-`SymbolicPermutation` also implements the in-place [`entropy!`](@ref) and
-[`probabilities!`](@ref). The length of the pre-allocated symbol vector must match the
-length of the embedding: `N - (m-1)τ` for univariate time series, and `M` for length-`M`
-`Dataset`s), i.e.
-
-```julia
-using DelayEmbeddings, Entropies
-m, τ, N = 2, 1, 100
-est = SymbolicPermutation(; m, τ)
-
-# For a time series
-x_ts = rand(N)
-πs_ts = zeros(Int, N - (m - 1)*τ)
-p = probabilities!(πs_ts, est, x_ts)
-h = entropy!(πs_ts, Renyi(), est, x_ts)
-
-# For a pre-discretized `Dataset`
-x_symb = outcomes(x_ts, OrdinalPatternEncoding(m = 2, τ = 1))
-x_d = genembed(x_symb, (0, -1, -2))
-πs_d = zeros(Int, length(x_d))
-p = probabilities!(πs_d, est, x_d)
-h = entropy!(πs_d, Renyi(), est, x_d)
-```
-
-See [`SymbolicWeightedPermutation`](@ref) and [`SymbolicAmplitudeAwarePermutation`](@ref)
-for estimators that not only consider ordinal (sorting) patterns, but also incorporate
-information about within-state-vector amplitudes.
-
-!!! note "Handling equal values in ordinal patterns"
-    In Bandt & Pompe (2002), equal values are ordered after their order of appearance, but
-    this can lead to erroneous temporal correlations, especially for data with
-    low-amplitude resolution [^Zunino2017]. Here, by default, if two values are equal,
-    then one of the is randomly assigned as "the largest", using
-    `lt = Entropies.isless_rand`. To get the behaviour from Bandt and Pompe (2002), use
-    `lt = Base.isless`).
-
-[^BandtPompe2002]: Bandt, Christoph, and Bernd Pompe. "Permutation entropy: a natural
-    complexity measure for time series." Physical review letters 88.17 (2002): 174102.
-[^Zunino2017]: Zunino, L., Olivares, F., Scholkmann, F., & Rosso, O. A. (2017).
-    Permutation entropy based time series analysis: Equalities in the input signal can
-    lead to false conclusions. Physics Letters A, 381(22), 1883-1892.
-[^He2016]:
-    He, S., Sun, K., & Wang, H. (2016). Multivariate permutation entropy and its
-    application for complexity analysis of chaotic systems. Physica A: Statistical
-    Mechanics and its Applications, 461, 812-823.
-"""
-struct SymbolicPermutation{F} <: PermutationProbabilitiesEstimator
-    τ::Int
-    m::Int
-    lt::F
-end
-function SymbolicPermutation(; τ::Int = 1, m::Int = 3, lt::F=isless_rand) where {F <: Function}
-    m >= 2 || throw(ArgumentError("Need m ≥ 2, otherwise no dynamical information is encoded in the symbols."))
-    SymbolicPermutation{F}(τ, m, lt)
-end
-
-function probabilities(est::SymbolicPermutation, x::Vector_or_Dataset)
-    probabilities(encodings_from_permutations(est, x))
-end
-
-function probabilities!(πs::AbstractVector{Int}, est::SymbolicPermutation, x::Vector_or_Dataset)
-    encodings_from_permutations!(πs, est, x)
-    probabilities(πs)
-end
-
-function probabilities_and_outcomes(est::SymbolicPermutation, x::Vector_or_Dataset)
-    πs = encodings_from_permutations(est, x)
-    return probabilities(πs), observed_outcomes(est, πs)
-end
-
-function probabilities_and_outcomes!(πs::AbstractVector{Int}, est::SymbolicPermutation, x::Vector_or_Dataset)
-    encodings_from_permutations!(πs, est, x)
-    return probabilities(πs), observed_outcomes(est, πs)
-end
-
-function entropy!(
-    s::AbstractVector{Int},
-    e::Entropy,
-    est::SymbolicPermutation,
-    x::AbstractDataset{m, T};
-    ) where {m, T}
-
-    length(s) == length(x) || error("Pre-allocated symbol vector s need the same number of elements as x. Got length(πs)=$(length(πs)) and length(x)=$(L).")
-    ps = probabilities!(s, est, x)
-
-    entropy(e, ps)
-end
-
-function entropy!(
-        s::AbstractVector{Int},
-        e::Entropy,
-        est::SymbolicPermutation,
-        x::AbstractVector{T},
-    ) where {T<:Real}
-
-    L = length(x)
-    N = L - (est.m-1)*est.τ
-    length(s) == N || error("Pre-allocated symbol vector `s` needs to have length `length(x) - (m-1)*τ` to match the number of state vectors after `x` has been embedded. Got length(s)=$(length(s)) and length(x)=$(L).")
-
-    ps = probabilities!(s, est, x)
-    entropy(e, ps)
-end
-
-entropy!(s::AbstractVector{Int}, est::SymbolicPermutation, x) =
-    entropy!(s, Shannon(base = 2), est, x)
-
-total_outcomes(est::SymbolicPermutation)::Int = factorial(est.m)
-outcome_space(est::SymbolicPermutation) = permutations(1:est.m) |> collect
diff --git a/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl b/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
deleted file mode 100644
index 955016505..000000000
--- a/src/probabilities_estimators/permutation_ordinal/SymbolicWeightedPermutation.jl
+++ /dev/null
@@ -1,97 +0,0 @@
-import DelayEmbeddings: genembed, Dataset
-import Statistics: mean
-
-export SymbolicWeightedPermutation
-
-"""
-    SymbolicWeightedPermutation <: ProbabilitiesEstimator
-    SymbolicWeightedPermutation(; τ = 1, m = 3, lt = Entropies.isless_rand)
-
-A variant of [`SymbolicPermutation`](@ref) that also incorporates amplitude information,
-based on the weighted permutation entropy (Fadlallah et al., 2013).
-
-## Outcome space
-
-Like for [`SymbolicPermutation`](@ref), the outcome space `Ω` for
-`SymbolicWeightedPermutation` is the lexiographically ordered set of
-length-`m` ordinal patterns (i.e. permutations) that can be formed by the integers
-`1, 2, …, m`. There are `factorial(m)` such patterns.
-
-## Description
-
-Probabilities are computed as
-
-```math
-p(\\pi_i^{m, \\tau}) = \\dfrac{\\sum_{k=1}^N \\mathbf{1}_{u:S(u) = s_i}
-\\left( \\mathbf{x}_k^{m, \\tau} \\right)
-\\, w_k}{\\sum_{k=1}^N \\mathbf{1}_{u:S(u) \\in \\Pi}
-\\left( \\mathbf{x}_k^{m, \\tau} \\right) \\,w_k} = \\dfrac{\\sum_{k=1}^N
-\\mathbf{1}_{u:S(u) = s_i}
-\\left( \\mathbf{x}_k^{m, \\tau} \\right) \\, w_k}{\\sum_{k=1}^N w_k},
-```
-
-where weights are computed based on the variance of the state vectors as
-
-```math
-w_j = \\dfrac{1}{m}\\sum_{k=1}^m (x_{j+(k+1)\\tau} - \\mathbf{\\hat{x}}_j^{m, \\tau})^2,
-```
-
-and ``\\mathbf{x}_i`` is the aritmetic mean of state vector:
-
-```math
-\\mathbf{\\hat{x}}_j^{m, \\tau} = \\frac{1}{m} \\sum_{k=1}^m x_{j + (k+1)\\tau}.
-```
-
-The weighted permutation entropy is equivalent to regular permutation entropy when weights
-are positive and identical (``w_j = \\beta \\,\\,\\, \\forall \\,\\,\\, j \\leq N`` and
-``\\beta > 0)``.
-
-See [`SymbolicPermutation`](@ref) for an estimator that only incorporates ordinal/sorting
-information and disregards amplitudes, and [`SymbolicAmplitudeAwarePermutation`](@ref) for
-another estimator that incorporates amplitude information.
-
-!!! note "An implementation note"
-    *Note: in equation 7, section III, of the original paper, the authors write*
-
-    ```math
-    w_j = \\dfrac{1}{m}\\sum_{k=1}^m (x_{j-(k-1)\\tau} - \\mathbf{\\hat{x}}_j^{m, \\tau})^2.
-    ```
-    *But given the formula they give for the arithmetic mean, this is **not** the variance
-    of ``\\mathbf{x}_i``, because the indices are mixed: ``x_{j+(k-1)\\tau}`` in the weights
-    formula, vs. ``x_{j+(k+1)\\tau}`` in the arithmetic mean formula. This seems to imply
-    that amplitude information about previous delay vectors
-    are mixed with mean amplitude information about current vectors. The authors also mix the
-    terms "vector" and "neighboring vector" (but uses the same notation for both), making it
-    hard to interpret whether the sign switch is a typo or intended. Here, we use the notation
-    above, which actually computes the variance for ``\\mathbf{x}_i``*.
-
-
-
-[^Fadlallah2013]: Fadlallah, Bilal, et al. "Weighted-permutation entropy: A complexity
-    measure for time series incorporating amplitude information." Physical Review E 87.2
-    (2013): 022911.
-"""
-struct SymbolicWeightedPermutation{F} <: PermutationProbabilitiesEstimator
-    τ::Int
-    m::Int
-    lt::F
-end
-function SymbolicWeightedPermutation(; τ::Int = 1, m::Int = 3, lt::F = isless_rand) where {F <: Function}
-    m >= 2 || error("Need m ≥ 2, otherwise no dynamical information is encoded in the symbols.")
-    SymbolicWeightedPermutation{F}(τ, m, lt)
-end
-
-function permutation_weights(est::SymbolicWeightedPermutation, x::AbstractDataset)
-    weights_from_variance.(x.data, est.m)
-end
-weights_from_variance(x, m::Int) = sum((x .- mean(x)) .^ 2)/m
-
-probabilities(est::SymbolicWeightedPermutation, x) = encodings_and_probs(est, x)[2]
-function probabilities_and_outcomes(est::SymbolicWeightedPermutation, x)
-    encodings, probs = encodings_and_probs(est, x)
-    return probs, observed_outcomes(est, encodings)
-end
-
-
-total_outcomes(est::SymbolicWeightedPermutation)::Int = factorial(est.m)
-outcome_space(est::SymbolicWeightedPermutation) = permutations(1:est.m) |> collect
diff --git a/src/probabilities_estimators/permutation_ordinal/common.jl b/src/probabilities_estimators/permutation_ordinal/common.jl
deleted file mode 100644
index ee7ea067c..000000000
--- a/src/probabilities_estimators/permutation_ordinal/common.jl
+++ /dev/null
@@ -1,81 +0,0 @@
-using StateSpaceSets: AbstractDataset
-using StaticArrays: @MVector
-
-# ----------------------------------------------------------------------------------------
-# When given a dataset `X`, it is assumed that the dataset already represents an embedding.
-# Therefore, each `xᵢ ∈ X` is directly encoded as an integer.
-# ----------------------------------------------------------------------------------------
-function encodings_from_permutations(est::PermutationProbabilitiesEstimator, x::AbstractDataset{m, T}) where {m, T}
-    πs = zeros(Int, length(x))
-    return encodings_from_permutations!(πs, est, x)
-end
-
-function encodings_from_permutations!(πs::AbstractVector{Int}, est::PermutationProbabilitiesEstimator,
-        x::AbstractDataset{m, T}) where {m, T}
-
-    if length(πs) != length(x)
-        throw(ArgumentError("Need length(πs) == length(x), got `length(πs)=$(length(πs))` and `length(x)==$(length(x))`."))
-    end
-
-    encoding = OrdinalPatternEncoding(; m, lt = est.lt)
-    perm = @MVector zeros(Int, m)
-    @inbounds for (i, xᵢ) in enumerate(x)
-        sortperm!(perm, xᵢ, lt = est.lt)
-        πs[i] = encode(encoding, perm)
-    end
-    return πs
-end
-
-# ----------------------------------------------------------------------------------------
-# Timeseries are first embedded, then encoded as integers.
-# ----------------------------------------------------------------------------------------
-function encodings_from_permutations(est::PermutationProbabilitiesEstimator, x::AbstractVector{T}) where {T}
-    τs = tuple([est.τ*i for i = 0:est.m-1]...)
-    x_emb = genembed(x, τs)
-    N = length(x_emb)
-    πs = zeros(Int, N)
-    return encodings_from_permutations!(πs, est, x_emb)
-end
-
-function encodings_from_permutations!(πs::AbstractVector{Int}, est::PermutationProbabilitiesEstimator,
-        x::AbstractVector{T}) where T
-    N = length(x) - (est.m - 1)*est.τ
-    length(πs) == N || throw(ArgumentError("Pre-allocated symbol vector `πs` needs to have length `length(x) - (m-1)*τ` to match the number of state vectors after `x` has been embedded. Got length(πs)=$(length(πs)) and length(x)=$(L)."))
-    τs = tuple([est.τ*i for i = 0:est.m-1]...)
-    x_emb = genembed(x, τs)
-    encodings_from_permutations!(πs, est, x_emb)
-end
-
-# ----------------------------------------------------------------------------------------
-# `SymbolicPermutation`, `SymbolicWeightedPermutation` and `SymbolicPermutation` are
-# essentially identical, but weights are computed differently (and no weights are used for
-# `SymbolicPermutation`). Therefore, we define a common `permutation_weights` function,
-# so we don't need to duplicate code.
-# ----------------------------------------------------------------------------------------
-function permutation_weights end
-
-function encodings_and_probs(
-        est::PermutationProbabilitiesEstimator,
-        x::AbstractDataset
-    )
-    πs = encodings_from_permutations(est, x)
-    wts = permutation_weights(est, x)
-    return πs, Probabilities(symprobs(πs, wts, normalize = true))
-end
-
-function encodings_and_probs(
-        est::PermutationProbabilitiesEstimator,
-        x::AbstractVector{<:Real}
-    )
-    τs = tuple([est.τ*i for i = 0:est.m-1]...)
-    emb = genembed(x, τs)
-    πs = encodings_from_permutations(est, x)
-    wts = permutation_weights(est, emb)
-    return πs, Probabilities(symprobs(πs, wts, normalize = true))
-end
-
-function observed_outcomes(est::PermutationProbabilitiesEstimator, encodings)
-    encoding = OrdinalPatternEncoding(m = est.m, lt = est.lt)
-    observed_encodings = sort(unique(encodings))
-    return decode.(Ref(encoding), observed_encodings) # int → permutation
-end
diff --git a/src/probabilities_estimators/permutation_ordinal/symbolic.jl b/src/probabilities_estimators/permutation_ordinal/symbolic.jl
deleted file mode 100644
index afe3ea861..000000000
--- a/src/probabilities_estimators/permutation_ordinal/symbolic.jl
+++ /dev/null
@@ -1,17 +0,0 @@
-
-"""
-The supertype for probability estimators based on permutation patterns.
-
-Subtypes must implement fields:
-
-- `m::Int` The dimension of the permutation patterns.
-- `lt::Function` A function determining how ties are to be broken when constructing
-    permutation patterns from embedding vectors.
-"""
-abstract type PermutationProbabilitiesEstimator <: ProbabilitiesEstimator end
-
-include("common.jl")
-include("utils.jl")
-include("SymbolicPermutation.jl")
-include("SymbolicWeightedPermutation.jl")
-include("SymbolicAmplitudeAware.jl")
diff --git a/src/probabilities_estimators/permutation_ordinal/symbolic_permutation.jl b/src/probabilities_estimators/permutation_ordinal/symbolic_permutation.jl
new file mode 100644
index 000000000..1f2b2d160
--- /dev/null
+++ b/src/probabilities_estimators/permutation_ordinal/symbolic_permutation.jl
@@ -0,0 +1,286 @@
+export SymbolicPermutation
+export SymbolicWeightedPermutation
+export SymbolicAmplitudeAwarePermutation
+using DelayEmbeddings: embed
+
+"""
+The supertype for probability estimators based on permutation patterns.
+
+Subtypes must implement fields:
+
+- `m::Int`: The dimension of the permutation patterns.
+- `lt::Function`: A function determining how ties are to be broken when constructing
+    permutation patterns from embedding vectors.
+"""
+abstract type PermutationProbabilitiesEstimator{m} <: ProbabilitiesEstimator end
+const PermProbEst = PermutationProbabilitiesEstimator
+
+###########################################################################################
+# Types and docstrings
+###########################################################################################
+"""
+    SymbolicPermutation <: ProbabilitiesEstimator
+    SymbolicPermutation(; m = 3, τ = 1, lt::Function = Entropies.isless_rand)
+
+A probabilities estimator based on ordinal permutation patterns.
+
+When passed to [`probabilities`](@ref) the output depends on the input data type:
+
+- **Univariate data**. If applied to a univariate timeseries (`Vector`), then the timeseries
+    is first embedded using embedding delay `τ` and dimension `m`, resulting in embedding
+    vectors ``\\{ \\bf{x}_i \\}_{i=1}^{N-(m-1)\\tau}``. Then, for each ``\\bf{x}_i``,
+    we find its permutation pattern ``\\pi_{i}``. Probabilities are then
+    estimated as the frequencies of the encoded permutation symbols
+    by using [`CountOccurrences`](@ref). When giving the resulting probabilities to
+    [`entropy`](@ref), the original permutation entropy is computed [^BandtPompe2002].
+- **Multivariate data**. If applied to a an `D`-dimensional `Dataset`,
+    then no embedding is constructed, and we each vector ``\\bf{x}_i`` of the dataset
+    directly to its permutation pattern ``\\pi_{i}``, ``\\pi_{i}`` by comparing the
+    relative magnitudes of the elements of ``\\bf{x}_i``.
+    Like above, probabilities are estimated as the frequencies of the permutation symbols.
+    In this case, `m` is ignored,
+    but `m` must still match the dimension of the dataset for optimization.
+    The resulting probabilities can be used to compute multivariate permutation
+    entropy[^He2016], although here we don't perform any further subdivision
+    of the permutation patterns (as in Figure 3 of[^He2016]).
+
+Internally, [`SymbolicPermutation`](@ref) uses the [`OrdinalPatternEncoding`](@ref)
+to represent ordinal patterns as integers for efficient computations.
+
+## Outcome space
+
+The outcome space `Ω` for `SymbolicPermutation` is the set of length-`m` ordinal
+patterns (i.e. permutations) that can be formed by the integers `1, 2, …, m`,
+ordered lexicographically. There are `factorial(m)` such patterns.
+
+For example, the outcome `[3, 1, 2]` corresponds to the ordinal pattern of having
+first the largest value, then the lowest value, and then the value in between.
+
+## In-place symbolization
+
+`SymbolicPermutation` also implements the in-place [`probabilities!`](@ref)
+for `Dataset` input (or embedded vector input).
+The length of the pre-allocated symbol vector must match the length of the dataset.
+For example
+
+```julia
+using DelayEmbeddings, Entropies
+m, N = 2, 100
+est = SymbolicPermutation(; m, τ)
+x = Dataset(rand(N, m) # timeseries example
+πs_ts = zeros(Int, N) # length must match length of `x`
+p = probabilities!(πs_ts, est, x)
+```
+
+See [`SymbolicWeightedPermutation`](@ref) and [`SymbolicAmplitudeAwarePermutation`](@ref)
+for estimators that not only consider ordinal (sorting) patterns, but also incorporate
+information about within-state-vector amplitudes.
+For a version of this estimator that can be used on high-dimensional arrays, see
+[`SpatialSymbolicPermutation`](@ref).
+
+!!! note "Handling equal values in ordinal patterns"
+    In Bandt & Pompe (2002), equal values are ordered after their order of appearance, but
+    this can lead to erroneous temporal correlations, especially for data with
+    low amplitude resolution [^Zunino2017]. Here, by default, if two values are equal,
+    then one of the is randomly assigned as "the largest", using
+    `lt = Entropies.isless_rand`. To get the behaviour from Bandt and Pompe (2002), use
+    `lt = Base.isless`).
+
+[^BandtPompe2002]: Bandt, Christoph, and Bernd Pompe. "Permutation entropy: a natural
+    complexity measure for timeseries." Physical review letters 88.17 (2002): 174102.
+[^Zunino2017]: Zunino, L., Olivares, F., Scholkmann, F., & Rosso, O. A. (2017).
+    Permutation entropy based timeseries analysis: Equalities in the input signal can
+    lead to false conclusions. Physics Letters A, 381(22), 1883-1892.
+[^He2016]:
+    He, S., Sun, K., & Wang, H. (2016). Multivariate permutation entropy and its
+    application for complexity analysis of chaotic systems. Physica A: Statistical
+    Mechanics and its Applications, 461, 812-823.
+"""
+struct SymbolicPermutation{M,F} <: PermutationProbabilitiesEstimator{M}
+    encoding::OrdinalPatternEncoding{M,F}
+    τ::Int
+end
+
+"""
+    SymbolicWeightedPermutation <: ProbabilitiesEstimator
+    SymbolicWeightedPermutation(; τ = 1, m = 3, lt::Function = Entropies.isless_rand)
+
+A variant of [`SymbolicPermutation`](@ref) that also incorporates amplitude information,
+based on the weighted permutation entropy[^Fadlallah2013]. The outcome space and keywords
+are the same as in [`SymbolicPermutation`](@ref).
+
+## Description
+
+For each ordinal pattern extracted from each state (or delay) vector, a weight is attached
+to it which is the variance of the vector. Probabilities are then estimated by summing
+the weights corresponding to the same pattern, instead of just counting the occurrence
+of the same pattern.
+
+!!! note "An implementation note"
+    *Note: in equation 7, section III, of the original paper, the authors write*
+
+    ```math
+    w_j = \\dfrac{1}{m}\\sum_{k=1}^m (x_{j-(k-1)\\tau} - \\mathbf{\\hat{x}}_j^{m, \\tau})^2.
+    ```
+    *But given the formula they give for the arithmetic mean, this is **not** the variance
+    of the delay vector ``\\mathbf{x}_i``, because the indices are mixed:
+    ``x_{j+(k-1)\\tau}`` in the weights formula, vs. ``x_{j+(k+1)\\tau}`` in the arithmetic
+    mean formula. Here, delay embedding and computation of the patterns and their weights
+    are completely separated processes, ensuring that we compute the arithmetic mean
+    correctly for each vector of the input dataset (which may be a delay-embedded
+    timeseries).
+
+
+[^Fadlallah2013]: Fadlallah, et al. "Weighted-permutation entropy: A complexity
+    measure for time series incorporating amplitude information." Physical Review E 87.2
+    (2013): 022911.
+"""
+struct SymbolicWeightedPermutation{M,F} <: PermutationProbabilitiesEstimator{M}
+    encoding::OrdinalPatternEncoding{M,F}
+    τ::Int
+end
+
+"""
+    SymbolicAmplitudeAwarePermutation <: ProbabilitiesEstimator
+    SymbolicAmplitudeAwarePermutation(; τ = 1, m = 3, A = 0.5, lt = Entropies.isless_rand)
+
+A variant of [`SymbolicPermutation`](@ref) that also incorporates amplitude information,
+based on the amplitude-aware permutation entropy[^Azami2016]. The outcome space and keywords
+are the same as in [`SymbolicPermutation`](@ref).
+
+## Description
+
+Similarly to [`SymbolicWeightedPermutation`](@ref), a weight ``w_i`` is attached to each
+ordinal pattern extracted from each state (or delay) vector
+``\\mathbf{x}_i = (x_1^i, x_2^i, \\ldots, x_m^i)`` as
+
+```math
+w_i = \\dfrac{A}{m} \\sum_{k=1}^m |x_k^i | + \\dfrac{1-A}{d-1}
+\\sum_{k=2}^d |x_{k}^i - x_{k-1}^i|,
+```
+
+with ``0 \\leq A \\leq 1``. When ``A=0`` , only internal differences between the
+elements of
+``\\mathbf{x}_i`` are weighted. Only mean amplitude of the state vector
+elements are weighted when ``A=1``. With, ``0<A<1``, a combined weighting is used.
+
+[^Azami2016]: Azami, H., & Escudero, J. (2016). Amplitude-aware permutation entropy:
+    Illustration in spike detection and signal segmentation. Computer methods and programs
+    in biomedicine, 128, 40-51.
+"""
+struct SymbolicAmplitudeAwarePermutation{M,F} <: PermutationProbabilitiesEstimator{M}
+    encoding::OrdinalPatternEncoding{M,F}
+    τ::Int
+    A::Float64
+end
+
+# Initializations
+function SymbolicPermutation(; τ::Int = 1, m::Int = 3, lt::F=isless_rand) where {F}
+    m >= 2 || throw(ArgumentError("Need order m ≥ 2."))
+    return SymbolicPermutation{m, F}(OrdinalPatternEncoding{m}(lt), τ)
+end
+function SymbolicWeightedPermutation(; τ::Int = 1, m::Int = 3, lt::F=isless_rand) where {F}
+    m >= 2 || throw(ArgumentError("Need order m ≥ 2."))
+    return SymbolicWeightedPermutation{m, F}(OrdinalPatternEncoding{m}(lt), τ)
+end
+function SymbolicAmplitudeAwarePermutation(; A = 0.5, τ::Int = 1, m::Int = 3, lt::F=isless_rand) where {F}
+    m >= 2 || throw(ArgumentError("Need order m ≥ 2."))
+    return SymbolicAmplitudeAwarePermutation{m, F}(OrdinalPatternEncoding{m}(lt), τ, A)
+end
+
+###########################################################################################
+# Implementation of the whole `probabilities` API on abstract `PermProbEst`
+###########################################################################################
+# Probabilities etc. simply initialize the datasets and containers of the encodings
+# and just map everythihng using `encode`. The only difference between the three
+# types is whether they compute some additional weights that are affecting
+# how the probabilities are counted.
+
+function probabilities(est::PermProbEst{m}, x::Vector{T}) where {m, T<:Real}
+    dataset::Dataset{m,T} = embed(x, m, est.τ)
+    return probabilities(est, dataset)
+end
+
+function probabilities(est::PermProbEst{m}, x::AbstractDataset{D}) where {m, D}
+    m != D && throw(ArgumentError(
+        "Order of ordinal patterns and dimension of `Dataset` must match!"
+    ))
+    πs = zeros(Int, length(x))
+    return probabilities!(πs, est, x)
+end
+
+function probabilities!(::Vector{Int}, ::PermProbEst, ::AbstractVector)
+    error("""
+    In-place `probabilities!` for `SymbolicPermutation` can only be used by
+    Dataset input, not timeseries. First embed the timeseries or use the
+    normal version `probabilities`.
+    """)
+end
+
+function probabilities!(πs::Vector{Int}, est::PermProbEst{m}, x::AbstractDataset{m}) where {m}
+    # TODO: The following loop can probably be parallelized!
+    @inbounds for (i, χ) in enumerate(x)
+        πs[i] = encode(est.encoding, χ)
+    end
+    weights = permutation_weights(est, x)
+    probs = fasthist!(πs, weights)
+    return Probabilities(probs)
+end
+
+function probabilities_and_outcomes(est::PermProbEst{m}, x::Vector_or_Dataset) where {m}
+    # A bit of code duplication here, because we actually need the processed
+    # `πs` to invert it with `decode`. This can surely be optimized with some additional
+    # function that both maps to integers with `decode` but also keeps track of
+    # the permutation pattern vectors. Anyways, I don't think `outcomes` is a function
+    # that will be called often, so we can live with this as is.
+    if x isa Vector
+        dataset = embed(x, m, est.τ)
+    else
+        dataset = x
+    end
+    m != dimension(dataset) && throw(ArgumentError(
+        "Order of ordinal patterns and dimension of `Dataset` must match!"
+    ))
+    πs = zeros(Int, length(dataset))
+    ps = probabilities!(πs, est, dataset)
+    # Okay, now we compute the outcomes. (`πs` is already sorted in `fasthist!`)
+    outcomes = decode.(Ref(est.encoding), unique!(πs))
+    return ps, outcomes
+end
+
+# fallback
+total_outcomes(est::PermutationProbabilitiesEstimator) = total_outcomes(est.encoding)
+outcome_space(est::PermutationProbabilitiesEstimator) = outcome_space(est.encoding)
+
+###########################################################################################
+# Permutation weights definition
+###########################################################################################
+permutation_weights(::SymbolicPermutation, ::Any) = nothing
+
+function permutation_weights(::SymbolicWeightedPermutation{m}, x::AbstractDataset) where {m}
+    weights_from_variance.(vec(x), m)
+end
+
+function weights_from_variance(χ, m::Int)
+    z = mean(χ)
+    s = sum(e -> (e - z)^2, χ)
+    return s/m
+end
+
+function permutation_weights(est::SymbolicAmplitudeAwarePermutation{m}, x::AbstractDataset) where {m}
+    AAPE.(vec(x), est.A, m)
+end
+
+# TODO: This has absolutely terrible performance, allocating liek 10 vectors for each
+# element of a dataset...
+"""
+    AAPE(x, A::Real = 0.5, m::Int = length(x))
+
+Encode relative amplitude information of the elements of `a`.
+- `A = 1` emphasizes only average values.
+- `A = 0` emphasizes changes in amplitude values.
+- `A = 0.5` equally emphasizes average values and changes in the amplitude values.
+"""
+function AAPE(x, A::Real = 0.5, m::Int = length(x))
+    (A/m)*sum(abs.(x)) + (1-A)/(m-1)*sum(abs.(diff(x)))
+end
diff --git a/src/probabilities_estimators/permutation_ordinal/utils.jl b/src/probabilities_estimators/permutation_ordinal/utils.jl
deleted file mode 100644
index 1405d850c..000000000
--- a/src/probabilities_estimators/permutation_ordinal/utils.jl
+++ /dev/null
@@ -1,41 +0,0 @@
-""" Compute probabilities of symbols `Π`, given weights `wts`. """
-function symprobs(Π::AbstractVector, wts::AbstractVector; normalize = true)
-    length(Π) == length(wts) || error("Need length(Π) == length(wts)")
-    N = length(Π)
-    idxs = sortperm(Π)
-    sΠ = Π[idxs]   # sorted symbols
-    sw = wts[idxs] # sorted weights
-
-    i = 1   # symbol counter
-    W = 0.0 # Initialize weight
-    ps = Float64[]
-
-    prev_sym = sΠ[1]
-
-    while i <= length(sΠ)
-        symᵢ = sΠ[i]
-        wtᵢ = sw[i]
-        if symᵢ == prev_sym
-            W += wtᵢ
-        else
-            # Finished counting weights for the previous symbol, so push
-            # the summed weights (normalization happens later).
-            push!(ps, W)
-
-            # We are at a new symbol, so refresh sum with the first weight
-            # of the new symbol.
-            W = wtᵢ
-        end
-        prev_sym = symᵢ
-        i += 1
-    end
-    push!(ps, W) # last entry
-
-    # Normalize
-    Σ = sum(sw)
-    if normalize
-        return ps ./ Σ
-    else
-        return ps
-    end
-end
diff --git a/src/probabilities_estimators/probabilities_estimators.jl b/src/probabilities_estimators/probabilities_estimators.jl
index 7160ffe72..4703ae515 100644
--- a/src/probabilities_estimators/probabilities_estimators.jl
+++ b/src/probabilities_estimators/probabilities_estimators.jl
@@ -1,6 +1,6 @@
 include("counting/count_occurences.jl")
 include("histograms/value_histogram.jl")
-include("permutation_ordinal/symbolic.jl")
+include("permutation_ordinal/symbolic_permutation.jl")
 include("kernel/kernel_density.jl")
 include("timescales/timescales.jl")
 include("transfer_operator/transfer_operator.jl")
diff --git a/src/probabilities_estimators/spatial/spatial_permutation/SpatialSymbolicPermutation.jl b/src/probabilities_estimators/spatial/spatial_permutation/SpatialSymbolicPermutation.jl
index fad309527..acc9fe36f 100644
--- a/src/probabilities_estimators/spatial/spatial_permutation/SpatialSymbolicPermutation.jl
+++ b/src/probabilities_estimators/spatial/spatial_permutation/SpatialSymbolicPermutation.jl
@@ -11,96 +11,60 @@ export SpatialSymbolicPermutation
 
 A symbolic, permutation-based probabilities estimator for spatiotemporal systems that
 generalises [`SymbolicPermutation`](@ref) to high-dimensional arrays.
+The order `m` of the permutation pattern is extracted from the `stencil`, see below.
 
 `SpatialSymbolicPermutation` is based on the 2D and 3D *spatiotemporal permutation entropy*
 estimators by by Ribeiro et al. (2012)[^Ribeiro2012] and Schlemmer et al.
 (2018)[^Schlemmer2018]), respectively, but is here implemented as a pure probabilities
-probabilities estimator that is generalized for `N`-dimensional input data `x`,
-with arbitrary neighborhood regions (stencils) and periodic boundary conditions.
+probabilities estimator that is generalized for `D`-dimensional input array `x`,
+with arbitrary regions (stencils) to get patterns form
+and (possibly) periodic boundary conditions.
+
+See below for ways to specify the `stencil`. If `periodic = true`, then the stencil wraps
+around at the ends of the array. If `false`, then collected regions with indices
+which exceed the array bounds are skipped.
 
 In combination with [`entropy`](@ref) and [`entropy_normalized`](@ref), this
-probabilities estimator can be used to compute (normalized) generalized spatiotemporal
+probabilities estimator can be used to compute generalized spatiotemporal
 permutation [`Entropy`](@ref) of any type.
 
-## Arguments
-
-- `stencil`. Defines what local area (hyperrectangle), or which points within this area,
-    to include around each hypervoxel (i.e. pixel in 2D). The examples below demonstrate
-    different ways of specifying stencils. For details, see
-    [`SpatialSymbolicPermutation`](@ref).
--  `x::AbstractArray`. The input data. Must be provided because we need to know its size
-    for optimization and bound checking.
-
-## Keyword arguments
+## Outcome space
 
-- `periodic::Bool`. If `periodic == true`, then the stencil should wrap around at the
-    end of the array. If `periodic = false`, then pixels whose stencil exceeds the array
-    bounds are skipped.
+The outcome space `Ω` for `SpatialSymbolicPermutation` is the set of length-`m` ordinal
+patterns (i.e. permutations) that can be formed by the integers `1, 2, …, m`,
+ordered lexicographically. There are `factorial(m)` such patterns.
+Here `m` refers to the number of points included in `stencil`.
 
 ## Stencils
 
+The `stencil` defines what local area to use to group hypervoxels. Each grouping
+of hypervoxels is mapped to an order-`m` permutation pattern,
+which is then mapped to an integer as in [`SymbolicPermutation`](@ref).
+The `stencil` is moved around the input array, in a sense "scanning" the input array,
+to collect all possible groupings allowed by the boundary condition (periodic or not).
+
 Stencils are passed in one of the following three ways:
 
-1. As vectors of `CartesianIndex` which encode the pixels to include in the
-    stencil, with respect to the current pixel, or integer arrays of the same dimensionality
-    as the data. For example `stencil = CartesianIndex.([(0,0), (0,1), (1,1), (1,0)])`.
-    Don't forget to include the zero offset index if you want to include the point itself,
-    which is almost always the case.
+1. As vectors of `CartesianIndex` which encode the offset of indices to include in the
+    stencil, with respect to the current array index when scanning over the array.
+    For example `stencil = CartesianIndex.([(0,0), (0,1), (1,1), (1,0)])`.
+    Don't forget to include the zero offset index if you want to include the hypervoxel
+    itself, which is almost always the case.
     Here the stencil creates a 2x2 square extending to the bottom and right of the pixel
     (directions here correspond to the way Julia prints matrices by default).
     When passing a stencil as a vector of `CartesianIndex`, `m = length(stencil)`.
-
 2. As a `D`-dimensional array (where `D` matches the dimensionality of the input data)
     containing `0`s and `1`s, where if `stencil[index] == 1`, the corresponding pixel is
     included, and if `stencil[index] == 0`, it is not included.
     To generate the same estimator as in 1., use `stencil = [1 1; 1 1]`.
     When passing a stencil as a `D`-dimensional array, `m = sum(stencil)`
-
 3. As a `Tuple` containing two `Tuple`s, both of length `D`, for `D`-dimensional data.
     The first tuple specifies the `extent` of the stencil, where `extent[i]`
-    dictates the number of pixels to be included along the `i`th axis and `lag[i]`
-    the separation of pixels along the same axis.
+    dictates the number of hypervoxels to be included along the `i`th axis and `lag[i]`
+    the separation of hypervoxels along the same axis.
     This method can only generate (hyper)rectangular stencils. To create the same estimator as
     in the previous examples, use here `stencil = ((2, 2), (1, 1))`.
-    When passing a stencil using `extent` and `lag`, `m = prod(extent)!`.
-
-## Example: spatiotemporal entropy for time series
-
-Usage is simple. First, define a `SpatialSymbolicPermutation` estimator by specifying
-a stencil and giving some input data (a matrix with the same dimensions as the data
-as you're going to analyse). Then simply call [`entropy`](@ref) with the estimator.
-
-```julia
-using Entropies
-x = rand(50, 50) # first "time slice" of a spatial system evolution
-stencil = [1 1; 0 1] # or one of the other ways of specifying stencils
-est = SpatialSymbolicPermutation(stencil, x)
-h = entropy(est, x)
-```
-
-To apply this to timeseries of spatial data, simply loop over the call, e.g.:
-
-```julia
-data = [rand(50, 50) for i in 1:50]
-est = SpatialSymbolicPermutation(stencil, first(data))
-h_vs_t = [entropy(est, d) for d in data]
-```
-
-Computing generalized spatiotemporal permutation entropy is trivial, e.g. with
-[`Renyi`](@ref):
-
-```julia
-x = reshape(repeat(1:5, 500) .+ 0.1*rand(500*5), 50, 50)
-est = SpatialSymbolicPermutation(stencil, x)
-entropy(Renyi(q = 2), est, x)
-```
-
-## Outcome space
-
-The outcome space `Ω` for `SpatialSymbolicPermutation` is the set of length-`m` ordinal
-patterns (i.e. permutations) that can be formed by the integers `1, 2, …, m`,
-ordered lexicographically. There are `factorial(m)` such patterns.
-Here, `m` refers to the number of points included by `stencil`.
+    When passing a stencil using `extent` and `lag`, `m = prod(extent)`.
 
 [^Ribeiro2012]:
     Ribeiro et al. (2012). Complexity-entropy causality plane as a complexity measure
@@ -110,28 +74,25 @@ Here, `m` refers to the number of points included by `stencil`.
     Schlemmer et al. (2018). Spatiotemporal Permutation Entropy as a Measure for
     Complexity of Cardiac Arrhythmia. https://doi.org/10.3389/fphy.2018.00039
 """
-struct SpatialSymbolicPermutation{D,P,V} <: SpatialProbEst{D, P}
+struct SpatialSymbolicPermutation{D,P,V,M,F} <: SpatialProbEst{D, P}
     stencil::Vector{CartesianIndex{D}}
     viewer::Vector{CartesianIndex{D}}
     arraysize::Dims{D}
     valid::V
-    lt::Function
-    m::Int
+    encoding::OrdinalPatternEncoding{M,F}
 end
 
 function SpatialSymbolicPermutation(stencil, x::AbstractArray{T, D};
-        periodic::Bool = true, lt = isless_rand) where {T, D}
+        periodic::Bool = true, lt::F = isless_rand) where {T, D, F}
     stencil, arraysize, valid = preprocess_spatial(stencil, x, periodic)
     m = stencil_length(stencil)
-
-    SpatialSymbolicPermutation{D, periodic, typeof(valid)}(
-        stencil, copy(stencil), arraysize, valid, lt, m
+    encoding = OrdinalPatternEncoding{m}(lt)
+    return SpatialSymbolicPermutation{D,periodic,typeof(valid),m,F}(
+        stencil, copy(stencil), arraysize, valid, encoding
     )
 end
 
 function probabilities(est::SpatialSymbolicPermutation, x)
-    # TODO: This can be literally a call to `symbolize` and then
-    # calling probabilities on it. Should do once the `symbolize` refactoring is done.
     s = zeros(Int, length(est.valid))
     probabilities!(s, est, x)
 end
@@ -149,38 +110,42 @@ function probabilities_and_outcomes(est::SpatialSymbolicPermutation, x)
 end
 
 function probabilities_and_outcomes!(s, est::SpatialSymbolicPermutation, x)
-    m, lt = est.m, est.lt
-    encoding = OrdinalPatternEncoding(; m, lt)
-
     encodings_from_permutations!(s, est, x)
-    observed_outcomes = decode.(Ref(encoding), s)
+    observed_outcomes = decode.(Ref(est.encoding), s)
     return probabilities(s), observed_outcomes
 end
 
 # Pretty printing
-function Base.show(io::IO, est::SpatialSymbolicPermutation{D}) where {D}
-    print(io, "Spatial permutation estimator for $D-dimensional data. Stencil:")
+function Base.show(io::IO, est::SpatialSymbolicPermutation{D,P,V,M}) where {D,P,V,M}
+    print(io, "Spatial symbolic permutation probabilities estimator"*
+              "of order $(M) and for $D-dimensional data. Periodic: $(P). Stencil:")
     print(io, "\n")
     show(io, MIME"text/plain"(), est.stencil)
 end
 
-function outcome_space(est::SpatialSymbolicPermutation)
-    encoding = OrdinalPatternEncoding(; est.m, est.lt)
-    decode.(Ref(encoding), 1:factorial(est.m))
-end
+outcome_space(est::SpatialSymbolicPermutation) = outcome_space(est.encoding)
+total_outcomes(est::SpatialSymbolicPermutation) = total_outcomes(est.encoding)
 
-function total_outcomes(est::SpatialSymbolicPermutation)
-    return factorial(est.m)
+function encodings_from_permutations!(πs, est::SpatialSymbolicPermutation, x::AbstractArray)
+    check_preallocated_length!(πs, est, x)
+    for (i, pixel) in enumerate(est.valid)
+        pixels = pixels_in_stencil(est, pixel)
+        selection = view(x, pixels)
+        πs[i] = encode(est.encoding, selection)
+    end
+    return πs
 end
 
-function check_preallocated_length!(πs, est::SpatialSymbolicPermutation{D, periodic}, x::AbstractArray{T, N}) where {D, periodic, T, N}
+function check_preallocated_length!(
+        πs, est::SpatialSymbolicPermutation{D, periodic}, x::AbstractArray{T, N}
+    ) where {D, periodic, T, N}
     if periodic
         # If periodic boundary conditions, then each pixel has a well-defined neighborhood,
         # and there are as many encoded symbols as there are pixels.
         length(πs) == length(x) ||
             throw(
                 ArgumentError(
-                    """Need length(πs) == length(x), got `length(πs)=$(length(πs))`\
+                    """Need length(πs) == length(x), got `length(πs)=$(length(πs))`
                     and `length(x)==$(length(x))`."""
                 )
             )
@@ -190,24 +155,9 @@ function check_preallocated_length!(πs, est::SpatialSymbolicPermutation{D, peri
         length(πs) == length(est.valid) ||
         throw(
             ArgumentError(
-                """Need length(πs) == length(est.valid), got `length(πs)=$(length(πs))`\
+                """Need length(πs) == length(est.valid), got `length(πs)=$(length(πs))`
                 and `length(est.valid)==$(length(est.valid))`."""
             )
         )
     end
-end
-
-function encodings_from_permutations!(πs, est::SpatialSymbolicPermutation{D, periodic},
-        x::AbstractArray{T, N}) where {T, N, D, periodic}
-    m, lt = est.m, est.lt
-    check_preallocated_length!(πs, est, x)
-    encoding = OrdinalPatternEncoding(; m, lt)
-
-    perm = @MVector zeros(Int, m)
-    for (i, pixel) in enumerate(est.valid)
-        pixels = pixels_in_stencil(est, pixel)
-        sortperm!(perm, view(x, pixels)) # Find permutation for currently selected pixels.
-        πs[i] = encode(encoding, perm) # Encode based on the permutation.
-    end
-    return πs
-end
+end
\ No newline at end of file
diff --git a/src/probabilities_estimators/spatial/utils.jl b/src/probabilities_estimators/spatial/utils.jl
index 2f41c2750..ad4c140dc 100644
--- a/src/probabilities_estimators/spatial/utils.jl
+++ b/src/probabilities_estimators/spatial/utils.jl
@@ -1,11 +1,12 @@
-# This file contains functions that are common to all spatial symbolic estimators.
+# This file contains functions that are common to all spatial estimators that "scan"
+# some stencil of pixes of higher-dimensional arrays and map this stencil of pixels
+# into an integer (e.g., a permutation pattern)
 # --------------------------------------------------------------------------------
 
 # get stencil in the form of vectors of cartesian indices from either input type
 stencil_to_offsets(stencil::Vector{CartesianIndex{D}}) where D = stencil, D
 
 function stencil_to_offsets(stencil::NTuple{2, NTuple{D, T}}) where {D, T}
-    # get extent and lag from stencil
     extent, lag = stencil
     # generate a D-dimensional stencil
     # start by generating a list of iterators for each dimension
diff --git a/test/probabilities/estimators/dispersion.jl b/test/probabilities/estimators/dispersion.jl
index 949d13d03..824c28e8b 100644
--- a/test/probabilities/estimators/dispersion.jl
+++ b/test/probabilities/estimators/dispersion.jl
@@ -1,7 +1,8 @@
 using Entropies
 using Test
 using Statistics: mean, std
-using DelayEmbeddings
+using Entropies.DelayEmbeddings: embed
+
 @testset "Dispersion" begin
 
     @testset "Internals" begin
@@ -15,11 +16,11 @@ using DelayEmbeddings
         encoding = GaussianCDFEncoding(; σ, μ, c)
 
         # Encoded symbols should be in the set [1, 2, …, c].
-        symbols = Entropies.encode.(Ref(encoding), x)
-        @test all([s ∈ collect(1:c) for s in symbols])
+        symbols = encode.(Ref(encoding), x)
+        @test all(s -> s ∈ collect(1:c), symbols)
 
         # Dispersion patterns should have a normalized histogram that sums to 1.0.
-        dispersion_patterns = DelayEmbeddings.embed(symbols, m, τ)
+        dispersion_patterns = embed(symbols, m, τ)
         hist = Entropies.dispersion_histogram(dispersion_patterns, length(x), m, τ)
         @test sum(hist) ≈ 1.0
     end
diff --git a/test/probabilities/estimators/naive_kernel.jl b/test/probabilities/estimators/naive_kernel.jl
index f495f7d55..d0339fda0 100644
--- a/test/probabilities/estimators/naive_kernel.jl
+++ b/test/probabilities/estimators/naive_kernel.jl
@@ -1,7 +1,8 @@
 using Entropies.DelayEmbeddings.Neighborhood: KDTree
 
-@test_throws ArgumentError NaiveKernel(0.1) isa NaiveKernel
+@test_throws ArgumentError NaiveKernel(rand(10), 0.1)
 @test_throws ArgumentError NaiveKernel(0.1, method = KDTree) isa NaiveKernel
+@test NaiveKernel(Dataset(rand(10,2)), 0.1; method = KDTree) isa ProbabilitiesEstimator
 
 N = 1000
 pts = Dataset([rand(2) for i = 1:N]);
diff --git a/test/probabilities/estimators/permutation.jl b/test/probabilities/estimators/permutation.jl
index 497dbadde..b4ac60b41 100644
--- a/test/probabilities/estimators/permutation.jl
+++ b/test/probabilities/estimators/permutation.jl
@@ -1,100 +1,79 @@
-using StateSpaceSets: Dataset
-using DelayEmbeddings: genembed
-using StaticArrays: SVector
-
-@test SymbolicPermutation() isa SymbolicPermutation
-@test SymbolicPermutation(lt = Base.isless) isa SymbolicPermutation
-@test SymbolicPermutation(lt = Entropies.isless_rand) isa SymbolicPermutation
-
-@test total_outcomes(SymbolicPermutation(m = 3)) == factorial(3)
+using Entropies, Test
+using Random
+using Entropies.DelayEmbeddings: embed
+
+@testset "API" begin
+    m = 3
+    # Since all these estimators initialize an encoding,
+    # many of the tests such as whether "is_less" is used
+    # are actually done in the ordinal pattern encoding test suite
+    for S in (SymbolicPermutation, SymbolicWeightedPermutation, SymbolicAmplitudeAwarePermutation)
+        @test S() isa ProbabilitiesEstimator
+        @test S(lt = Base.isless) isa ProbabilitiesEstimator
+        @test total_outcomes(S(m = 3)) == factorial(3)
+    end
+end
 
-@testset "Pre-allocated" begin
-    @testset "Probabilities" begin
-        est = SymbolicPermutation(m = 5, τ = 1)
-        N = 500
-        s = zeros(Int, N);
-        x = Dataset(repeat([1.1 2.2 3.3], N))
-        y = Dataset(rand(N, 5))
-        z = rand(N)
+@testset "Analytic, symbolic" begin
+    est = SymbolicPermutation(m = 3, τ = 1)
+    N = 1000
+    x = Dataset(repeat([1.1 2.2 3.3], 10))
+    y = Dataset(rand(Random.MersenneTwister(123), N, 3))
 
-        # Probability distributions
-        p1 = probabilities!(s, est, x)
-        p2 = probabilities!(s, est, y)
+    @testset "direct" begin
+        p1 = probabilities(est, x)
+        p2 = probabilities(est, y)
         @test sum(p1) ≈ 1.0
         @test sum(p2) ≈ 1.0
+        @test length(p1) == 1 # analytic
+        @test length(p2) == 6 # analytic
     end
 
-
-    @testset "In-place permutation entropy" begin
-        m, τ = 2, 1
-        est = SymbolicPermutation(; m, τ)
-
-        # For these two inputs, with m = 2, τ = 1, there should be two symbols (0 and 1)
-        # with equal probabilities, so base-2 Shannon entropy should be
-        # -(0.5 * log2(0.5) + 0.5 * log2(0.5)) = 1.0
-        x_timeseries = [repeat([1, 2], 5); 1]
-        x_dataset = Dataset(repeat([1 2; 2 1], 3))
-
-        # Pre-allocated integer vectors
-        s_timeseries = zeros(Int, length(x_timeseries) - (m - 1)*τ)
-        s_dataset = zeros(Int, length(x_dataset))
-
-        @test entropy!(s_timeseries, Shannon(base = 2), est, x_timeseries) ≈ 1.0
-        @test entropy!(s_dataset, Shannon(base = 2), est, x_dataset) ≈ 1.0
-
-        # Should default to Shannon base 2
-        @test entropy!(s_timeseries, est, x_timeseries) ≈ 1.0
-        @test entropy!(s_dataset, est, x_dataset) ≈ 1.0
+    @testset "pre-allocated" begin
+        s = zeros(Int, N)
+        p2 = probabilities!(s, est, y)
+        @test sum(p2) ≈ 1.0
     end
-end
-
-@testset "Not pre-allocated" begin
-    est = SymbolicPermutation(m = 3, τ = 1)
-    N = 500
-    x = Dataset(repeat([1.1 2.2 3.3], N))
-    y = Dataset(rand(N, 5))
 
-    # Probability distributions
-    p1 = probabilities(est, x)
-    p2 = probabilities(est, y)
-    @test sum(p1) ≈ 1.0
-    @test sum(p2) ≈ 1.0
-
-    # Entropy
-    @test entropy(Renyi(q = 1), est, x) ≈ 0  # Regular order-1 entropy
-    @test entropy(Renyi(q = 2), est, y) >= 0 # Higher-order entropy
 end
 
-@testset "Custom sorting" begin
+# TODO: would be nioce to have analytic tests for amplitude aware
+@testset "Uniform distr." begin
     m = 4
     τ = 1
-    τs = tuple([τ*i for i = 0:m-1]...)
-    ts = rand(1:3, 100)
-    D = genembed(ts, τs)
-
-    est_isless = SymbolicPermutation(; m, τ = 1, lt = Base.isless)
-    est_isless_rand = SymbolicPermutation(; m, τ = 1, lt = Entropies.isless_rand)
-    @test probabilities(est_isless, D) isa Probabilities
-    @test probabilities(est_isless_rand, D) isa Probabilities
+    x = rand(Random.MersenneTwister(1234), 100_000)
+    D = embed(x, m, τ)
+    @testset "$(S)" for S in (SymbolicPermutation,
+        SymbolicWeightedPermutation, SymbolicAmplitudeAwarePermutation)
+        est = S(m = m, τ = τ)
+        p1 = probabilities(est, x)
+        p2 = probabilities(est, D)
+        @test all(p1 .≈ p2)
+        @test all(p -> 0.03 < p < 0.05, p2)
+    end
 end
 
-# With m = 2, ordinal patterns and frequencies are:
-# [1, 2] => 3
-# [2, 1] => 2
-x = [1, 2, 1, 2, 1, 2]
-# don't randomize in the case of equal values, so use Base.isless
-est = SymbolicPermutation(m = 2, lt = Base.isless)
-probs, πs = probabilities_and_outcomes(est, x)
-@test πs == SVector{2, Int}.([[1, 2], [2, 1]])
-@test probs == [3/5, 2/5]
-
-est3 = SymbolicPermutation(m = 3)
-@test outcome_space(est3) == [
-    [1, 2, 3],
-    [1, 3, 2],
-    [2, 1, 3],
-    [2, 3, 1],
-    [3, 1, 2],
-    [3, 2, 1],
-]
-@test total_outcomes(est3) == factorial(est3.m)
+@testset "outcomes" begin
+    # With m = 2, ordinal patterns and frequencies are:
+    # [1, 2] => 3
+    # [2, 1] => 2
+    x = [1, 2, 1, 2, 1, 2]
+    @testset "$(S)" for S in (SymbolicPermutation,
+        SymbolicWeightedPermutation, SymbolicAmplitudeAwarePermutation)
+        # don't randomize in the case of equal values, so use Base.isless
+        est = S(m = 2, lt = Base.isless)
+        probs, πs = probabilities_and_outcomes(est, x)
+        @test πs == SVector{2, Int}.([[1, 2], [2, 1]])
+        @test probs == [3/5, 2/5]
+        est3 = S(m = 3)
+        @test outcome_space(est3) == [
+            [1, 2, 3],
+            [1, 3, 2],
+            [2, 1, 3],
+            [2, 3, 1],
+            [3, 1, 2],
+            [3, 2, 1],
+        ]
+        @test total_outcomes(est3) == factorial(3)
+    end
+end
\ No newline at end of file
diff --git a/test/probabilities/estimators/permutation_amplitude_aware.jl b/test/probabilities/estimators/permutation_amplitude_aware.jl
deleted file mode 100644
index 844f16e92..000000000
--- a/test/probabilities/estimators/permutation_amplitude_aware.jl
+++ /dev/null
@@ -1,59 +0,0 @@
-@test SymbolicAmplitudeAwarePermutation() isa SymbolicAmplitudeAwarePermutation
-@test SymbolicAmplitudeAwarePermutation(lt = Base.isless) isa SymbolicAmplitudeAwarePermutation
-@test SymbolicAmplitudeAwarePermutation(lt = Entropies.isless_rand) isa SymbolicAmplitudeAwarePermutation
-
-@test total_outcomes(SymbolicAmplitudeAwarePermutation(m = 3)) == factorial(3)
-
-m = 4
-τ = 1
-τs = tuple([τ*i for i = 0:m-1]...)
-x = rand(25)
-D = genembed(x, τs)
-
-est = SymbolicAmplitudeAwarePermutation(m = m, τ = τ)
-# Probability distributions
-p1 = probabilities(est, x)
-p2 = probabilities(est, D)
-@test sum(p1) ≈ 1.0
-@test sum(p2) ≈ 1.0
-@test all(p1.p .≈ p2.p)
-
-# Entropy
-e1 = entropy(Renyi(), est, D)
-e2 = entropy(Renyi(), est, x)
-@test e1 ≈ e2
-
-
-@testset "Custom sorting" begin
-    m = 4
-    τ = 1
-    τs = tuple([τ*i for i = 0:m-1]...)
-    ts = rand(1:3, 100)
-    D = genembed(ts, τs)
-
-    est_isless = SymbolicAmplitudeAwarePermutation(m = 5, τ = 1, lt = Base.isless)
-    est_isless_rand = SymbolicAmplitudeAwarePermutation(m = 5, τ = 1, lt = Entropies.isless_rand)
-    @test probabilities(est_isless, ts) isa Probabilities
-    @test probabilities(est_isless, D) isa Probabilities
-end
-
-# With m = 2, ordinal patterns and frequencies are:
-# [1, 2] => 3
-# [2, 1] => 2
-x = [1, 2, 1, 2, 1, 2]
-# don't randomize in the case of equal values, so use Base.isless
-est = SymbolicAmplitudeAwarePermutation(m = 2, lt = Base.isless)
-probs, πs = probabilities_and_outcomes(est, x)
-@test πs == SVector{2, Int}.([[1, 2], [2, 1]])
-
-# TODO: probabilities should be explicitly tested too.
-est3 = SymbolicAmplitudeAwarePermutation(m = 3)
-@test outcome_space(est3) == [
-    [1, 2, 3],
-    [1, 3, 2],
-    [2, 1, 3],
-    [2, 3, 1],
-    [3, 1, 2],
-    [3, 2, 1],
-]
-@test total_outcomes(est3) == factorial(est3.m)
diff --git a/test/probabilities/estimators/permutation_weighted.jl b/test/probabilities/estimators/permutation_weighted.jl
deleted file mode 100644
index ab120f206..000000000
--- a/test/probabilities/estimators/permutation_weighted.jl
+++ /dev/null
@@ -1,60 +0,0 @@
-@test SymbolicWeightedPermutation() isa SymbolicWeightedPermutation
-@test SymbolicWeightedPermutation(lt = Base.isless) isa SymbolicWeightedPermutation
-@test SymbolicWeightedPermutation(lt = Entropies.isless_rand) isa SymbolicWeightedPermutation
-
-@test total_outcomes(SymbolicWeightedPermutation(m = 3)) == factorial(3)
-
-m = 4
-τ = 1
-τs = tuple([τ*i for i = 0:m-1]...)
-x = rand(100)
-D = genembed(x, τs)
-
-# Probability distributions
-est = SymbolicWeightedPermutation(m = m, τ = τ)
-p1 = probabilities(est, x)
-p2 = probabilities(est, D)
-@test sum(p1) ≈ 1.0
-@test sum(p2) ≈ 1.0
-@test all(p1.p .≈ p2.p)
-
-# Entropy
-e1 = entropy(Renyi(), est, D)
-e2 = entropy(Renyi(), est, x)
-@test e1 ≈ e2
-
-
-@testset "Custom sorting" begin
-    m = 4
-    τ = 1
-    τs = tuple([τ*i for i = 0:m-1]...)
-    ts = rand(1:3, 100)
-    D = genembed(ts, τs)
-
-
-    est_isless = SymbolicWeightedPermutation(m = 5, τ = 1, lt = Base.isless)
-    est_isless_rand = SymbolicWeightedPermutation(m = 5, τ = 1, lt = Entropies.isless_rand)
-    @test probabilities(est_isless, ts) isa Probabilities
-    @test probabilities(est_isless, D) isa Probabilities
-end
-
-# With m = 2, ordinal patterns and frequencies are:
-# [1, 2] => 3
-# [2, 1] => 2
-x = [1, 2, 1, 2, 1, 2]
-# don't randomize in the case of equal values, so use Base.isless
-est = SymbolicWeightedPermutation(m = 2, lt = Base.isless)
-probs, πs = probabilities_and_outcomes(est, x)
-@test πs == SVector{2, Int}.([[1, 2], [2, 1]])
-# TODO: probabilities should be explicitly tested too.
-
-est3 = SymbolicWeightedPermutation(m = 3)
-@test outcome_space(est3) == [
-    [1, 2, 3],
-    [1, 3, 2],
-    [2, 1, 3],
-    [2, 3, 1],
-    [3, 1, 2],
-    [3, 2, 1],
-]
-@test total_outcomes(est3) == factorial(est3.m)
diff --git a/test/probabilities/estimators/spatial/spatial_permutation.jl b/test/probabilities/estimators/spatial/spatial_permutation.jl
index fe731a4f6..abf8a4f85 100644
--- a/test/probabilities/estimators/spatial/spatial_permutation.jl
+++ b/test/probabilities/estimators/spatial/spatial_permutation.jl
@@ -81,4 +81,4 @@ est = SpatialSymbolicPermutation(stencil, x)
 hsp = entropy_normalized(Renyi(), est, x)
 @test round(hsp, digits = 2) == 1.00
 
-@test outcome_space(est) == outcome_space(SymbolicPermutation(m = est.m))
+@test outcome_space(est) == outcome_space(SymbolicPermutation(m = 4))
diff --git a/test/probabilities/probabilities.jl b/test/probabilities/probabilities.jl
index dee4eb94a..fdc0dbd30 100644
--- a/test/probabilities/probabilities.jl
+++ b/test/probabilities/probabilities.jl
@@ -7,8 +7,6 @@ testfile("estimators/value_histogram.jl")
 testfile("estimators/transfer_operator.jl")
 testfile("estimators/naive_kernel.jl")
 testfile("estimators/permutation.jl")
-testfile("estimators/permutation_weighted.jl")
-testfile("estimators/permutation_amplitude_aware.jl")
 testfile("estimators/timescales.jl")
 testfile("estimators/dispersion.jl")
 testfile("estimators/diversity.jl")