bheisler · vxfield · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 - MSRV bumped to 1.70
+- Fixed plotting bug when all samples' measurements have the same value. See [#720](https://github.com/bheisler/criterion.rs/issues/720).
 
 ### Changed
 - The `real_blackbox` feature no longer has any impact. Criterion always uses `std::hint::black_box()` now.

diff --git a/src/plot/gnuplot_backend/distributions.rs b/src/plot/gnuplot_backend/distributions.rs
@@ -21,41 +21,54 @@ fn abs_distribution(
     let mut ci_values = [ci.lower_bound, ci.upper_bound, estimate.point_estimate];
     let unit = formatter.scale_values(typical, &mut ci_values);
     let (lb, ub, point) = (ci_values[0], ci_values[1], ci_values[2]);
-
-    let start = lb - (ub - lb) / 9.;
-    let end = ub + (ub - lb) / 9.;
-    let mut scaled_xs: Vec<f64> = distribution.iter().cloned().collect();
-    let _ = formatter.scale_values(typical, &mut scaled_xs);
-    let scaled_xs_sample = Sample::new(&scaled_xs);
-    let (kde_xs, ys) = kde::sweep(scaled_xs_sample, KDE_POINTS, Some((start, end)));
-
-    // interpolate between two points of the KDE sweep to find the Y position at the point estimate.
-    let n_point = kde_xs
-        .iter()
-        .position(|&x| x >= point)
-        .unwrap_or(kde_xs.len() - 1)
-        .max(1); // Must be at least the second element or this will panic
-    let slope = (ys[n_point] - ys[n_point - 1]) / (kde_xs[n_point] - kde_xs[n_point - 1]);
-    let y_point = ys[n_point - 1] + (slope * (point - kde_xs[n_point - 1]));
-
     let zero = iter::repeat(0);
 
-    let start = kde_xs
-        .iter()
-        .enumerate()
-        .find(|&(_, &x)| x >= lb)
-        .unwrap()
-        .0;
-    let end = kde_xs
-        .iter()
-        .enumerate()
-        .rev()
-        .find(|&(_, &x)| x <= ub)
-        .unwrap()
-        .0;
-    let len = end - start;
+    let (kde_xs, ys, start, len, y_point) =
+    // If lower_bound == upper_bound, all samples have the same value.
+    // Here we make some tweaks for better visualization in the charts.
+    if lb == ub {
+        let delta = if lb == 0. { 0.01 } else { lb * 0.01 };
+        // the first two x,y values are to draw the line/bar of the distribution at `x`
+        // the second two x,y values are to draw the rectangle of the confidence interval
+        let kde_xs = Vec::from([lb, lb, lb - delta, lb + delta]).into_boxed_slice();
+        let ys = Vec::from([0_f64, point, point, point]).into_boxed_slice();
+        // start and len are used to mark the range of confidence interval rectangle coordinates
+        let start = 2_usize;
+        let len = 2_usize;
+        (kde_xs, ys, start, len, point)
+    } else {
+        let start = lb - (ub - lb) / 9.;
+        let end = ub + (ub - lb) / 9.;
+        let mut scaled_xs: Vec<f64> = distribution.iter().cloned().collect();
+        let _ = formatter.scale_values(typical, &mut scaled_xs);
+        let scaled_xs_sample = Sample::new(&scaled_xs);
+        let (kde_xs, ys) = kde::sweep(scaled_xs_sample, KDE_POINTS, Some((start, end)));
 
-    let kde_xs_sample = Sample::new(&kde_xs);
+        // interpolate between two points of the KDE sweep to find the Y position at the point estimate.
+        let n_point = kde_xs
+            .iter()
+            .position(|&x| x >= point)
+            .unwrap_or(kde_xs.len() - 1)
+            .max(1); // Must be at least the second element or this will panic
+        let slope = (ys[n_point] - ys[n_point - 1]) / (kde_xs[n_point] - kde_xs[n_point - 1]);
+        let y_point = ys[n_point - 1] + (slope * (point - kde_xs[n_point - 1]));
+
+        let start = kde_xs
+            .iter()
+            .enumerate()
+            .find(|&(_, &x)| x >= lb)
+            .unwrap()
+            .0;
+        let end = kde_xs
+            .iter()
+            .enumerate()
+            .rev()
+            .find(|&(_, &x)| x <= ub)
+            .unwrap()
+            .0;
+        let len = end - start;
+        (kde_xs, ys, start, len, y_point)
+    };
 
     let mut figure = Figure::new();
     figure
@@ -68,7 +81,7 @@ fn abs_distribution(
         )))
         .configure(Axis::BottomX, |a| {
             a.set(Label(format!("Average time ({})", unit)))
-                .set(Range::Limits(kde_xs_sample.min(), kde_xs_sample.max()))
+                .set(Range::Auto)
         })
         .configure(Axis::LeftY, |a| a.set(Label("Density (a.u.)")))
         .configure(Key, |k| {

diff --git a/src/plot/gnuplot_backend/pdf.rs b/src/plot/gnuplot_backend/pdf.rs
@@ -237,7 +237,6 @@ pub(crate) fn pdf_small(
     let mean = scaled_avg_times.mean();
 
     let (xs, ys, mean_y) = kde::sweep_and_estimate(scaled_avg_times, KDE_POINTS, None, mean);
-    let xs_ = Sample::new(&xs);
     let ys_ = Sample::new(&ys);
 
     let y_limit = ys_.max() * 1.1;
@@ -249,7 +248,7 @@ pub(crate) fn pdf_small(
         .set(size.unwrap_or(SIZE))
         .configure(Axis::BottomX, |a| {
             a.set(Label(format!("Average time ({})", unit)))
-                .set(Range::Limits(xs_.min(), xs_.max()))
+                .set(Range::Auto)
         })
         .configure(Axis::LeftY, |a| {
             a.set(Label("Density (a.u.)"))

diff --git a/src/plot/plotters_backend/distributions.rs b/src/plot/plotters_backend/distributions.rs
@@ -19,36 +19,52 @@ fn abs_distribution(
     let unit = formatter.scale_values(typical, &mut ci_values);
     let (lb, ub, point) = (ci_values[0], ci_values[1], ci_values[2]);
 
-    let start = lb - (ub - lb) / 9.;
-    let end = ub + (ub - lb) / 9.;
-    let mut scaled_xs: Vec<f64> = distribution.iter().cloned().collect();
-    let _ = formatter.scale_values(typical, &mut scaled_xs);
-    let scaled_xs_sample = Sample::new(&scaled_xs);
-    let (kde_xs, ys) = kde::sweep(scaled_xs_sample, KDE_POINTS, Some((start, end)));
+    let (kde_xs, ys, start, len, y_point) =
+    // If lower_bound == upper_bound, all samples have the same value.
+    // Here we make some tweaks for better visualization in the charts.
+    if lb == ub {
+        let delta = if lb == 0. { 0.01 } else { lb * 0.01 };
+        // the first two x,y values are to draw the line/bar of the distribution at `x`
+        // the second two x,y values are to draw the rectangle of the confidence interval
+        let kde_xs = Vec::from([lb, lb, lb - delta, lb + delta]).into_boxed_slice();
+        let ys = Vec::from([0_f64, point, point, point]).into_boxed_slice();
+        // start and len are used to mark the range of confidence interval rectangle coordinates
+        let start = 2_usize;
+        let len = 2_usize;
+        (kde_xs, ys, start, len, point)
+    } else {
+        let start = lb - (ub - lb) / 9.;
+        let end = ub + (ub - lb) / 9.;
+        let mut scaled_xs: Vec<f64> = distribution.iter().cloned().collect();
+        let _ = formatter.scale_values(typical, &mut scaled_xs);
+        let scaled_xs_sample = Sample::new(&scaled_xs);
+        let (kde_xs, ys) = kde::sweep(scaled_xs_sample, KDE_POINTS, Some((start, end)));
 
-    // interpolate between two points of the KDE sweep to find the Y position at the point estimate.
-    let n_point = kde_xs
-        .iter()
-        .position(|&x| x >= point)
-        .unwrap_or(kde_xs.len() - 1)
-        .max(1); // Must be at least the second element or this will panic
-    let slope = (ys[n_point] - ys[n_point - 1]) / (kde_xs[n_point] - kde_xs[n_point - 1]);
-    let y_point = ys[n_point - 1] + (slope * (point - kde_xs[n_point - 1]));
+        // interpolate between two points of the KDE sweep to find the Y position at the point estimate.
+        let n_point = kde_xs
+            .iter()
+            .position(|&x| x >= point)
+            .unwrap_or(kde_xs.len() - 1)
+            .max(1); // Must be at least the second element or this will panic
+        let slope = (ys[n_point] - ys[n_point - 1]) / (kde_xs[n_point] - kde_xs[n_point - 1]);
+        let y_point = ys[n_point - 1] + (slope * (point - kde_xs[n_point - 1]));
 
-    let start = kde_xs
-        .iter()
-        .enumerate()
-        .find(|&(_, &x)| x >= lb)
-        .unwrap()
-        .0;
-    let end = kde_xs
-        .iter()
-        .enumerate()
-        .rev()
-        .find(|&(_, &x)| x <= ub)
-        .unwrap()
-        .0;
-    let len = end - start;
+        let start = kde_xs
+            .iter()
+            .enumerate()
+            .find(|&(_, &x)| x >= lb)
+            .unwrap()
+            .0;
+        let end = kde_xs
+            .iter()
+            .enumerate()
+            .rev()
+            .find(|&(_, &x)| x <= ub)
+            .unwrap()
+            .0;
+        let len = end - start;
+        (kde_xs, ys, start, len, y_point)
+    };
 
     let kde_xs_sample = Sample::new(&kde_xs);
 

diff --git a/src/routine.rs b/src/routine.rs
@@ -104,8 +104,7 @@ pub(crate) trait Routine<M: Measurement, T: ?Sized> {
             // Early exit for extremely long running benchmarks:
             if time_start.elapsed() > maximum_bench_duration {
                 let iters = vec![n as f64, n as f64].into_boxed_slice();
-                // prevent gnuplot bug when all values are equal
-                let elapsed = vec![t_prev, t_prev + 0.000001].into_boxed_slice();
+                let elapsed = vec![t_prev, t_prev].into_boxed_slice();
                 return (ActualSamplingMode::Flat, iters, elapsed);
             }
 

diff --git a/src/stats/univariate/kde/mod.rs b/src/stats/univariate/kde/mod.rs
@@ -17,6 +17,7 @@ where
     bandwidth: A,
     kernel: K,
     sample: &'a Sample<A>,
+    are_samples_the_same: bool,
 }
 
 impl<'a, A, K> Kde<'a, A, K>
@@ -27,10 +28,14 @@ where
     /// Creates a new kernel density estimator from the `sample`, using a kernel and estimating
     /// the bandwidth using the method `bw`
     pub fn new(sample: &'a Sample<A>, kernel: K, bw: Bandwidth) -> Kde<'a, A, K> {
+        // We check if all samples have the same value just once
+        // during initialization and store it in `are_samples_the_same`.
+        // This is used in the `estimate(x)` method.
         Kde {
             bandwidth: bw.estimate(sample),
             kernel,
             sample,
+            are_samples_the_same: sample.min() == sample.max(),
         }
     }
 
@@ -56,16 +61,22 @@ where
 
     /// Estimates the probability density of `x`
     pub fn estimate(&self, x: A) -> A {
-        let _0 = A::cast(0);
-        let slice = self.sample;
-        let h = self.bandwidth;
-        let n = A::cast(slice.len());
-
-        let sum = slice
-            .iter()
-            .fold(_0, |acc, &x_i| acc + self.kernel.evaluate((x - x_i) / h));
-
-        sum / (h * n)
+        if self.are_samples_the_same {
+            // the probability density of `x`` is 1
+            // when all samples are `x``
+            A::cast(1)
+        } else {
+            let _0 = A::cast(0);
+            let slice = self.sample;
+            let h = self.bandwidth;
+            let n = A::cast(slice.len());
+
+            let sum = slice
+                .iter()
+                .fold(_0, |acc, &x_i| acc + self.kernel.evaluate((x - x_i) / h));
+
+            sum / (h * n)
+        }
     }
 }
 
@@ -84,7 +95,19 @@ impl Bandwidth {
                 let n = A::cast(sample.len());
                 let sigma = sample.std_dev(None);
 
-                sigma * (factor / n).powf(exponent)
+                // When all samples have the same value (rare case, but not impossible),
+                // the standard deviation will be zero, and the Silverman
+                // method will produce a zeroed bandwidth.
+                // But the bandwidth should always be a positive (non-zero) number,
+                // so if that happens, the optimal bandwidth should be very small,
+                // as this will create a very sharp peak at the single data point,
+                // accurately reflecting the fact that all data is concentrated
+                // at that single value.
+                if sigma.is_zero() {
+                    A::cast(0.001)
+                } else {
+                    sigma * (factor / n).powf(exponent)
+                }
             }
         }
     }
@@ -102,6 +125,33 @@ macro_rules! test {
             use crate::stats::univariate::kde::{Bandwidth, Kde};
             use crate::stats::univariate::Sample;
 
+            // The probability density of a sample when all sample
+            // values are the same X should be 1 for X.
+            #[test]
+            fn constant_sample_measurements() {
+                const CONSTANT_MEASUREMENT: $ty = 1.;
+                let measurements = &[CONSTANT_MEASUREMENT, CONSTANT_MEASUREMENT];
+                let sample = Sample::new(measurements);
+                let kde = Kde::new(sample, Gaussian, Bandwidth::Silverman);
+                for x in measurements {
+                    let prob_density = kde.estimate(*x);
+                    assert_eq!(prob_density, 1., "The probability density of X when all samples are X should be 1.");
+                }
+            }
+
+            // The bandwidth should be a positive (non-zero) number, even when
+            // the samples are all the same (constant measurements).
+            // It's very unlikely that all samples will have the same value, but
+            // not impossible, so we should handle that case gracefully.
+            #[test]
+            fn positive_bandwidth() {
+                const CONSTANT_MEASUREMENT: $ty = 1.;
+                let sample = Sample::new(&[CONSTANT_MEASUREMENT, CONSTANT_MEASUREMENT]);
+                let bandwidth_estimator = Bandwidth::Silverman;
+                let h = bandwidth_estimator.estimate(&sample);
+                assert!(h > 0., "The bandwidth should be > 0, even when all samples have the same value, but it was 0.");
+            }
+
             // The [-inf inf] integral of the estimated PDF should be one
             quickcheck! {
                 fn integral(size: u8, start: u8) -> TestResult {