Improve dudect test with cropped time analysis

BennyWang1007 · BennyWang1007 · commit 0c920f9532b7 · 2025-03-23T01:42:41.000+08:00
The original dudect collect all execution times and perform t-tests,
which may be affected by outliers. The outliers could be caused by
context switches, interrupts, or other system activities. This patch
introduces percentile-based cropping to remove outliers.

The patch adds a new function "prepare_percentiles()" to compute
thresholds using an complementary exponential decay scale. The function
is called before the test starts.

The patch modifies "update_statistics()" to perform t-tests on cropped
execution times by filtering out the outliers.
diff --git a/dudect/fixture.c b/dudect/fixture.c
@@ -42,6 +42,7 @@
 
 #define ENOUGH_MEASURE 10000
 #define TEST_TRIES 10
+#define NUM_PERCENTILES (100)
 
 static t_context_t *t;
 
@@ -56,6 +57,36 @@ static void __attribute__((noreturn)) die(void)
     exit(111);
 }
 
+static int64_t percentile(const int64_t *a_sorted, double which, size_t size)
+{
+    assert(which >= 0 && which <= 1.0);
+    size_t pos = (size_t) (which * size);
+    return a_sorted[pos];
+}
+
+static int cmp(const int64_t *a, const int64_t *b)
+{
+    return *a - *b;
+}
+
+/* This function is used to set different thresholds for cropping measurements.
+ * To filter out the slowest measurements, we keep only the fastest ones by a
+ * complementary exponential decay scale as threshold for cropping measurements:
+ * threshold(x) = 1 - 0.5^(10 * x / N_MEASURES), where x is the counter of the measurement.
+ * This way we will have more tolerance for the first measurements and less for the last ones.
+ */
+static void prepare_percentiles(int64_t *exec_times, int64_t *percentiles)
+{
+    qsort(exec_times, N_MEASURES, sizeof(int64_t),
+          (int (*)(const void *, const void *)) cmp);
+
+    for (size_t i = 0; i < NUM_PERCENTILES; i++) {
+        percentiles[i] = percentile(
+            exec_times, 1 - (pow(0.5, 10 * (double) (i + 1) / NUM_PERCENTILES)),
+            N_MEASURES);
+    }
+}
+
 static void differentiate(int64_t *exec_times,
                           const int64_t *before_ticks,
                           const int64_t *after_ticks)
@@ -64,7 +95,9 @@ static void differentiate(int64_t *exec_times,
         exec_times[i] = after_ticks[i] - before_ticks[i];
 }
 
-static void update_statistics(const int64_t *exec_times, uint8_t *classes)
+static void update_statistics(const int64_t *exec_times,
+                              uint8_t *classes,
+                              int64_t *percentiles)
 {
     for (size_t i = 0; i < N_MEASURES; i++) {
         int64_t difference = exec_times[i];
@@ -74,6 +107,13 @@ static void update_statistics(const int64_t *exec_times, uint8_t *classes)
 
         /* do a t-test on the execution time */
         t_push(t, difference, classes[i]);
+
+        /* t-test on cropped execution times, for several cropping thresholds. */
+        for (size_t j = 0; j < NUM_PERCENTILES; j++) {
+            if (difference < percentiles[j]) {
+                t_push(t, difference, classes[i]);
+            }
+        }
     }
 }
 
@@ -123,6 +163,7 @@ static bool doit(int mode)
     int64_t *exec_times = calloc(N_MEASURES, sizeof(int64_t));
     uint8_t *classes = calloc(N_MEASURES, sizeof(uint8_t));
     uint8_t *input_data = calloc(N_MEASURES * CHUNK_SIZE, sizeof(uint8_t));
+    int64_t *percentiles = calloc(NUM_PERCENTILES, sizeof(int64_t));
 
     if (!before_ticks || !after_ticks || !exec_times || !classes ||
         !input_data) {
@@ -133,14 +174,16 @@ static bool doit(int mode)
 
     bool ret = measure(before_ticks, after_ticks, input_data, mode);
     differentiate(exec_times, before_ticks, after_ticks);
-    update_statistics(exec_times, classes);
+    prepare_percentiles(exec_times, percentiles);
+    update_statistics(exec_times, classes, percentiles);
     ret &= report();
 
     free(before_ticks);
     free(after_ticks);
     free(exec_times);
     free(classes);
     free(input_data);
+    free(percentiles);
 
     return ret;
 }