netboxlabs
diff --git a/‎3rd/datasketches/common/CMakeLists.txt
Lines changed: 6 additions & 1 deletion b/‎3rd/datasketches/common/CMakeLists.txt
Lines changed: 6 additions & 1 deletion
diff --git a/‎3rd/datasketches/common/include/MurmurHash3.h
Lines changed: 25 additions & 27 deletions b/‎3rd/datasketches/common/include/MurmurHash3.h
Lines changed: 25 additions & 27 deletions
diff --git a/‎3rd/datasketches/common/include/binomial_bounds.hpp
Lines changed: 1 addition & 0 deletions b/‎3rd/datasketches/common/include/binomial_bounds.hpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎3rd/datasketches/common/include/common_defs.hpp
Lines changed: 27 additions & 1 deletion b/‎3rd/datasketches/common/include/common_defs.hpp
Lines changed: 27 additions & 1 deletion
diff --git a/‎3rd/datasketches/common/include/count_zeros.hpp
Lines changed: 11 additions & 0 deletions b/‎3rd/datasketches/common/include/count_zeros.hpp
Lines changed: 11 additions & 0 deletions
diff --git a/‎3rd/datasketches/kll/include/kolmogorov_smirnov.hpp renamed to ‎3rd/datasketches/common/include/kolmogorov_smirnov.hpp
Lines changed: 5 additions & 3 deletions b/‎3rd/datasketches/kll/include/kolmogorov_smirnov.hpp renamed to ‎3rd/datasketches/common/include/kolmogorov_smirnov.hpp
Lines changed: 5 additions & 3 deletions
diff --git a/‎3rd/datasketches/kll/include/kolmogorov_smirnov_impl.hpp renamed to ‎3rd/datasketches/common/include/kolmogorov_smirnov_impl.hpp
Lines changed: 15 additions & 18 deletions b/‎3rd/datasketches/kll/include/kolmogorov_smirnov_impl.hpp renamed to ‎3rd/datasketches/common/include/kolmogorov_smirnov_impl.hpp
Lines changed: 15 additions & 18 deletions
diff --git a/‎3rd/datasketches/common/include/memory_operations.hpp
Lines changed: 6 additions & 4 deletions b/‎3rd/datasketches/common/include/memory_operations.hpp
Lines changed: 6 additions & 4 deletions
@@ -37,4 +37,9 @@ target_sources(common
 		${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
 		${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
 		${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
-		)
+                ${CMAKE_CURRENT_SOURCE_DIR}/include/kolmogorov_smirnov.hpp
+		${CMAKE_CURRENT_SOURCE_DIR}/include/kolmogorov_smirnov_impl.hpp
+		${CMAKE_CURRENT_SOURCE_DIR}/include/quantiles_sorted_view.hpp
+  		${CMAKE_CURRENT_SOURCE_DIR}/include/quantiles_sorted_view_impl.hpp
+    		${CMAKE_CURRENT_SOURCE_DIR}/include/version.hpp.in
+		)
@@ -29,37 +29,30 @@ typedef unsigned char uint8_t;
 typedef unsigned int uint32_t;
 typedef unsigned __int64 uint64_t;
 
-#define FORCE_INLINE	__forceinline
+#define MURMUR3_FORCE_INLINE	__forceinline
 
 #include <stdlib.h>
 
-#define ROTL32(x,y)	_rotl(x,y)
-#define ROTL64(x,y)	_rotl64(x,y)
+#define MURMUR3_ROTL64(x,y)	_rotl64(x,y)
 
-#define BIG_CONSTANT(x) (x)
+#define MURMUR3_BIG_CONSTANT(x) (x)
 
 // Other compilers
 
 #else   // defined(_MSC_VER)
 
 #include <stdint.h>
 
-#define	FORCE_INLINE inline __attribute__((always_inline))
-
-inline uint32_t rotl32 ( uint32_t x, int8_t r )
-{
-  return (x << r) | (x >> (32 - r));
-}
+#define	MURMUR3_FORCE_INLINE inline __attribute__((always_inline))
 
 inline uint64_t rotl64 ( uint64_t x, int8_t r )
 {
   return (x << r) | (x >> (64 - r));
 }
 
-#define	ROTL32(x,y)	rotl32(x,y)
-#define ROTL64(x,y)	rotl64(x,y)
+#define MURMUR3_ROTL64(x,y)	rotl64(x,y)
 
-#define BIG_CONSTANT(x) (x##LLU)
+#define MURMUR3_BIG_CONSTANT(x) (x##LLU)
 
 #endif // !defined(_MSC_VER)
 
@@ -78,7 +71,7 @@ typedef struct {
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
 
-FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
+MURMUR3_FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
 {
   uint64_t res;
   memcpy(&res, p + i, sizeof(res));
@@ -88,20 +81,21 @@ FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
 //-----------------------------------------------------------------------------
 // Finalization mix - force all bits of a hash block to avalanche
 
-FORCE_INLINE uint64_t fmix64 ( uint64_t k )
+MURMUR3_FORCE_INLINE uint64_t fmix64 ( uint64_t k )
 {
   k ^= k >> 33;
-  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k *= MURMUR3_BIG_CONSTANT(0xff51afd7ed558ccd);
   k ^= k >> 33;
-  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k *= MURMUR3_BIG_CONSTANT(0xc4ceb9fe1a85ec53);
   k ^= k >> 33;
 
   return k;
 }
 
-FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t seed, HashState& out) {
-  static const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
-  static const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes,
+                                              uint64_t seed, HashState& out) {
+  static const uint64_t c1 = MURMUR3_BIG_CONSTANT(0x87c37b91114253d5);
+  static const uint64_t c2 = MURMUR3_BIG_CONSTANT(0x4cf5ad432745937f);
 
   const uint8_t* data = (const uint8_t*)key;
 
@@ -118,13 +112,13 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
     uint64_t k1 = getblock64(blocks, i * 2 + 0);
     uint64_t k2 = getblock64(blocks, i * 2 + 1);
 
-    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
-    out.h1 = ROTL64(out.h1,27);
+    k1 *= c1; k1  = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
+    out.h1 = MURMUR3_ROTL64(out.h1,27);
     out.h1 += out.h2;
     out.h1 = out.h1*5+0x52dce729;
 
-    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
-    out.h2 = ROTL64(out.h2,31);
+    k2 *= c2; k2  = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
+    out.h2 = MURMUR3_ROTL64(out.h2,31);
     out.h2 += out.h1;
     out.h2 = out.h2*5+0x38495ab5;
   }
@@ -144,7 +138,7 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
   case 11: k2 ^= ((uint64_t)tail[10]) << 16; // falls through
   case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;  // falls through
   case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
-           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
+           k2 *= c2; k2  = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
            // falls through
   case  8: k1 ^= ((uint64_t)tail[ 7]) << 56; // falls through
   case  7: k1 ^= ((uint64_t)tail[ 6]) << 48; // falls through
@@ -154,7 +148,7 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
   case  3: k1 ^= ((uint64_t)tail[ 2]) << 16; // falls through
   case  2: k1 ^= ((uint64_t)tail[ 1]) << 8; // falls through
   case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
-           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
+           k1 *= c1; k1  = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
   };
 
   //----------
@@ -175,10 +169,14 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
 
 //-----------------------------------------------------------------------------
 
-FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
+MURMUR3_FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
   HashState hashes;
   MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
   return static_cast<uint16_t>(hashes.h1 & 0xffff);
 }
 
+#undef MURMUR3_FORCE_INLINE
+#undef MURMUR3_ROTL64
+#undef MURMUR3_BIG_CONSTANT
+
 #endif // _MURMURHASH3_H_
@@ -22,6 +22,7 @@
 
 #include <algorithm>
 #include <cmath>
+#include <stdexcept>
 
 /*
  * This class enables the estimation of error bounds given a sample set size, the sampling
 
@@ -24,6 +24,9 @@
 #include <string>
 #include <memory>
 #include <iostream>
+#include <random>
+#include <chrono>
+#include <thread>
 
 namespace datasketches {
 
@@ -34,6 +37,19 @@ enum resize_factor { X1 = 0, X2, X4, X8 };
 template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
 template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
 
+// thread-safe random bit
+static thread_local std::independent_bits_engine<std::mt19937, 1, uint32_t>
+  random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count() 
+    + std::hash<std::thread::id>{}(std::this_thread::get_id())));
+
+// common random declarations
+namespace random_utils {
+  static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
+  static thread_local std::mt19937_64 rand(rd());
+  static thread_local std::uniform_real_distribution<> next_double(0.0, 1.0);
+}
+
+
 // utility function to hide unused compiler warning
 // usually has no additional cost
 template<typename T> void unused(T&&...) {}
@@ -63,7 +79,7 @@ static inline void read(std::istream& is, T* ptr, size_t size_bytes) {
 }
 
 template<typename T>
-static inline void write(std::ostream& os, T& value) {
+static inline void write(std::ostream& os, T value) {
   os.write(reinterpret_cast<const char*>(&value), sizeof(T));
 }
 
@@ -72,6 +88,16 @@ static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) {
   os.write(reinterpret_cast<const char*>(ptr), size_bytes);
 }
 
+// wrapper for iterators to implement operator-> returning temporary value
+template<typename T>
+class return_value_holder {
+public:
+  return_value_holder(T value): value_(value) {}
+  const T* operator->() const { return std::addressof(value_); }
+private:
+  T value_;
+};
+
 } // namespace
 
 #endif // _COMMON_DEFS_HPP_
@@ -91,6 +91,17 @@ static inline uint8_t count_leading_zeros_in_u64(uint64_t input) {
     return 56 + byte_leading_zeros_table[(input      ) & FCLZ_MASK_08];
 }
 
+static inline uint8_t count_leading_zeros_in_u32(uint32_t input) {
+  if (input > FCLZ_MASK_24)
+    return      byte_leading_zeros_table[(input >> 24) & FCLZ_MASK_08];
+  if (input > FCLZ_MASK_16)
+    return 8 + byte_leading_zeros_table[(input >> 16) & FCLZ_MASK_08];
+  if (input > FCLZ_MASK_08)
+    return 16 + byte_leading_zeros_table[(input >>  8) & FCLZ_MASK_08];
+  if (true)
+    return 24 + byte_leading_zeros_table[(input      ) & FCLZ_MASK_08];
+}
+
 static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
   for (int i = 0; i < 4; i++) {
     const int byte = input & 0xff;
 
@@ -25,7 +25,8 @@ namespace datasketches {
 class kolmogorov_smirnov {
 public:
   /**
-   * Computes the raw delta area between two KLL quantile sketches for the Kolmogorov-Smirnov Test.
+   * Computes the raw delta area between two quantile sketches for the Kolmogorov-Smirnov Test.
+   * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
    * @param sketch1 KLL sketch 1
    * @param sketch2 KLL sketch 2
    * @return the raw delta between two KLL quantile sketches
@@ -37,6 +38,7 @@ class kolmogorov_smirnov {
    * Computes the adjusted delta area threshold for the Kolmogorov-Smirnov Test.
    * Adjusts the computed threshold by the error epsilons of the two given sketches.
    * See <a href="https://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">Kolmogorov–Smirnov Test</a>
+   * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
    * @param sketch1 KLL sketch 1
    * @param sketch2 KLL sketch 2
    * @param p Target p-value. Typically .001 to .1, e.g., .05.
@@ -46,7 +48,8 @@ class kolmogorov_smirnov {
   static double threshold(const Sketch& sketch1, const Sketch& sketch2, double p);
 
   /**
-   * Performs the Kolmogorov-Smirnov Test between two KLL quantiles sketches.
+   * Performs the Kolmogorov-Smirnov Test between two quantile sketches.
+   * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
    * Note: if the given sketches have insufficient data or if the sketch sizes are too small,
    * this will return false.
    * @param sketch1 KLL sketch 1
@@ -57,7 +60,6 @@ class kolmogorov_smirnov {
    */
   template<typename Sketch>
   static bool test(const Sketch& sketch1, const Sketch& sketch2, double p);
-
 };
 
 } /* namespace datasketches */
 
@@ -20,39 +20,36 @@
 #ifndef KOLMOGOROV_SMIRNOV_IMPL_HPP_
 #define KOLMOGOROV_SMIRNOV_IMPL_HPP_
 
-namespace datasketches {
+#include <cmath>
+#include <algorithm>
 
-// type resolver
-template<typename T, typename C, typename S, typename A>
-kll_quantile_calculator<T, C, A> make_quantile_calculator(const kll_sketch<T, C, S, A>& sketch) {
-  return kll_quantile_calculator<T, C, A>(sketch);
-}
+namespace datasketches {
 
 template<typename Sketch>
 double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
-  using Comparator = typename Sketch::comparator;
-  auto calc1 = make_quantile_calculator(sketch1);
-  auto calc2 = make_quantile_calculator(sketch2);
-  auto it1 = calc1.begin();
-  auto it2 = calc2.begin();
+  auto comparator = sketch1.get_comparator(); // assuming the same comparator in sketch2
+  auto view1 = sketch1.get_sorted_view();
+  auto view2 = sketch2.get_sorted_view();
+  auto it1 = view1.begin();
+  auto it2 = view2.begin();
   const auto n1 = sketch1.get_n();
   const auto n2 = sketch2.get_n();
   double delta = 0;
-  while (it1 != calc1.end() && it2 != calc2.end()) {
-    const double norm_cum_wt1 = static_cast<double>((*it1).second) / n1;
-    const double norm_cum_wt2 = static_cast<double>((*it2).second) / n2;
+  while (it1 != view1.end() && it2 != view2.end()) {
+    const double norm_cum_wt1 = static_cast<double>(it1.get_cumulative_weight(false)) / n1;
+    const double norm_cum_wt2 = static_cast<double>(it2.get_cumulative_weight(false)) / n2;
     delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
-    if (Comparator()((*it1).first, (*it2).first)) {
+    if (comparator((*it1).first, (*it2).first)) {
       ++it1;
-    } else if (Comparator()((*it2).first, (*it1).first)) {
+    } else if (comparator((*it2).first, (*it1).first)) {
       ++it2;
     } else {
       ++it1;
       ++it2;
     }
   }
-  const double norm_cum_wt1 = it1 == calc1.end() ? 1 : static_cast<double>((*it1).second) / n1;
-  const double norm_cum_wt2 = it2 == calc2.end() ? 1 : static_cast<double>((*it2).second) / n2;
+  const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>(it1.get_cumulative_weight(false)) / n1;
+  const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>(it2.get_cumulative_weight(false)) / n2;
   delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
   return delta;
 }
 
@@ -23,6 +23,8 @@
 #include <memory>
 #include <exception>
 #include <iostream>
+#include <string>
+#include <cstring>
 
 namespace datasketches {
 
@@ -53,14 +55,14 @@ static inline size_t copy_to_mem(const void* src, void* dst, size_t size) {
 }
 
 template<typename T>
-static inline size_t copy_to_mem(const T& item, void* dst) {
-  memcpy(dst, &item, sizeof(T));
+static inline size_t copy_from_mem(const void* src, T& item) {
+  memcpy(&item, src, sizeof(T));
   return sizeof(T);
 }
 
 template<typename T>
-static inline size_t copy_from_mem(const void* src, T& item) {
-  memcpy(&item, src, sizeof(T));
+static inline size_t copy_to_mem(T item, void* dst) {
+  memcpy(dst, &item, sizeof(T));
   return sizeof(T);
 }