From 16712804c04c8e8ad57751815244a0c5fb37f208 Mon Sep 17 00:00:00 2001 From: Claude Warren Date: Wed, 21 Jun 2023 13:18:01 +0100 Subject: [PATCH] [COLLECTIONS-841] Open up bloom filter tests - test changes to support bit decay based Bloom filters (#400) * Adjusted tests to handle bloom filter implementations that utilized automatic decay. * fixed formatting issues * fixed indent * Format tweaks --------- Co-authored-by: Gary Gregory --- .../bloomfilter/AbstractBloomFilterTest.java | 32 +++++++++++-------- .../bloomfilter/TestingHashers.java | 28 ++++++++++++---- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java index 5e9d91a186..7e1666a074 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java @@ -24,6 +24,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; +import java.util.Arrays; import java.util.BitSet; import java.util.List; @@ -42,7 +43,7 @@ public abstract class AbstractBloomFilterTest { * * @return the testing shape. */ - protected final Shape getTestShape() { + protected Shape getTestShape() { return Shape.fromKM(17, 72); } @@ -121,8 +122,9 @@ public void testMergeWithHasher() { @Test public void testMergeWithBitMapProducer() { + int bitMapCount = BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()); for (int i = 0; i < 5; i++) { - final long[] values = new long[2]; + final long[] values = new long[bitMapCount]; for (final int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) { BitMap.set(values, idx); } @@ -135,7 +137,9 @@ public void testMergeWithBitMapProducer() { assertTrue(lst.isEmpty()); } // values too large - final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE); + long[] values = new long[bitMapCount]; + Arrays.fill(values, Long.MAX_VALUE); + final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(values); final BloomFilter bf = createEmptyFilter(getTestShape()); assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer)); @@ -200,7 +204,8 @@ public final void testContains() { assertTrue(bf1.contains(bf3)); assertTrue(bf3.contains(bf1)); - final BloomFilter bf4 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1))); + final BloomFilter bf4 = TestingHashers.populateRange(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1)), + 1, 11+getTestShape().getNumberOfHashFunctions()); assertFalse(bf1.contains(bf4)); assertTrue(bf4.contains(bf1)); @@ -247,10 +252,9 @@ public final void testEstimateIntersection() { assertEquals(0, bf.estimateIntersection(bf4)); assertEquals(0, bf4.estimateIntersection(bf)); - BloomFilter bf5 = TestingHashers.mergeHashers(createEmptyFilter(getTestShape()), new IncrementingHasher(0, 1)/* 0-16 */, - new IncrementingHasher(17, 1)/* 17-33 */, new IncrementingHasher(33, 1)/* 33-49 */); - BloomFilter bf6 = TestingHashers.mergeHashers(createEmptyFilter(getTestShape()), new IncrementingHasher(50, 1)/* 50-66 */, - new IncrementingHasher(67, 1)/* 67-83 */); + int midPoint = getTestShape().getNumberOfBits() / 2; + BloomFilter bf5 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), 0, midPoint); + BloomFilter bf6 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), midPoint+1, getTestShape().getNumberOfBits()-1); assertThrows(IllegalArgumentException.class, () -> bf5.estimateIntersection(bf6)); // infinite with infinite @@ -370,14 +374,14 @@ public final void testMerge() { assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1))); // test error when bloom filter returns values out of range - final BloomFilter bf5 = new SimpleBloomFilter( - Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE)); - bf5.merge(new IncrementingHasher(Long.SIZE * 2, 1)); + Shape s = Shape.fromKM(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits() * 3); + Hasher h = new IncrementingHasher(getTestShape().getNumberOfBits() * 2, 1); + final BloomFilter bf5 = new SimpleBloomFilter(s); + bf5.merge(h); assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5)); - final BloomFilter bf6 = new SparseBloomFilter( - Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE)); - bf6.merge(new IncrementingHasher(Long.SIZE * 2, 1)); + final BloomFilter bf6 = new SparseBloomFilter(s); + bf6.merge(h); assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6)); } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/TestingHashers.java b/src/test/java/org/apache/commons/collections4/bloomfilter/TestingHashers.java index 27a13fd7c4..84b17f554c 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/TestingHashers.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/TestingHashers.java @@ -15,6 +15,7 @@ * limitations under the License. */ package org.apache.commons.collections4.bloomfilter; + /** * A collection of methods and statics that represent standard hashers in testing. */ @@ -59,17 +60,32 @@ public static T populateFromHashersFrom1AndFrom11(T filt } /** - * Create a hasher that fills the entire range. + * Enables all bits in the filter. * @param the Bloom filter type. * @param filter the Bloom filter to populate * @return {@code filter} for chaining */ public static T populateEntireFilter(T filter) { - int n = filter.getShape().getNumberOfBits(); - int k = filter.getShape().getNumberOfHashFunctions(); - for (int i = 0; i < n; i += k) { - filter.merge(new IncrementingHasher(i, 1)); - } + return populateRange(filter, 0, filter.getShape().getNumberOfBits() - 1); + } + + /** + * Enables all bits in a range (inclusive). + * @param the Bloom filter type. + * @param filter the Bloom filter to populate + * @param start the starting bit to enable. + * @param end the last bit to enable. + * @return {@code filter} for chaining + */ + public static T populateRange(T filter, int start, int end) { + filter.merge((IndexProducer) p -> { + for (int i = start; i <= end; i++) { + if (!p.test(i)) { + return false; + } + } + return true; + }); return filter; } }