Skip to content

Commit

Permalink
[COLLECTIONS-841] Open up bloom filter tests - test changes to suppor…
Browse files Browse the repository at this point in the history
…t bit decay based Bloom filters (#400)

* Adjusted tests to handle bloom filter implementations that utilized
automatic decay.

* fixed formatting issues

* fixed indent

* Format tweaks

---------

Co-authored-by: Gary Gregory <[email protected]>
  • Loading branch information
Claudenw and garydgregory committed Jun 21, 2023
1 parent 916efdb commit 1671280
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;

Expand All @@ -42,7 +43,7 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
* </ul>
* @return the testing shape.
*/
protected final Shape getTestShape() {
protected Shape getTestShape() {
return Shape.fromKM(17, 72);
}

Expand Down Expand Up @@ -121,8 +122,9 @@ public void testMergeWithHasher() {

@Test
public void testMergeWithBitMapProducer() {
int bitMapCount = BitMap.numberOfBitMaps(getTestShape().getNumberOfBits());
for (int i = 0; i < 5; i++) {
final long[] values = new long[2];
final long[] values = new long[bitMapCount];
for (final int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) {
BitMap.set(values, idx);
}
Expand All @@ -135,7 +137,9 @@ public void testMergeWithBitMapProducer() {
assertTrue(lst.isEmpty());
}
// values too large
final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
long[] values = new long[bitMapCount];
Arrays.fill(values, Long.MAX_VALUE);
final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(values);
final BloomFilter bf = createEmptyFilter(getTestShape());
assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer));

Expand Down Expand Up @@ -200,7 +204,8 @@ public final void testContains() {
assertTrue(bf1.contains(bf3));
assertTrue(bf3.contains(bf1));

final BloomFilter bf4 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1)));
final BloomFilter bf4 = TestingHashers.populateRange(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1)),
1, 11+getTestShape().getNumberOfHashFunctions());

assertFalse(bf1.contains(bf4));
assertTrue(bf4.contains(bf1));
Expand Down Expand Up @@ -247,10 +252,9 @@ public final void testEstimateIntersection() {
assertEquals(0, bf.estimateIntersection(bf4));
assertEquals(0, bf4.estimateIntersection(bf));

BloomFilter bf5 = TestingHashers.mergeHashers(createEmptyFilter(getTestShape()), new IncrementingHasher(0, 1)/* 0-16 */,
new IncrementingHasher(17, 1)/* 17-33 */, new IncrementingHasher(33, 1)/* 33-49 */);
BloomFilter bf6 = TestingHashers.mergeHashers(createEmptyFilter(getTestShape()), new IncrementingHasher(50, 1)/* 50-66 */,
new IncrementingHasher(67, 1)/* 67-83 */);
int midPoint = getTestShape().getNumberOfBits() / 2;
BloomFilter bf5 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), 0, midPoint);
BloomFilter bf6 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), midPoint+1, getTestShape().getNumberOfBits()-1);
assertThrows(IllegalArgumentException.class, () -> bf5.estimateIntersection(bf6));

// infinite with infinite
Expand Down Expand Up @@ -370,14 +374,14 @@ public final void testMerge() {
assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));

// test error when bloom filter returns values out of range
final BloomFilter bf5 = new SimpleBloomFilter(
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
bf5.merge(new IncrementingHasher(Long.SIZE * 2, 1));
Shape s = Shape.fromKM(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits() * 3);
Hasher h = new IncrementingHasher(getTestShape().getNumberOfBits() * 2, 1);
final BloomFilter bf5 = new SimpleBloomFilter(s);
bf5.merge(h);
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));

final BloomFilter bf6 = new SparseBloomFilter(
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
bf6.merge(new IncrementingHasher(Long.SIZE * 2, 1));
final BloomFilter bf6 = new SparseBloomFilter(s);
bf6.merge(h);
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;

/**
* A collection of methods and statics that represent standard hashers in testing.
*/
Expand Down Expand Up @@ -59,17 +60,32 @@ public static <T extends BloomFilter> T populateFromHashersFrom1AndFrom11(T filt
}

/**
* Create a hasher that fills the entire range.
* Enables all bits in the filter.
* @param <T> the Bloom filter type.
* @param filter the Bloom filter to populate
* @return {@code filter} for chaining
*/
public static <T extends BloomFilter> T populateEntireFilter(T filter) {
int n = filter.getShape().getNumberOfBits();
int k = filter.getShape().getNumberOfHashFunctions();
for (int i = 0; i < n; i += k) {
filter.merge(new IncrementingHasher(i, 1));
}
return populateRange(filter, 0, filter.getShape().getNumberOfBits() - 1);
}

/**
* Enables all bits in a range (inclusive).
* @param <T> the Bloom filter type.
* @param filter the Bloom filter to populate
* @param start the starting bit to enable.
* @param end the last bit to enable.
* @return {@code filter} for chaining
*/
public static <T extends BloomFilter> T populateRange(T filter, int start, int end) {
filter.merge((IndexProducer) p -> {
for (int i = start; i <= end; i++) {
if (!p.test(i)) {
return false;
}
}
return true;
});
return filter;
}
}

0 comments on commit 1671280

Please sign in to comment.