Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[COLLECTIONS-841] Open up bloom filter tests - test changes to support bit decay based Bloom filters #400

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;

Expand All @@ -42,7 +43,7 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
* </ul>
* @return the testing shape.
*/
protected final Shape getTestShape() {
protected Shape getTestShape() {
return Shape.fromKM(17, 72);
}

Expand Down Expand Up @@ -121,8 +122,9 @@ public void testMergeWithHasher() {

@Test
public void testMergeWithBitMapProducer() {
int bitMapCount = BitMap.numberOfBitMaps(getTestShape().getNumberOfBits());
for (int i = 0; i < 5; i++) {
final long[] values = new long[2];
final long[] values = new long[bitMapCount];
for (final int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) {
BitMap.set(values, idx);
}
Expand All @@ -135,7 +137,9 @@ public void testMergeWithBitMapProducer() {
assertTrue(lst.isEmpty());
}
// values too large
final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE);
long[] values = new long[bitMapCount];
Arrays.fill(values, Long.MAX_VALUE);
final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(values);
final BloomFilter bf = createEmptyFilter(getTestShape());
assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer));

Expand Down Expand Up @@ -200,7 +204,8 @@ public final void testContains() {
assertTrue(bf1.contains(bf3));
assertTrue(bf3.contains(bf1));

final BloomFilter bf4 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1)));
final BloomFilter bf4 = TestingHashers.populateRange(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1)),
1, 11+getTestShape().getNumberOfHashFunctions());

assertFalse(bf1.contains(bf4));
assertTrue(bf4.contains(bf1));
Expand Down Expand Up @@ -247,10 +252,9 @@ public final void testEstimateIntersection() {
assertEquals(0, bf.estimateIntersection(bf4));
assertEquals(0, bf4.estimateIntersection(bf));

BloomFilter bf5 = TestingHashers.mergeHashers(createEmptyFilter(getTestShape()), new IncrementingHasher(0, 1)/* 0-16 */,
new IncrementingHasher(17, 1)/* 17-33 */, new IncrementingHasher(33, 1)/* 33-49 */);
BloomFilter bf6 = TestingHashers.mergeHashers(createEmptyFilter(getTestShape()), new IncrementingHasher(50, 1)/* 50-66 */,
new IncrementingHasher(67, 1)/* 67-83 */);
int midPoint = getTestShape().getNumberOfBits() / 2;
BloomFilter bf5 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), 0, midPoint);
BloomFilter bf6 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), midPoint+1, getTestShape().getNumberOfBits()-1);
assertThrows(IllegalArgumentException.class, () -> bf5.estimateIntersection(bf6));

// infinite with infinite
Expand Down Expand Up @@ -370,14 +374,14 @@ public final void testMerge() {
assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));

// test error when bloom filter returns values out of range
final BloomFilter bf5 = new SimpleBloomFilter(
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
bf5.merge(new IncrementingHasher(Long.SIZE * 2, 1));
Shape s = Shape.fromKM(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits() * 3);
Hasher h = new IncrementingHasher(getTestShape().getNumberOfBits() * 2, 1);
final BloomFilter bf5 = new SimpleBloomFilter(s);
bf5.merge(h);
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));

final BloomFilter bf6 = new SparseBloomFilter(
Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE));
bf6.merge(new IncrementingHasher(Long.SIZE * 2, 1));
final BloomFilter bf6 = new SparseBloomFilter(s);
bf6.merge(h);
assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;

/**
* A collection of methods and statics that represent standard hashers in testing.
*/
Expand Down Expand Up @@ -59,17 +60,32 @@ public static <T extends BloomFilter> T populateFromHashersFrom1AndFrom11(T filt
}

/**
* Create a hasher that fills the entire range.
* Enables all bits in the filter.
* @param <T> the Bloom filter type.
* @param filter the Bloom filter to populate
* @return {@code filter} for chaining
*/
public static <T extends BloomFilter> T populateEntireFilter(T filter) {
int n = filter.getShape().getNumberOfBits();
int k = filter.getShape().getNumberOfHashFunctions();
for (int i = 0; i < n; i += k) {
filter.merge(new IncrementingHasher(i, 1));
}
return populateRange(filter, 0, filter.getShape().getNumberOfBits() - 1);
}

/**
* Enables all bits in a range (inclusive).
* @param <T> the Bloom filter type.
* @param filter the Bloom filter to populate
* @param start the starting bit to enable.
* @param end the last bit to enable.
* @return {@code filter} for chaining
*/
public static <T extends BloomFilter> T populateRange(T filter, int start, int end) {
filter.merge((IndexProducer) p -> {
for (int i = start; i <= end; i++) {
if (!p.test(i)) {
return false;
}
}
return true;
});
return filter;
}
}