Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[COLLECTIONS-843] Implement Layered Bloom filter #402

Merged
merged 54 commits into from
Dec 22, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
36beec4
Adjusted tests to handle bloom filter implementations that utilized
Claudenw Jun 21, 2023
5be49b7
cleaned up spacing
Claudenw Jun 27, 2023
3a1a8c8
fixed indent
Claudenw Jun 21, 2023
a6b5e81
updated for layered testing
Claudenw Jun 27, 2023
301041b
removed spaces
Claudenw Jun 27, 2023
7631528
fixed merge issue
Claudenw Jun 27, 2023
680d2bb
initial checkin
Claudenw Jun 27, 2023
ee2cdff
cleaned up tests
Claudenw Jun 29, 2023
d581e99
fixed timing on test
Claudenw Jun 29, 2023
0dddfa2
fixed formatting
Claudenw Jun 29, 2023
756e88f
added javadoc
Claudenw Jun 29, 2023
ccf430f
fixed typos
Claudenw Jun 29, 2023
b8fe880
removed blank lines
Claudenw Jun 29, 2023
a6d4f46
fixed javadocs
Claudenw Jun 30, 2023
4ab19a6
Fix Javadoc
garydgregory Jun 30, 2023
ad257e1
Add Javadoc @since 4.5
garydgregory Jun 30, 2023
d5e17e8
Add Javadoc @since 4.5
garydgregory Jun 30, 2023
97ca57e
updated tests and added BloomFilterProducer code
Claudenw Jun 30, 2023
cb09fbe
Merge branch 'apache:master' into layered_filter
Claudenw Jun 30, 2023
0123c3f
Merge branch 'layered_filter' of github.com:Claudenw/commons-collecti…
Claudenw Jun 30, 2023
1a647d5
Cleaned up javadoc and BiPredicate<BloomFilter,BloomFilter> processing
Claudenw Jun 30, 2023
62eba66
fixed javadoc issues
Claudenw Jun 30, 2023
4e7ab0b
fixed typography issue
Claudenw Jun 30, 2023
a1749d7
Fixed a documentation error
Claudenw Jul 4, 2023
8eda0ba
code format cleanup
Claudenw Jul 6, 2023
7bd8d33
code simplification and documentation
Claudenw Jul 6, 2023
63bfe90
added isEmpty and associated tests
Claudenw Jul 6, 2023
8f22d46
Changes as requested by review
Claudenw Jul 6, 2023
1addfe7
cleaned up formatting errors
Claudenw Jul 6, 2023
ac92c7d
fixed javadoc issues
Claudenw Jul 6, 2023
6188866
added LayeredBloomFilter to overview.
Claudenw Jul 6, 2023
2967b1e
added coco driven test cases.
Claudenw Jul 6, 2023
6517fed
attempt to fix formatting
Claudenw Jul 7, 2023
c3fbcf5
cleaned up javadoc differences
Claudenw Jul 7, 2023
b8e4850
cleaned up javadoc
Claudenw Jul 7, 2023
7732ede
Made flatten() part of BloomFilterProducer
Claudenw Jul 7, 2023
9d8282c
fixed since tag.
Claudenw Jul 7, 2023
c18e7da
changed X() methods to setX()
Claudenw Jul 21, 2023
f24bfad
updated javadoc
Claudenw Jul 21, 2023
f0eb919
fixed javadoc errors
Claudenw Jul 21, 2023
e193143
Merge branch 'master' into layered_filter
Claudenw Aug 15, 2023
75b9a6d
merged changes from master
Claudenw Aug 15, 2023
e3ac952
renamed to Test to CellProducerFromLayeredBloomFilterTest
Claudenw Aug 15, 2023
508eec3
changed to jupiter from junit.
Claudenw Aug 25, 2023
c2ecf7e
added override for uniqueIndices as optimization.
Claudenw Aug 25, 2023
9769046
fixed checkstyle issue
Claudenw Aug 27, 2023
b39a472
modified as per review
Claudenw Oct 1, 2023
195f987
Merge branch 'layered_filter' of github.com:Claudenw/commons-collecti…
Claudenw Oct 1, 2023
a71b55e
Updated tests as per review
Claudenw Oct 27, 2023
4babacf
fixed variable initialization issues
Claudenw Oct 28, 2023
95df800
Merge branch 'master' into layered_filter
Claudenw Nov 24, 2023
68201e5
made suggested test changes
Claudenw Nov 27, 2023
d806d6e
fixed broken test
Claudenw Nov 27, 2023
500a252
Remove dead comments per code reviews
garydgregory Dec 22, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
* @since 4.5
*/
class CountingPredicate<T> implements Predicate<T> {
private int idx = 0;
private int idx;
private final T[] ary;
private final BiPredicate<T, T> func;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public static Predicate<LayerManager> advanceOnCount(int breakAt) {
throw new IllegalArgumentException("'breakAt' must be greater than 0");
}
return new Predicate<LayerManager>() {
int count = 0;
int count;

@Override
public boolean test(LayerManager filter) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,8 @@ public void next() {
*/
private class Finder implements Predicate<BloomFilter> {
int[] result = new int[layerManager.getDepth()];
int bfIdx = 0;
int resultIdx = 0;
int bfIdx;
int resultIdx;
BloomFilter bf;

Finder(BloomFilter bf) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ public abstract class AbstractBloomFilterProducerTest {
return true;
};

/**
* The shape of the Bloom filters for testing.
* <ul>
* <li>Hash functions (k) = 17
* <li>Number of bits (m) = 72
* </ul>
* @return the testing shape.
*/
protected Shape getTestShape() {
return shape;
}

@BeforeEach
public void setup() {
one.clear();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,15 @@ public void testCardinalityAndIsEmpty() {
testCardinalityAndIsEmpty(createEmptyFilter(getTestShape()));
}

@Test
public void testEmptyAfterMergeWithNothing() {
// test the case where is empty after merge
// in this case the internal cardinality == -1
BloomFilter bf = createEmptyFilter(getTestShape());
bf.merge(IndexProducer.fromIndexArray());
assertTrue(bf.isEmpty());
}

/**
* Testing class returns the value as the only value.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,103 @@
*/
package org.apache.commons.collections4.bloomfilter;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.ArrayList;
import java.util.List;
import java.util.function.BiPredicate;

import org.apache.commons.lang3.tuple.Pair;
import org.junit.jupiter.api.Test;

public class CountingPredicateTest {
private List<Pair<Integer, Integer>> expected = new ArrayList<>();
private List<Pair<Integer, Integer>> result = new ArrayList<>();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be an argument to makeFunc. The expected can be created for each test that uses it.

Currently using a class level object works as JUnit is only executing one test at a time on an instance. But it is cleaner and more thread safe to pass in result to the makeFunc helper (which can be static).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aherbert, thanks for the suggestion. I made the changes.

private Integer[] ary = {Integer.valueOf(1), Integer.valueOf(2)};

private BiPredicate<Integer, Integer> makeFunc(BiPredicate<Integer, Integer> inner) {
return (x, y) -> {
if (inner.test(x, y)) {
result.add(Pair.of(x, y));
return true;
}
return false;
};
}

/**
* Test when the predicate array is shorter than other array as determined by the number
* of times cp.test() is called and all other values result in a true statement.
*/
@Test
public void testPredicateShorter() {
Integer[] shortAry = {Integer.valueOf(3)};
expected.clear();
expected.add(Pair.of(3, 1));
expected.add(Pair.of(null, 2));
result.clear();
CountingPredicate<Integer> cp = new CountingPredicate<>(shortAry, makeFunc((x, y) -> true));
for (Integer i : ary) {
assertTrue(cp.test(i));
}
assertEquals(expected, result);
assertTrue(cp.forEachRemaining());
assertEquals(expected, result);
}

/**
* Test when the predicate array is shorter than other array as determined by the number
* of times cp.test() is called and all other values result in a true statement.
*/
@Test
public void testAryShort() {
CountingPredicate<Integer> cp = new CountingPredicate<>(new Integer[0], (x, y) -> x == null);
assertTrue(cp.test(Integer.valueOf(1)));
public void testPredicateSameLength() {
expected.clear();
expected.add( Pair.of(1, 3));
expected.add( Pair.of(2, 3));
result.clear();
CountingPredicate<Integer> cp = new CountingPredicate<>(ary, makeFunc((x, y) -> true));
assertTrue(cp.test(3));
assertTrue(cp.test(3));
assertEquals(expected, result);
assertTrue(cp.forEachRemaining());
assertEquals(expected, result);
}

/**
* Test when the predicate array is longer than other array as determined by the number
* of times cp.test() is called and all other values result in a true statement.
*/
@Test
public void testAryLong() {
Integer[] ary = { Integer.valueOf(1), Integer.valueOf(2) };
CountingPredicate<Integer> cp = new CountingPredicate<>(ary, (x, y) -> y == null);
public void testPredicateLonger() {
expected.clear();
expected.add(Pair.of(1, 3));
result.clear();
CountingPredicate<Integer> cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x!=null));
assertTrue(cp.test(Integer.valueOf(3)));
assertEquals(expected, result);
expected.add(Pair.of(2, null));
assertTrue(cp.forEachRemaining());
assertEquals(expected, result);

// test last item not checked
cp = new CountingPredicate<>(ary, (x, y) -> y == Integer.valueOf(2));
assertFalse(cp.forEachRemaining());
// if the other array is zero length then cp.test() will not be called so
// we can just call cp.forEachRemaining() here.
expected.clear();
expected.add(Pair.of(1, null));
expected.add(Pair.of(2, null));
result.clear();
cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x!=null));
assertTrue(cp.forEachRemaining());
assertEquals( expected, result);

// test last item fails
cp = new CountingPredicate<>(ary, (x, y) -> y == Integer.valueOf(1));
// If a test fails then the result should be false and the rest of the list should
// not be processed.
expected.clear();
expected.add(Pair.of(1, null));
result.clear();
cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x == Integer.valueOf(1)));
assertFalse(cp.forEachRemaining());
assertEquals(expected, result);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import static org.junit.Assert.assertNotSame;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

Expand All @@ -34,6 +34,8 @@
import java.util.function.Predicate;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

public class LayerManagerTest {

Expand Down Expand Up @@ -63,7 +65,9 @@ public void testNeverAdvance() {
}
}

private void testAdvanceOnCount(int breakAt) {
@ParameterizedTest
@ValueSource(ints = {4, 10, 2, 1})
public void testAdvanceOnCount(int breakAt) {
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.advanceOnCount(breakAt);
LayerManager layerManager = testingBuilder().build();
for (int i = 0; i < breakAt - 1; i++) {
Expand All @@ -74,42 +78,30 @@ private void testAdvanceOnCount(int breakAt) {
}

@Test
public void testAdvanceOnCount() {
testAdvanceOnCount(4);
testAdvanceOnCount(10);
testAdvanceOnCount(2);
testAdvanceOnCount(1);
public void testAdvanceOnCountInvalidArguments() {
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnCount(0));
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnCount(-1));
}

@Test
public void testAdvanceOnCalculatedFull() {
Double maxN = shape.estimateMaxN();
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.advanceOnSaturation(shape.estimateMaxN());
LayerManager layerManager = testingBuilder().build();
while (layerManager.getTarget().getShape().estimateN(layerManager.getTarget().cardinality()) < maxN) {
assertFalse(underTest.test(layerManager));
layerManager.getTarget().merge(TestingHashers.randomHasher());
}
assertTrue(underTest.test(layerManager));
}

@Test
public void testAdvanceOnSaturation() {
Claudenw marked this conversation as resolved.
Show resolved Hide resolved
Double maxN = shape.estimateMaxN();
int hashStart = 0;
Predicate<LayerManager> underTest = LayerManager.ExtendCheck.advanceOnSaturation(maxN);
LayerManager layerManager = testingBuilder().build();
while (layerManager.getTarget().getShape().estimateN(layerManager.getTarget().cardinality()) < maxN) {
assertFalse(underTest.test(layerManager));
layerManager.getTarget().merge(TestingHashers.randomHasher());
layerManager.getTarget().merge(new IncrementingHasher(hashStart, shape.getNumberOfHashFunctions()));
hashStart+=shape.getNumberOfHashFunctions();
}
assertTrue(underTest.test(layerManager));
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnSaturation(0));
assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnSaturation(-1));
}

private void testOnMaxSize(int maxSize) {
@ParameterizedTest
@ValueSource(ints = {5, 100, 2, 1})
public void testOnMaxSize(int maxSize) {
Consumer<LinkedList<BloomFilter>> underTest = LayerManager.Cleanup.onMaxSize(maxSize);
LinkedList<BloomFilter> list = new LinkedList<>();
for (int i = 0; i < maxSize; i++) {
Expand All @@ -127,11 +119,7 @@ private void testOnMaxSize(int maxSize) {
}

@Test
public void testOnMaxSize() {
testOnMaxSize(5);
testOnMaxSize(100);
testOnMaxSize(2);
testOnMaxSize(1);
public void testOnMaxSizeIllegalValues() {
assertThrows(IllegalArgumentException.class, () -> LayerManager.Cleanup.onMaxSize(0));
assertThrows(IllegalArgumentException.class, () -> LayerManager.Cleanup.onMaxSize(-1));
}
Expand Down Expand Up @@ -251,9 +239,10 @@ public void testNextAndGetDepth() {

@Test
public void testGet() {
LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)).build();
SimpleBloomFilter f = new SimpleBloomFilter(shape);
LayerManager underTest = LayerManager.builder().setSupplier(() -> f).build();
assertEquals(1, underTest.getDepth());
assertNotNull(underTest.get(0));
assertSame(f, underTest.get(0));
assertThrows(NoSuchElementException.class, () -> underTest.get(-1));
assertThrows(NoSuchElementException.class, () -> underTest.get(1));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,12 @@ private LayeredBloomFilter setupFindTest() {
@Test
public void testFindBloomFilter() {
LayeredBloomFilter filter = setupFindTest();
int[] expected = {0, 3};
int[] result = filter.find(TestingHashers.FROM1);
assertEquals(2, result.length);
assertEquals(0, result[0]);
assertEquals(3, result[1]);
assertArrayEquals(expected, result);
expected = new int[] {1, 3};
result = filter.find(TestingHashers.FROM11);
assertEquals(2, result.length);
assertEquals(1, result[0]);
assertEquals(3, result[1]);
assertArrayEquals(expected, result);
}

@Test
Expand All @@ -98,34 +96,30 @@ public void testFindBitMapProducer() {
IndexProducer idxProducer = TestingHashers.FROM1.indices(getTestShape());
BitMapProducer producer = BitMapProducer.fromIndexProducer(idxProducer, getTestShape().getNumberOfBits());

int[] expected = {0, 3};
int[] result = filter.find(producer);
assertEquals(2, result.length);
assertEquals(0, result[0]);
assertEquals(3, result[1]);
assertArrayEquals(expected, result);

expected = new int[]{1, 3};
idxProducer = TestingHashers.FROM11.indices(getTestShape());
producer = BitMapProducer.fromIndexProducer(idxProducer, getTestShape().getNumberOfBits());
result = filter.find(producer);
assertEquals(2, result.length);
assertEquals(1, result[0]);
assertEquals(3, result[1]);
assertArrayEquals(expected, result);
}

@Test
public void testFindIndexProducer() {
IndexProducer producer = TestingHashers.FROM1.indices(getTestShape());
LayeredBloomFilter filter = setupFindTest();

int[] expected = {0, 3};
int[] result = filter.find(producer);
assertEquals(2, result.length);
assertEquals(0, result[0]);
assertEquals(3, result[1]);
assertArrayEquals(expected, result);

expected = new int[] {1, 3};
producer = TestingHashers.FROM11.indices(getTestShape());
result = filter.find(producer);
assertEquals(2, result.length);
assertEquals(1, result[0]);
assertEquals(3, result[1]);
assertArrayEquals(expected, result);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,4 @@ public void testMergeShortBitMapProducer() {
assertTrue(filter.merge(producer));
assertEquals(1, filter.cardinality());
}

@Test
public void testCardinalityAndIsEmpty() {
testCardinalityAndIsEmpty(createEmptyFilter(getTestShape()));

// test the case where is empty after merge
// in this case the internal cardinality == -1
BloomFilter bf = createEmptyFilter(getTestShape());
bf.merge(IndexProducer.fromIndexArray());
assertTrue(bf.isEmpty());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

public class WrappedBloomFilterTest extends AbstractBloomFilterTest<WrappedBloomFilter> {

Expand All @@ -28,15 +29,17 @@ protected WrappedBloomFilter createEmptyFilter(Shape shape) {
};
}

@Test
public void testCharacteristics() {
@ParameterizedTest
@ValueSource(ints = {0, 1, 34})
public void testCharacteristics(int characteristics) {
Shape shape = getTestShape();
BloomFilter inner = new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape);
WrappedBloomFilter underTest = createEmptyFilter(getTestShape());
assertEquals(inner.characteristics(), underTest.characteristics());

inner = new DefaultBloomFilterTest.NonSparseDefaultBloomFilter(shape);
underTest = createEmptyFilter(getTestShape());
assertEquals(inner.characteristics(), underTest.characteristics());
BloomFilter inner = new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape) {
@Override
public int characteristics() {
return characteristics;
}
};
WrappedBloomFilter underTest = new WrappedBloomFilter(inner) {};
assertEquals(characteristics, underTest.characteristics());
}
}