diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java index 63d2c5b763..61692a5e69 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java @@ -17,6 +17,7 @@ package org.apache.commons.collections4.bloomfilter; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.function.BiPredicate; import java.util.function.Predicate; @@ -27,6 +28,7 @@ * @since 4.5 */ public interface BloomFilterProducer { + /** * Executes a Bloom filter Predicate on each Bloom filter in the collection. The * ordering of the Bloom filters is not specified by this interface. @@ -65,4 +67,36 @@ default boolean forEachBloomFilterPair(final BloomFilterProducer other, final CountingPredicate p = new CountingPredicate<>(asBloomFilterArray(), func); return other.forEachBloomFilter(p) && p.forEachRemaining(); } + + /** + * Creates a BloomFilterProducer from an array of Bloom filters. + * + * @param filters The filters to be returned by the producer. + * @return THe BloomFilterProducer containing the filters. + */ + static BloomFilterProducer fromBloomFilterArray(BloomFilter... filters) { + return new BloomFilterProducer() { + @Override + public boolean forEachBloomFilter(final Predicate predicate) { + for (final BloomFilter filter : filters) { + if (!predicate.test(filter)) { + return false; + } + } + return true; + } + + @Override + public BloomFilter[] asBloomFilterArray() { + return Arrays.copyOf(filters, filters.length); + } + + @Override + public boolean forEachBloomFilterPair(final BloomFilterProducer other, + final BiPredicate func) { + final CountingPredicate p = new CountingPredicate<>(filters, func); + return other.forEachBloomFilter(p) && p.forEachRemaining(); + } + }; + } } diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java b/src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java index a6de684689..525946602f 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java @@ -130,9 +130,6 @@ public static final Predicate advanceOnSaturation(double maxN) { throw new IllegalArgumentException("'maxN' must be greater than 0"); } return manager -> { - if (manager.filters.isEmpty()) { - return false; - } BloomFilter bf = manager.filters.peekLast(); return maxN <= bf.getShape().estimateN(bf.cardinality()); }; @@ -152,7 +149,7 @@ private Cleanup() { }; private static final Consumer> REMOVE_EMPTY_TARGET = x -> { - if (!x.isEmpty() && x.getLast().cardinality() == 0) { + if (x.getLast().cardinality() == 0) { x.removeLast(); } }; diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java index c05c27fe05..56f2351027 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java @@ -72,10 +72,10 @@ *

The counting Bloom filter extends the Bloom filter by counting the number of times a specific bit has been * enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional * overhead.

- * + * *

LayeredBloomFilter

* - *

The layered Bloom filter extends the Bloom filter by creating layers of Bloom filters that can be queried as a single + *

The layered Bloom filter extends the Bloom filter by creating layers of Bloom filters that can be queried as a single * Filter or as a set of filters. This adds the ability to perform windowing on streams of data.

* *

Shape

diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterProducerTest.java new file mode 100644 index 0000000000..cf4bb7b1c1 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterProducerTest.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import static org.junit.Assert.assertFalse; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.function.BiPredicate; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public abstract class AbstractBloomFilterProducerTest { + private Shape shape = Shape.fromKM(17, 72); + + BloomFilter one = new SimpleBloomFilter(shape); + BloomFilter two = new SimpleBloomFilter(shape); + int[] nullCount = { 0, 0 }; + int[] equalityCount = { 0 }; + BiPredicate counter = (x, y) -> { + if (x == null) { + nullCount[0]++; + } + if (y == null) { + nullCount[1]++; + } + if (x != null && y != null && x.cardinality() == y.cardinality()) { + equalityCount[0]++; + } + return true; + }; + + @BeforeEach + public void setup() { + one.clear(); + one.merge(IndexProducer.fromIndexArray(1)); + two.clear(); + two.merge(IndexProducer.fromIndexArray(1, 2)); + nullCount[0] = 0; + nullCount[1] = 0; + equalityCount[0] = 0; + } + + /** + * Creates a BloomFilterProducer that returns the filters (or their copy) in the order presented. + * @param filters The filters to return. + * @return A BloomFilterProducer that returns the filters in order. + */ + protected abstract BloomFilterProducer createUnderTest(BloomFilter... filters); + + private BloomFilterProducer createUnderTest() { + return createUnderTest(one, two); + } + + @Test + public void testAsBloomFilterArray() { + BloomFilter[] result = createUnderTest().asBloomFilterArray(); + assertEquals(2, result.length); + assertEquals(1, result[0].cardinality()); + assertEquals(2, result[1].cardinality()); + } + + @Test + public void testForEachPairCompleteMatch() { + assertTrue(createUnderTest().forEachBloomFilterPair(createUnderTest(), counter)); + assertArrayEquals(new int[] { 0, 0 }, nullCount); + assertEquals(2, equalityCount[0]); + } + + @Test + public void testForEachPairArrayTooShort() { + assertTrue(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one), counter)); + assertEquals(0, nullCount[0]); + assertEquals(1, nullCount[1]); + assertEquals(1, equalityCount[0]); + } + + @Test + public void testForEachPairArrayTooLong() { + assertTrue(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one), + counter)); + assertEquals(1, nullCount[0]); + assertEquals(0, nullCount[1]); + assertEquals(2, equalityCount[0]); + } + + @Test + public void testForEachPairReturnFalseLate() { + assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one), + counter.and((x, y) -> x != null && y != null))); + assertEquals(1, nullCount[0]); + assertEquals(0, nullCount[1]); + assertEquals(2, equalityCount[0]); + } + + @Test + public void testForEachPairReturnFalseLateShortArray() { + assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one), + counter.and((x, y) -> x != null && y != null))); + assertEquals(0, nullCount[0]); + assertEquals(1, nullCount[1]); + assertEquals(1, equalityCount[0]); + } + + @Test + public void testForEachPairReturnFalseEarly() { + assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one), + (x, y) -> false)); + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java index 030c27323e..1664d5ea67 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java @@ -459,6 +459,16 @@ protected void testCardinalityAndIsEmpty(BloomFilter bf) { assertFalse(bf.isEmpty(), "Wrong value at " + i); assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i); } + + // check operations in reverse order + bf.clear(); + assertEquals(0, bf.cardinality()); + assertTrue(bf.isEmpty()); + for (int i = 0; i < getTestShape().getNumberOfBits(); i++) { + bf.merge(IndexProducer.fromIndexArray(i)); + assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i); + assertFalse(bf.isEmpty(), "Wrong value at " + i); + } } @Test diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromBloomFilterArrayTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromBloomFilterArrayTest.java new file mode 100644 index 0000000000..8a6eba7de0 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromBloomFilterArrayTest.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +public class BloomFilterProducerFromBloomFilterArrayTest extends AbstractBloomFilterProducerTest{ + + @Override + protected BloomFilterProducer createUnderTest(BloomFilter... filters) { + return BloomFilterProducer.fromBloomFilterArray(filters); + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromLayeredBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromLayeredBloomFilterTest.java new file mode 100644 index 0000000000..a9a8565c13 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromLayeredBloomFilterTest.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +public class BloomFilterProducerFromLayeredBloomFilterTest extends AbstractBloomFilterProducerTest{ + + @Override + protected BloomFilterProducer createUnderTest(BloomFilter... filters) { + Shape shape = filters[0].getShape(); + LayerManager layerManager = LayerManager.builder().supplier( () -> new SimpleBloomFilter(shape) ) + .extendCheck( LayerManager.ExtendCheck.advanceOnPopulated()) + .cleanup(LayerManager.Cleanup.noCleanup()).build(); + LayeredBloomFilter underTest = new LayeredBloomFilter(shape, layerManager); + for (BloomFilter bf : filters) { + underTest.merge(bf); + } + return underTest; + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/CountingPredicateTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/CountingPredicateTest.java new file mode 100644 index 0000000000..9b1a1e44d1 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/CountingPredicateTest.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import static org.junit.Assert.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +public class CountingPredicateTest { + + @Test + public void testAryShort() { + CountingPredicate cp = new CountingPredicate<>(new Integer[0], (x, y) -> x == null); + assertTrue(cp.test(Integer.valueOf(1))); + } + + @Test + public void testAryLong() { + Integer[] ary = { Integer.valueOf(1), Integer.valueOf(2) }; + CountingPredicate cp = new CountingPredicate<>(ary, (x, y) -> y == null); + assertTrue(cp.forEachRemaining()); + + // test last item not checked + cp = new CountingPredicate<>(ary, (x, y) -> y == Integer.valueOf(2)); + assertFalse(cp.forEachRemaining()); + + // test last item fails + cp = new CountingPredicate<>(ary, (x, y) -> y == Integer.valueOf(1)); + assertFalse(cp.forEachRemaining()); + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterProducerTest.java new file mode 100644 index 0000000000..81d41abf99 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterProducerTest.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import java.util.function.Predicate; + +public class DefaultBloomFilterProducerTest extends AbstractBloomFilterProducerTest { + + @Override + protected BloomFilterProducer createUnderTest(BloomFilter... filters) { + return new BloomFilterProducer() { + @Override + public boolean forEachBloomFilter(Predicate bloomFilterPredicate) { + for (BloomFilter bf : filters) { + if (!bloomFilterPredicate.test(bf)) { + return false; + } + } + return true; + } + }; + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/LayerManagerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/LayerManagerTest.java index 0bb8f914e9..75018439bc 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/LayerManagerTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/LayerManagerTest.java @@ -21,6 +21,7 @@ import static org.junit.Assert.assertNotSame; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -28,6 +29,7 @@ import java.util.Arrays; import java.util.LinkedList; import java.util.List; +import java.util.NoSuchElementException; import java.util.function.Consumer; import java.util.function.Predicate; @@ -134,6 +136,57 @@ public void testOnMaxSize() { assertThrows(IllegalArgumentException.class, () -> LayerManager.Cleanup.onMaxSize(-1)); } + @Test + public void testNoCleanup() { + Consumer> underTest = LayerManager.Cleanup.noCleanup(); + LinkedList list = new LinkedList<>(); + for (int i = 0; i < 20; i++) { + assertEquals(i, list.size()); + list.add(new SimpleBloomFilter(shape)); + underTest.accept(list); + } + } + + @Test + public void testRemoveEmptyTarget() { + Consumer> underTest = LayerManager.Cleanup.removeEmptyTarget(); + LinkedList list = new LinkedList<>(); + + // removes an empty filter + BloomFilter bf = new SimpleBloomFilter(shape); + list.add(bf); + assertEquals(bf, list.get(0)); + underTest.accept(list); + assertTrue(list.isEmpty()); + + // does not remove a populated filter. + bf.merge(IndexProducer.fromIndexArray(1)); + list.add(bf); + assertEquals(bf, list.get(0)); + underTest.accept(list); + assertEquals(bf, list.get(0)); + + // does not remove an empty filter followed by a populated filter. + list.clear(); + list.add(new SimpleBloomFilter(shape)); + list.add(bf); + assertEquals(2, list.size()); + underTest.accept(list); + assertEquals(2, list.size()); + + // does not remove multiple empty filters at the end of the list, just the last + // one. + list.clear(); + list.add(bf); + list.add(new SimpleBloomFilter(shape)); + list.add(new SimpleBloomFilter(shape)); + assertEquals(3, list.size()); + underTest.accept(list); + assertEquals(2, list.size()); + assertEquals(bf, list.get(0)); + + } + @Test public void testCopy() { LayerManager underTest = LayerManager.builder().supplier(() -> new SimpleBloomFilter(shape)).build(); @@ -196,6 +249,15 @@ public void testNextAndGetDepth() { assertEquals(2, underTest.getDepth()); } + @Test + public void testGet() { + LayerManager underTest = LayerManager.builder().supplier(() -> new SimpleBloomFilter(shape)).build(); + assertEquals(1, underTest.getDepth()); + assertNotNull(underTest.get(0)); + assertThrows(NoSuchElementException.class, () -> underTest.get(-1)); + assertThrows(NoSuchElementException.class, () -> underTest.get(1)); + } + @Test public void testTarget() { boolean[] extendCheckCalled = { false }; diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java index f58828afcf..9fb8555e8a 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java @@ -41,4 +41,16 @@ public void testMergeShortBitMapProducer() { assertTrue(filter.merge(producer)); assertEquals(1, filter.cardinality()); } + + @Test + public void testCardinalityAndIsEmpty() { + testCardinalityAndIsEmpty(createEmptyFilter(getTestShape())); + + // test the case where is empty after merge + // in this case the internal cardinality == -1 + BloomFilter bf = createEmptyFilter(getTestShape()); + bf.merge(IndexProducer.fromIndexArray()); + assertTrue(bf.isEmpty()); + } + } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/WrappedBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/WrappedBloomFilterTest.java index e66a6ad702..b1b5980537 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/WrappedBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/WrappedBloomFilterTest.java @@ -16,6 +16,10 @@ */ package org.apache.commons.collections4.bloomfilter; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.Test; + public class WrappedBloomFilterTest extends AbstractBloomFilterTest { @Override @@ -24,4 +28,15 @@ protected WrappedBloomFilter createEmptyFilter(Shape shape) { }; } + @Test + public void testCharacteristics() { + Shape shape = getTestShape(); + BloomFilter inner = new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape); + WrappedBloomFilter underTest = createEmptyFilter(getTestShape()); + assertEquals(inner.characteristics(), underTest.characteristics()); + + inner = new DefaultBloomFilterTest.NonSparseDefaultBloomFilter(shape); + underTest = createEmptyFilter(getTestShape()); + assertEquals(inner.characteristics(), underTest.characteristics()); + } }