diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java index 1f3c7ab6f8..eb9b6f3896 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java @@ -209,6 +209,21 @@ default boolean isFull() { */ int cardinality(); + /** + * Determines if all the bits are off. This is equivalent to + * {@code cardinality() == 0}. + * + *

+ * Note: This method is optimised for non-sparse filters. Implementers + * are encouraged to implement faster checks if possible. + *

+ * + * @return {@code true} if no bits are enabled, {@code false} otherwise. + */ + default boolean isEmpty() { + return forEachBitMap(y -> y == 0); + } + /** * Estimates the number of items in the Bloom filter. * diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java new file mode 100644 index 0000000000..7008a7e3d8 --- /dev/null +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.function.BiPredicate; +import java.util.function.Predicate; + +/** + * Produces Bloom filters from a collection (e.g. LayeredBloomFilter). + * + * @since 4.5 + */ +public interface BloomFilterProducer { + + /** + * Executes a Bloom filter Predicate on each Bloom filter in the collection. The + * ordering of the Bloom filters is not specified by this interface. + * + * @param bloomFilterPredicate the predicate to evaluate each Bloom filter with. + * @return {@code false} when the first filter fails the predicate test. Returns + * {@code true} if all filters pass the test. + */ + boolean forEachBloomFilter(Predicate bloomFilterPredicate); + + /** + * Return an array of the Bloom filters in the collection. + *

Implementations should specify if the array contains deep copies, immutable instances, + * or references to the filters in the collection.

+ *

The default method returns a deep copy of the enclosed filters.

+ * + * @return An array of Bloom filters. + */ + default BloomFilter[] asBloomFilterArray() { + final List filters = new ArrayList<>(); + forEachBloomFilter(f -> filters.add(f.copy())); + return filters.toArray(new BloomFilter[0]); + } + + /** + * Applies the {@code func} to each Bloom filter pair in order. Will apply all + * of the Bloom filters from the other BloomFilterProducer to this producer. If + * either {@code this} producer or {@code other} producer has fewer BloomFilters + * ths method will provide {@code null} for all excess calls to the {@code func}. + * + *

This implementation returns references to the Bloom filter. Other implementations + * should specify if the array contains deep copies, immutable instances, + * or references to the filters in the collection.

+ * + * @param other The other BloomFilterProducer that provides the y values in the + * (x,y) pair. + * @param func The function to apply. + * @return {@code true} if the {@code func} returned {@code true} for every pair, + * {@code false} otherwise. + */ + default boolean forEachBloomFilterPair(final BloomFilterProducer other, + final BiPredicate func) { + final CountingPredicate p = new CountingPredicate<>(asBloomFilterArray(), func); + return other.forEachBloomFilter(p) && p.forEachRemaining(); + } + + /** + * Create a standard (non-layered) Bloom filter by merging all of the layers. If + * the filter is empty this method will return an empty Bloom filter. + * + * @return the merged bloom filter. + */ + default BloomFilter flatten() { + BloomFilter[] bf = {null}; + forEachBloomFilter( x -> { + if (bf[0] == null) { + bf[0] = new SimpleBloomFilter( x.getShape()); + } + return bf[0].merge( x ); + }); + return bf[0]; + } + + /** + * Creates a BloomFilterProducer from an array of Bloom filters. + * + *
    + *
  • The asBloomFilterArray() method returns a copy of the original array + * with references to the original filters.
  • + *
  • The forEachBloomFilterPair() method uses references to the original filters.
  • + *
+ *

All modifications to the Bloom filters are reflected in the original filters

+ * + * @param filters The filters to be returned by the producer. + * @return THe BloomFilterProducer containing the filters. + */ + static BloomFilterProducer fromBloomFilterArray(BloomFilter... filters) { + Objects.requireNonNull(filters, "filters"); + return new BloomFilterProducer() { + @Override + public boolean forEachBloomFilter(final Predicate predicate) { + for (final BloomFilter filter : filters) { + if (!predicate.test(filter)) { + return false; + } + } + return true; + } + + /** + * This implementation returns a copy the original array, the contained Bloom filters + * are references to the originals, any modifications to them are reflected in the original + * filters. + */ + @Override + public BloomFilter[] asBloomFilterArray() { + return filters.clone(); + } + + /** + * This implementation uses references to the original filters. Any modifications to the + * filters are reflected in the originals. + */ + @Override + public boolean forEachBloomFilterPair(final BloomFilterProducer other, + final BiPredicate func) { + final CountingPredicate p = new CountingPredicate<>(filters, func); + return other.forEachBloomFilter(p) && p.forEachRemaining(); + } + }; + } +} diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java index ecb984e2ce..4207e0a54d 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java @@ -67,6 +67,11 @@ default boolean forEachIndex(final IntPredicate predicate) { return forEachCell((i, v) -> predicate.test(i)); } + @Override + default IndexProducer uniqueIndices() { + return this; + } + /** * Creates a CellProducer from an IndexProducer. * diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingLongPredicate.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingLongPredicate.java index 1f638d4b11..6d933e1a3a 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingLongPredicate.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingLongPredicate.java @@ -22,7 +22,8 @@ * A long predicate that applies the test func to each member of the {@code ary} in sequence for each call to {@code test()}. * if the {@code ary} is exhausted, the subsequent calls to {@code test} are executed with a zero value. * If the calls to {@code test} do not exhaust the {@code ary} the {@code forEachRemaining} method can be called to - * execute the @code{text} with a zero value for each remaining {@code idx} value. + * execute the @{code test} with a zero value for each remaining {@code idx} value. + * @since 4.5 */ class CountingLongPredicate implements LongPredicate { private int idx; diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingPredicate.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingPredicate.java new file mode 100644 index 0000000000..8ee70040ef --- /dev/null +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingPredicate.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import java.util.function.BiPredicate; +import java.util.function.Predicate; + +/** + * A predicate that applies the test {@code func} to each member of the {@code ary} in + * sequence for each call to {@code test()}. if the {@code ary} is exhausted, + * the subsequent calls to {@code test} are executed with a {@code null} value. + * If the calls to {@code test} do not exhaust the {@code ary} the {@code + * forEachRemaining} method can be called to execute the @{code test} with a + * {@code null} value for each remaining {@code idx} value. + * + * @param the type of object being compared. + * @since 4.5 + */ +class CountingPredicate implements Predicate { + private int idx; + private final T[] ary; + private final BiPredicate func; + + /** + * Constructs an instance that will compare the elements in {@code ary} with the + * elements returned by {@code func}. function is called as {@code func.test( + * idxValue, otherValue )}. If there are more {@code otherValue} values than + * {@code idxValues} then {@code func} is called as {@code func.test(null, otherValue)}. + * + * @param ary The array of long values to compare. + * @param func The function to apply to the pairs of long values. + */ + CountingPredicate(final T[] ary, final BiPredicate func) { + this.ary = ary; + this.func = func; + } + + @Override + public boolean test(final T other) { + return func.test(idx == ary.length ? null : ary[idx++], other); + } + + /** + * Call {@code BiPredicate} for each remaining unpaired {@code } in the + * input array. This method should be invoked after the predicate has been + * passed to a {@code Producer.forEach(BiPredicate)} to consume any + * unpaired {@code }s. The second argument to the BiPredicate will be {@code null}. + * + * @return true if all calls the predicate were successful + */ + boolean forEachRemaining() { + // uses local references for optimization benefit. + int i = idx; + final T[] a = ary; + final int limit = a.length; + while (i != limit && func.test(a[i], null)) { + i++; + } + return i == limit; + } +} diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java b/src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java new file mode 100644 index 0000000000..710bfb53a6 --- /dev/null +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java @@ -0,0 +1,383 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import java.util.LinkedList; +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.function.Consumer; +import java.util.function.Predicate; +import java.util.function.Supplier; + +/** + * Implementation of the methods to manage the layers in a layered Bloom filter. + *

+ * The manager comprises a list of Bloom filters that are managed based on + * various rules. The last filter in the list is known as the {@code target} and + * is the filter into which merges are performed. The Layered manager utilizes + * three methods to manage the list. + *

+ *
    + *
  • ExtendCheck - A Predicate that if true causes a new Bloom filter to be + * created as the new target.
  • + *
  • FilterSupplier - A Supplier that produces empty Bloom filters to be used + * as a new target.
  • + *
  • Cleanup - A Consumer of a {@code LinkedList} of BloomFilter that removes any + * expired or out dated filters from the list.
  • + *
+ *

+ * When extendCheck returns {@code true} the following steps are taken: + *

+ *
    + *
  1. {@code Cleanup} is called
  2. + *
  3. {@code FilterSuplier} is executed and the new filter added to the list as + * the {@code target} filter.
  4. + *
+ * + * @since 4.5 + */ +public class LayerManager implements BloomFilterProducer { + + /** + * A collection of common ExtendCheck implementations to test whether to extend + * the depth of a LayerManager. + */ + public static final class ExtendCheck { + private ExtendCheck() { + } + + /** + * Advances the target once a merge has been performed. + * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. + */ + public static Predicate advanceOnPopulated() { + return lm -> !lm.filters.peekLast().isEmpty(); + } + + /** + * Does not automatically advance the target. @{code next()} must be called directly to + * perform the advance. + * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. + */ + public static Predicate neverAdvance() { + return x -> false; + } + + /** + * Creates a new target after a specific number of filters have been added to + * the current target. + * + * @param breakAt the number of filters to merge into each filter in the list. + * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. + * @throws IllegalArgumentException if {@code breakAt <= 0} + */ + public static Predicate advanceOnCount(int breakAt) { + if (breakAt <= 0) { + throw new IllegalArgumentException("'breakAt' must be greater than 0"); + } + return new Predicate() { + int count; + + @Override + public boolean test(LayerManager filter) { + return ++count % breakAt == 0; + } + }; + } + + /** + * Creates a new target after the current target is saturated. Saturation is + * defined as the {@code Bloom filter estimated N >= maxN}. + * + *

An example usage is advancing on a calculated saturation by calling: + * {@code ExtendCheck.advanceOnSaturation(shape.estimateMaxN()) }

+ * + * @param maxN the maximum number of estimated items in the filter. + * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. + * @throws IllegalArgumentException if {@code maxN <= 0} + */ + public static Predicate advanceOnSaturation(double maxN) { + if (maxN <= 0) { + throw new IllegalArgumentException("'maxN' must be greater than 0"); + } + return manager -> { + BloomFilter bf = manager.filters.peekLast(); + return maxN <= bf.getShape().estimateN(bf.cardinality()); + }; + } + } + + /** + * Static methods to create a Consumer of a LinkedList of BloomFilter perform + * tests on whether to reduce the collection of Bloom filters. + */ + public static final class Cleanup { + private Cleanup() { + } + + /** + * A Cleanup that never removes anything. + * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. + */ + public static Consumer> noCleanup() { + return x -> {}; + } + + /** + * Removes the earliest filters in the list when the the number of filters + * exceeds maxSize. + * + * @param maxSize the maximum number of filters for the list. Must be greater + * than 0 + * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. + * @throws IllegalArgumentException if {@code maxSize <= 0}. + */ + public static Consumer> onMaxSize(int maxSize) { + if (maxSize <= 0) { + throw new IllegalArgumentException("'maxSize' must be greater than 0"); + } + return ll -> { + while (ll.size() > maxSize) { + ll.removeFirst(); + } + }; + } + + /** + * Removes the last added target if it is empty. Useful as the first in a chain + * of cleanup consumers. (e.g. {@code Cleanup.removeEmptyTarget.andThen( otherConsumer )}) + * + * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. + */ + public static Consumer> removeEmptyTarget() { + return x -> { + if (x.getLast().cardinality() == 0) { + x.removeLast(); + } + }; + } + } + + private final LinkedList filters = new LinkedList<>(); + private final Consumer> filterCleanup; + private final Predicate extendCheck; + private final Supplier filterSupplier; + + /** + * Creates a new Builder with defaults of {@code ExtendCheck.neverAdvance()} and + * {@code Cleanup.noCleanup()}. + * + * @return A builder. + * @see ExtendCheck#neverAdvance() + * @see Cleanup#noCleanup() + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Constructor. + * + * @param filterSupplier the supplier of new Bloom filters to add the the list + * when necessary. + * @param extendCheck The predicate that checks if a new filter should be + * added to the list. + * @param filterCleanup the consumer that removes any old filters from the + * list. + * @param initialize true if the filter list should be initialized. + */ + private LayerManager(Supplier filterSupplier, Predicate extendCheck, + Consumer> filterCleanup, boolean initialize) { + this.filterSupplier = filterSupplier; + this.extendCheck = extendCheck; + this.filterCleanup = filterCleanup; + if (initialize) { + addFilter(); + } + } + + /** + * Adds a new Bloom filter to the list. + */ + private void addFilter() { + BloomFilter bf = filterSupplier.get(); + if (bf == null) { + throw new NullPointerException("filterSupplier returned null."); + } + filters.add(bf); + } + + /** + * Creates a deep copy of this LayerManager. + *

Filters in the copy are deep copies, not references, so changes in the copy + * are NOT reflected in the original.

+ *

The {@code filterSupplier}, {@code extendCheck}, and the {@code filterCleanup} are shared between + * the copy and this instance.

+ * + * @return a copy of this layer Manager. + */ + public LayerManager copy() { + LayerManager newMgr = new LayerManager(filterSupplier, extendCheck, filterCleanup, false); + for (BloomFilter bf : filters) { + newMgr.filters.add(bf.copy()); + } + return newMgr; + } + + /** + * Forces an advance to the next depth. This method will clean-up the current + * layers and generate a new filter layer. In most cases is it unnecessary to + * call this method directly. + *

+ * Ths method is used within {@link #getTarget()} when the configured + * {@code ExtendCheck} returns {@code true}. + *

+ */ + void next() { + this.filterCleanup.accept(filters); + addFilter(); + } + + /** + * Returns the number of filters in the LayerManager. In the default LayerManager implementation + * there is alwasy at least one layer. + * + * @return the current depth. + */ + public final int getDepth() { + return filters.size(); + } + + /** + * Gets the Bloom filter at the specified depth. The filter at depth 0 is the + * oldest filter. + * + * @param depth the depth at which the desired filter is to be found. + * @return the filter. + * @throws NoSuchElementException if depth is not in the range + * [0,filters.size()) + */ + public final BloomFilter get(int depth) { + if (depth < 0 || depth >= filters.size()) { + throw new NoSuchElementException(String.format("Depth must be in the range [0,%s)", filters.size())); + } + return filters.get(depth); + } + + /** + * Returns the current target filter. If a new filter should be created based on + * {@code extendCheck} it will be created before this method returns. + * + * @return the current target filter after any extension. + */ + public final BloomFilter getTarget() { + if (extendCheck.test(this)) { + next(); + } + return filters.peekLast(); + } + + /** + * Removes all the filters from the layer manager, and sets up a new one as the + * target. + */ + public final void clear() { + filters.clear(); + addFilter(); + } + + /** + * Executes a Bloom filter Predicate on each Bloom filter in the manager in + * depth order. Oldest filter first. + * + * @param bloomFilterPredicate the predicate to evaluate each Bloom filter with. + * @return {@code false} when the a filter fails the predicate test. Returns + * {@code true} if all filters pass the test. + */ + @Override + public boolean forEachBloomFilter(Predicate bloomFilterPredicate) { + for (BloomFilter bf : filters) { + if (!bloomFilterPredicate.test(bf)) { + return false; + } + } + return true; + } + + /** + * Builder to create Layer Manager + */ + public static class Builder { + private Predicate extendCheck; + private Supplier supplier; + private Consumer> cleanup; + + private Builder() { + extendCheck = ExtendCheck.neverAdvance(); + cleanup = Cleanup.noCleanup(); + } + + /** + * Builds the layer manager with the specified properties. + * + * @return a new LayerManager. + */ + public LayerManager build() { + Objects.requireNonNull(supplier, "Supplier must not be null"); + Objects.requireNonNull(extendCheck, "ExtendCheck must not be null"); + Objects.requireNonNull(cleanup, "Cleanup must not be null"); + return new LayerManager(supplier, extendCheck, cleanup, true); + } + + /** + * Sets the extendCheck predicate. When the predicate returns {@code true} a new + * target will be created. + * + * @param extendCheck The predicate to determine if a new target should be + * created. + * @return this for chaining. + */ + public Builder setExtendCheck(Predicate extendCheck) { + this.extendCheck = extendCheck; + return this; + } + + /** + * Sets the supplier of Bloom filters. When extendCheck creates a new target, + * the supplier provides the instance of the Bloom filter. + * + * @param supplier The supplier of new Bloom filter instances. + * @return this for chaining. + */ + public Builder setSupplier(Supplier supplier) { + this.supplier = supplier; + return this; + } + + /** + * Sets the Consumer that cleans the list of Bloom filters. + * + * @param cleanup the Consumer that will modify the list of filters removing out + * dated or stale filters. + * @return this for chaining. + */ + public Builder setCleanup(Consumer> cleanup) { + this.cleanup = cleanup; + return this; + } + } +} diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/LayeredBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/LayeredBloomFilter.java new file mode 100644 index 0000000000..cff8e87efc --- /dev/null +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/LayeredBloomFilter.java @@ -0,0 +1,380 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import java.util.Arrays; +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.function.IntPredicate; +import java.util.function.LongPredicate; +import java.util.function.Predicate; + +/** + * Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun + * (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd + * International Conference on Advanced Computer Theory and Engineering (ICACTE + * 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN + * 978-1-4244-6539-2, S2CID 3108985 + *

+ * In short, Layered Bloom filter contains several bloom filters arranged in + * layers. + *

+ *
    + *
  • When membership in the filter is checked each layer in turn is checked + * and if a match is found {@code true} is returned.
  • + *
  • When merging each bloom filter is merged into the newest filter in the + * list of layers.
  • + *
  • When questions of cardinality are asked the cardinality of the union of + * the enclosed Bloom filters is used.
  • + *
+ *

+ * The net result is that the layered Bloom filter can be populated with more + * items than the Shape would indicate and yet still return a false positive + * rate in line with the Shape and not the over population. + *

+ *

+ * This implementation uses a LayerManager to handle the manipulation of the + * layers. + *

+ *
    + *
  • Level 0 is the oldest layer and the highest level is the newest.
  • + *
  • There is always at least one enclosed filter.
  • + *
  • The newest filter is the {@code target} into which merges are performed. + *
  • Whenever the target is retrieved, or a {@code merge} operation is + * performed the code checks if any older layers should be removed, and if so + * removes them. It also checks it a new layer should be added, and if so adds + * it and sets the {@code target} before the operation.
  • + *
+ * @since 4.5 + */ +public class LayeredBloomFilter implements BloomFilter, BloomFilterProducer { + private final Shape shape; + private LayerManager layerManager; + + /** + * Creates a fixed size layered bloom filter that adds new filters to the list, + * but never merges them. List will never exceed maxDepth. As additional filters + * are added earlier filters are removed. + * + * @param shape The shape for the enclosed Bloom filters. + * @param maxDepth The maximum depth of layers. + * @return An empty layered Bloom filter of the specified shape and depth. + */ + public static LayeredBloomFilter fixed(final Shape shape, int maxDepth) { + LayerManager manager = LayerManager.builder().setExtendCheck(LayerManager.ExtendCheck.advanceOnPopulated()) + .setCleanup(LayerManager.Cleanup.onMaxSize(maxDepth)).setSupplier(() -> new SimpleBloomFilter(shape)).build(); + return new LayeredBloomFilter(shape, manager); + } + + /** + * Constructor. + * + * @param shape the Shape of the enclosed Bloom filters + * @param layerManager the LayerManager to manage the layers. + */ + public LayeredBloomFilter(Shape shape, LayerManager layerManager) { + this.shape = shape; + this.layerManager = layerManager; + } + + @Override + public LayeredBloomFilter copy() { + return new LayeredBloomFilter(shape, layerManager.copy()); + } + + /** + * Gets the depth of the deepest layer. The minimum value returned by this + * method is 1. + * + * @return the depth of the deepest layer. + */ + public final int getDepth() { + return layerManager.getDepth(); + } + + /** + * Gets the Bloom filter at the specified depth + * + * @param depth the depth of the filter to return. + * @return the Bloom filter at the specified depth. + * @throws NoSuchElementException if depth is not in the range [0,getDepth()) + */ + public BloomFilter get(int depth) { + return layerManager.get(depth); + } + + @Override + public int cardinality() { + return SetOperations.cardinality(this); + } + + @Override + public boolean isEmpty() { + return forEachBloomFilter(BloomFilter::isEmpty); + } + + @Override + public final void clear() { + layerManager.clear(); + } + + /** + * Processes the Bloom filters in depth order with the most recent filters + * first. Each filter is passed to the predicate in turn. The function exits on + * the first {@code false} returned by the predicate. + * + * @param bloomFilterPredicate the predicate to execute. + * @return {@code true} if all filters passed the predicate, {@code false} + * otherwise. + */ + @Override + public final boolean forEachBloomFilter(Predicate bloomFilterPredicate) { + return layerManager.forEachBloomFilter(bloomFilterPredicate); + } + + /** + * Create a standard (non-layered) Bloom filter by merging all of the layers. If + * the filter is empty this method will return an empty Bloom filter. + * + * @return the merged bloom filter. + */ + @Override + public BloomFilter flatten() { + BloomFilter bf = new SimpleBloomFilter(shape); + forEachBloomFilter(bf::merge); + return bf; + } + + /** + * Finds the layers in which the Hasher is found. + * + * @param hasher the Hasher to search for. + * @return an array of layer indices in which the Bloom filter is found. + */ + public int[] find(final Hasher hasher) { + SimpleBloomFilter bf = new SimpleBloomFilter(shape); + bf.merge(hasher); + return find(bf); + } + + /** + * Finds the layers in which the IndexProducer is found. + * + * @param indexProducer the Index producer to search for. + * @return an array of layer indices in which the Bloom filter is found. + */ + public int[] find(final IndexProducer indexProducer) { + SimpleBloomFilter bf = new SimpleBloomFilter(shape); + bf.merge(indexProducer); + return find(bf); + } + + /** + * Finds the layers in which the BitMapProducer is found. + * + * @param bitMapProducer the BitMapProducer to search for. + * @return an array of layer indices in which the Bloom filter is found. + */ + public int[] find(final BitMapProducer bitMapProducer) { + SimpleBloomFilter bf = new SimpleBloomFilter(shape); + bf.merge(bitMapProducer); + return find(bf); + } + + /** + * Finds the layers in which the Bloom filter is found. + * + * @param bf the Bloom filter to search for. + * @return an array of layer indices in which the Bloom filter is found. + */ + public int[] find(BloomFilter bf) { + Finder finder = new Finder(bf); + forEachBloomFilter(finder); + return finder.getResult(); + } + + /** + * Returns {@code true} if this any layer contained by this filter contains the + * specified filter. + *

+ * If the {@code other} is a BloomFilterProducer each filter within the + * {@code other} is checked to see if it exits within this filter. + *

+ * + * @param other the other Bloom filter + * @return {@code true} if this filter contains the other filter. + */ + @Override + public boolean contains(final BloomFilter other) { + return other instanceof BloomFilterProducer ? contains((BloomFilterProducer) other) + : !forEachBloomFilter(x -> !x.contains(other)); + } + + /** + * Returns {@code true} if each filter within the {@code producer} exits within + * this filter. + * + * @param producer the BloomFilterProducer that provides the filters to check + * for. + * @return {@code true} if this filter contains all of the filters contained in + * the {@code producer}. + */ + public boolean contains(final BloomFilterProducer producer) { + boolean[] result = { true }; + // return false when we have found a match to short circuit checks + return producer.forEachBloomFilter(x -> { + result[0] &= contains(x); + return result[0]; + }); + } + + /** + * Creates a Bloom filter from a Hasher. + * + * @param hasher the hasher to create the filter from. + * @return the BloomFilter. + */ + private BloomFilter createFilter(final Hasher hasher) { + SimpleBloomFilter bf = new SimpleBloomFilter(shape); + bf.merge(hasher); + return bf; + } + + /** + * Creates a Bloom filter from an IndexProducer. + * + * @param indexProducer the IndexProducer to create the filter from. + * @return the BloomFilter. + */ + private BloomFilter createFilter(final IndexProducer indexProducer) { + SimpleBloomFilter bf = new SimpleBloomFilter(shape); + bf.merge(indexProducer); + return bf; + } + + /** + * Creates a Bloom filter from a BitMapProducer. + * + * @param bitMapProducer the BitMapProducer to create the filter from. + * @return the BloomFilter. + */ + private BloomFilter createFilter(final BitMapProducer bitMapProducer) { + SimpleBloomFilter bf = new SimpleBloomFilter(shape); + bf.merge(bitMapProducer); + return bf; + } + + @Override + public int characteristics() { + return 0; + } + + @Override + public final Shape getShape() { + return shape; + } + + @Override + public boolean contains(final Hasher hasher) { + return contains(createFilter(hasher)); + } + + @Override + public boolean contains(final BitMapProducer bitMapProducer) { + return contains(createFilter(bitMapProducer)); + } + + @Override + public boolean contains(IndexProducer indexProducer) { + return contains(createFilter(indexProducer)); + } + + @Override + public boolean merge(BloomFilter bf) { + return layerManager.getTarget().merge(bf); + } + + @Override + public boolean merge(IndexProducer indexProducer) { + return layerManager.getTarget().merge(indexProducer); + } + + @Override + public boolean merge(BitMapProducer bitMapProducer) { + return layerManager.getTarget().merge(bitMapProducer); + } + + @Override + public boolean forEachIndex(IntPredicate predicate) { + return forEachBloomFilter(bf -> bf.forEachIndex(predicate)); + } + + @Override + public boolean forEachBitMap(LongPredicate predicate) { + return flatten().forEachBitMap(predicate); + } + + @Override + public int estimateN() { + return flatten().estimateN(); + } + + @Override + public int estimateUnion(final BloomFilter other) { + Objects.requireNonNull(other, "other"); + final BloomFilter cpy = this.flatten(); + cpy.merge(other); + return cpy.estimateN(); + } + + /** + * Forces and advance to the next layer. Executes the same logic as when + * LayerManager.extendCheck returns {@code true} + * + * @see LayerManager + */ + public void next() { + layerManager.next(); + } + + /** + * A class used to locate matching filters across all the layers. + */ + private class Finder implements Predicate { + int[] result = new int[layerManager.getDepth()]; + int bfIdx; + int resultIdx; + BloomFilter bf; + + Finder(BloomFilter bf) { + this.bf = bf; + } + + @Override + public boolean test(BloomFilter x) { + if (x.contains(bf)) { + result[resultIdx++] = bfIdx; + } + bfIdx++; + return true; + } + + int[] getResult() { + return Arrays.copyOf(result, resultIdx); + } + } +} diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java b/src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java index 883208f063..aeb1eb2685 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java @@ -227,6 +227,23 @@ public double estimateN(final int cardinality) { return -(m / k) * Math.log1p(-c / m); } + /** + * Estimates the maximum number of elements that can be merged into a filter of + * this shape before the false positive rate exceeds the desired rate.

The + * formula for deriving {@code k} when {@code m} and {@code n} are known is: + * + *

{@code k = ln2 * m / n}

+ * + *

Solving for {@code n} yields:

+ * + *

{@code n = ln2 * m / k}

+ * + * @return An estimate of max N. + */ + public double estimateMaxN() { + return numberOfBits * LN_2 / numberOfHashFunctions; + } + /** * Constructs a filter configuration with a desired false-positive probability ({@code p}) and the * specified number of bits ({@code m}) and hash functions ({@code k}). diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java index 25114c1136..91d802c32c 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java @@ -167,6 +167,11 @@ public int cardinality() { return c; } + @Override + public boolean isEmpty() { + return cardinality == 0 || forEachBitMap(y -> y == 0); + } + @Override public boolean forEachIndex(final IntPredicate consumer) { Objects.requireNonNull(consumer, "consumer"); diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java index bd0ba87cc2..980f74f9c2 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java @@ -136,6 +136,11 @@ public int cardinality() { return indices.size(); } + @Override + public boolean isEmpty() { + return indices.isEmpty(); + } + @Override public boolean forEachIndex(final IntPredicate consumer) { Objects.requireNonNull(consumer, "consumer"); diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/WrappedBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/WrappedBloomFilter.java new file mode 100644 index 0000000000..cff65d6e34 --- /dev/null +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/WrappedBloomFilter.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import java.util.function.IntPredicate; +import java.util.function.LongPredicate; + +/** + * An abstract class to assist in implementing Bloom filter decorators. + * + * @since 4.5 + */ +public abstract class WrappedBloomFilter implements BloomFilter { + final BloomFilter wrapped; + + /** + * Wraps a Bloom filter. The wrapped filter is maintained as a reference + * not a copy. Changes in one will be reflected in the other. + * @param bf The Bloom filter. + */ + public WrappedBloomFilter(BloomFilter bf) { + this.wrapped = bf; + } + + @Override + public boolean forEachIndex(IntPredicate predicate) { + return wrapped.forEachIndex(predicate); + } + + @Override + public BloomFilter copy() { + return wrapped.copy(); + } + + @Override + public boolean forEachBitMap(LongPredicate predicate) { + return wrapped.forEachBitMap(predicate); + } + + @Override + public int characteristics() { + return wrapped.characteristics(); + } + + @Override + public Shape getShape() { + return wrapped.getShape(); + } + + @Override + public void clear() { + wrapped.clear(); + } + + @Override + public boolean contains(BloomFilter other) { + return wrapped.contains(other); + } + + @Override + public boolean forEachBitMapPair(BitMapProducer other, LongBiPredicate func) { + return wrapped.forEachBitMapPair(other, func); + } + + @Override + public boolean contains(Hasher hasher) { + return wrapped.contains(hasher); + } + + @Override + public long[] asBitMapArray() { + return wrapped.asBitMapArray(); + } + + @Override + public int[] asIndexArray() { + return wrapped.asIndexArray(); + } + + @Override + public boolean contains(IndexProducer indexProducer) { + return wrapped.contains(indexProducer); + } + + @Override + public boolean contains(BitMapProducer bitMapProducer) { + return wrapped.contains(bitMapProducer); + } + + @Override + public boolean merge(BloomFilter other) { + return wrapped.merge(other); + } + + @Override + public boolean merge(Hasher hasher) { + return wrapped.merge(hasher); + } + + @Override + public boolean merge(IndexProducer indexProducer) { + return wrapped.merge(indexProducer); + } + + @Override + public boolean merge(BitMapProducer bitMapProducer) { + return wrapped.merge(bitMapProducer); + } + + @Override + public boolean isFull() { + return wrapped.isFull(); + } + + @Override + public int cardinality() { + return wrapped.cardinality(); + } + + @Override + public int estimateN() { + return wrapped.estimateN(); + } + + @Override + public int estimateUnion(BloomFilter other) { + return wrapped.estimateUnion(other); + } + + @Override + public int estimateIntersection(BloomFilter other) { + return wrapped.estimateIntersection(other); + } +} diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java index 7df764182d..7a21f19834 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java @@ -46,7 +46,7 @@ * representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in * manipulation of the representations.

* - *

The bloom filter code is an interface that requires implementation of 9 methods:

+ *

The Bloom filter code is an interface that requires implementation of 9 methods:

*
    *
  • {@link BloomFilter#cardinality()} returns the number of bits enabled in the Bloom filter.
  • * @@ -72,10 +72,15 @@ * *

    CountingBloomFilter

    * - *

    The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been + *

    The counting Bloom filter extends the Bloom filter by counting the number of times a specific bit has been * enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional * overhead.

    * + *

    LayeredBloomFilter

    + * + *

    The layered Bloom filter extends the Bloom filter by creating layers of Bloom filters that can be queried as a single + * Filter or as a set of filters. This adds the ability to perform windowing on streams of data.

    + * *

    Shape

    * *

    The Shape describes the Bloom filter using the number of bits and the number of hash functions

    diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterProducerTest.java new file mode 100644 index 0000000000..3d445add96 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterProducerTest.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import static org.junit.Assert.assertFalse; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.function.BiPredicate; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public abstract class AbstractBloomFilterProducerTest { + private Shape shape = Shape.fromKM(17, 72); + + BloomFilter one = new SimpleBloomFilter(shape); + BloomFilter two = new SimpleBloomFilter(shape); + int[] nullCount = { 0, 0 }; + int[] equalityCount = { 0 }; + BiPredicate counter = (x, y) -> { + if (x == null) { + nullCount[0]++; + } + if (y == null) { + nullCount[1]++; + } + if (x != null && y != null && x.cardinality() == y.cardinality()) { + equalityCount[0]++; + } + return true; + }; + + /** + * The shape of the Bloom filters for testing. + *
      + *
    • Hash functions (k) = 17 + *
    • Number of bits (m) = 72 + *
    + * @return the testing shape. + */ + protected Shape getTestShape() { + return shape; + } + + @BeforeEach + public void setup() { + one.clear(); + one.merge(IndexProducer.fromIndexArray(1)); + two.clear(); + two.merge(IndexProducer.fromIndexArray(2, 3)); + nullCount[0] = 0; + nullCount[1] = 0; + equalityCount[0] = 0; + } + + /** + * Creates a BloomFilterProducer that returns the filters (or their copy) in the order presented. + * @param filters The filters to return. + * @return A BloomFilterProducer that returns the filters in order. + */ + protected abstract BloomFilterProducer createUnderTest(BloomFilter... filters); + + private BloomFilterProducer createUnderTest() { + return createUnderTest(one, two); + } + + @Test + public void testAsBloomFilterArray() { + BloomFilter[] result = createUnderTest().asBloomFilterArray(); + assertEquals(2, result.length); + assertEquals(1, result[0].cardinality()); + assertEquals(2, result[1].cardinality()); + } + + @Test + public void testForEachPairCompleteMatch() { + assertTrue(createUnderTest().forEachBloomFilterPair(createUnderTest(), counter)); + assertArrayEquals(new int[] { 0, 0 }, nullCount); + assertEquals(2, equalityCount[0]); + } + + @Test + public void testForEachPairArrayTooShort() { + assertTrue(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one), counter)); + assertEquals(0, nullCount[0]); + assertEquals(1, nullCount[1]); + assertEquals(1, equalityCount[0]); + } + + @Test + public void testForEachPairArrayTooLong() { + assertTrue(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one), + counter)); + assertEquals(1, nullCount[0]); + assertEquals(0, nullCount[1]); + assertEquals(2, equalityCount[0]); + } + + @Test + public void testForEachPairReturnFalseLate() { + assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one), + counter.and((x, y) -> x != null && y != null))); + assertEquals(1, nullCount[0]); + assertEquals(0, nullCount[1]); + assertEquals(2, equalityCount[0]); + } + + @Test + public void testForEachPairReturnFalseLateShortArray() { + assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one), + counter.and((x, y) -> x != null && y != null))); + assertEquals(0, nullCount[0]); + assertEquals(1, nullCount[1]); + assertEquals(1, equalityCount[0]); + } + + @Test + public void testForEachPairReturnFalseEarly() { + assertFalse(createUnderTest().forEachBloomFilterPair(BloomFilterProducer.fromBloomFilterArray(one, two, one), + (x, y) -> false)); + } + + @Test + public void testFlatten() { + BloomFilter underTest = createUnderTest().flatten(); + BloomFilter expected = new SimpleBloomFilter(shape); + expected.merge(IndexProducer.fromIndexArray(1, 2, 3)); + assertArrayEquals(expected.asBitMapArray(), underTest.asBitMapArray()); + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java index 3d332146d5..32cb95d262 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java @@ -174,10 +174,10 @@ public final void testContains() { BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1); final BloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(getTestShape())); - assertTrue(bf1.contains(bf1), "BF Should contain itself"); + assertTrue(bf1.contains(bf1), "BF1 Should contain itself"); assertTrue(bf2.contains(bf2), "BF2 Should contain itself"); - assertFalse(bf1.contains(bf2), "BF should not contain BF2"); - assertTrue(bf2.contains(bf1), "BF2 should contain BF"); + assertFalse(bf1.contains(bf2), "BF1 should not contain BF2"); + assertTrue(bf2.contains(bf1), "BF2 should contain BF1"); assertTrue(bf2.contains(new IncrementingHasher(1, 1)), "BF2 Should contain this hasher"); assertFalse(bf2.contains(new IncrementingHasher(1, 3)), "BF2 Should not contain this hasher"); @@ -433,6 +433,46 @@ public void testBitMapProducerSize() { assertEquals(BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()), idx[0]); } + /** + * Test cardinality and isEmpty. Bloom filter must be able to accept multiple + * IndexProducer merges until all the bits are populated. + * + * @param bf The Bloom filter to test. + */ + protected void testCardinalityAndIsEmpty(BloomFilter bf) { + assertTrue(bf.isEmpty()); + assertEquals(0, bf.cardinality()); + for (int i = 0; i < getTestShape().getNumberOfBits(); i++) { + bf.merge(IndexProducer.fromIndexArray(i)); + assertFalse(bf.isEmpty(), "Wrong value at " + i); + assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i); + } + + // check operations in reverse order + bf.clear(); + assertEquals(0, bf.cardinality()); + assertTrue(bf.isEmpty()); + for (int i = 0; i < getTestShape().getNumberOfBits(); i++) { + bf.merge(IndexProducer.fromIndexArray(i)); + assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i); + assertFalse(bf.isEmpty(), "Wrong value at " + i); + } + } + + @Test + public void testCardinalityAndIsEmpty() { + testCardinalityAndIsEmpty(createEmptyFilter(getTestShape())); + } + + @Test + public void testEmptyAfterMergeWithNothing() { + // test the case where is empty after merge + // in this case the internal cardinality == -1 + BloomFilter bf = createEmptyFilter(getTestShape()); + bf.merge(IndexProducer.fromIndexArray()); + assertTrue(bf.isEmpty()); + } + /** * Testing class returns the value as the only value. */ diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java index faa66ead7b..044dcd1721 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java @@ -16,10 +16,10 @@ */ package org.apache.commons.collections4.bloomfilter; -import static org.junit.Assert.assertSame; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Arrays; diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromLayeredBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromLayeredBloomFilterTest.java new file mode 100644 index 0000000000..e8a2d0c78b --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromLayeredBloomFilterTest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +public class BitMapProducerFromLayeredBloomFilterTest extends AbstractBitMapProducerTest { + + protected Shape shape = Shape.fromKM(17, 72); + + @Override + protected BitMapProducer createProducer() { + final Hasher hasher = new IncrementingHasher(0, 1); + final BloomFilter bf = LayeredBloomFilter.fixed(shape, 10); + bf.merge(hasher); + return bf; + } + + @Override + protected BitMapProducer createEmptyProducer() { + return LayeredBloomFilter.fixed(shape, 10); + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromWrappedBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromWrappedBloomFilterTest.java new file mode 100644 index 0000000000..b665bc9e94 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromWrappedBloomFilterTest.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +public class BitMapProducerFromWrappedBloomFilterTest extends AbstractBitMapProducerTest { + + protected Shape shape = Shape.fromKM(17, 72); + + @Override + protected BitMapProducer createProducer() { + final Hasher hasher = new IncrementingHasher(0, 1); + final BloomFilter bf = new WrappedBloomFilter(new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape)) { + }; + bf.merge(hasher); + return bf; + } + + @Override + protected BitMapProducer createEmptyProducer() { + return new WrappedBloomFilter(new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape)) { + }; + } + +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromBloomFilterArrayTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromBloomFilterArrayTest.java new file mode 100644 index 0000000000..8a6eba7de0 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromBloomFilterArrayTest.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +public class BloomFilterProducerFromBloomFilterArrayTest extends AbstractBloomFilterProducerTest{ + + @Override + protected BloomFilterProducer createUnderTest(BloomFilter... filters) { + return BloomFilterProducer.fromBloomFilterArray(filters); + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromLayeredBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromLayeredBloomFilterTest.java new file mode 100644 index 0000000000..de93190931 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducerFromLayeredBloomFilterTest.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +public class BloomFilterProducerFromLayeredBloomFilterTest extends AbstractBloomFilterProducerTest{ + + @Override + protected BloomFilterProducer createUnderTest(BloomFilter... filters) { + Shape shape = filters[0].getShape(); + LayerManager layerManager = LayerManager.builder().setSupplier( () -> new SimpleBloomFilter(shape) ) + .setExtendCheck( LayerManager.ExtendCheck.advanceOnPopulated()) + .setCleanup(LayerManager.Cleanup.noCleanup()).build(); + LayeredBloomFilter underTest = new LayeredBloomFilter(shape, layerManager); + for (BloomFilter bf : filters) { + underTest.merge(bf); + } + return underTest; + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromLayeredBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromLayeredBloomFilterTest.java new file mode 100644 index 0000000000..6c22b37f86 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromLayeredBloomFilterTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +public class CellProducerFromLayeredBloomFilterTest extends AbstractCellProducerTest { + + protected Shape shape = Shape.fromKM(17, 72); + + @Override + protected CellProducer createProducer() { + final Hasher hasher = new IncrementingHasher(3, 2); + final BloomFilter bf = LayeredBloomFilter.fixed(shape, 10); + bf.merge(hasher); + return CellProducer.from(bf); + } + + @Override + protected CellProducer createEmptyProducer() { + return CellProducer.from(LayeredBloomFilter.fixed(shape, 10)); + } + + @Override + protected int[] getExpectedIndices() { + return new int[] {3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35}; + } + + @Override + protected int[] getExpectedValues() { + return new int[] {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/CountingPredicateTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/CountingPredicateTest.java new file mode 100644 index 0000000000..d03c94d180 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/CountingPredicateTest.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.BiPredicate; + +import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.Test; + +public class CountingPredicateTest { + + private Integer[] ary = {Integer.valueOf(1), Integer.valueOf(2)}; + + private BiPredicate makeFunc(BiPredicate inner, List> result) { + return (x, y) -> { + if (inner.test(x, y)) { + result.add(Pair.of(x, y)); + return true; + } + return false; + }; + } + + /** + * Test when the predicate array is shorter than other array as determined by the number + * of times cp.test() is called and all other values result in a true statement. + */ + @Test + public void testPredicateShorter() { + List> expected = new ArrayList<>(); + List> result = new ArrayList<>(); + Integer[] shortAry = {Integer.valueOf(3)}; + expected.add(Pair.of(3, 1)); + expected.add(Pair.of(null, 2)); + CountingPredicate cp = new CountingPredicate<>(shortAry, makeFunc((x, y) -> true, result)); + for (Integer i : ary) { + assertTrue(cp.test(i)); + } + assertEquals(expected, result); + assertTrue(cp.forEachRemaining()); + assertEquals(expected, result); + } + + /** + * Test when the predicate array is shorter than other array as determined by the number + * of times cp.test() is called and all other values result in a true statement. + */ + @Test + public void testPredicateSameLength() { + List> expected = new ArrayList<>(); + List> result = new ArrayList<>(); + expected.add( Pair.of(1, 3)); + expected.add( Pair.of(2, 3)); + CountingPredicate cp = new CountingPredicate<>(ary, makeFunc((x, y) -> true, result)); + assertTrue(cp.test(3)); + assertTrue(cp.test(3)); + assertEquals(expected, result); + assertTrue(cp.forEachRemaining()); + assertEquals(expected, result); + } + + /** + * Test when the predicate array is longer than other array as determined by the number + * of times cp.test() is called and all other values result in a true statement. + */ + @Test + public void testPredicateLonger() { + List> expected = new ArrayList<>(); + List> result = new ArrayList<>(); + expected.add(Pair.of(1, 3)); + + CountingPredicate cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x!=null, result)); + assertTrue(cp.test(Integer.valueOf(3))); + assertEquals(expected, result); + expected.add(Pair.of(2, null)); + assertTrue(cp.forEachRemaining()); + assertEquals(expected, result); + + // if the other array is zero length then cp.test() will not be called so + // we can just call cp.forEachRemaining() here. + expected.clear(); + expected.add(Pair.of(1, null)); + expected.add(Pair.of(2, null)); + result.clear(); + cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x!=null, result)); + assertTrue(cp.forEachRemaining()); + assertEquals( expected, result); + + // If a test fails then the result should be false and the rest of the list should + // not be processed. + expected.clear(); + expected.add(Pair.of(1, null)); + result.clear(); + cp = new CountingPredicate<>(ary, makeFunc((x, y) -> x == Integer.valueOf(1), result)); + assertFalse(cp.forEachRemaining()); + assertEquals(expected, result); + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterProducerTest.java new file mode 100644 index 0000000000..81d41abf99 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterProducerTest.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import java.util.function.Predicate; + +public class DefaultBloomFilterProducerTest extends AbstractBloomFilterProducerTest { + + @Override + protected BloomFilterProducer createUnderTest(BloomFilter... filters) { + return new BloomFilterProducer() { + @Override + public boolean forEachBloomFilter(Predicate bloomFilterPredicate) { + for (BloomFilter bf : filters) { + if (!bloomFilterPredicate.test(bf)) { + return false; + } + } + return true; + } + }; + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java index e716c42b56..bd7456c709 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java @@ -215,9 +215,12 @@ public int cardinality() { } } - static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter { + /** + * A default implementation of a Sparse bloom filter. + */ + public static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter { - SparseDefaultBloomFilter(final Shape shape) { + public SparseDefaultBloomFilter(final Shape shape) { super(shape); } @@ -234,9 +237,12 @@ public AbstractDefaultBloomFilter copy() { } } - static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter { + /** + * A default implementation of a non-sparse Bloom filter. + */ + public static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter { - NonSparseDefaultBloomFilter(final Shape shape) { + public NonSparseDefaultBloomFilter(final Shape shape) { super(shape); } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/LayerManagerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/LayerManagerTest.java new file mode 100644 index 0000000000..c7023b1703 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/LayerManagerTest.java @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.function.Consumer; +import java.util.function.Predicate; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class LayerManagerTest { + + private Shape shape = Shape.fromKM(17, 72); + + private LayerManager.Builder testingBuilder() { + return LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)); + } + + @Test + public void testAdvanceOnPopulated() { + Predicate underTest = LayerManager.ExtendCheck.advanceOnPopulated(); + LayerManager layerManager = testingBuilder().build(); + assertFalse(underTest.test(layerManager)); + layerManager.getTarget().merge(TestingHashers.FROM1); + assertTrue(underTest.test(layerManager)); + } + + @Test + public void testNeverAdvance() { + Predicate underTest = LayerManager.ExtendCheck.neverAdvance(); + LayerManager layerManager = testingBuilder().build(); + assertFalse(underTest.test(layerManager)); + for (int i = 0; i < 10; i++) { + layerManager.getTarget().merge(TestingHashers.randomHasher()); + assertFalse(underTest.test(layerManager)); + } + } + + @ParameterizedTest + @ValueSource(ints = {4, 10, 2, 1}) + public void testAdvanceOnCount(int breakAt) { + Predicate underTest = LayerManager.ExtendCheck.advanceOnCount(breakAt); + LayerManager layerManager = testingBuilder().build(); + for (int i = 0; i < breakAt - 1; i++) { + assertFalse(underTest.test(layerManager), "at " + i); + layerManager.getTarget().merge(TestingHashers.FROM1); + } + assertTrue(underTest.test(layerManager)); + } + + @Test + public void testAdvanceOnCountInvalidArguments() { + assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnCount(0)); + assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnCount(-1)); + } + + @Test + public void testAdvanceOnSaturation() { + Double maxN = shape.estimateMaxN(); + int hashStart = 0; + Predicate underTest = LayerManager.ExtendCheck.advanceOnSaturation(maxN); + LayerManager layerManager = testingBuilder().build(); + while (layerManager.getTarget().getShape().estimateN(layerManager.getTarget().cardinality()) < maxN) { + assertFalse(underTest.test(layerManager)); + layerManager.getTarget().merge(new IncrementingHasher(hashStart, shape.getNumberOfHashFunctions())); + hashStart+=shape.getNumberOfHashFunctions(); + } + assertTrue(underTest.test(layerManager)); + assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnSaturation(0)); + assertThrows(IllegalArgumentException.class, () -> LayerManager.ExtendCheck.advanceOnSaturation(-1)); + } + + @ParameterizedTest + @ValueSource(ints = {5, 100, 2, 1}) + public void testOnMaxSize(int maxSize) { + Consumer> underTest = LayerManager.Cleanup.onMaxSize(maxSize); + LinkedList list = new LinkedList<>(); + for (int i = 0; i < maxSize; i++) { + assertEquals(i, list.size()); + list.add(new SimpleBloomFilter(shape)); + underTest.accept(list); + } + assertEquals(maxSize, list.size()); + + for (int i = 0; i < maxSize; i++) { + list.add(new SimpleBloomFilter(shape)); + underTest.accept(list); + assertEquals(maxSize, list.size()); + } + } + + @Test + public void testOnMaxSizeIllegalValues() { + assertThrows(IllegalArgumentException.class, () -> LayerManager.Cleanup.onMaxSize(0)); + assertThrows(IllegalArgumentException.class, () -> LayerManager.Cleanup.onMaxSize(-1)); + } + + @Test + public void testNoCleanup() { + Consumer> underTest = LayerManager.Cleanup.noCleanup(); + LinkedList list = new LinkedList<>(); + for (int i = 0; i < 20; i++) { + assertEquals(i, list.size()); + list.add(new SimpleBloomFilter(shape)); + underTest.accept(list); + } + } + + @Test + public void testRemoveEmptyTarget() { + Consumer> underTest = LayerManager.Cleanup.removeEmptyTarget(); + LinkedList list = new LinkedList<>(); + + // removes an empty filter + BloomFilter bf = new SimpleBloomFilter(shape); + list.add(bf); + assertEquals(bf, list.get(0)); + underTest.accept(list); + assertTrue(list.isEmpty()); + + // does not remove a populated filter. + bf.merge(IndexProducer.fromIndexArray(1)); + list.add(bf); + assertEquals(bf, list.get(0)); + underTest.accept(list); + assertEquals(bf, list.get(0)); + + // does not remove an empty filter followed by a populated filter. + list.clear(); + list.add(new SimpleBloomFilter(shape)); + list.add(bf); + assertEquals(2, list.size()); + underTest.accept(list); + assertEquals(2, list.size()); + + // does not remove multiple empty filters at the end of the list, just the last + // one. + list.clear(); + list.add(bf); + list.add(new SimpleBloomFilter(shape)); + list.add(new SimpleBloomFilter(shape)); + assertEquals(3, list.size()); + underTest.accept(list); + assertEquals(2, list.size()); + assertEquals(bf, list.get(0)); + + } + + @Test + public void testCopy() { + LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)).build(); + underTest.getTarget().merge(TestingHashers.randomHasher()); + underTest.next(); + underTest.getTarget().merge(TestingHashers.randomHasher()); + underTest.next(); + underTest.getTarget().merge(TestingHashers.randomHasher()); + assertEquals(3, underTest.getDepth()); + + LayerManager copy = underTest.copy(); + assertNotSame(underTest, copy); + // object equals not implemented + assertNotEquals(underTest, copy); + + assertEquals(underTest.getDepth(), copy.getDepth()); + assertTrue( + underTest.forEachBloomFilterPair(copy, (x, y) -> Arrays.equals(x.asBitMapArray(), y.asBitMapArray()))); + } + + @Test + public void testBuilder() { + LayerManager.Builder underTest = LayerManager.builder(); + NullPointerException npe = assertThrows(NullPointerException.class, () -> underTest.build()); + assertTrue(npe.getMessage().contains("Supplier must not be null")); + underTest.setSupplier(() -> null).setCleanup(null); + npe = assertThrows(NullPointerException.class, () -> underTest.build()); + assertTrue(npe.getMessage().contains("Cleanup must not be null")); + underTest.setCleanup(x -> { + }).setExtendCheck(null); + npe = assertThrows(NullPointerException.class, () -> underTest.build()); + assertTrue(npe.getMessage().contains("ExtendCheck must not be null")); + + npe = assertThrows(NullPointerException.class, () -> LayerManager.builder().setSupplier(() -> null).build()); + assertTrue(npe.getMessage().contains("filterSupplier returned null.")); + + } + + @Test + public void testClear() { + LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)).build(); + underTest.getTarget().merge(TestingHashers.randomHasher()); + underTest.next(); + underTest.getTarget().merge(TestingHashers.randomHasher()); + underTest.next(); + underTest.getTarget().merge(TestingHashers.randomHasher()); + assertEquals(3, underTest.getDepth()); + underTest.clear(); + assertEquals(1, underTest.getDepth()); + assertEquals(0, underTest.getTarget().cardinality()); + } + + @Test + public void testNextAndGetDepth() { + LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)).build(); + assertEquals(1, underTest.getDepth()); + underTest.getTarget().merge(TestingHashers.randomHasher()); + assertEquals(1, underTest.getDepth()); + underTest.next(); + assertEquals(2, underTest.getDepth()); + } + + @Test + public void testGet() { + SimpleBloomFilter f = new SimpleBloomFilter(shape); + LayerManager underTest = LayerManager.builder().setSupplier(() -> f).build(); + assertEquals(1, underTest.getDepth()); + assertSame(f, underTest.get(0)); + assertThrows(NoSuchElementException.class, () -> underTest.get(-1)); + assertThrows(NoSuchElementException.class, () -> underTest.get(1)); + } + + @Test + public void testTarget() { + boolean[] extendCheckCalled = { false }; + boolean[] cleanupCalled = { false }; + int[] supplierCount = { 0 }; + LayerManager underTest = LayerManager.builder().setSupplier(() -> { + supplierCount[0]++; + return new SimpleBloomFilter(shape); + }).setExtendCheck(lm -> { + extendCheckCalled[0] = true; + return true; + }).setCleanup(ll -> { + cleanupCalled[0] = true; + }).build(); + assertFalse(extendCheckCalled[0]); + assertFalse(cleanupCalled[0]); + assertEquals(1, supplierCount[0]); + underTest.getTarget(); + assertTrue(extendCheckCalled[0]); + assertTrue(cleanupCalled[0]); + assertEquals(2, supplierCount[0]); + } + + @Test + public void testForEachBloomFilter() { + LayerManager underTest = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(shape)) + .setExtendCheck(LayerManager.ExtendCheck.advanceOnPopulated()).build(); + + List lst = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + BloomFilter bf = new SimpleBloomFilter(shape); + bf.merge(TestingHashers.randomHasher()); + lst.add(bf); + underTest.getTarget().merge(bf); + } + List lst2 = new ArrayList<>(); + underTest.forEachBloomFilter(lst2::add); + assertEquals(10, lst.size()); + assertEquals(10, lst2.size()); + for (int i = 0; i < lst.size(); i++) { + assertArrayEquals(lst.get(i).asBitMapArray(), lst2.get(i).asBitMapArray()); + } + } + +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/LayeredBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/LayeredBloomFilterTest.java new file mode 100644 index 0000000000..11164eca06 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/LayeredBloomFilterTest.java @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import java.util.function.Predicate; + +import org.apache.commons.collections4.bloomfilter.LayerManager.Cleanup; +import org.apache.commons.collections4.bloomfilter.LayerManager.ExtendCheck; +import org.junit.jupiter.api.Test; + +public class LayeredBloomFilterTest extends AbstractBloomFilterTest { + + @Override + protected LayeredBloomFilter createEmptyFilter(Shape shape) { + return LayeredBloomFilter.fixed(shape, 10); + } + + protected BloomFilter makeFilter(int... values) { + return makeFilter(IndexProducer.fromIndexArray(values)); + } + + protected BloomFilter makeFilter(IndexProducer p) { + BloomFilter bf = new SparseBloomFilter(getTestShape()); + bf.merge(p); + return bf; + } + + protected BloomFilter makeFilter(Hasher h) { + BloomFilter bf = new SparseBloomFilter(getTestShape()); + bf.merge(h); + return bf; + } + + @Test + public void testMultipleFilters() { + LayeredBloomFilter filter = LayeredBloomFilter.fixed(getTestShape(), 10); + filter.merge(TestingHashers.FROM1); + filter.merge(TestingHashers.FROM11); + assertEquals(2, filter.getDepth()); + assertTrue(filter.contains(makeFilter(TestingHashers.FROM1))); + assertTrue(filter.contains(makeFilter(TestingHashers.FROM11))); + BloomFilter t1 = makeFilter(6, 7, 17, 18, 19); + assertFalse(filter.contains(t1)); + assertFalse(filter.copy().contains(t1)); + assertTrue(filter.flatten().contains(t1)); + } + + private LayeredBloomFilter setupFindTest() { + LayeredBloomFilter filter = LayeredBloomFilter.fixed(getTestShape(), 10); + filter.merge(TestingHashers.FROM1); + filter.merge(TestingHashers.FROM11); + filter.merge(new IncrementingHasher(11, 2)); + filter.merge(TestingHashers.populateFromHashersFrom1AndFrom11(new SimpleBloomFilter(getTestShape()))); + return filter; + } + + @Test + public void testFindBloomFilter() { + LayeredBloomFilter filter = setupFindTest(); + int[] expected = {0, 3}; + int[] result = filter.find(TestingHashers.FROM1); + assertArrayEquals(expected, result); + expected = new int[] {1, 3}; + result = filter.find(TestingHashers.FROM11); + assertArrayEquals(expected, result); + } + + @Test + public void testFindBitMapProducer() { + LayeredBloomFilter filter = setupFindTest(); + + IndexProducer idxProducer = TestingHashers.FROM1.indices(getTestShape()); + BitMapProducer producer = BitMapProducer.fromIndexProducer(idxProducer, getTestShape().getNumberOfBits()); + + int[] expected = {0, 3}; + int[] result = filter.find(producer); + assertArrayEquals(expected, result); + + expected = new int[]{1, 3}; + idxProducer = TestingHashers.FROM11.indices(getTestShape()); + producer = BitMapProducer.fromIndexProducer(idxProducer, getTestShape().getNumberOfBits()); + result = filter.find(producer); + assertArrayEquals(expected, result); + } + + @Test + public void testFindIndexProducer() { + IndexProducer producer = TestingHashers.FROM1.indices(getTestShape()); + LayeredBloomFilter filter = setupFindTest(); + + int[] expected = {0, 3}; + int[] result = filter.find(producer); + assertArrayEquals(expected, result); + + expected = new int[] {1, 3}; + producer = TestingHashers.FROM11.indices(getTestShape()); + result = filter.find(producer); + assertArrayEquals(expected, result); + } + + /** + * Tests that the estimated union calculations are correct. + */ + @Test + public final void testEstimateUnionCrossTypes() { + final BloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1); + final BloomFilter bf2 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape()); + bf2.merge(TestingHashers.FROM11); + + assertEquals(2, bf.estimateUnion(bf2)); + assertEquals(2, bf2.estimateUnion(bf)); + } + + @Test + public final void testGetLayer() { + BloomFilter bf = new SimpleBloomFilter(getTestShape()); + bf.merge(TestingHashers.FROM11); + LayeredBloomFilter filter = LayeredBloomFilter.fixed(getTestShape(), 10); + filter.merge(TestingHashers.FROM1); + filter.merge(TestingHashers.FROM11); + filter.merge(new IncrementingHasher(11, 2)); + filter.merge(TestingHashers.populateFromHashersFrom1AndFrom11(new SimpleBloomFilter(getTestShape()))); + assertArrayEquals(bf.asBitMapArray(), filter.get(1).asBitMapArray()); + } + + @Test + public final void testNext() { + LayerManager layerManager = LayerManager.builder().setSupplier(() -> new SimpleBloomFilter(getTestShape())) + .build(); + + LayeredBloomFilter filter = new LayeredBloomFilter(getTestShape(), layerManager); + filter.merge(TestingHashers.FROM1); + filter.merge(TestingHashers.FROM11); + assertEquals(1, filter.getDepth()); + filter.next(); + filter.merge(new IncrementingHasher(11, 2)); + assertEquals(2, filter.getDepth()); + assertTrue(filter.get(0).contains(TestingHashers.FROM1)); + assertTrue(filter.get(0).contains(TestingHashers.FROM11)); + assertFalse(filter.get(0).contains(new IncrementingHasher(11, 2))); + assertFalse(filter.get(1).contains(TestingHashers.FROM1)); + assertFalse(filter.get(1).contains(TestingHashers.FROM11)); + assertTrue(filter.get(1).contains(new IncrementingHasher(11, 2))); + } + + @Override + @Test + public void testCardinalityAndIsEmpty() { + LayerManager layerManager = LayerManager.builder().setExtendCheck(ExtendCheck.neverAdvance()) + .setSupplier(() -> new SimpleBloomFilter(getTestShape())).build(); + testCardinalityAndIsEmpty(new LayeredBloomFilter(getTestShape(), layerManager)); + } + + // ***** TESTS THAT CHECK LAYERED PROCESSING ****** + + // ***example of instrumentation *** + private static List dbgInstrument = new ArrayList<>(); + // instrumentation to record timestamps in dbgInstrument list + private Predicate dbg = (bf) -> { + TimestampedBloomFilter tbf = (TimestampedBloomFilter) bf; + long ts = System.currentTimeMillis(); + dbgInstrument.add(String.format("T:%s (Elapsed:%s)- EstN:%s (Card:%s)\n", tbf.timestamp, ts - tbf.timestamp, + tbf.estimateN(), tbf.cardinality())); + return true; + }; + // *** end of instrumentation *** + + /** + * Creates a LayeredBloomFilter that retains enclosed filters for + * {@code duration} and limits the contents of each enclosed filter to a time + * {@code quanta}. This filter uses the timestamped Bloom filter internally. + * + * @param shape The shape of the Bloom filters. + * @param duration The length of time to keep filters in the list. + * @param dUnit The unit of time to apply to duration. + * @param quanta The quantization factor for each filter. Individual filters + * will span at most this much time. + * @param qUnit the unit of time to apply to quanta. + * @return LayeredBloomFilter with the above properties. + */ + static LayeredBloomFilter createTimedLayeredFilter(Shape shape, long duration, TimeUnit dUnit, long quanta, + TimeUnit qUnit) { + LayerManager layerManager = LayerManager.builder() + .setSupplier(() -> new TimestampedBloomFilter(new SimpleBloomFilter(shape))) + .setCleanup(Cleanup.removeEmptyTarget().andThen(new CleanByTime(duration, dUnit))) + .setExtendCheck(new AdvanceOnTimeQuanta(quanta, qUnit) + .or(LayerManager.ExtendCheck.advanceOnSaturation(shape.estimateMaxN()))) + .build(); + return new LayeredBloomFilter(shape, layerManager); + } + + /** + * A Predicate that advances after a quantum of time. + */ + static class AdvanceOnTimeQuanta implements Predicate { + long quanta; + + AdvanceOnTimeQuanta(long quanta, TimeUnit unit) { + this.quanta = unit.toMillis(quanta); + } + + @Override + public boolean test(LayerManager lm) { + // can not use getTarget() as it causes recursion. + TimestampedBloomFilter bf = (TimestampedBloomFilter) lm.get(lm.getDepth() - 1); + return bf.timestamp + quanta < System.currentTimeMillis(); + } + } + + /** + * A Consumer that cleans the list based on how long each filters has been in + * the list. + * + */ + static class CleanByTime implements Consumer> { + long elapsedTime; + + CleanByTime(long duration, TimeUnit unit) { + elapsedTime = unit.toMillis(duration); + } + + @Override + public void accept(LinkedList t) { + long min = System.currentTimeMillis() - elapsedTime; + while (!t.isEmpty() && ((TimestampedBloomFilter) t.getFirst()).getTimestamp() < min) { + TimestampedBloomFilter bf = (TimestampedBloomFilter) t.getFirst(); + dbgInstrument.add(String.format("Removing old entry: T:%s (Aged: %s) \n", bf.getTimestamp(), + (min - bf.getTimestamp()))); + t.removeFirst(); + } + } + } + + /** + * A Bloomfilter implementation that tracks the creation time. + */ + static class TimestampedBloomFilter extends WrappedBloomFilter { + final long timestamp; + + TimestampedBloomFilter(BloomFilter bf) { + super(bf); + this.timestamp = System.currentTimeMillis(); + } + + public long getTimestamp() { + return timestamp; + } + } + + @Test + public void testExpiration() throws InterruptedException { + // this test uses the instrumentation noted above to track changes for debugging + // purposes. + + // list of timestamps that are expected to be expired. + List lst = new ArrayList<>(); + Shape shape = Shape.fromNM(4, 64); + + // create a filter that removes filters that are 4 seconds old + // and quantises time to 1 second intervals. + LayeredBloomFilter underTest = createTimedLayeredFilter(shape, 600, TimeUnit.MILLISECONDS, 150, + TimeUnit.MILLISECONDS); + + for (int i = 0; i < 10; i++) { + underTest.merge(TestingHashers.randomHasher()); + } + underTest.forEachBloomFilter(dbg.and(x -> lst.add(((TimestampedBloomFilter) x).timestamp))); + assertTrue(underTest.getDepth() > 1); + + Thread.sleep(300); + for (int i = 0; i < 10; i++) { + underTest.merge(TestingHashers.randomHasher()); + } + dbgInstrument.add("=== AFTER 300 milliseconds ====\n"); + underTest.forEachBloomFilter(dbg); + + Thread.sleep(150); + for (int i = 0; i < 10; i++) { + underTest.merge(TestingHashers.randomHasher()); + } + dbgInstrument.add("=== AFTER 450 milliseconds ====\n"); + underTest.forEachBloomFilter(dbg); + + // sleep 200 milliseconds to ensure we cross the 600 millisecond boundary + Thread.sleep(200); + underTest.merge(TestingHashers.randomHasher()); + dbgInstrument.add("=== AFTER 600 milliseconds ====\n"); + assertTrue(underTest.forEachBloomFilter(dbg.and(x -> !lst.contains(((TimestampedBloomFilter) x).timestamp))), + "Found filter that should have been deleted: " + dbgInstrument.get(dbgInstrument.size() - 1)); + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/TestingHashers.java b/src/test/java/org/apache/commons/collections4/bloomfilter/TestingHashers.java index 8222d4c253..bf9ab41de5 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/TestingHashers.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/TestingHashers.java @@ -16,6 +16,8 @@ */ package org.apache.commons.collections4.bloomfilter; +import java.util.concurrent.ThreadLocalRandom; + /** * A collection of methods and statics that represent standard hashers in testing. */ @@ -88,4 +90,11 @@ public static T populateRange(final T filter, final int }); return filter; } + + /** + * Creates an EnhancedDoubleHasher hasher from 2 random longs. + */ + public static Hasher randomHasher() { + return new EnhancedDoubleHasher( ThreadLocalRandom.current().nextLong(), ThreadLocalRandom.current().nextLong() ); + } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/WrappedBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/WrappedBloomFilterTest.java new file mode 100644 index 0000000000..eca4a21a3b --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/WrappedBloomFilterTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class WrappedBloomFilterTest extends AbstractBloomFilterTest { + + @Override + protected WrappedBloomFilter createEmptyFilter(Shape shape) { + return new WrappedBloomFilter(new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape)) { + }; + } + + @ParameterizedTest + @ValueSource(ints = {0, 1, 34}) + public void testCharacteristics(int characteristics) { + Shape shape = getTestShape(); + BloomFilter inner = new DefaultBloomFilterTest.SparseDefaultBloomFilter(shape) { + @Override + public int characteristics() { + return characteristics; + } + }; + WrappedBloomFilter underTest = new WrappedBloomFilter(inner) {}; + assertEquals(characteristics, underTest.characteristics()); + } +}