From 488393e60081b89fb1178bd93a66f414008ea6e1 Mon Sep 17 00:00:00 2001
From: Claude Warren
Date: Tue, 15 Aug 2023 09:34:38 +0200
Subject: [PATCH] COLLECTIONS-844: Allow counting Bloom filters with cell size
other than Integer.SIZE (#406)
* Added getMaxInsert() and getMaxValue() to CountingBloomFilter.
* Changed 'BitCount' to 'Cell' to match the literature for counting Bloom filters.
* Updated documentation.
* Changed CellProducer to require ordered distinct cell indices.
* Updated asIndexArray to respect the order of forEachIndex.
---
.../bloomfilter/ArrayCountingBloomFilter.java | 122 ++++++++-----
.../bloomfilter/BitCountProducer.java | 126 -------------
.../collections4/bloomfilter/BloomFilter.java | 8 +
.../bloomfilter/CellProducer.java | 166 +++++++++++++++++
.../bloomfilter/CountingBloomFilter.java | 168 +++++++++++++-----
.../collections4/bloomfilter/Hasher.java | 19 --
.../collections4/bloomfilter/IndexFilter.java | 5 +-
.../bloomfilter/IndexProducer.java | 65 ++++++-
.../collections4/bloomfilter/IndexUtils.java | 47 +++++
.../bloomfilter/package-info.java | 3 +
.../AbstractBitCountProducerTest.java | 168 ------------------
.../bloomfilter/AbstractBloomFilterTest.java | 8 -
.../bloomfilter/AbstractCellProducerTest.java | 155 ++++++++++++++++
.../AbstractCountingBloomFilterTest.java | 149 ++++++++++++++--
.../AbstractIndexProducerTest.java | 7 +
.../collections4/bloomfilter/ArrayHasher.java | 6 -
.../BitCountProducerFromHasherTest.java | 47 -----
...ducerFromArrayCountingBloomFilterTest.java | 45 +++++
...lProducerFromDefaultIndexProducerTest.java | 45 +++++
...Test.java => DefaultCellProducerTest.java} | 30 ++--
.../bloomfilter/DefaultIndexProducerTest.java | 23 ++-
...ucerFromArrayCountingBloomFilterTest.java} | 21 +--
....java => IndexProducerFromHasherTest.java} | 19 +-
...ava => IndexProducerFromIntArrayTest.java} | 19 +-
...dexProducerFromSimpleBloomFilterTest.java} | 19 +-
...dexProducerFromSparseBloomFilterTest.java} | 21 +--
...=> IndexProducerFromUniqueHasherTest.java} | 19 +-
.../bloomfilter/IndexProducerTest.java | 16 ++
.../collections4/bloomfilter/NullHasher.java | 6 -
29 files changed, 966 insertions(+), 586 deletions(-)
delete mode 100644 src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java
create mode 100644 src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java
create mode 100644 src/main/java/org/apache/commons/collections4/bloomfilter/IndexUtils.java
delete mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java
create mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java
delete mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherTest.java
create mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromArrayCountingBloomFilterTest.java
create mode 100644 src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromDefaultIndexProducerTest.java
rename src/test/java/org/apache/commons/collections4/bloomfilter/{DefaultBitCountProducerTest.java => DefaultCellProducerTest.java} (69%)
rename src/test/java/org/apache/commons/collections4/bloomfilter/{BitCountProducerFromArrayCountingBloomFilterTest.java => IndexProducerFromArrayCountingBloomFilterTest.java} (71%)
rename src/test/java/org/apache/commons/collections4/bloomfilter/{BitCountProducerFromDefaultIndexProducerTest.java => IndexProducerFromHasherTest.java} (69%)
rename src/test/java/org/apache/commons/collections4/bloomfilter/{BitCountProducerFromIntArrayTest.java => IndexProducerFromIntArrayTest.java} (76%)
rename src/test/java/org/apache/commons/collections4/bloomfilter/{BitCountProducerFromSimpleBloomFilterTest.java => IndexProducerFromSimpleBloomFilterTest.java} (77%)
rename src/test/java/org/apache/commons/collections4/bloomfilter/{BitCountProducerFromSparseBloomFilterTest.java => IndexProducerFromSparseBloomFilterTest.java} (70%)
rename src/test/java/org/apache/commons/collections4/bloomfilter/{BitCountProducerFromUniqueHasherTest.java => IndexProducerFromUniqueHasherTest.java} (73%)
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java
index 21d7ce7c9e..5fa0296f84 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java
@@ -23,17 +23,16 @@
import java.util.stream.IntStream;
/**
- * A counting Bloom filter using an int array to track counts for each enabled bit
- * index.
+ * A counting Bloom filter using an int array to track cells for each enabled bit.
*
* Any operation that results in negative counts or integer overflow of
* counts will mark this filter as invalid. This transition is not reversible.
* The operation is completed in full, no exception is raised and the state is
- * set to invalid. This allows the counts for the filter immediately prior to the
+ * set to invalid. This allows the cells for the filter immediately prior to the
* operation that created the invalid state to be recovered. See the documentation
* in {@link #isValid()} for details.
*
- * All the operations in the filter assume the counts are currently valid,
+ *
All the operations in the filter assume the cells are currently valid,
* for example {@code cardinality} or {@code contains} operations. Behavior of an invalid
* filter is undefined. It will no longer function identically to a standard
* Bloom filter that is the merge of all the Bloom filters that have been added
@@ -47,6 +46,7 @@
* consumption of approximately 8 GB.
*
* @see Shape
+ * @see CellProducer
* @since 4.5
*/
public final class ArrayCountingBloomFilter implements CountingBloomFilter {
@@ -57,30 +57,30 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter {
private final Shape shape;
/**
- * The count of each bit index in the filter.
+ * The cell for each bit index in the filter.
*/
- private final int[] counts;
+ private final int[] cells;
/**
* The state flag. This is a bitwise @{code OR} of the entire history of all updated
- * counts. If negative then a negative count or integer overflow has occurred on
- * one or more counts in the history of the filter and the state is invalid.
+ * cells. If negative then a negative cell or integer overflow has occurred on
+ * one or more cells in the history of the filter and the state is invalid.
*
*
Maintenance of this state flag is branch-free for improved performance. It
- * eliminates a conditional check for a negative count during remove/subtract
+ * eliminates a conditional check for a negative cell during remove/subtract
* operations and a conditional check for integer overflow during merge/add
* operations.
*
- * Note: Integer overflow is unlikely in realistic usage scenarios. A count
+ *
Note: Integer overflow is unlikely in realistic usage scenarios. A cell
* that overflows indicates that the number of items in the filter exceeds the
* maximum possible size (number of bits) of any Bloom filter constrained by
* integer indices. At this point the filter is most likely full (all bits are
* non-zero) and thus useless.
*
- * Negative counts are a concern if the filter is used incorrectly by
+ *
Negative cells are a concern if the filter is used incorrectly by
* removing an item that was never added. It is expected that a user of a
* counting Bloom filter will not perform this action as it is a mistake.
- * Enabling an explicit recovery path for negative or overflow counts is a major
+ * Enabling an explicit recovery path for negative or overflow cells is a major
* performance burden not deemed necessary for the unlikely scenarios when an
* invalid state is created. Maintenance of the state flag is a concession to
* flag improper use that should not have a major performance impact.
@@ -96,18 +96,23 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter {
public ArrayCountingBloomFilter(final Shape shape) {
Objects.requireNonNull(shape, "shape");
this.shape = shape;
- counts = new int[shape.getNumberOfBits()];
+ cells = new int[shape.getNumberOfBits()];
}
private ArrayCountingBloomFilter(final ArrayCountingBloomFilter source) {
this.shape = source.shape;
this.state = source.state;
- this.counts = source.counts.clone();
+ this.cells = source.cells.clone();
}
@Override
public void clear() {
- Arrays.fill(counts, 0);
+ Arrays.fill(cells, 0);
+ }
+
+ @Override
+ public int getMaxCell() {
+ return Integer.MAX_VALUE;
}
@Override
@@ -122,20 +127,20 @@ public int characteristics() {
@Override
public int cardinality() {
- return (int) IntStream.range(0, counts.length).filter(i -> counts[i] > 0).count();
+ return (int) IntStream.range(0, cells.length).filter(i -> cells[i] > 0).count();
}
@Override
- public boolean add(final BitCountProducer other) {
+ public boolean add(final CellProducer other) {
Objects.requireNonNull(other, "other");
- other.forEachCount(this::add);
+ other.forEachCell(this::add);
return isValid();
}
@Override
- public boolean subtract(final BitCountProducer other) {
+ public boolean subtract(final CellProducer other) {
Objects.requireNonNull(other, "other");
- other.forEachCount(this::subtract);
+ other.forEachCell(this::subtract);
return isValid();
}
@@ -146,12 +151,12 @@ public boolean subtract(final BitCountProducer other) {
*
* The state transition to invalid is permanent.
*
- * This implementation does not correct negative counts to zero or integer
- * overflow counts to {@link Integer#MAX_VALUE}. Thus the operation that
- * generated invalid counts can be reversed by using the complement of the
- * original operation with the same Bloom filter. This will restore the counts
- * to the state prior to the invalid operation. Counts can then be extracted
- * using {@link #forEachCount(BitCountConsumer)}.
+ * This implementation does not correct negative cells to zero or integer
+ * overflow cells to {@link Integer#MAX_VALUE}. Thus the operation that
+ * generated invalid cells can be reversed by using the complement of the
+ * original operation with the same Bloom filter. This will restore the cells
+ * to the state prior to the invalid operation. Cells can then be extracted
+ * using {@link #forEachCell(CellConsumer)}.
*/
@Override
public boolean isValid() {
@@ -159,10 +164,10 @@ public boolean isValid() {
}
@Override
- public boolean forEachCount(final BitCountProducer.BitCountConsumer consumer) {
+ public boolean forEachCell(final CellProducer.CellConsumer consumer) {
Objects.requireNonNull(consumer, "consumer");
- for (int i = 0; i < counts.length; i++) {
- if (counts[i] != 0 && !consumer.test(i, counts[i])) {
+ for (int i = 0; i < cells.length; i++) {
+ if (cells[i] != 0 && !consumer.test(i, cells[i])) {
return false;
}
}
@@ -172,8 +177,8 @@ public boolean forEachCount(final BitCountProducer.BitCountConsumer consumer) {
@Override
public boolean forEachIndex(final IntPredicate consumer) {
Objects.requireNonNull(consumer, "consumer");
- for (int i = 0; i < counts.length; i++) {
- if (counts[i] != 0 && !consumer.test(i)) {
+ for (int i = 0; i < cells.length; i++) {
+ if (cells[i] != 0 && !consumer.test(i)) {
return false;
}
}
@@ -183,14 +188,14 @@ public boolean forEachIndex(final IntPredicate consumer) {
@Override
public boolean forEachBitMap(final LongPredicate consumer) {
Objects.requireNonNull(consumer, "consumer");
- final int blocksm1 = BitMap.numberOfBitMaps(counts.length) - 1;
+ final int blocksm1 = BitMap.numberOfBitMaps(cells.length) - 1;
int i = 0;
long value;
// must break final block separate as the number of bits may not fall on the long boundary
for (int j = 0; j < blocksm1; j++) {
value = 0;
for (int k = 0; k < Long.SIZE; k++) {
- if (counts[i++] != 0) {
+ if (cells[i++] != 0) {
value |= BitMap.getLongBit(k);
}
}
@@ -200,8 +205,8 @@ public boolean forEachBitMap(final LongPredicate consumer) {
}
// Final block
value = 0;
- for (int k = 0; i < counts.length; k++) {
- if (counts[i++] != 0) {
+ for (int k = 0; i < cells.length; k++) {
+ if (cells[i++] != 0) {
value |= BitMap.getLongBit(k);
}
}
@@ -209,31 +214,41 @@ public boolean forEachBitMap(final LongPredicate consumer) {
}
/**
- * Add to the count for the bit index.
+ * Add to the cell for the bit index.
*
* @param idx the index
* @param addend the amount to add
* @return {@code true} always.
*/
private boolean add(final int idx, final int addend) {
- final int updated = counts[idx] + addend;
- state |= updated;
- counts[idx] = updated;
- return true;
+ try {
+ final int updated = cells[idx] + addend;
+ state |= updated;
+ cells[idx] = updated;
+ return true;
+ } catch (final IndexOutOfBoundsException e) {
+ throw new IllegalArgumentException(
+ String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
+ }
}
/**
- * Subtract from the count for the bit index.
+ * Subtract from the cell for the bit index.
*
* @param idx the index
* @param subtrahend the amount to subtract
* @return {@code true} always.
*/
private boolean subtract(final int idx, final int subtrahend) {
- final int updated = counts[idx] - subtrahend;
- state |= updated;
- counts[idx] = updated;
- return true;
+ try {
+ final int updated = cells[idx] - subtrahend;
+ state |= updated;
+ cells[idx] = updated;
+ return true;
+ } catch (final IndexOutOfBoundsException e) {
+ throw new IllegalArgumentException(
+ String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
+ }
}
@Override
@@ -243,7 +258,7 @@ public Shape getShape() {
@Override
public boolean contains(final IndexProducer indexProducer) {
- return indexProducer.forEachIndex(idx -> this.counts[idx] != 0);
+ return indexProducer.forEachIndex(idx -> this.cells[idx] != 0);
}
@Override
@@ -253,6 +268,19 @@ public boolean contains(final BitMapProducer bitMapProducer) {
@Override
public int[] asIndexArray() {
- return IntStream.range(0, counts.length).filter(i -> counts[i] > 0).toArray();
+ return IntStream.range(0, cells.length).filter(i -> cells[i] > 0).toArray();
+ }
+
+ @Override
+ public int getMaxInsert(CellProducer cellProducer) {
+ int[] max = {Integer.MAX_VALUE};
+ cellProducer.forEachCell( (x, y) -> {
+ int count = cells[x] / y;
+ if (count < max[0]) {
+ max[0] = count;
+ }
+ return max[0] > 0;
+ });
+ return max[0];
}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java
deleted file mode 100644
index 7ccd8bc924..0000000000
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BitCountProducer.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import java.util.function.IntPredicate;
-
-/**
- * Defines a mapping of index to counts.
- *
- * Note that a BitCountProducer may return duplicate indices and may be unordered.
- *
- *
Implementations must guarantee that:
- *
- *
- * - The mapping of index to counts is the combined sum of counts at each index.
- *
- For every unique value produced by the IndexProducer there will be at least one matching
- * index and count produced by the BitCountProducer.
- *
- The BitCountProducer will not generate indices that are not output by the IndexProducer.
- *
- *
- * Note that implementations that do not output duplicate indices for BitCountProducer and
- * do for IndexProducer, or vice versa, are consistent if the distinct indices from each are
- * the same.
- *
- *
For example the mapping [(1,2),(2,3),(3,1)] can be output with many combinations including:
- *
- * [(1,2),(2,3),(3,1)]
- * [(1,1),(1,1),(2,1),(2,1),(2,1),(3,1)]
- * [(1,1),(3,1),(1,1),(2,1),(2,1),(2,1)]
- * [(3,1),(1,1),(2,2),(1,1),(2,1)]
- * ...
- *
- *
- * @since 4.5
- */
-@FunctionalInterface
-public interface BitCountProducer extends IndexProducer {
-
- /**
- * Performs the given action for each {@code } pair where the count is non-zero.
- * Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
- * index-count pair, if the consumer returns {@code false} the execution is stopped, {@code false}
- * is returned, and no further pairs are processed.
- *
- * Duplicate indices are not required to be aggregated. Duplicates may be output by the producer as
- * noted in the class javadoc.
- *
- * @param consumer the action to be performed for each non-zero bit count
- * @return {@code true} if all count pairs return true from consumer, {@code false} otherwise.
- * @throws NullPointerException if the specified consumer is null
- */
- boolean forEachCount(BitCountConsumer consumer);
-
- /**
- * The default implementation returns indices with ordering and uniqueness of {@code forEachCount()}.
- */
- @Override
- default boolean forEachIndex(final IntPredicate predicate) {
- return forEachCount((i, v) -> predicate.test(i));
- }
-
- /**
- * Creates a BitCountProducer from an IndexProducer. The resulting
- * producer will return every index from the IndexProducer with a count of 1.
- *
- * Note that the BitCountProducer does not remove duplicates. Any use of the
- * BitCountProducer to create an aggregate mapping of index to counts, such as a
- * CountingBloomFilter, should use the same BitCountProducer in both add and
- * subtract operations to maintain consistency.
- *
- * @param idx An index producer.
- * @return A BitCountProducer with the same indices as the IndexProducer.
- */
- static BitCountProducer from(final IndexProducer idx) {
- return new BitCountProducer() {
- @Override
- public boolean forEachCount(final BitCountConsumer consumer) {
- return idx.forEachIndex(i -> consumer.test(i, 1));
- }
-
- @Override
- public int[] asIndexArray() {
- return idx.asIndexArray();
- }
-
- @Override
- public boolean forEachIndex(final IntPredicate predicate) {
- return idx.forEachIndex(predicate);
- }
- };
- }
-
- /**
- * Represents an operation that accepts an {@code } pair representing
- * the count for a bit index. Returns {@code true}
- * if processing should continue, {@code false} otherwise.
- *
- * Note: This is a functional interface as a specialization of
- * {@link java.util.function.BiPredicate} for {@code int}.
- */
- @FunctionalInterface
- interface BitCountConsumer {
- /**
- * Performs an operation on the given {@code } pair.
- *
- * @param index the bit index.
- * @param count the count at the specified bit index.
- * @return {@code true} if processing should continue, {@code false} if processing should stop.
- */
- boolean test(int index, int count);
- }
-}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
index f51eb081fc..e4783bf3e1 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
@@ -309,4 +309,12 @@ default int estimateIntersection(final BloomFilter other) {
}
return estimate>Integer.MAX_VALUE?Integer.MAX_VALUE:(int) estimate;
}
+
+ /**
+ * Most Bloom filters create unique IndexProducers.
+ */
+ @Override
+ default IndexProducer uniqueIndices() {
+ return this;
+ }
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java
new file mode 100644
index 0000000000..6949a13c64
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.TreeMap;
+import java.util.function.IntPredicate;
+
+
+/**
+ * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
+ * refer to these counts and their associated index. This class is the equivalent of the index producer except
+ * that it produces cells.
+ *
+ * Note that a CellProducer must not return duplicate indices and must be ordered.
+ *
+ * Implementations must guarantee that:
+ *
+ *
+ * - The IndexProducer implementation returns unique ordered indices.
+ * - The cells are produced in IndexProducer order.
+ * - For every value produced by the IndexProducer there will be only one matching
+ * cell produced by the CellProducer.
+ * - The CellProducer will not generate cells with indices that are not output by the IndexProducer.
+ * - The IndexProducer will not generate indices that have a zero count for the cell.
+ *
+ *
+ * @since 4.5
+ */
+@FunctionalInterface
+public interface CellProducer extends IndexProducer {
+
+ /**
+ * Performs the given action for each {@code cell} where the cell count is non-zero.
+ *
+ * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
+ * refer to these counts.
+ *
+ * Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
+ * cell. If the consumer returns {@code false} the execution is stopped, {@code false}
+ * is returned, and no further pairs are processed.
+ *
+ * @param consumer the action to be performed for each non-zero cell.
+ * @return {@code true} if all cells return true from consumer, {@code false} otherwise.
+ * @throws NullPointerException if the specified consumer is null
+ */
+ boolean forEachCell(CellConsumer consumer);
+
+ /**
+ * The default implementation returns distinct and ordered indices for all cells with a non-zero count.
+ */
+ @Override
+ default boolean forEachIndex(final IntPredicate predicate) {
+ return forEachCell((i, v) -> predicate.test(i));
+ }
+
+ /**
+ * Creates a CellProducer from an IndexProducer.
+ *
+ * Note the following properties:
+ *
+ * - Each index returned from the IndexProducer is assumed to have a cell value of 1.
+ * - The CellProducer aggregates duplicate indices from the IndexProducer.
+ *
+ *
+ * A CellProducer that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations
+ * of indices including:
+ *
+ * [1, 1, 2, 2, 2, 3]
+ * [1, 3, 1, 2, 2, 2]
+ * [3, 2, 1, 2, 1, 2]
+ * ...
+ *
+ *
+ * @param producer An index producer.
+ * @return A CellProducer with the same indices as the IndexProducer.
+ */
+ static CellProducer from(final IndexProducer producer) {
+ return new CellProducer() {
+ TreeMap counterCells = new TreeMap<>();
+
+ private void populate() {
+ if (counterCells.isEmpty()) {
+ producer.forEachIndex( idx -> {
+ CounterCell cell = new CounterCell(idx, 1);
+ CounterCell counter = counterCells.get(cell);
+ if (counter == null) {
+ counterCells.put(cell, cell);
+ } else {
+ counter.count++;
+ }
+ return true;
+ });
+ }
+ }
+
+ @Override
+ public int[] asIndexArray() {
+ populate();
+ return counterCells.keySet().stream().mapToInt(c -> c.idx).toArray();
+ }
+
+ @Override
+ public boolean forEachCell(CellConsumer consumer) {
+ populate();
+ for (CounterCell cell : counterCells.values()) {
+ if (!consumer.test(cell.idx, cell.count)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Class to track cell values in the TreeMap.
+ */
+ final class CounterCell implements Comparable {
+ final int idx;
+ int count;
+
+ CounterCell(int idx, int count) {
+ this.idx = idx;
+ this.count = count;
+ }
+
+ @Override
+ public int compareTo(CounterCell other) {
+ return Integer.compare(idx, other.idx);
+ }
+ }
+ };
+ }
+
+ /**
+ * Represents an operation that accepts an {@code } pair.
+ * Returns {@code true} if processing should continue, {@code false} otherwise.
+ *
+ * Note: This is a functional interface as a specialization of
+ * {@link java.util.function.BiPredicate} for {@code int}.
+ */
+ @FunctionalInterface
+ interface CellConsumer {
+ /**
+ * Performs an operation on the given {@code } pair.
+ *
+ * @param index the bit index.
+ * @param count the cell value at the specified bit index.
+ * @return {@code true} if processing should continue, {@code false} if processing should stop.
+ */
+ boolean test(int index, int count);
+ }
+}
+
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
index a86afa1f36..7c9310f406 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java
@@ -20,7 +20,8 @@
/**
* The interface that describes a Bloom filter that associates a count with each
- * bit index to allow reversal of merge operations with remove operations.
+ * bit index rather than a bit. This allows reversal of merge operations with
+ * remove operations.
*
* A counting Bloom filter is expected to function identically to a standard
* Bloom filter that is the merge of all the Bloom filters that have been added
@@ -30,29 +31,30 @@
* remove order, is expected to be the same.
*
* Removal of a filter that has not previously been merged results in an
- * invalid state where the counts no longer represent a sum of merged Bloom
+ * invalid state where the cells no longer represent a sum of merged Bloom
* filters. It is impossible to validate merge and remove exactly without
* explicitly storing all filters. Consequently such an operation may go
* undetected. The CountingBloomFilter maintains a state flag that is used as a
- * warning that an operation was performed that resulted in invalid counts and
- * thus an invalid state. For example this may occur if a count for an index was
+ * warning that an operation was performed that resulted in invalid cells and
+ * thus an invalid state. For example this may occur if a cell for an index was
* set to negative following a remove operation.
*
* Implementations should document the expected state of the filter after an
- * operation that generates invalid counts, and any potential recovery options.
+ * operation that generates invalid cells, and any potential recovery options.
* An implementation may support a reversal of the operation to restore the
- * state to that prior to the operation. In the event that invalid counts are
+ * state to that prior to the operation. In the event that invalid cells are
* adjusted to a valid range then it should be documented if there has been
* irreversible information loss.
*
* Implementations may choose to throw an exception during an operation that
- * generates invalid counts. Implementations should document the expected state
- * of the filter after such an operation. For example are the counts not updated,
+ * generates invalid cells. Implementations should document the expected state
+ * of the filter after such an operation. For example are the cells not updated,
* partially updated or updated entirely before the exception is raised.
*
+ * @see CellProducer
* @since 4.5
*/
-public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
+public interface CountingBloomFilter extends BloomFilter, CellProducer {
// Query Operations
@@ -60,9 +62,9 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
* Returns {@code true} if the internal state is valid.
*
* This flag is a warning that an addition or
- * subtraction of counts from this filter resulted in an invalid count for one or more
- * indexes. For example this may occur if a count for an index was
- * set to negative following a subtraction operation, or overflows an {@code int} following an
+ * subtraction of cells from this filter resulted in an invalid cell for one or more
+ * indexes. For example this may occur if a cell for an index was
+ * set to negative following a subtraction operation, or overflows the value specified by {@code getMaxCell()} following an
* addition operation.
*
* A counting Bloom filter that has an invalid state is no longer ensured to function
@@ -77,14 +79,81 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
*/
boolean isValid();
+ /**
+ * Returns the maximum allowable value for a cell count in this Counting filter.
+ * @return the maximum allowable value for a cell count in this Counting filter.
+ */
+ int getMaxCell();
+
+ /**
+ * Determines the maximum number of times the Bloom filter could have been merged
+ * into this counting filter.
+ * @param bloomFilter the Bloom filter the check for.
+ * @return the maximum number of times the Bloom filter could have been inserted.
+ */
+ default int getMaxInsert(BloomFilter bloomFilter) {
+ return getMaxInsert((BitMapProducer) bloomFilter);
+ }
+
+ /**
+ * Determines the maximum number of times the IndexProducer could have been merged
+ * into this counting filter.
+ *
To determine how many times an indxProducer could have been added create a CellProducer
+ * from the indexProducer and check that
+ * @param idxProducer the producer to drive the count check.
+ * @return the maximum number of times the IndexProducer could have been inserted.
+ * @see #getMaxInsert(CellProducer)
+ */
+ default int getMaxInsert(IndexProducer idxProducer) {
+ return getMaxInsert(CellProducer.from(idxProducer.uniqueIndices()) );
+ }
+
+ /**
+ * Determines the maximum number of times the Cell Producer could have been add.
+ * @param cellProducer the producer of cells.
+ * @return the maximum number of times the CellProducer could have been inserted.
+ */
+ int getMaxInsert(CellProducer cellProducer);
+
+ /**
+ * Determines the maximum number of times the Hasher could have been merged into this
+ * counting filter.
+ * @param hasher the Hasher to provide the indices.
+ * @return the maximum number of times the hasher could have been inserted.
+ */
+ default int getMaxInsert(Hasher hasher) {
+ return getMaxInsert(hasher.indices(getShape()));
+ }
+
+ /**
+ * Determines the maximum number of times the BitMapProducer could have been merged into this
+ * counting filter.
+ * @param bitMapProducer the BitMapProducer to provide the indices.
+ * @return the maximum number of times the BitMapProducer could have been inserted.
+ */
+ default int getMaxInsert(BitMapProducer bitMapProducer) {
+ if (!contains(bitMapProducer)) {
+ return 0;
+ }
+ long[] bitMaps = bitMapProducer.asBitMapArray();
+ int[] max = { Integer.MAX_VALUE };
+ forEachCell((x, y) -> {
+ if ((bitMaps[BitMap.getLongIndex(x)] & BitMap.getLongBit(x)) != 0) {
+ max[0] = max[0] <= y ? max[0] : y;
+ }
+ return true;
+ });
+ return max[0];
+ }
+
// Modification Operations
/**
* Merges the specified Bloom filter into this Bloom filter.
*
- * Specifically: all counts for the indexes identified by the {@code other} filter will be incremented by 1.
+ * Specifically: all cells for the indexes identified by the {@code other} filter will be incremented by 1.
*
- * Note: If the other filter is a counting Bloom filter the index counts are ignored and it is treated as an
+ *
Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an
* IndexProducer.
*
* This method will return {@code true} if the filter is valid after the operation.
@@ -92,7 +161,7 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer {
* @param other the other Bloom filter
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
- * @see #add(BitCountProducer)
+ * @see #add(CellProducer)
*/
@Override
default boolean merge(final BloomFilter other) {
@@ -103,40 +172,41 @@ default boolean merge(final BloomFilter other) {
/**
* Merges the specified Hasher into this Bloom filter.
*
- * Specifically: all counts for the unique indexes identified by the {@code hasher} will be incremented by 1.
+ * Specifically: all cells for the unique indexes identified by the {@code hasher} will be incremented by 1.
*
* This method will return {@code true} if the filter is valid after the operation.
*
* @param hasher the hasher
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
- * @see #add(BitCountProducer)
+ * @see #add(CellProducer)
*/
@Override
default boolean merge(final Hasher hasher) {
Objects.requireNonNull(hasher, "hasher");
- return merge(hasher.uniqueIndices(getShape()));
+ return merge(hasher.indices(getShape()));
}
/**
* Merges the specified index producer into this Bloom filter.
*
- * Specifically: all counts for the indexes identified by the {@code indexProducer} will be incremented by 1.
+ * Specifically: all unique cells for the indices identified by the {@code indexProducer} will be incremented by 1.
*
* This method will return {@code true} if the filter is valid after the operation.
*
- * Note: Indices that are returned multiple times will be incremented multiple times.
+ * Note: If indices that are returned multiple times should be incremented multiple times convert the IndexProducer
+ * to a CellProducer and add that.
*
* @param indexProducer the IndexProducer
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
- * @see #add(BitCountProducer)
+ * @see #add(CellProducer)
*/
@Override
default boolean merge(final IndexProducer indexProducer) {
Objects.requireNonNull(indexProducer, "indexProducer");
try {
- return add(BitCountProducer.from(indexProducer));
+ return add(CellProducer.from(indexProducer.uniqueIndices()));
} catch (final IndexOutOfBoundsException e) {
throw new IllegalArgumentException(
String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
@@ -146,14 +216,14 @@ default boolean merge(final IndexProducer indexProducer) {
/**
* Merges the specified BitMap producer into this Bloom filter.
*
- * Specifically: all counts for the indexes identified by the {@code bitMapProducer} will be incremented by 1.
+ * Specifically: all cells for the indexes identified by the {@code bitMapProducer} will be incremented by 1.
*
* This method will return {@code true} if the filter is valid after the operation.
*
* @param bitMapProducer the BitMapProducer
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
- * @see #add(BitCountProducer)
+ * @see #add(CellProducer)
*/
@Override
default boolean merge(final BitMapProducer bitMapProducer) {
@@ -164,9 +234,9 @@ default boolean merge(final BitMapProducer bitMapProducer) {
/**
* Removes the specified Bloom filter from this Bloom filter.
*
- * Specifically: all counts for the indexes identified by the {@code other} filter will be decremented by 1.
+ * Specifically: all cells for the indexes identified by the {@code other} filter will be decremented by 1.
*
- * Note: If the other filter is a counting Bloom filter the index counts are ignored and it is treated as an
+ *
Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an
* IndexProducer.
*
* This method will return {@code true} if the filter is valid after the operation.
@@ -174,7 +244,7 @@ default boolean merge(final BitMapProducer bitMapProducer) {
* @param other the other Bloom filter
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
- * @see #subtract(BitCountProducer)
+ * @see #subtract(CellProducer)
*/
default boolean remove(final BloomFilter other) {
Objects.requireNonNull(other, "other");
@@ -184,7 +254,7 @@ default boolean remove(final BloomFilter other) {
/**
* Removes the unique values from the specified hasher from this Bloom filter.
*
- * Specifically all counts for the unique indices produced by the {@code hasher} will be
+ *
Specifically all cells for the unique indices produced by the {@code hasher} will be
* decremented by 1.
*
* This method will return {@code true} if the filter is valid after the operation.
@@ -192,32 +262,33 @@ default boolean remove(final BloomFilter other) {
* @param hasher the hasher to provide the indexes
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
- * @see #subtract(BitCountProducer)
+ * @see #subtract(CellProducer)
*/
default boolean remove(final Hasher hasher) {
Objects.requireNonNull(hasher, "hasher");
- return remove(hasher.uniqueIndices(getShape()));
+ return remove(hasher.indices(getShape()));
}
/**
* Removes the values from the specified IndexProducer from the Bloom filter from this Bloom filter.
*
- * Specifically all counts for the unique indices produced by the {@code hasher} will be
+ *
Specifically all cells for the unique indices produced by the {@code hasher} will be
* decremented by 1.
*
* This method will return {@code true} if the filter is valid after the operation.
*
- * Node: This method expects index producers that produce unique values.
+ * Note: If indices that are returned multiple times should be decremented multiple times convert the IndexProducer
+ * to a CellProducer and subtract that.
*
* @param indexProducer the IndexProducer to provide the indexes
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
- * @see #subtract(BitCountProducer)
+ * @see #subtract(CellProducer)
*/
default boolean remove(final IndexProducer indexProducer) {
Objects.requireNonNull(indexProducer, "indexProducer");
try {
- return subtract(BitCountProducer.from(indexProducer));
+ return subtract(CellProducer.from(indexProducer.uniqueIndices()));
} catch (final IndexOutOfBoundsException e) {
throw new IllegalArgumentException(
String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()));
@@ -227,7 +298,7 @@ default boolean remove(final IndexProducer indexProducer) {
/**
* Removes the specified BitMapProducer from this Bloom filter.
*
- * Specifically all counts for the indices produced by the {@code bitMapProducer} will be
+ *
Specifically all cells for the indices produced by the {@code bitMapProducer} will be
* decremented by 1.
*
* This method will return {@code true} if the filter is valid after the operation.
@@ -235,7 +306,7 @@ default boolean remove(final IndexProducer indexProducer) {
* @param bitMapProducer the BitMapProducer to provide the indexes
* @return {@code true} if the removal was successful and the state is valid
* @see #isValid()
- * @see #subtract(BitCountProducer)
+ * @see #subtract(CellProducer)
*/
default boolean remove(final BitMapProducer bitMapProducer) {
Objects.requireNonNull(bitMapProducer, "bitMapProducer");
@@ -243,36 +314,36 @@ default boolean remove(final BitMapProducer bitMapProducer) {
}
/**
- * Adds the specified BitCountProducer to this Bloom filter.
+ * Adds the specified CellProducer to this Bloom filter.
*
* Specifically
- * all counts for the indexes identified by the {@code other} will be incremented
+ * all cells for the indexes identified by the {@code other} will be incremented
* by their corresponding values in the {@code other}.
*
* This method will return {@code true} if the filter is valid after the operation.
*
- * @param other the BitCountProducer to add.
+ * @param other the CellProducer to add.
* @return {@code true} if the addition was successful and the state is valid
* @see #isValid()
- * @see #subtract(BitCountProducer)
+ * @see #subtract(CellProducer)
*/
- boolean add(BitCountProducer other);
+ boolean add(CellProducer other);
/**
- * Adds the specified BitCountProducer to this Bloom filter.
+ * Adds the specified CellProducer to this Bloom filter.
*
* Specifically
- * all counts for the indexes identified by the {@code other} will be decremented
+ * all cells for the indexes identified by the {@code other} will be decremented
* by their corresponding values in the {@code other}.
*
* This method will return true if the filter is valid after the operation.
*
- * @param other the BitCountProducer to subtract.
+ * @param other the CellProducer to subtract.
* @return {@code true} if the subtraction was successful and the state is valid
* @see #isValid()
- * @see #add(BitCountProducer)
+ * @see #add(CellProducer)
*/
- boolean subtract(BitCountProducer other);
+ boolean subtract(CellProducer other);
/**
@@ -281,4 +352,9 @@ default boolean remove(final BitMapProducer bitMapProducer) {
*/
@Override
CountingBloomFilter copy();
+
+ @Override
+ default IndexProducer uniqueIndices() {
+ return this;
+ }
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java b/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java
index d8b3a43aa9..5b1b6a127b 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/Hasher.java
@@ -16,8 +16,6 @@
*/
package org.apache.commons.collections4.bloomfilter;
-import java.util.Objects;
-
/**
* A Hasher creates IndexProducer based on the hash implementation and the
* provided Shape.
@@ -44,21 +42,4 @@ public interface Hasher {
* @return the iterator of integers
*/
IndexProducer indices(Shape shape);
-
- /**
- * Creates an IndexProducer of unique indices for this hasher based on the Shape.
- *
- * This is like the `indices(Shape)` method except that it adds the guarantee that no
- * duplicate values will be returned. The indices produced are equivalent to those returned
- * from by a Bloom filter created from this hasher.
- *
- * @param shape the shape of the desired Bloom filter.
- * @return the iterator of integers
- */
- default IndexProducer uniqueIndices(final Shape shape) {
- return consumer -> {
- Objects.requireNonNull(consumer, "consumer");
- return indices(shape).forEachIndex(IndexFilter.create(shape, consumer));
- };
- }
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilter.java
index c7e6ca1861..57f70f5638 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilter.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilter.java
@@ -72,7 +72,10 @@ public boolean test(final int number) {
if (number >= size) {
throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size));
}
- return !tracker.test(number) || consumer.test(number);
+ if (tracker.test(number)) {
+ return consumer.test(number);
+ }
+ return true;
}
/**
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java
index dbaf0908c7..0269d34eac 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java
@@ -16,6 +16,7 @@
*/
package org.apache.commons.collections4.bloomfilter;
+import java.util.Arrays;
import java.util.BitSet;
import java.util.Objects;
import java.util.function.IntPredicate;
@@ -107,21 +108,69 @@ public boolean test(long word) {
* Indices ordering and uniqueness is not guaranteed.
*
*
- * The default implementation of this method is slow. It is recommended
- * that implementing classes reimplement this method.
+ * The default implementation of this method creates an array and populates
+ * it. Implementations that have access to an index array should consider
+ * returning a copy of that array if possible.
*
*
- *
- * The default implementation of this method returns unique values in order.
- *
* @return An int array of the data.
*/
default int[] asIndexArray() {
- final BitSet result = new BitSet();
+ class Indices {
+ private int[] data = new int[32];
+ private int size;
+
+ boolean add(final int index) {
+ data = IndexUtils.ensureCapacityForAdd(data, size);
+ data[size++] = index;
+ return true;
+ }
+
+ int[] toArray() {
+ // Edge case to avoid a large array copy
+ return size == data.length ? data : Arrays.copyOf(data, size);
+ }
+ }
+ Indices indices = new Indices();
+ forEachIndex(indices::add);
+ return indices.toArray();
+ }
+
+ /**
+ * Creates an IndexProducer comprising the unique indices for this producer.
+ *
+ * By default creates a new producer with some overhead to remove
+ * duplicates. IndexProducers that return unique indices by default
+ * should override this to return {@code this}.
+ *
+ * The default implementation will filter the indices from this instance
+ * and return them in ascending order.
+ *
+ * @return the IndexProducer of unique values.
+ * @throws IndexOutOfBoundsException if any index is less than zero.
+ */
+ default IndexProducer uniqueIndices() {
+ final BitSet bitSet = new BitSet();
forEachIndex(i -> {
- result.set(i);
+ bitSet.set(i);
return true;
});
- return result.stream().toArray();
+
+ return new IndexProducer() {
+ @Override
+ public boolean forEachIndex(IntPredicate predicate) {
+ for (int idx = bitSet.nextSetBit(0); idx >= 0; idx = bitSet.nextSetBit(idx + 1)) {
+ if (!predicate.test(idx)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public IndexProducer uniqueIndices() {
+ return this;
+ }
+ };
}
}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexUtils.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexUtils.java
new file mode 100644
index 0000000000..96bfefec02
--- /dev/null
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexUtils.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Arrays;
+
+/**
+ * Provides functions to assist in IndexProducer creation and manipulation.
+ * @see IndexProducer
+ */
+final class IndexUtils {
+
+ /**
+ * The maximum array size for the methods in this class.
+ */
+ static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
+
+ // do not instantiate
+ private IndexUtils() {}
+
+ /**
+ * Ensure the array can add an element at the specified index.
+ * @param array the array to check.
+ * @param index the index to add at.
+ * @return the array or a newly allocated copy of the array.
+ */
+ static int[] ensureCapacityForAdd(int[] array, int index) {
+ if (index >= array.length) {
+ return Arrays.copyOf(array, (int) Math.min(IndexUtils.MAX_ARRAY_SIZE, Math.max(array.length * 2L, index + 1)));
+ }
+ return array;
+ }
+}
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
index a7fb009540..7df764182d 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java
@@ -32,6 +32,9 @@
* list. There are lots of other uses, and in most cases the reason is to perform a fast check as a gateway for a longer
* operation.
*
+ * Some Bloom filters (e.g. CountingBloomFilter) use counters rather than bits. In this case each counter
+ * is called a {@code cell}.
+ *
* BloomFilter
*
* The Bloom filter architecture here is designed for speed of execution, so some methods like {@code merge}, {@code remove},
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java
deleted file mode 100644
index 2a5aa0a622..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBitCountProducerTest.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-import static org.junit.jupiter.api.Assertions.assertArrayEquals;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
-import static org.junit.jupiter.api.Assumptions.assumeTrue;
-
-import java.util.Arrays;
-import java.util.BitSet;
-
-import org.apache.commons.collections4.bag.TreeBag;
-import org.apache.commons.collections4.bloomfilter.BitCountProducer.BitCountConsumer;
-import org.junit.jupiter.api.Test;
-
-public abstract class AbstractBitCountProducerTest extends AbstractIndexProducerTest {
-
- /**
- * A testing BitCountConsumer that always returns true.
- */
- private static final BitCountConsumer TRUE_CONSUMER = (i, j) -> true;
- /**
- * A testing BitCountConsumer that always returns false.
- */
- private static final BitCountConsumer FALSE_CONSUMER = (i, j) -> false;
-
- /**
- * Creates an array of integer pairs comprising the index and the expected count for the index.
- * The order and count for each index is dependent upon the producer created by the {@code createProducer()}
- * method.
- * By default returns the each {@code getExpectedIndices()} value paired with 1 (one).
- * @return an array of integer pairs comprising the index and the expected count for the index.
- */
- protected int[][] getExpectedBitCount() {
- return Arrays.stream(getExpectedIndices()).mapToObj(x -> new int[] {x, 1}).toArray(int[][]::new);
- }
-
- /**
- * Creates a producer with some data.
- * @return a producer with some data
- */
- @Override
- protected abstract BitCountProducer createProducer();
-
- /**
- * Creates a producer without data.
- * @return a producer that has no data.
- */
- @Override
- protected abstract BitCountProducer createEmptyProducer();
-
- /**
- * Gets the behavior of the {@link BitCountProducer#forEachCount(BitCountConsumer)} method.
- * By default returns the value of {@code getAsIndexArrayBehaviour()} method.
- * @return the behavior.
- */
- protected int getForEachCountBehaviour() {
- return getAsIndexArrayBehaviour();
- }
-
- @Test
- public final void testForEachCountPredicates() {
- final BitCountProducer populated = createProducer();
- final BitCountProducer empty = createEmptyProducer();
-
- assertFalse(populated.forEachCount(FALSE_CONSUMER), "non-empty should be false");
- assertTrue(empty.forEachCount(FALSE_CONSUMER), "empty should be true");
-
- assertTrue(populated.forEachCount(TRUE_CONSUMER), "non-empty should be true");
- assertTrue(empty.forEachCount(TRUE_CONSUMER), "empty should be true");
- }
-
- @Test
- public final void testEmptyBitCountProducer() {
- final BitCountProducer empty = createEmptyProducer();
- final int ary[] = empty.asIndexArray();
- assertEquals(0, ary.length);
- assertTrue(empty.forEachCount((i, j) -> {
- fail("forEachCount consumer should not be called");
- return false;
- }));
- }
-
- @Test
- public final void testIndexConsistency() {
- final BitCountProducer producer = createProducer();
- final BitSet bs1 = new BitSet();
- final BitSet bs2 = new BitSet();
- producer.forEachIndex(i -> {
- bs1.set(i);
- return true;
- });
- producer.forEachCount((i, j) -> {
- bs2.set(i);
- return true;
- });
- assertEquals(bs1, bs2);
- }
-
- @Test
- public void testForEachCountValues() {
- // Assumes the collections bag works. Could be replaced with Map with more work.
- final TreeBag expected = new TreeBag<>();
- Arrays.stream(getExpectedBitCount()).forEach(c -> expected.add(c[0], c[1]));
- final TreeBag actual = new TreeBag<>();
- // can not return actual.add as it returns false on duplicate 'i'
- createProducer().forEachCount((i, j) -> {
- actual.add(i, j);
- return true;
- });
- assertEquals(expected, actual);
- }
-
- /**
- * Test the behavior of {@link BitCountProducer#forEachCount(BitCountConsumer)} with respect
- * to ordered and distinct indices. Currently the behavior is assumed to be the same as
- * {@link IndexProducer#forEachIndex(java.util.function.IntPredicate)}.
- */
- @Test
- public final void testBehaviourForEachCount() {
- final int flags = getForEachCountBehaviour();
- assumeTrue((flags & (ORDERED | DISTINCT)) != 0);
- final IntList list = new IntList();
- createProducer().forEachCount((i, j) -> list.add(i));
- final int[] actual = list.toArray();
- if ((flags & ORDERED) != 0) {
- final int[] expected = Arrays.stream(actual).sorted().toArray();
- assertArrayEquals(expected, actual);
- }
- if ((flags & DISTINCT) != 0) {
- final long count = Arrays.stream(actual).distinct().count();
- assertEquals(count, actual.length);
- }
- }
-
- @Test
- public void testForEachCountEarlyExit() {
- final int[] passes = new int[1];
- assertTrue(createEmptyProducer().forEachCount((i, j) -> {
- passes[0]++;
- return false;
- }));
- assertEquals(0, passes[0]);
-
- assertFalse(createProducer().forEachCount((i, j) -> {
- passes[0]++;
- return false;
- }));
- assertEquals(1, passes[0]);
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
index 7e1666a074..e4a9082771 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java
@@ -94,9 +94,6 @@ protected final T createFilter(final Shape shape, final IndexProducer producer)
return bf;
}
- /**
- *
- */
@Test
public void testMergeWithBadHasher() {
// value too large
@@ -451,10 +448,5 @@ public BadHasher(final int value) {
public IndexProducer indices(final Shape shape) {
return producer;
}
-
- @Override
- public IndexProducer uniqueIndices(final Shape shape) {
- return producer;
- }
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java
new file mode 100644
index 0000000000..8433161d1d
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.util.Arrays;
+import java.util.BitSet;
+
+import org.apache.commons.collections4.bloomfilter.CellProducer.CellConsumer;
+import org.junit.jupiter.api.Test;
+
+public abstract class AbstractCellProducerTest extends AbstractIndexProducerTest {
+
+ /**
+ * A testing CellConsumer that always returns true.
+ */
+ private static final CellConsumer TRUE_CONSUMER = (i, j) -> true;
+ /**
+ * A testing CellConsumer that always returns false.
+ */
+ private static final CellConsumer FALSE_CONSUMER = (i, j) -> false;
+
+ /**
+ * Creates an array of expected values that aligns with the expected indices entries.
+ * @return an array of expected values.
+ * @see AbstractIndexProducerTest#getExpectedIndices()
+ */
+ protected abstract int[] getExpectedValues();
+
+ @Override
+ protected final int getAsIndexArrayBehaviour() {
+ return ORDERED | DISTINCT;
+ }
+
+ /**
+ * Creates a producer with some data.
+ * @return a producer with some data
+ */
+ @Override
+ protected abstract CellProducer createProducer();
+
+ /**
+ * Creates a producer without data.
+ * @return a producer that has no data.
+ */
+ @Override
+ protected abstract CellProducer createEmptyProducer();
+
+ @Test
+ public final void testForEachCellPredicates() {
+ final CellProducer populated = createProducer();
+ final CellProducer empty = createEmptyProducer();
+
+ assertFalse(populated.forEachCell(FALSE_CONSUMER), "non-empty should be false");
+ assertTrue(empty.forEachCell(FALSE_CONSUMER), "empty should be true");
+
+ assertTrue(populated.forEachCell(TRUE_CONSUMER), "non-empty should be true");
+ assertTrue(empty.forEachCell(TRUE_CONSUMER), "empty should be true");
+ }
+
+ @Test
+ public final void testEmptyCellProducer() {
+ final CellProducer empty = createEmptyProducer();
+ final int ary[] = empty.asIndexArray();
+ assertEquals(0, ary.length);
+ assertTrue(empty.forEachCell((i, j) -> {
+ fail("forEachCell consumer should not be called");
+ return false;
+ }));
+ }
+
+ @Test
+ public final void testIndexConsistency() {
+ final CellProducer producer = createProducer();
+ final BitSet bs1 = new BitSet();
+ final BitSet bs2 = new BitSet();
+ producer.forEachIndex(i -> {
+ bs1.set(i);
+ return true;
+ });
+ producer.forEachCell((i, j) -> {
+ bs2.set(i);
+ return true;
+ });
+ assertEquals(bs1, bs2);
+ }
+
+ @Test
+ public void testForEachCellValues() {
+ int[] expectedIdx = getExpectedIndices();
+ int[] expectedValue = getExpectedValues();
+ assertEquals(expectedIdx.length, expectedValue.length, "expected index length and value length do not match");
+ int[] idx = {0};
+ createProducer().forEachCell((i, j) -> {
+ assertEquals(expectedIdx[idx[0]], i, "bad index at " + idx[0]);
+ assertEquals(expectedValue[idx[0]], j, "bad value at " + idx[0]);
+ idx[0]++;
+ return true;
+ });
+ }
+
+ /**
+ * Test the behavior of {@link CellProducer#forEachCell(CellConsumer)} with respect
+ * to ordered and distinct indices. Currently the behavior is assumed to be the same as
+ * {@link IndexProducer#forEachIndex(java.util.function.IntPredicate)}.
+ */
+ @Test
+ public final void testBehaviourForEachCell() {
+ final IntList list = new IntList();
+ createProducer().forEachCell((i, j) -> list.add(i));
+ final int[] actual = list.toArray();
+ // check order
+ final int[] expected = Arrays.stream(actual).sorted().toArray();
+ assertArrayEquals(expected, actual);
+ // check distinct
+ final long count = Arrays.stream(actual).distinct().count();
+ assertEquals(count, actual.length);
+ }
+
+ @Test
+ public void testForEachCellEarlyExit() {
+ final int[] passes = new int[1];
+ assertTrue(createEmptyProducer().forEachCell((i, j) -> {
+ passes[0]++;
+ return false;
+ }));
+ assertEquals(0, passes[0]);
+
+ assertFalse(createProducer().forEachCell((i, j) -> {
+ passes[0]++;
+ return false;
+ }));
+ assertEquals(1, passes[0]);
+ }
+}
+
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
index de424111f0..6d489d8d13 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
@@ -39,14 +39,16 @@ public abstract class AbstractCountingBloomFilterTest {
- for (int i = 1; i < 18; i++) {
- if (!consumer.test(i, Integer.MAX_VALUE)) {
- return false;
+ protected final CellProducer getMaximumValueProducer(int maxValue) {
+ return consumer -> {
+ for (int i = 1; i < 18; i++) {
+ if (!consumer.test(i, maxValue)) {
+ return false;
+ }
}
- }
- return true;
- };
+ return true;
+ };
+ }
/**
* Assert the counts match the expected values. Values are for indices starting
@@ -57,7 +59,7 @@ public abstract class AbstractCountingBloomFilterTest m = new HashMap<>();
- bf.forEachCount((i, c) -> {
+ bf.forEachCell((i, c) -> {
m.put(i, c);
return true;
});
@@ -82,7 +84,7 @@ public final void testCountingSpecificConstructor() {
// verify hasher duplicates are counted.
// bit hasher has duplicates for 11, 12,13,14,15,16, and 17
final CountingBloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1);
- bf.add(BitCountProducer.from(TestingHashers.FROM11.indices(getTestShape())));
+ bf.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
final long[] lb = bf.asBitMapArray();
assertEquals(2, lb.length);
@@ -130,7 +132,7 @@ public final void testCountingSpecificMerge() {
// test overflow
final CountingBloomFilter bf5 = createEmptyFilter(getTestShape());
- assertTrue(bf5.add(maximumValueProducer), "Should add to empty");
+ assertTrue(bf5.add(getMaximumValueProducer(bf5.getMaxCell())), "Should add to empty");
assertTrue(bf5.isValid(), "Should be valid");
final CountingBloomFilter bf6 = bf5.copy();
@@ -155,7 +157,7 @@ public void testAdd() {
// test overflow
final CountingBloomFilter bf2 = createEmptyFilter(getTestShape());
- assertTrue(bf2.add(maximumValueProducer), "Should add to empty");
+ assertTrue(bf2.add(getMaximumValueProducer(bf2.getMaxCell())), "Should add to empty");
assertTrue(bf2.isValid(), "Should be valid");
assertFalse(bf2.add(createFilter(getTestShape(), TestingHashers.FROM1)), "Should not add");
@@ -169,7 +171,7 @@ public void testAdd() {
@Test
public final void testSubtract() {
final CountingBloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
- bf1.add(BitCountProducer.from(TestingHashers.FROM11.indices(getTestShape())));
+ bf1.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
final CountingBloomFilter bf2 = createFilter(getTestShape(), TestingHashers.FROM11);
@@ -190,6 +192,9 @@ public final void testSubtract() {
assertFalse(bf3.contains(bf4), "Should not contain");
assertCounts(bf3, new int[] {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
+
+ assertThrows(IllegalArgumentException.class, () -> bf3.remove( new BadHasher(-1)));
+ assertThrows(IllegalArgumentException.class, () -> bf3.remove( new BadHasher(getTestShape().getNumberOfBits())));;
}
/**
@@ -202,7 +207,7 @@ public final void testRemove() {
simple.merge(TestingHashers.FROM11);
final CountingBloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
- bf1.add(BitCountProducer.from(TestingHashers.FROM11.indices(getTestShape())));
+ bf1.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
assertTrue(bf1.remove(simple), "Remove should work");
assertFalse(bf1.contains(TestingHashers.FROM11), "Should not contain");
@@ -212,7 +217,7 @@ public final void testRemove() {
// with hasher
final CountingBloomFilter bf2 = createFilter(getTestShape(), TestingHashers.FROM1);
- bf2.add(BitCountProducer.from(TestingHashers.FROM11.indices(getTestShape())));
+ bf2.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
assertTrue(bf2.remove(TestingHashers.FROM11), "Remove should work");
assertFalse(bf2.contains(TestingHashers.FROM11), "Should not contain");
@@ -233,7 +238,7 @@ public final void testRemove() {
final IndexProducer ip = TestingHashers.FROM11.indices(getTestShape());
final CountingBloomFilter bf4 = createFilter(getTestShape(), TestingHashers.FROM1);
- bf4.add(BitCountProducer.from(TestingHashers.FROM11.indices(getTestShape())));
+ bf4.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
assertTrue(bf4.remove(ip), "Remove should work");
assertFalse(bf4.contains(TestingHashers.FROM11), "Should not contain");
@@ -244,7 +249,7 @@ public final void testRemove() {
// with BitMapProducer
final BitMapProducer bmp = BitMapProducer.fromIndexProducer(ip, getTestShape().getNumberOfBits());
final CountingBloomFilter bf5 = createFilter(getTestShape(), TestingHashers.FROM1);
- bf5.add(BitCountProducer.from(TestingHashers.FROM11.indices(getTestShape())));
+ bf5.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
assertTrue(bf5.remove(bmp), "Remove should work");
assertFalse(bf5.contains(TestingHashers.FROM11), "Should not contain");
@@ -260,6 +265,8 @@ public final void testRemove() {
final CountingBloomFilter bf7 = createFilter(getTestShape(), TestingHashers.FROM1);
final BitMapProducer bmp2 = BitMapProducer.fromIndexProducer(ip2, getTestShape().getNumberOfBits());
assertThrows(IllegalArgumentException.class, () -> bf7.remove(bmp2));
+ assertThrows(IllegalArgumentException.class, () -> bf7.remove( new BadHasher(-1)));
+ assertThrows(IllegalArgumentException.class, () -> bf7.remove( new BadHasher(getTestShape().getNumberOfBits())));;
}
@Test
@@ -272,7 +279,7 @@ public void testExcludesDuplicates() {
CountingBloomFilter bf1 = createFilter(shape, hasher);
assertEquals(6, bf1.cardinality());
- bf1.forEachCount((x, y) -> {
+ bf1.forEachCell((x, y) -> {
assertEquals(1, y, "Hasher in constructor results in value not equal to 1");
return true;
});
@@ -280,7 +287,7 @@ public void testExcludesDuplicates() {
bf1 = createEmptyFilter(shape);
bf1.merge(hasher);
assertEquals(6, bf1.cardinality());
- bf1.forEachCount((x, y) -> {
+ bf1.forEachCell((x, y) -> {
assertEquals(1, y, "Hasher in merge results in value not equal to 1");
return true;
});
@@ -289,6 +296,110 @@ public void testExcludesDuplicates() {
bf1.merge(hasher);
bf1.remove(hasher);
assertEquals(0, bf1.cardinality());
- assertTrue(bf1.forEachCount((x, y) -> false), "Hasher in removes results in value not equal to 0");
+ assertTrue(bf1.forEachCell((x, y) -> false), "Hasher in removes results in value not equal to 0");
+ }
+
+ private void verifyMaxInsert(CountingBloomFilter bf, int from1, int from11) {
+ BloomFilter bfFrom0 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
+ bfFrom0.merge(new IncrementingHasher(0, 1));
+ BloomFilter bfFrom1 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
+ bfFrom1.merge(TestingHashers.FROM1);
+ BloomFilter bfFrom11 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
+ bfFrom11.merge(TestingHashers.FROM11);
+
+ assertEquals(0, bf.getMaxInsert(new IncrementingHasher(0, 1)));
+ assertEquals(0, bf.getMaxInsert(bfFrom0));
+ assertEquals(0, bf.getMaxInsert((BitMapProducer) bfFrom0));
+ assertEquals(0, bf.getMaxInsert((IndexProducer) bfFrom0));
+
+ assertEquals(from1, bf.getMaxInsert(TestingHashers.FROM1));
+ assertEquals(from1, bf.getMaxInsert(bfFrom1));
+ assertEquals(from1, bf.getMaxInsert((BitMapProducer) bfFrom1));
+ assertEquals(from1, bf.getMaxInsert((IndexProducer) bfFrom1));
+
+ assertEquals(from11, bf.getMaxInsert(TestingHashers.FROM11));
+ assertEquals(from11, bf.getMaxInsert(bfFrom11));
+ assertEquals(from11, bf.getMaxInsert((BitMapProducer) bfFrom11));
+ assertEquals(from11, bf.getMaxInsert((IndexProducer) bfFrom11));
+ }
+
+ @Test
+ public void testGetMaxInsert() {
+ CountingBloomFilter bf = createEmptyFilter(getTestShape());
+ verifyMaxInsert(bf, 0, 0);
+ bf.merge(TestingHashers.FROM1);
+ verifyMaxInsert(bf, 1, 0);
+ bf.merge(TestingHashers.FROM1);
+ verifyMaxInsert(bf, 2, 0);
+ bf.merge(TestingHashers.FROM11);
+ verifyMaxInsert(bf, 2, 1);
+ bf.remove(TestingHashers.FROM1);
+ verifyMaxInsert(bf, 1, 1);
+ // verify remove false positive works
+ // Incrementing hasher 5,1 spans the single count cells for both FROM1 and FROM11
+ assertEquals(1, bf.getMaxInsert(new IncrementingHasher(5, 1)));
+ bf.remove(new IncrementingHasher(5, 1));
+ verifyMaxInsert(bf, 0, 0);
+ assertEquals(0, bf.getMaxInsert(new IncrementingHasher(5, 1)));
+ }
+
+ private void assertCell3(CountingBloomFilter bf, int value) {
+ bf.forEachCell((k, v) -> {
+ if (k == 3) {
+ assertEquals(value, v, "Mismatch at position 3");
+ } else {
+ assertEquals(0, v, "Mismatch at position " + k);
+ }
+ return true;
+ });
+ }
+
+ @Test
+ public void mergeIncrementsAllCellsTest() {
+ CountingBloomFilter f1 = createEmptyFilter(Shape.fromKM(1, 10));
+ CountingBloomFilter f2 = f1.copy();
+ CountingBloomFilter f3 = f1.copy();
+ // index producer produces 3 two times.
+ IndexProducer ip = p -> {
+ p.test(3);
+ p.test(3);
+ return true;
+ };
+ // The merge should increment cell 3 by 1
+ f1.merge(ip);
+ assertCell3(f1, 1);
+
+ // The add should increment cells 3 by 2
+ f2.add(CellProducer.from(ip));
+ assertCell3(f2, 2);
+ }
+
+ @Test
+ public void removeDecrementsAllCellsTest() {
+ CountingBloomFilter f1 = createEmptyFilter(Shape.fromKM(1, 10));
+ CellProducer cp = p -> {
+ p.test(3, 3);
+ return true;
+ };
+ f1.add(cp);
+ CountingBloomFilter f2 = f1.copy();
+ CountingBloomFilter f3 = f1.copy();
+ // index producer produces 3 two times.
+ IndexProducer ip = p -> {
+ p.test(3);
+ p.test(3);
+ return true;
+ };
+ // The merge should decrement cell 3 by 1
+ f1.remove(ip);
+ assertCell3(f1, 2);
+
+ // The add should decrement cells 3 by 2
+ f2.subtract(CellProducer.from(ip));
+ assertCell3(f2, 1);
+
+ // This merge will decrement by 1 as the round-trip makes the indices unique
+ f3.remove(IndexProducer.fromIndexArray(ip.asIndexArray()));
+ assertCell3(f3, 2);
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java
index 542f9a6875..917f361a2d 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractIndexProducerTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.commons.collections4.bloomfilter;
+import static org.junit.Assert.assertSame;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -245,4 +246,10 @@ public void testForEachIndexEarlyExit() {
}));
assertEquals(0, passes[0]);
}
+
+ @Test
+ public void testUniqueReturnsSelf() {
+ IndexProducer expected = createProducer().uniqueIndices();
+ assertSame(expected, expected.uniqueIndices());
+ }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayHasher.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayHasher.java
index f2a18c426a..753decc6e2 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayHasher.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayHasher.java
@@ -37,12 +37,6 @@ public IndexProducer indices(final Shape shape) {
return new Producer(shape);
}
- @Override
- public IndexProducer uniqueIndices(final Shape shape) {
- Objects.requireNonNull(shape, "shape");
- return new Producer(shape);
- }
-
private class Producer implements IndexProducer {
Shape shape;
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherTest.java
deleted file mode 100644
index 8c1e846989..0000000000
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromHasherTest.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.collections4.bloomfilter;
-
-public class BitCountProducerFromHasherTest extends AbstractBitCountProducerTest {
-
- @Override
- protected BitCountProducer createProducer() {
- // hasher has collisions and wraps
- return BitCountProducer.from(new IncrementingHasher(4, 8).indices(Shape.fromKM(17, 72)));
- }
-
- @Override
- protected BitCountProducer createEmptyProducer() {
- return BitCountProducer.from(NullHasher.INSTANCE.indices(Shape.fromKM(17, 72)));
- }
-
- @Override
- protected int getAsIndexArrayBehaviour() {
- // Hasher allows duplicates and may be unordered
- return 0;
- }
-
- @Override
- protected int[] getExpectedIndices() {
- return new int[] {4, 12, 20, 28, 36, 44, 52, 60, 68, 4, 12, 20, 28, 36, 44, 52, 60};
- }
-
- @Override
- protected int[][] getExpectedBitCount() {
- return new int[][] {{4, 2}, {12, 2}, {20, 2}, {28, 2}, {36, 2}, {44, 2}, {52, 2}, {60, 2}, {68, 1}};
- }
-}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromArrayCountingBloomFilterTest.java
new file mode 100644
index 0000000000..454e16492a
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromArrayCountingBloomFilterTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class CellProducerFromArrayCountingBloomFilterTest extends AbstractCellProducerTest {
+
+ protected Shape shape = Shape.fromKM(17, 72);
+
+ @Override
+ protected CellProducer createProducer() {
+ final ArrayCountingBloomFilter filter = new ArrayCountingBloomFilter(shape);
+ filter.merge(new IncrementingHasher(0, 1));
+ filter.merge(new IncrementingHasher(5, 1));
+ return filter;
+ }
+
+ @Override
+ protected CellProducer createEmptyProducer() {
+ return new ArrayCountingBloomFilter(shape);
+ }
+
+ @Override
+ protected int[] getExpectedIndices() {
+ return new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21};
+ }
+
+ @Override
+ protected int[] getExpectedValues() {
+ return new int[] {1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1};
+ }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromDefaultIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromDefaultIndexProducerTest.java
new file mode 100644
index 0000000000..8f97d8388d
--- /dev/null
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/CellProducerFromDefaultIndexProducerTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+public class CellProducerFromDefaultIndexProducerTest extends AbstractCellProducerTest {
+
+ int[] data = {0, 63, 1, 64, 128, 1, 127};
+ int[] indices = {0, 1, 63, 64, 127, 128};
+ int[] values = {1, 2, 1, 1, 1, 1 };
+
+ @Override
+ protected CellProducer createProducer() {
+ return CellProducer.from(IndexProducer.fromIndexArray(data));
+ }
+
+ @Override
+ protected CellProducer createEmptyProducer() {
+ return CellProducer.from(IndexProducer.fromIndexArray(new int[0]));
+ }
+
+ @Override
+ protected int[] getExpectedIndices() {
+ return indices;
+ }
+
+ @Override
+ protected int[] getExpectedValues() {
+ return values;
+ }
+
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitCountProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultCellProducerTest.java
similarity index 69%
rename from src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitCountProducerTest.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/DefaultCellProducerTest.java
index a85b90b29d..e99a9aaeb4 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitCountProducerTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultCellProducerTest.java
@@ -16,21 +16,27 @@
*/
package org.apache.commons.collections4.bloomfilter;
-public class DefaultBitCountProducerTest extends AbstractBitCountProducerTest {
+public class DefaultCellProducerTest extends AbstractCellProducerTest {
/** Make forEachIndex unordered and contain duplicates. */
- private final int[] values = {10, 1, 10, 1};
+ private final int[] indices = {1, 2, 3, 5};
+ private final int[] values = {1, 4, 9, 25};
@Override
protected int[] getExpectedIndices() {
+ return indices;
+ }
+
+ @Override
+ protected int[] getExpectedValues() {
return values;
}
@Override
- protected BitCountProducer createProducer() {
+ protected CellProducer createProducer() {
return consumer -> {
- for (final int i : values) {
- if (!consumer.test(i, 1)) {
+ for (int i = 0; i < indices.length; i++) {
+ if (!consumer.test(indices[i], values[i] )) {
return false;
}
}
@@ -39,25 +45,13 @@ protected BitCountProducer createProducer() {
}
@Override
- protected BitCountProducer createEmptyProducer() {
+ protected CellProducer createEmptyProducer() {
return consumer -> true;
}
- @Override
- protected int getAsIndexArrayBehaviour() {
- // The default method streams a BitSet so is distinct and ordered.
- return ORDERED | DISTINCT;
- }
-
@Override
protected int getForEachIndexBehaviour() {
// The default method has the same behavior as the forEachCount() method.
return 0;
}
-
- @Override
- protected int getForEachCountBehaviour() {
- // The implemented method returns unordered duplicates.
- return 0;
- }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java
index 2682a96a7e..73a3a58dc9 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java
@@ -22,8 +22,11 @@
import java.util.BitSet;
import java.util.Objects;
import java.util.concurrent.ThreadLocalRandom;
+import java.util.stream.IntStream;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
public class DefaultIndexProducerTest extends AbstractIndexProducerTest {
@@ -58,8 +61,7 @@ protected IndexProducer createEmptyProducer() {
@Override
protected int getAsIndexArrayBehaviour() {
- // The default method streams a BitSet so is distinct and ordered.
- return DISTINCT | ORDERED;
+ return 0;
}
@Override
@@ -119,4 +121,21 @@ public void testFromIndexArray() {
assertArrayEquals(expected, ip.asIndexArray());
}
}
+
+ @ParameterizedTest
+ @ValueSource(ints = {32, 33})
+ public void testEntries(int size) {
+ int[] values = IntStream.range(0, size).toArray();
+ IndexProducer producer = predicate -> {
+ Objects.requireNonNull(predicate);
+ for (final int i : values) {
+ if (!predicate.test(i)) {
+ return false;
+ }
+ }
+ return true;
+ };
+ int[] other = producer.asIndexArray();
+ assertArrayEquals(values, other);
+ }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromArrayCountingBloomFilterTest.java
similarity index 71%
rename from src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromArrayCountingBloomFilterTest.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromArrayCountingBloomFilterTest.java
index 1cfe291a50..f9d7f18fcf 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromArrayCountingBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromArrayCountingBloomFilterTest.java
@@ -16,12 +16,12 @@
*/
package org.apache.commons.collections4.bloomfilter;
-public class BitCountProducerFromArrayCountingBloomFilterTest extends AbstractBitCountProducerTest {
+public class IndexProducerFromArrayCountingBloomFilterTest extends AbstractIndexProducerTest {
protected Shape shape = Shape.fromKM(17, 72);
@Override
- protected BitCountProducer createProducer() {
+ protected IndexProducer createProducer() {
final ArrayCountingBloomFilter filter = new ArrayCountingBloomFilter(shape);
filter.merge(new IncrementingHasher(0, 1));
filter.merge(new IncrementingHasher(5, 1));
@@ -29,25 +29,18 @@ protected BitCountProducer createProducer() {
}
@Override
- protected BitCountProducer createEmptyProducer() {
+ protected IndexProducer createEmptyProducer() {
return new ArrayCountingBloomFilter(shape);
}
@Override
- protected int getAsIndexArrayBehaviour() {
- // CountingBloomFilter based on an array will be distinct and ordered
- return DISTINCT | ORDERED;
+ protected int[] getExpectedIndices() {
+ return new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21};
}
@Override
- protected int[][] getExpectedBitCount() {
- return new int[][] {{0, 1}, {1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 2}, {6, 2}, {7, 2},
- {8, 2}, {9, 2}, {10, 2}, {11, 2}, {12, 2}, {13, 2}, {14, 2}, {15, 2}, {16, 2},
- {17, 1}, {18, 1}, {19, 1}, {20, 1}, {21, 1}};
+ protected int getAsIndexArrayBehaviour() {
+ return DISTINCT | ORDERED;
}
- @Override
- protected int[] getExpectedIndices() {
- return new int[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21};
- }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromDefaultIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherTest.java
similarity index 69%
rename from src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromDefaultIndexProducerTest.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherTest.java
index 56a5c792a6..f6fdd91008 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromDefaultIndexProducerTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromHasherTest.java
@@ -16,27 +16,26 @@
*/
package org.apache.commons.collections4.bloomfilter;
-public class BitCountProducerFromDefaultIndexProducerTest extends AbstractBitCountProducerTest {
-
- int[] data = {0, 63, 1, 1, 64, 127, 128};
+public class IndexProducerFromHasherTest extends AbstractIndexProducerTest {
@Override
- protected BitCountProducer createProducer() {
- return BitCountProducer.from(IndexProducer.fromIndexArray(data));
+ protected int getAsIndexArrayBehaviour() {
+ return 0;
}
@Override
- protected BitCountProducer createEmptyProducer() {
- return BitCountProducer.from(IndexProducer.fromIndexArray(new int[0]));
+ protected IndexProducer createProducer() {
+ // hasher has collisions and wraps
+ return new IncrementingHasher(4, 8).indices(Shape.fromKM(17, 72));
}
@Override
- protected int getAsIndexArrayBehaviour() {
- return 0;
+ protected IndexProducer createEmptyProducer() {
+ return NullHasher.INSTANCE.indices(Shape.fromKM(17, 72));
}
@Override
protected int[] getExpectedIndices() {
- return data;
+ return new int[] {4, 12, 20, 28, 36, 44, 52, 60, 68, 4, 12, 20, 28, 36, 44, 52, 60};
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIntArrayTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromIntArrayTest.java
similarity index 76%
rename from src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIntArrayTest.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromIntArrayTest.java
index d0a598a17d..4b0281dccc 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromIntArrayTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromIntArrayTest.java
@@ -16,27 +16,28 @@
*/
package org.apache.commons.collections4.bloomfilter;
-public class BitCountProducerFromIntArrayTest extends AbstractBitCountProducerTest {
+public class IndexProducerFromIntArrayTest extends AbstractIndexProducerTest {
int[] data = {6, 8, 1, 2, 4, 4, 5};
@Override
- protected BitCountProducer createEmptyProducer() {
- return BitCountProducer.from(IndexProducer.fromIndexArray(new int[0]));
+ protected IndexProducer createEmptyProducer() {
+ return IndexProducer.fromIndexArray(new int[0]);
}
@Override
- protected BitCountProducer createProducer() {
- return BitCountProducer.from(IndexProducer.fromIndexArray(data));
+ protected IndexProducer createProducer() {
+ return IndexProducer.fromIndexArray(data);
}
@Override
- protected int getAsIndexArrayBehaviour() {
- return 0;
+ protected int[] getExpectedIndices() {
+ return data;
}
@Override
- protected int[] getExpectedIndices() {
- return data;
+ protected int getAsIndexArrayBehaviour() {
+ return 0;
}
+
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java
similarity index 77%
rename from src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSimpleBloomFilterTest.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java
index a6b2be2099..b2f3e947a6 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSimpleBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java
@@ -16,31 +16,30 @@
*/
package org.apache.commons.collections4.bloomfilter;
-public class BitCountProducerFromSimpleBloomFilterTest extends AbstractBitCountProducerTest {
+public class IndexProducerFromSimpleBloomFilterTest extends AbstractIndexProducerTest {
protected Shape shape = Shape.fromKM(17, 72);
@Override
- protected BitCountProducer createProducer() {
+ protected IndexProducer createProducer() {
final Hasher hasher = new IncrementingHasher(3, 2);
final BloomFilter bf = new SimpleBloomFilter(shape);
bf.merge(hasher);
- return BitCountProducer.from(bf);
+ return bf;
}
@Override
- protected BitCountProducer createEmptyProducer() {
- return BitCountProducer.from(new SimpleBloomFilter(shape));
+ protected IndexProducer createEmptyProducer() {
+ return new SimpleBloomFilter(shape);
}
@Override
- protected int getAsIndexArrayBehaviour() {
- // BloomFilter based on a bit map array will be distinct and ordered
- return DISTINCT | ORDERED;
+ protected int[] getExpectedIndices() {
+ return new int[] {3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35};
}
@Override
- protected int[] getExpectedIndices() {
- return new int[] {3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35};
+ protected int getAsIndexArrayBehaviour() {
+ return DISTINCT | ORDERED;
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java
similarity index 70%
rename from src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSparseBloomFilterTest.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java
index 7e05bb9961..d7bf45cba9 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromSparseBloomFilterTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java
@@ -16,33 +16,30 @@
*/
package org.apache.commons.collections4.bloomfilter;
-public class BitCountProducerFromSparseBloomFilterTest extends AbstractBitCountProducerTest {
+public class IndexProducerFromSparseBloomFilterTest extends AbstractIndexProducerTest {
protected Shape shape = Shape.fromKM(17, 72);
@Override
- protected BitCountProducer createProducer() {
+ protected IndexProducer createProducer() {
final Hasher hasher = new IncrementingHasher(4, 7);
final BloomFilter bf = new SparseBloomFilter(shape);
bf.merge(hasher);
- return BitCountProducer.from(bf);
+ return bf;
}
@Override
- protected BitCountProducer createEmptyProducer() {
- return BitCountProducer.from(new SparseBloomFilter(shape));
+ protected IndexProducer createEmptyProducer() {
+ return new SparseBloomFilter(shape);
}
@Override
- protected int getAsIndexArrayBehaviour() {
- // A sparse BloomFilter will be distinct but it may not be ordered.
- // Currently the ordered behavior is asserted as the implementation uses
- // an ordered TreeSet. This may change in the future.
- return DISTINCT | ORDERED;
+ protected int[] getExpectedIndices() {
+ return new int[] {2, 4, 9, 11, 16, 18, 23, 25, 30, 32, 37, 39, 44, 46, 53, 60, 67};
}
@Override
- protected int[] getExpectedIndices() {
- return new int[] {2, 4, 9, 11, 16, 18, 23, 25, 30, 32, 37, 39, 44, 46, 53, 60, 67};
+ protected int getAsIndexArrayBehaviour() {
+ return DISTINCT |ORDERED;
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromUniqueHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromUniqueHasherTest.java
similarity index 73%
rename from src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromUniqueHasherTest.java
rename to src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromUniqueHasherTest.java
index 18e7f7936d..c7cbd217cb 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitCountProducerFromUniqueHasherTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromUniqueHasherTest.java
@@ -16,27 +16,26 @@
*/
package org.apache.commons.collections4.bloomfilter;
-public class BitCountProducerFromUniqueHasherTest extends AbstractBitCountProducerTest {
+public class IndexProducerFromUniqueHasherTest extends AbstractIndexProducerTest {
@Override
- protected BitCountProducer createProducer() {
+ protected IndexProducer createProducer() {
// hasher has collisions and wraps
- return BitCountProducer.from(new IncrementingHasher(4, 8).uniqueIndices(Shape.fromKM(17, 72)));
+ return new IncrementingHasher(4, 8).indices(Shape.fromKM(17, 72)).uniqueIndices();
}
@Override
- protected BitCountProducer createEmptyProducer() {
- return BitCountProducer.from(NullHasher.INSTANCE.indices(Shape.fromKM(17, 72)));
+ protected IndexProducer createEmptyProducer() {
+ return NullHasher.INSTANCE.indices(Shape.fromKM(17, 72));
}
@Override
- protected int getAsIndexArrayBehaviour() {
- // Hasher may be unordered
- return DISTINCT;
+ protected int[] getExpectedIndices() {
+ return new int[] {4, 12, 20, 28, 36, 44, 52, 60, 68};
}
@Override
- protected int[] getExpectedIndices() {
- return new int[] {4, 12, 20, 28, 36, 44, 52, 60, 68};
+ protected int getAsIndexArrayBehaviour() {
+ return DISTINCT;
}
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerTest.java
index 52f557a834..655dfeed9d 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerTest.java
@@ -22,7 +22,10 @@
import java.util.List;
import java.util.function.LongPredicate;
+import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
public class IndexProducerTest {
@@ -68,4 +71,17 @@ public boolean forEachBitMap(final LongPredicate consumer) {
return true;
}
}
+
+ @ParameterizedTest
+ @ValueSource(ints = {32, 33})
+ void testAsIndexArray(int n) {
+ IndexProducer ip = i -> {
+ for (int j = 0; j < n; j++) {
+ // Always test index zero
+ i.test(0);
+ }
+ return true;
+ };
+ Assertions.assertArrayEquals(new int[n], ip.asIndexArray());
+ }
}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/NullHasher.java b/src/test/java/org/apache/commons/collections4/bloomfilter/NullHasher.java
index 9ab0fb76d5..6ac6d04eae 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/NullHasher.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/NullHasher.java
@@ -53,10 +53,4 @@ public IndexProducer indices(final Shape shape) {
Objects.requireNonNull(shape, "shape");
return PRODUCER;
}
-
- @Override
- public IndexProducer uniqueIndices(final Shape shape) {
- Objects.requireNonNull(shape, "shape");
- return PRODUCER;
- }
}