diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java
index 7ccd8bc924..6bfc60c7fb 100644
--- a/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java
+++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java
@@ -16,109 +16,153 @@
*/
package org.apache.commons.collections4.bloomfilter;
+import java.util.TreeMap;
import java.util.function.IntPredicate;
+
/**
- * Defines a mapping of index to counts.
+ * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
+ * refer to these counts and their associated index. This class is the equivalent of the index producer except
+ * that it produces cells.
*
- *
Note that a BitCountProducer may return duplicate indices and may be unordered.
+ *
Note that a CellProducer must not return duplicate indices and must be ordered.
*
- * Implementations must guarantee that:
+ *
Implementations must guarantee that:
*
*
- * - The mapping of index to counts is the combined sum of counts at each index.
- *
- For every unique value produced by the IndexProducer there will be at least one matching
- * index and count produced by the BitCountProducer.
- *
- The BitCountProducer will not generate indices that are not output by the IndexProducer.
+ *
- The IndexProducer implementation returns unique ordered indices.
+ * - The cells are produced in IndexProducer order.
+ * - For every value produced by the IndexProducer there will be only one matching
+ * cell produced by the CellProducer.
+ * - The CellProducer will not generate cells with indices that are not output by the IndexProducer.
+ * - The IndexProducer will not generate indices that have a zero count for the cell.
*
*
- * Note that implementations that do not output duplicate indices for BitCountProducer and
- * do for IndexProducer, or vice versa, are consistent if the distinct indices from each are
- * the same.
- *
- *
For example the mapping [(1,2),(2,3),(3,1)] can be output with many combinations including:
- *
- * [(1,2),(2,3),(3,1)]
- * [(1,1),(1,1),(2,1),(2,1),(2,1),(3,1)]
- * [(1,1),(3,1),(1,1),(2,1),(2,1),(2,1)]
- * [(3,1),(1,1),(2,2),(1,1),(2,1)]
- * ...
- *
- *
* @since 4.5
*/
@FunctionalInterface
-public interface BitCountProducer extends IndexProducer {
+public interface CellProducer extends IndexProducer {
/**
- * Performs the given action for each {@code } pair where the count is non-zero.
- * Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
- * index-count pair, if the consumer returns {@code false} the execution is stopped, {@code false}
- * is returned, and no further pairs are processed.
+ * Performs the given action for each {@code cell} where the cell count is non-zero.
+ *
+ * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
+ * refer to these counts.
*
- * Duplicate indices are not required to be aggregated. Duplicates may be output by the producer as
- * noted in the class javadoc.
+ * Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
+ * cell. If the consumer returns {@code false} the execution is stopped, {@code false}
+ * is returned, and no further pairs are processed.
*
- * @param consumer the action to be performed for each non-zero bit count
- * @return {@code true} if all count pairs return true from consumer, {@code false} otherwise.
+ * @param consumer the action to be performed for each non-zero cell.
+ * @return {@code true} if all cells return true from consumer, {@code false} otherwise.
* @throws NullPointerException if the specified consumer is null
*/
- boolean forEachCount(BitCountConsumer consumer);
+ boolean forEachCell(CellConsumer consumer);
/**
- * The default implementation returns indices with ordering and uniqueness of {@code forEachCount()}.
+ * The default implementation returns distinct and ordered indices for all cells with a non-zero count.
*/
@Override
default boolean forEachIndex(final IntPredicate predicate) {
- return forEachCount((i, v) -> predicate.test(i));
+ return forEachCell((i, v) -> predicate.test(i));
+ }
+
+ @Override
+ default IndexProducer uniqueIndices() {
+ return this;
}
/**
- * Creates a BitCountProducer from an IndexProducer. The resulting
- * producer will return every index from the IndexProducer with a count of 1.
+ * Creates a CellProducer from an IndexProducer.
+ *
+ * Note the following properties:
+ *
+ * - Each index returned from the IndexProducer is assumed to have a cell value of 1.
+ * - The CellProducer aggregates duplicate indices from the IndexProducer.
+ *
*
- * Note that the BitCountProducer does not remove duplicates. Any use of the
- * BitCountProducer to create an aggregate mapping of index to counts, such as a
- * CountingBloomFilter, should use the same BitCountProducer in both add and
- * subtract operations to maintain consistency.
- *
- * @param idx An index producer.
- * @return A BitCountProducer with the same indices as the IndexProducer.
+ * A CellProducer that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations
+ * of indices including:
+ *
+ * [1, 1, 2, 2, 2, 3]
+ * [1, 3, 1, 2, 2, 2]
+ * [3, 2, 1, 2, 1, 2]
+ * ...
+ *
+ *
+ * @param producer An index producer.
+ * @return A CellProducer with the same indices as the IndexProducer.
*/
- static BitCountProducer from(final IndexProducer idx) {
- return new BitCountProducer() {
- @Override
- public boolean forEachCount(final BitCountConsumer consumer) {
- return idx.forEachIndex(i -> consumer.test(i, 1));
+ static CellProducer from(final IndexProducer producer) {
+ return new CellProducer() {
+ TreeMap counterCells = new TreeMap<>();
+
+ private void populate() {
+ if (counterCells.isEmpty()) {
+ producer.forEachIndex( idx -> {
+ CounterCell cell = new CounterCell(idx, 1);
+ CounterCell counter = counterCells.get(cell);
+ if (counter == null) {
+ counterCells.put(cell, cell);
+ } else {
+ counter.count++;
+ }
+ return true;
+ });
+ }
}
@Override
public int[] asIndexArray() {
- return idx.asIndexArray();
+ populate();
+ return counterCells.keySet().stream().mapToInt( c -> c.idx ).toArray();
}
@Override
- public boolean forEachIndex(final IntPredicate predicate) {
- return idx.forEachIndex(predicate);
+ public boolean forEachCell(CellConsumer consumer) {
+ populate();
+ for (CounterCell cell : counterCells.values()) {
+ if (!consumer.test(cell.idx, cell.count) ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Class to track cell values in the TreeMap.
+ */
+ final class CounterCell implements Comparable {
+ final int idx;
+ int count;
+
+ CounterCell(int idx, int count) {
+ this.idx = idx;
+ this.count = count;
+ }
+
+ @Override
+ public int compareTo(CounterCell other) {
+ return Integer.compare( idx, other.idx);
+ }
}
};
}
/**
- * Represents an operation that accepts an {@code } pair representing
- * the count for a bit index. Returns {@code true}
- * if processing should continue, {@code false} otherwise.
+ * Represents an operation that accepts an {@code } pair.
+ * Returns {@code true} if processing should continue, {@code false} otherwise.
*
* Note: This is a functional interface as a specialization of
* {@link java.util.function.BiPredicate} for {@code int}.
*/
@FunctionalInterface
- interface BitCountConsumer {
+ interface CellConsumer {
/**
* Performs an operation on the given {@code } pair.
*
* @param index the bit index.
- * @param count the count at the specified bit index.
+ * @param count the cell value at the specified bit index.
* @return {@code true} if processing should continue, {@code false} if processing should stop.
*/
boolean test(int index, int count);
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java
index 2a5aa0a622..1d44e58a48 100644
--- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java
+++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java
@@ -21,35 +21,34 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
-import static org.junit.jupiter.api.Assumptions.assumeTrue;
import java.util.Arrays;
import java.util.BitSet;
-import org.apache.commons.collections4.bag.TreeBag;
-import org.apache.commons.collections4.bloomfilter.BitCountProducer.BitCountConsumer;
+import org.apache.commons.collections4.bloomfilter.CellProducer.CellConsumer;
import org.junit.jupiter.api.Test;
-public abstract class AbstractBitCountProducerTest extends AbstractIndexProducerTest {
+public abstract class AbstractCellProducerTest extends AbstractIndexProducerTest {
/**
- * A testing BitCountConsumer that always returns true.
+ * A testing CellConsumer that always returns true.
*/
- private static final BitCountConsumer TRUE_CONSUMER = (i, j) -> true;
+ private static final CellConsumer TRUE_CONSUMER = (i, j) -> true;
/**
- * A testing BitCountConsumer that always returns false.
+ * A testing CellConsumer that always returns false.
*/
- private static final BitCountConsumer FALSE_CONSUMER = (i, j) -> false;
+ private static final CellConsumer FALSE_CONSUMER = (i, j) -> false;
/**
- * Creates an array of integer pairs comprising the index and the expected count for the index.
- * The order and count for each index is dependent upon the producer created by the {@code createProducer()}
- * method.
- * By default returns the each {@code getExpectedIndices()} value paired with 1 (one).
- * @return an array of integer pairs comprising the index and the expected count for the index.
+ * Creates an array of expected values that alignes with the expected indices entries.
+ * @return an array of expected values.
+ * @see AbstractIndexProducerTest#getExpectedIndices()
*/
- protected int[][] getExpectedBitCount() {
- return Arrays.stream(getExpectedIndices()).mapToObj(x -> new int[] {x, 1}).toArray(int[][]::new);
+ protected abstract int[] getExpectedValues();
+
+ @Override
+ protected final int getAsIndexArrayBehaviour() {
+ return ORDERED | DISTINCT;
}
/**
@@ -57,57 +56,48 @@ protected int[][] getExpectedBitCount() {
* @return a producer with some data
*/
@Override
- protected abstract BitCountProducer createProducer();
+ protected abstract CellProducer createProducer();
/**
* Creates a producer without data.
* @return a producer that has no data.
*/
@Override
- protected abstract BitCountProducer createEmptyProducer();
-
- /**
- * Gets the behavior of the {@link BitCountProducer#forEachCount(BitCountConsumer)} method.
- * By default returns the value of {@code getAsIndexArrayBehaviour()} method.
- * @return the behavior.
- */
- protected int getForEachCountBehaviour() {
- return getAsIndexArrayBehaviour();
- }
+ protected abstract CellProducer createEmptyProducer();
@Test
- public final void testForEachCountPredicates() {
- final BitCountProducer populated = createProducer();
- final BitCountProducer empty = createEmptyProducer();
+ public final void testForEachCellPredicates() {
+ final CellProducer populated = createProducer();
+ final CellProducer empty = createEmptyProducer();
- assertFalse(populated.forEachCount(FALSE_CONSUMER), "non-empty should be false");
- assertTrue(empty.forEachCount(FALSE_CONSUMER), "empty should be true");
+ assertFalse(populated.forEachCell(FALSE_CONSUMER), "non-empty should be false");
+ assertTrue(empty.forEachCell(FALSE_CONSUMER), "empty should be true");
- assertTrue(populated.forEachCount(TRUE_CONSUMER), "non-empty should be true");
- assertTrue(empty.forEachCount(TRUE_CONSUMER), "empty should be true");
+ assertTrue(populated.forEachCell(TRUE_CONSUMER), "non-empty should be true");
+ assertTrue(empty.forEachCell(TRUE_CONSUMER), "empty should be true");
}
@Test
- public final void testEmptyBitCountProducer() {
- final BitCountProducer empty = createEmptyProducer();
+ public final void testEmptyCellProducer() {
+ final CellProducer empty = createEmptyProducer();
final int ary[] = empty.asIndexArray();
assertEquals(0, ary.length);
- assertTrue(empty.forEachCount((i, j) -> {
- fail("forEachCount consumer should not be called");
+ assertTrue(empty.forEachCell((i, j) -> {
+ fail("forEachCell consumer should not be called");
return false;
}));
}
@Test
public final void testIndexConsistency() {
- final BitCountProducer producer = createProducer();
+ final CellProducer producer = createProducer();
final BitSet bs1 = new BitSet();
final BitSet bs2 = new BitSet();
producer.forEachIndex(i -> {
bs1.set(i);
return true;
});
- producer.forEachCount((i, j) -> {
+ producer.forEachCell((i, j) -> {
bs2.set(i);
return true;
});
@@ -115,51 +105,47 @@ public final void testIndexConsistency() {
}
@Test
- public void testForEachCountValues() {
- // Assumes the collections bag works. Could be replaced with Map with more work.
- final TreeBag expected = new TreeBag<>();
- Arrays.stream(getExpectedBitCount()).forEach(c -> expected.add(c[0], c[1]));
- final TreeBag actual = new TreeBag<>();
- // can not return actual.add as it returns false on duplicate 'i'
- createProducer().forEachCount((i, j) -> {
- actual.add(i, j);
+ public void testForEachCellValues() {
+ int[] expectedIdx = getExpectedIndices();
+ int[] expectedValue = getExpectedValues();
+ assertEquals( expectedIdx.length, expectedValue.length, "expected index length and value length do not match");
+ int[] idx = {0};
+ createProducer().forEachCell((i, j) -> {
+ assertEquals(expectedIdx[idx[0]], i, "bad index at "+idx[0]);
+ assertEquals(expectedValue[idx[0]], j, "bad value at "+idx[0]);
+ idx[0]++;
return true;
});
- assertEquals(expected, actual);
}
/**
- * Test the behavior of {@link BitCountProducer#forEachCount(BitCountConsumer)} with respect
+ * Test the behavior of {@link CellProducer#forEachCell(CellConsumer)} with respect
* to ordered and distinct indices. Currently the behavior is assumed to be the same as
* {@link IndexProducer#forEachIndex(java.util.function.IntPredicate)}.
*/
@Test
- public final void testBehaviourForEachCount() {
- final int flags = getForEachCountBehaviour();
- assumeTrue((flags & (ORDERED | DISTINCT)) != 0);
+ public final void testBehaviourForEachCell() {
final IntList list = new IntList();
- createProducer().forEachCount((i, j) -> list.add(i));
+ createProducer().forEachCell((i, j) -> list.add(i));
final int[] actual = list.toArray();
- if ((flags & ORDERED) != 0) {
- final int[] expected = Arrays.stream(actual).sorted().toArray();
- assertArrayEquals(expected, actual);
- }
- if ((flags & DISTINCT) != 0) {
- final long count = Arrays.stream(actual).distinct().count();
- assertEquals(count, actual.length);
- }
+ // check order
+ final int[] expected = Arrays.stream(actual).sorted().toArray();
+ assertArrayEquals(expected, actual);
+ // check distinct
+ final long count = Arrays.stream(actual).distinct().count();
+ assertEquals(count, actual.length);
}
@Test
- public void testForEachCountEarlyExit() {
+ public void testForEachCellEarlyExit() {
final int[] passes = new int[1];
- assertTrue(createEmptyProducer().forEachCount((i, j) -> {
+ assertTrue(createEmptyProducer().forEachCell((i, j) -> {
passes[0]++;
return false;
}));
assertEquals(0, passes[0]);
- assertFalse(createProducer().forEachCount((i, j) -> {
+ assertFalse(createProducer().forEachCell((i, j) -> {
passes[0]++;
return false;
}));