diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java index 7ccd8bc924..6bfc60c7fb 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java @@ -16,109 +16,153 @@ */ package org.apache.commons.collections4.bloomfilter; +import java.util.TreeMap; import java.util.function.IntPredicate; + /** - * Defines a mapping of index to counts. + * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to + * refer to these counts and their associated index. This class is the equivalent of the index producer except + * that it produces cells. * - *

Note that a BitCountProducer may return duplicate indices and may be unordered. + *

Note that a CellProducer must not return duplicate indices and must be ordered.

* - *

Implementations must guarantee that: + *

Implementations must guarantee that:

* * * - *

Note that implementations that do not output duplicate indices for BitCountProducer and - * do for IndexProducer, or vice versa, are consistent if the distinct indices from each are - * the same. - * - *

For example the mapping [(1,2),(2,3),(3,1)] can be output with many combinations including: - *

- * [(1,2),(2,3),(3,1)]
- * [(1,1),(1,1),(2,1),(2,1),(2,1),(3,1)]
- * [(1,1),(3,1),(1,1),(2,1),(2,1),(2,1)]
- * [(3,1),(1,1),(2,2),(1,1),(2,1)]
- * ...
- * 
- * * @since 4.5 */ @FunctionalInterface -public interface BitCountProducer extends IndexProducer { +public interface CellProducer extends IndexProducer { /** - * Performs the given action for each {@code } pair where the count is non-zero. - * Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each - * index-count pair, if the consumer returns {@code false} the execution is stopped, {@code false} - * is returned, and no further pairs are processed. + * Performs the given action for each {@code cell} where the cell count is non-zero. + * + *

Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to + * refer to these counts.

* - * Duplicate indices are not required to be aggregated. Duplicates may be output by the producer as - * noted in the class javadoc. + *

Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each + * cell. If the consumer returns {@code false} the execution is stopped, {@code false} + * is returned, and no further pairs are processed.

* - * @param consumer the action to be performed for each non-zero bit count - * @return {@code true} if all count pairs return true from consumer, {@code false} otherwise. + * @param consumer the action to be performed for each non-zero cell. + * @return {@code true} if all cells return true from consumer, {@code false} otherwise. * @throws NullPointerException if the specified consumer is null */ - boolean forEachCount(BitCountConsumer consumer); + boolean forEachCell(CellConsumer consumer); /** - * The default implementation returns indices with ordering and uniqueness of {@code forEachCount()}. + * The default implementation returns distinct and ordered indices for all cells with a non-zero count. */ @Override default boolean forEachIndex(final IntPredicate predicate) { - return forEachCount((i, v) -> predicate.test(i)); + return forEachCell((i, v) -> predicate.test(i)); + } + + @Override + default IndexProducer uniqueIndices() { + return this; } /** - * Creates a BitCountProducer from an IndexProducer. The resulting - * producer will return every index from the IndexProducer with a count of 1. + * Creates a CellProducer from an IndexProducer. + * + *

Note the following properties: + *

* - *

Note that the BitCountProducer does not remove duplicates. Any use of the - * BitCountProducer to create an aggregate mapping of index to counts, such as a - * CountingBloomFilter, should use the same BitCountProducer in both add and - * subtract operations to maintain consistency. - *

- * @param idx An index producer. - * @return A BitCountProducer with the same indices as the IndexProducer. + *

A CellProducer that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations + * of indices including: + *

+     * [1, 1, 2, 2, 2, 3]
+     * [1, 3, 1, 2, 2, 2]
+     * [3, 2, 1, 2, 1, 2]
+     * ...
+     * 
+ * + * @param producer An index producer. + * @return A CellProducer with the same indices as the IndexProducer. */ - static BitCountProducer from(final IndexProducer idx) { - return new BitCountProducer() { - @Override - public boolean forEachCount(final BitCountConsumer consumer) { - return idx.forEachIndex(i -> consumer.test(i, 1)); + static CellProducer from(final IndexProducer producer) { + return new CellProducer() { + TreeMap counterCells = new TreeMap<>(); + + private void populate() { + if (counterCells.isEmpty()) { + producer.forEachIndex( idx -> { + CounterCell cell = new CounterCell(idx, 1); + CounterCell counter = counterCells.get(cell); + if (counter == null) { + counterCells.put(cell, cell); + } else { + counter.count++; + } + return true; + }); + } } @Override public int[] asIndexArray() { - return idx.asIndexArray(); + populate(); + return counterCells.keySet().stream().mapToInt( c -> c.idx ).toArray(); } @Override - public boolean forEachIndex(final IntPredicate predicate) { - return idx.forEachIndex(predicate); + public boolean forEachCell(CellConsumer consumer) { + populate(); + for (CounterCell cell : counterCells.values()) { + if (!consumer.test(cell.idx, cell.count) ) { + return false; + } + } + return true; + } + + /** + * Class to track cell values in the TreeMap. + */ + final class CounterCell implements Comparable { + final int idx; + int count; + + CounterCell(int idx, int count) { + this.idx = idx; + this.count = count; + } + + @Override + public int compareTo(CounterCell other) { + return Integer.compare( idx, other.idx); + } } }; } /** - * Represents an operation that accepts an {@code } pair representing - * the count for a bit index. Returns {@code true} - * if processing should continue, {@code false} otherwise. + * Represents an operation that accepts an {@code } pair. + * Returns {@code true} if processing should continue, {@code false} otherwise. * *

Note: This is a functional interface as a specialization of * {@link java.util.function.BiPredicate} for {@code int}.

*/ @FunctionalInterface - interface BitCountConsumer { + interface CellConsumer { /** * Performs an operation on the given {@code } pair. * * @param index the bit index. - * @param count the count at the specified bit index. + * @param count the cell value at the specified bit index. * @return {@code true} if processing should continue, {@code false} if processing should stop. */ boolean test(int index, int count); diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java index 2a5aa0a622..1d44e58a48 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java @@ -21,35 +21,34 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; -import static org.junit.jupiter.api.Assumptions.assumeTrue; import java.util.Arrays; import java.util.BitSet; -import org.apache.commons.collections4.bag.TreeBag; -import org.apache.commons.collections4.bloomfilter.BitCountProducer.BitCountConsumer; +import org.apache.commons.collections4.bloomfilter.CellProducer.CellConsumer; import org.junit.jupiter.api.Test; -public abstract class AbstractBitCountProducerTest extends AbstractIndexProducerTest { +public abstract class AbstractCellProducerTest extends AbstractIndexProducerTest { /** - * A testing BitCountConsumer that always returns true. + * A testing CellConsumer that always returns true. */ - private static final BitCountConsumer TRUE_CONSUMER = (i, j) -> true; + private static final CellConsumer TRUE_CONSUMER = (i, j) -> true; /** - * A testing BitCountConsumer that always returns false. + * A testing CellConsumer that always returns false. */ - private static final BitCountConsumer FALSE_CONSUMER = (i, j) -> false; + private static final CellConsumer FALSE_CONSUMER = (i, j) -> false; /** - * Creates an array of integer pairs comprising the index and the expected count for the index. - * The order and count for each index is dependent upon the producer created by the {@code createProducer()} - * method. - * By default returns the each {@code getExpectedIndices()} value paired with 1 (one). - * @return an array of integer pairs comprising the index and the expected count for the index. + * Creates an array of expected values that alignes with the expected indices entries. + * @return an array of expected values. + * @see AbstractIndexProducerTest#getExpectedIndices() */ - protected int[][] getExpectedBitCount() { - return Arrays.stream(getExpectedIndices()).mapToObj(x -> new int[] {x, 1}).toArray(int[][]::new); + protected abstract int[] getExpectedValues(); + + @Override + protected final int getAsIndexArrayBehaviour() { + return ORDERED | DISTINCT; } /** @@ -57,57 +56,48 @@ protected int[][] getExpectedBitCount() { * @return a producer with some data */ @Override - protected abstract BitCountProducer createProducer(); + protected abstract CellProducer createProducer(); /** * Creates a producer without data. * @return a producer that has no data. */ @Override - protected abstract BitCountProducer createEmptyProducer(); - - /** - * Gets the behavior of the {@link BitCountProducer#forEachCount(BitCountConsumer)} method. - * By default returns the value of {@code getAsIndexArrayBehaviour()} method. - * @return the behavior. - */ - protected int getForEachCountBehaviour() { - return getAsIndexArrayBehaviour(); - } + protected abstract CellProducer createEmptyProducer(); @Test - public final void testForEachCountPredicates() { - final BitCountProducer populated = createProducer(); - final BitCountProducer empty = createEmptyProducer(); + public final void testForEachCellPredicates() { + final CellProducer populated = createProducer(); + final CellProducer empty = createEmptyProducer(); - assertFalse(populated.forEachCount(FALSE_CONSUMER), "non-empty should be false"); - assertTrue(empty.forEachCount(FALSE_CONSUMER), "empty should be true"); + assertFalse(populated.forEachCell(FALSE_CONSUMER), "non-empty should be false"); + assertTrue(empty.forEachCell(FALSE_CONSUMER), "empty should be true"); - assertTrue(populated.forEachCount(TRUE_CONSUMER), "non-empty should be true"); - assertTrue(empty.forEachCount(TRUE_CONSUMER), "empty should be true"); + assertTrue(populated.forEachCell(TRUE_CONSUMER), "non-empty should be true"); + assertTrue(empty.forEachCell(TRUE_CONSUMER), "empty should be true"); } @Test - public final void testEmptyBitCountProducer() { - final BitCountProducer empty = createEmptyProducer(); + public final void testEmptyCellProducer() { + final CellProducer empty = createEmptyProducer(); final int ary[] = empty.asIndexArray(); assertEquals(0, ary.length); - assertTrue(empty.forEachCount((i, j) -> { - fail("forEachCount consumer should not be called"); + assertTrue(empty.forEachCell((i, j) -> { + fail("forEachCell consumer should not be called"); return false; })); } @Test public final void testIndexConsistency() { - final BitCountProducer producer = createProducer(); + final CellProducer producer = createProducer(); final BitSet bs1 = new BitSet(); final BitSet bs2 = new BitSet(); producer.forEachIndex(i -> { bs1.set(i); return true; }); - producer.forEachCount((i, j) -> { + producer.forEachCell((i, j) -> { bs2.set(i); return true; }); @@ -115,51 +105,47 @@ public final void testIndexConsistency() { } @Test - public void testForEachCountValues() { - // Assumes the collections bag works. Could be replaced with Map with more work. - final TreeBag expected = new TreeBag<>(); - Arrays.stream(getExpectedBitCount()).forEach(c -> expected.add(c[0], c[1])); - final TreeBag actual = new TreeBag<>(); - // can not return actual.add as it returns false on duplicate 'i' - createProducer().forEachCount((i, j) -> { - actual.add(i, j); + public void testForEachCellValues() { + int[] expectedIdx = getExpectedIndices(); + int[] expectedValue = getExpectedValues(); + assertEquals( expectedIdx.length, expectedValue.length, "expected index length and value length do not match"); + int[] idx = {0}; + createProducer().forEachCell((i, j) -> { + assertEquals(expectedIdx[idx[0]], i, "bad index at "+idx[0]); + assertEquals(expectedValue[idx[0]], j, "bad value at "+idx[0]); + idx[0]++; return true; }); - assertEquals(expected, actual); } /** - * Test the behavior of {@link BitCountProducer#forEachCount(BitCountConsumer)} with respect + * Test the behavior of {@link CellProducer#forEachCell(CellConsumer)} with respect * to ordered and distinct indices. Currently the behavior is assumed to be the same as * {@link IndexProducer#forEachIndex(java.util.function.IntPredicate)}. */ @Test - public final void testBehaviourForEachCount() { - final int flags = getForEachCountBehaviour(); - assumeTrue((flags & (ORDERED | DISTINCT)) != 0); + public final void testBehaviourForEachCell() { final IntList list = new IntList(); - createProducer().forEachCount((i, j) -> list.add(i)); + createProducer().forEachCell((i, j) -> list.add(i)); final int[] actual = list.toArray(); - if ((flags & ORDERED) != 0) { - final int[] expected = Arrays.stream(actual).sorted().toArray(); - assertArrayEquals(expected, actual); - } - if ((flags & DISTINCT) != 0) { - final long count = Arrays.stream(actual).distinct().count(); - assertEquals(count, actual.length); - } + // check order + final int[] expected = Arrays.stream(actual).sorted().toArray(); + assertArrayEquals(expected, actual); + // check distinct + final long count = Arrays.stream(actual).distinct().count(); + assertEquals(count, actual.length); } @Test - public void testForEachCountEarlyExit() { + public void testForEachCellEarlyExit() { final int[] passes = new int[1]; - assertTrue(createEmptyProducer().forEachCount((i, j) -> { + assertTrue(createEmptyProducer().forEachCell((i, j) -> { passes[0]++; return false; })); assertEquals(0, passes[0]); - assertFalse(createProducer().forEachCount((i, j) -> { + assertFalse(createProducer().forEachCell((i, j) -> { passes[0]++; return false; }));