apache · aherbert · Aug 15, 2023 · Jul 16, 2023 · Jul 27, 2023 · Jul 27, 2023
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
@@ -311,7 +311,7 @@ default int estimateIntersection(final BloomFilter other) {
     }
 
     /**
-     * Most Bloom filter's create unique IndexProducers.
+     * Most Bloom filters create unique IndexProducers.
      */
     @Override
     default IndexProducer uniqueIndices() {

diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CellProducer.java
@@ -67,11 +67,6 @@ default boolean forEachIndex(final IntPredicate predicate) {
         return forEachCell((i, v) -> predicate.test(i));
     }
 
-    @Override
-    default IndexProducer uniqueIndices() {
-        return this;
-    }
-
     /**
      * Creates a CellProducer from an IndexProducer.
      *
@@ -115,14 +110,14 @@ private void populate() {
             @Override
             public int[] asIndexArray() {
                 populate();
-                return counterCells.keySet().stream().mapToInt( c -> c.idx ).toArray();
+                return counterCells.keySet().stream().mapToInt(c -> c.idx ).toArray();
             }
 
             @Override
             public boolean forEachCell(CellConsumer consumer) {
                 populate();
                 for (CounterCell cell : counterCells.values()) {
-                    if (!consumer.test(cell.idx, cell.count) ) {
+                    if (!consumer.test(cell.idx, cell.count)) {
                         return false;
                     }
                 }
@@ -143,14 +138,14 @@ final class CounterCell implements Comparable<CounterCell> {
 
                 @Override
                 public int compareTo(CounterCell other) {
-                    return Integer.compare( idx,  other.idx);
+                    return Integer.compare(idx,  other.idx);
                 }
             }
         };
     }
 
     /**
-     * Represents an operation that accepts an {@code <index, cell>} pair.
+     * Represents an operation that accepts an {@code <index, count>} pair.
      * Returns {@code true} if processing should continue, {@code false} otherwise.
      *
      * <p>Note: This is a functional interface as a specialization of
@@ -168,3 +163,4 @@ interface CellConsumer {
         boolean test(int index, int count);
     }
 }
+
diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexFilter.java
@@ -27,9 +27,9 @@
  * @since 4.5
  */
 public final class IndexFilter {
-
-    // do not instantiate.
-    private IndexFilter() {}
+    private final IntPredicate tracker;
+    private final int size;
+    private final IntPredicate consumer;
 
     /**
      * Creates an instance optimized for the specified shape.
@@ -38,20 +38,44 @@ private IndexFilter() {}
      * @return an IndexFilter optimized for the specified shape.
      */
     public static IntPredicate create(final Shape shape, final IntPredicate consumer) {
-        int size = shape.getNumberOfBits();
-        IntPredicate result = number -> {
-            if (number >= size) {
-                throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size));
-            }
-            return true;
-        };
+        return new IndexFilter(shape, consumer)::test;
+    }
+
+    /**
+     * Creates an instance optimized for the specified shape.
+     * @param shape The shape that is being generated.
+     * @param consumer The consumer to accept the values.
+     */
+    private IndexFilter(final Shape shape, final IntPredicate consumer) {
+        this.size = shape.getNumberOfBits();
+        this.consumer = consumer;
         if (BitMap.numberOfBitMaps(shape.getNumberOfBits()) * Long.BYTES < (long) shape.getNumberOfHashFunctions()
                 * Integer.BYTES) {
-            result = result.and(new BitMapTracker(shape).negate());
+            this.tracker = new BitMapTracker(shape);
         } else {
-            result = result.and(new ArrayTracker(shape).negate());
+            this.tracker = new ArrayTracker(shape);
+        }
+    }
+
+    /**
+     * Test if the number should be processed by the {@code consumer}.
+     *
+     * <p>If the number has <em>not</em> been seen before it is passed to the {@code consumer} and the result returned.
+     * If the number has been seen before the {@code consumer} is not called and {@code true} returned.</p>
+     *
+     * <p><em>If the input is not in the range [0,size) an IndexOutOfBoundsException exception is thrown.</em></p>
+     *
+     * @param number the number to check.
+     * @return {@code true} if processing should continue, {@code false} otherwise.
+     */
+    public boolean test(final int number) {
+        if (number >= size) {
+            throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size));
+        }
+        if (tracker.test(number)) {
+            return  consumer.test(number);
         }
-        return result.or(consumer);
+        return true;
     }
 
     /**

diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexProducer.java
@@ -33,8 +33,6 @@
 @FunctionalInterface
 public interface IndexProducer {
 
-    int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
-
     /**
      * Each index is passed to the predicate. The predicate is applied to each
      * index value, if the predicate returns {@code false} the execution is stopped, {@code false}
@@ -110,13 +108,11 @@ public boolean test(long word) {
      * <p>Indices ordering and uniqueness is not guaranteed.</p>
      *
      * <p><em>
-     * The default implementation of this method is slow. It is recommended
-     * that implementing classes reimplement this method.
+     * The default implementation of this method creates an array and populates
+     * it.  Implementations that have access to an index array should consider
+     * returning a copy of that array if possible.
      * </em></p>
      *
-     * <p><em>
-     * The default implementation of this method returns unique values in order.
-     * </em></p>
      * @return An int array of the data.
      */
     default int[] asIndexArray() {
@@ -125,9 +121,7 @@ class Indices {
             private int size;
 
             boolean add(final int index) {
-                if (size == data.length) {
-                    data = Arrays.copyOf(data, (int) Math.min(MAX_ARRAY_SIZE, size * 2L));
-                }
+                data = IndexUtils.ensureCapacityForAdd(data, index);
                 data[size++] = index;
                 return true;
             }
@@ -143,14 +137,17 @@ int[] toArray() {
     }
 
     /**
-     * Creates an IndexProducer of unique indices for this index.
+     * Creates an IndexProducer comprising the unique indices for this producer.
+     *
+     * <p>By default creates a new producer with some overhead to remove
+     * duplicates.  IndexProducers that return unique indices by default
+     * should override this to return {@code this}.</p>
      *
-     * <p>This is like the `indices(Shape)` method except that it adds the guarantee that no
-     * duplicate values will be returned. The indices produced are equivalent to those returned
-     * from by a Bloom filter created from this hasher.</p>
+     * <p>The default implementation will filter the indices from this instance
+     * and return them in ascending order.</p>
      *
-     * @return the iterator of integers
-     * @throws IndexOutOfBoundsException if any index is less than 0
+     * @return the IndexProducer of unique values.
+     * @throws IndexOutOfBoundsException if any index is less than zero.
      */
     default IndexProducer uniqueIndices() {
         final BitSet bitSet = new BitSet();
@@ -162,7 +159,7 @@ default IndexProducer uniqueIndices() {
         return new IndexProducer() {
             @Override
             public boolean forEachIndex(IntPredicate predicate) {
-                for (int idx = bitSet.nextSetBit(0); idx >= 0; idx = bitSet.nextSetBit(idx+1)) {
+                for (int idx = bitSet.nextSetBit(0); idx >= 0; idx = bitSet.nextSetBit(idx + 1)) {
                     if (!predicate.test(idx)) {
                         return false;
                     }

diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/IndexUtils.java b/src/main/java/org/apache/commons/collections4/bloomfilter/IndexUtils.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import java.util.Arrays;
+
+/**
+ * Provides functions to assist in IndexProducer creation and manipulation.
+ * @see IndexProducer
+ */
+public class IndexUtils {
+
+    /**
+     * The maximum array size for the methods in this class.
+     */
+    public static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
+
+    // do not instantiate
+    private IndexUtils() {}
+
+    /**
+     * Ensure the array can add an element at the specified index.
+     * @param array the array to check.
+     * @param index the index to add at.
+     * @return the array or a newly allocated copy of the array.
+     */
+    static int[] ensureCapacityForAdd(int[] array, int index) {
+        if (index >= array.length) {
+            return Arrays.copyOf(array, (int) Math.min(IndexUtils.MAX_ARRAY_SIZE, Math.max( array.length * 2L, index+1)));
+        }
+        return array;
+    }
+}
diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCellProducerTest.java
@@ -40,7 +40,7 @@ public abstract class AbstractCellProducerTest extends AbstractIndexProducerTest
     private static final CellConsumer FALSE_CONSUMER = (i, j) -> false;
 
     /**
-     * Creates an array of expected values that alignes with the expected indices entries.
+     * Creates an array of expected values that aligns with the expected indices entries.
      * @return an array of expected values.
      * @see AbstractIndexProducerTest#getExpectedIndices()
      */
@@ -108,11 +108,11 @@ public final void testIndexConsistency() {
     public void testForEachCellValues() {
         int[] expectedIdx = getExpectedIndices();
         int[] expectedValue = getExpectedValues();
-        assertEquals( expectedIdx.length, expectedValue.length, "expected index length and value length do not match");
+        assertEquals(expectedIdx.length, expectedValue.length, "expected index length and value length do not match");
         int[] idx = {0};
         createProducer().forEachCell((i, j) -> {
-            assertEquals(expectedIdx[idx[0]], i, "bad index at "+idx[0]);
-            assertEquals(expectedValue[idx[0]], j, "bad value at "+idx[0]);
+            assertEquals(expectedIdx[idx[0]], i, "bad index at " + idx[0]);
+            assertEquals(expectedValue[idx[0]], j, "bad value at " + idx[0]);
             idx[0]++;
             return true;
         });
@@ -152,3 +152,4 @@ public void testForEachCellEarlyExit() {
         assertEquals(1, passes[0]);
     }
 }
+
diff --git a/...est/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java b/...est/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java
@@ -299,7 +299,7 @@ public void testExcludesDuplicates() {
         assertTrue(bf1.forEachCell((x, y) -> false), "Hasher in removes results in value not equal to 0");
     }
 
-    private void verifyMaxInsert( CountingBloomFilter bf, int from1, int from11) {
+    private void verifyMaxInsert(CountingBloomFilter bf, int from1, int from11) {
         BloomFilter bfFrom0 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
         bfFrom0.merge(new IncrementingHasher(0, 1));
         BloomFilter bfFrom1 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
@@ -346,9 +346,9 @@ public void testGetMaxInsert() {
     private void assertCell3(CountingBloomFilter bf, int value) {
         bf.forEachCell((k, v) -> {
             if (k == 3) {
-                assertEquals(value, v, "Mismatch at position (3) "+k);
+                assertEquals(value, v, "Mismatch at position 3");
             } else {
-                assertEquals(0, v, "Mismatch at position "+k);
+                assertEquals(0, v, "Mismatch at position " + k);
             }
             return true;
         });
@@ -372,10 +372,6 @@ public void mergeIncrementsAllCellsTest() {
         // The add should increment cells 3 by 2
         f2.add(CellProducer.from(ip));
         assertCell3(f2, 2);
-
-        // This merge will increment by 1 as the round-trip makes the indices unique
-        f3.merge(IndexProducer.fromIndexArray(ip.asIndexArray()));
-        assertCell3(f3, 1);
     }
 
     @Test

diff --git a/...apache/commons/collections4/bloomfilter/CellProducerFromArrayCountingBloomFilterTest.java b/...apache/commons/collections4/bloomfilter/CellProducerFromArrayCountingBloomFilterTest.java
@@ -40,6 +40,6 @@ protected int[] getExpectedIndices() {
 
     @Override
     protected int[] getExpectedValues() {
-        return new int[] { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1};
+        return new int[] {1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1};
     }
 }
diff --git a/...org/apache/commons/collections4/bloomfilter/CellProducerFromDefaultIndexProducerTest.java b/...org/apache/commons/collections4/bloomfilter/CellProducerFromDefaultIndexProducerTest.java
@@ -18,7 +18,7 @@
 
 public class CellProducerFromDefaultIndexProducerTest extends AbstractCellProducerTest {
 
-    int[] data = { 0, 63, 1, 64, 128, 1, 127 };
+    int[] data = {0, 63, 1, 64, 128, 1, 127};
     int[] indices = {0, 1, 63, 64, 127, 128};
     int[] values = {1, 2, 1, 1, 1, 1 };
 

diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultCellProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultCellProducerTest.java
@@ -35,7 +35,7 @@ protected int[] getExpectedValues() {
     @Override
     protected CellProducer createProducer() {
         return consumer -> {
-            for (int i=0; i<indices.length; i++) {
+            for (int i = 0; i < indices.length; i++) {
                 if (!consumer.test(indices[i], values[i] )) {
                     return false;
                 }

diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java
@@ -22,8 +22,11 @@
 import java.util.BitSet;
 import java.util.Objects;
 import java.util.concurrent.ThreadLocalRandom;
+import java.util.stream.IntStream;
 
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
 
 public class DefaultIndexProducerTest extends AbstractIndexProducerTest {
 
@@ -119,31 +122,10 @@ public void testFromIndexArray() {
         }
     }
 
-    @Test
-    public void testMoreThan32Entries() {
-        int[] values = new int[33];
-        for (int i=0; i<33; i++) {
-            values[i] = i;
-        }
-        IndexProducer producer =  predicate -> {
-            Objects.requireNonNull(predicate);
-            for (final int i : values) {
-                if (!predicate.test(i)) {
-                    return false;
-                }
-            }
-            return true;
-        };
-        int[] other = producer.asIndexArray();
-        assertArrayEquals(values, other);
-    }
-
-    @Test
-    public void test32Entries() {
-        int[] values = new int[32];
-        for (int i=0; i<32; i++) {
-            values[i] = i;
-        }
+    @ParameterizedTest
+    @ValueSource(ints = {32, 33})
+    public void testEntries(int size) {
+        int[] values = IntStream.range(0, size).toArray();
         IndexProducer producer =  predicate -> {
             Objects.requireNonNull(predicate);
             for (final int i : values) {