Skip to content

Commit

Permalink
updated as per comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Claudenw committed Aug 3, 2023
1 parent e5a2b45 commit a197d2e
Show file tree
Hide file tree
Showing 10 changed files with 76 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ default int estimateIntersection(final BloomFilter other) {
}

/**
* Most Bloom filter's create unique IndexProducers.
* Most Bloom filters create unique IndexProducers.
*/
@Override
default IndexProducer uniqueIndices() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,14 @@ private void populate() {
@Override
public int[] asIndexArray() {
populate();
return counterCells.keySet().stream().mapToInt( c -> c.idx ).toArray();
return counterCells.keySet().stream().mapToInt(c -> c.idx ).toArray();
}

@Override
public boolean forEachCell(CellConsumer consumer) {
populate();
for (CounterCell cell : counterCells.values()) {
if (!consumer.test(cell.idx, cell.count) ) {
if (!consumer.test(cell.idx, cell.count)) {
return false;
}
}
Expand All @@ -138,14 +138,14 @@ final class CounterCell implements Comparable<CounterCell> {

@Override
public int compareTo(CounterCell other) {
return Integer.compare( idx, other.idx);
return Integer.compare(idx, other.idx);
}
}
};
}

/**
* Represents an operation that accepts an {@code <index, cell>} pair.
* Represents an operation that accepts an {@code <index, count>} pair.
* Returns {@code true} if processing should continue, {@code false} otherwise.
*
* <p>Note: This is a functional interface as a specialization of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
@FunctionalInterface
public interface IndexProducer {

int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;

/**
* Each index is passed to the predicate. The predicate is applied to each
* index value, if the predicate returns {@code false} the execution is stopped, {@code false}
Expand Down Expand Up @@ -110,13 +108,11 @@ public boolean test(long word) {
* <p>Indices ordering and uniqueness is not guaranteed.</p>
*
* <p><em>
* The default implementation of this method is slow. It is recommended
* that implementing classes reimplement this method.
* The default implementation of this method creates an array and populates
* it. Implementations that have access to an index array should consider
* returning a copy of that array if possible.
* </em></p>
*
* <p><em>
* The default implementation of this method returns unique values in order.
* </em></p>
* @return An int array of the data.
*/
default int[] asIndexArray() {
Expand All @@ -125,9 +121,7 @@ class Indices {
private int size;

boolean add(final int index) {
if (size == data.length) {
data = Arrays.copyOf(data, (int) Math.min(MAX_ARRAY_SIZE, size * 2L));
}
data = IndexUtils.ensureCapacityForAdd(data, index);
data[size++] = index;
return true;
}
Expand All @@ -143,14 +137,17 @@ int[] toArray() {
}

/**
* Creates an IndexProducer of unique indices for this index.
* Creates an IndexProducer comprising the unique indices for this producer.
*
* <p>By default creates a new producer with some overhead to remove
* duplicates. IndexProducers that return unique indices by default
* should override this to return {@code this}.</p>
*
* <p>This is like the `indices(Shape)` method except that it adds the guarantee that no
* duplicate values will be returned. The indices produced are equivalent to those returned
* from by a Bloom filter created from this hasher.</p>
* <p>The default implementation will filter the indices from this instance
* and return them in ascending order.</p>
*
* @return the iterator of integers
* @throws IndexOutOfBoundsException if any index is less than 0
* @return the IndexProducer of unique values.
* @throws IndexOutOfBoundsException if any index is less than zero.
*/
default IndexProducer uniqueIndices() {
final BitSet bitSet = new BitSet();
Expand All @@ -162,7 +159,7 @@ default IndexProducer uniqueIndices() {
return new IndexProducer() {
@Override
public boolean forEachIndex(IntPredicate predicate) {
for (int idx = bitSet.nextSetBit(0); idx >= 0; idx = bitSet.nextSetBit(idx+1)) {
for (int idx = bitSet.nextSetBit(0); idx >= 0; idx = bitSet.nextSetBit(idx + 1)) {
if (!predicate.test(idx)) {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;

import java.util.Arrays;

public class IndexUtils {

public static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;

// do not instantiate
private IndexUtils() {}

/**
* Ensure the array can add an element at the specified index.
* @param array the array to check.
* @param index the index to add at.
* @return the array or a newly allocated copy of the array.
*/
static int[] ensureCapacityForAdd(int[] array, int index) {
if (index >= array.length) {
return Arrays.copyOf(array, (int) Math.min(IndexUtils.MAX_ARRAY_SIZE, Math.max( array.length * 2L, index+1)));
}
return array;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public abstract class AbstractCellProducerTest extends AbstractIndexProducerTest
private static final CellConsumer FALSE_CONSUMER = (i, j) -> false;

/**
* Creates an array of expected values that alignes with the expected indices entries.
* Creates an array of expected values that aligns with the expected indices entries.
* @return an array of expected values.
* @see AbstractIndexProducerTest#getExpectedIndices()
*/
Expand Down Expand Up @@ -108,11 +108,11 @@ public final void testIndexConsistency() {
public void testForEachCellValues() {
int[] expectedIdx = getExpectedIndices();
int[] expectedValue = getExpectedValues();
assertEquals( expectedIdx.length, expectedValue.length, "expected index length and value length do not match");
assertEquals(expectedIdx.length, expectedValue.length, "expected index length and value length do not match");
int[] idx = {0};
createProducer().forEachCell((i, j) -> {
assertEquals(expectedIdx[idx[0]], i, "bad index at "+idx[0]);
assertEquals(expectedValue[idx[0]], j, "bad value at "+idx[0]);
assertEquals(expectedIdx[idx[0]], i, "bad index at " + idx[0]);
assertEquals(expectedValue[idx[0]], j, "bad value at " + idx[0]);
idx[0]++;
return true;
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ public void testExcludesDuplicates() {
assertTrue(bf1.forEachCell((x, y) -> false), "Hasher in removes results in value not equal to 0");
}

private void verifyMaxInsert( CountingBloomFilter bf, int from1, int from11) {
private void verifyMaxInsert(CountingBloomFilter bf, int from1, int from11) {
BloomFilter bfFrom0 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
bfFrom0.merge(new IncrementingHasher(0, 1));
BloomFilter bfFrom1 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
Expand Down Expand Up @@ -346,9 +346,9 @@ public void testGetMaxInsert() {
private void assertCell3(CountingBloomFilter bf, int value) {
bf.forEachCell((k, v) -> {
if (k == 3) {
assertEquals(value, v, "Mismatch at position (3) "+k);
assertEquals(value, v, "Mismatch at position 3");
} else {
assertEquals(0, v, "Mismatch at position "+k);
assertEquals(0, v, "Mismatch at position " + k);
}
return true;
});
Expand All @@ -372,10 +372,6 @@ public void mergeIncrementsAllCellsTest() {
// The add should increment cells 3 by 2
f2.add(CellProducer.from(ip));
assertCell3(f2, 2);

// This merge will increment by 1 as the round-trip makes the indices unique
f3.merge(IndexProducer.fromIndexArray(ip.asIndexArray()));
assertCell3(f3, 1);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,6 @@ protected int[] getExpectedIndices() {

@Override
protected int[] getExpectedValues() {
return new int[] { 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1};
return new int[] {1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

public class CellProducerFromDefaultIndexProducerTest extends AbstractCellProducerTest {

int[] data = { 0, 63, 1, 64, 128, 1, 127 };
int[] data = {0, 63, 1, 64, 128, 1, 127};
int[] indices = {0, 1, 63, 64, 127, 128};
int[] values = {1, 2, 1, 1, 1, 1 };

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ protected int[] getExpectedValues() {
@Override
protected CellProducer createProducer() {
return consumer -> {
for (int i=0; i<indices.length; i++) {
for (int i = 0; i < indices.length; i++) {
if (!consumer.test(indices[i], values[i] )) {
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@
import java.util.BitSet;
import java.util.Objects;
import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.IntStream;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

public class DefaultIndexProducerTest extends AbstractIndexProducerTest {

Expand Down Expand Up @@ -119,31 +122,10 @@ public void testFromIndexArray() {
}
}

@Test
public void testMoreThan32Entries() {
int[] values = new int[33];
for (int i=0; i<33; i++) {
values[i] = i;
}
IndexProducer producer = predicate -> {
Objects.requireNonNull(predicate);
for (final int i : values) {
if (!predicate.test(i)) {
return false;
}
}
return true;
};
int[] other = producer.asIndexArray();
assertArrayEquals(values, other);
}

@Test
public void test32Entries() {
int[] values = new int[32];
for (int i=0; i<32; i++) {
values[i] = i;
}
@ParameterizedTest
@ValueSource(ints = {32, 33})
public void testEntries(int size) {
int[] values = IntStream.range(0, size).toArray();
IndexProducer producer = predicate -> {
Objects.requireNonNull(predicate);
for (final int i : values) {
Expand Down

0 comments on commit a197d2e

Please sign in to comment.