Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
garydgregory committed Apr 17, 2024
2 parents c94d3a4 + cb47bc8 commit 720c9e5
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 16 deletions.
2 changes: 2 additions & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
</properties>
<body>
<release version="4.5.0" date="YYYY-MM-DD" description="This milestone release requires Java 8 and adds the package `org.apache.commons.collections4.bloomfilter`.">
<!-- UPDATE -->
<action isue="COLLECTIONS-852" type="update" dev="ggregory" due-to="Claude Warren, Alex Herbert">Add layerd bloom filter clean method #476 .</action>
<!-- FIX -->
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Dependabot">Bump org.apache.commons:commons-parent from 67 to 69 #473.</action>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public LayerManager build() {
*
* @param cleanup the Consumer that will modify the list of filters removing out
* dated or stale filters.
* @return this for chaining.
* @return this
*/
public Builder setCleanup(Consumer<LinkedList<BloomFilter>> cleanup) {
this.cleanup = cleanup;
Expand Down Expand Up @@ -375,9 +375,23 @@ public final BloomFilter getTarget() {
* Ths method is used within {@link #getTarget()} when the configured
* {@code ExtendCheck} returns {@code true}.
* </p>
* @see LayerManager.Builder#setExtendCheck(Predicate)
* @see LayerManager.Builder#setCleanup(Consumer)
*/
void next() {
this.filterCleanup.accept(filters);
addFilter();
}

/**
* Forces execution the configured cleanup without creating a new filter except in cases
* where the cleanup removes all the layers.
* @see LayerManager.Builder#setCleanup(Consumer)
*/
void cleanup() {
this.filterCleanup.accept(filters);
if (filters.isEmpty()) {
addFilter();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -369,12 +369,24 @@ public boolean merge(IndexProducer indexProducer) {
}

/**
* Forces and advance to the next layer. Executes the same logic as when
* LayerManager.extendCheck returns {@code true}
* Forces and advance to the next layer. This method will clean-up the current
* layers and generate a new filter layer. In most cases is it unnecessary to
* call this method directly.
*
* @see LayerManager
* @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
* @see LayerManager.Builder#setExtendCheck(Predicate)
*/
public void next() {
layerManager.next();
}

/**
* Forces the execution of the cleanup Consumer that was provided when the associated LayerManager
* was built.
*
* @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
*/
public void cleanup() {
layerManager.cleanup();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,7 @@ private static int calculateNumberOfHashFunctions(final int numberOfItems, final
// than integer math.
final long k = Math.round(LN_2 * numberOfBits / numberOfItems);
if (k < 1) {
throw new IllegalArgumentException(
String.format("Filter too small: Calculated number of hash functions (%s) was less than 1", k));
throw new IllegalArgumentException(String.format("Filter too small: Calculated number of hash functions (%s) was less than 1", k));
}
// Normally we would check that numberOfHashFunctions <= Integer.MAX_VALUE but
// since numberOfBits is at most Integer.MAX_VALUE the numerator of
Expand All @@ -137,8 +136,7 @@ private static void checkCalculatedProbability(final double probability) {
// exp(-1/Integer.MAX_INT) approx 0.9999999995343387 so Math.pow( x, y ) will
// always be 0<x<1 and y>0
if (probability >= 1.0) {
throw new IllegalArgumentException(
String.format("Calculated probability is greater than or equal to 1: " + probability));
throw new IllegalArgumentException("Calculated probability is greater than or equal to 1: " + probability);
}
}

Expand All @@ -165,8 +163,7 @@ private static int checkNumberOfBits(final int numberOfBits) {
*/
private static int checkNumberOfHashFunctions(final int numberOfHashFunctions) {
if (numberOfHashFunctions < 1) {
throw new IllegalArgumentException(
"Number of hash functions must be greater than 0: " + numberOfHashFunctions);
throw new IllegalArgumentException("Number of hash functions must be greater than 0: " + numberOfHashFunctions);
}
return numberOfHashFunctions;
}
Expand Down Expand Up @@ -330,8 +327,7 @@ public static Shape fromPMK(final double probability, final int numberOfBits, fi

// Number of items (n):
// n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))
final double n = Math.ceil(numberOfBits
/ (-numberOfHashFunctions / Math.log(-Math.expm1(Math.log(probability) / numberOfHashFunctions))));
final double n = Math.ceil(numberOfBits / (-numberOfHashFunctions / Math.log(-Math.expm1(Math.log(probability) / numberOfHashFunctions))));

// log of probability is always < 0
// number of hash functions is >= 1
Expand Down Expand Up @@ -378,8 +374,7 @@ public boolean equals(final Object obj) {
// Shape is final so no check for the same class as inheritance is not possible
if (obj instanceof Shape) {
final Shape other = (Shape) obj;
return numberOfBits == other.numberOfBits &&
numberOfHashFunctions == other.numberOfHashFunctions;
return numberOfBits == other.numberOfBits && numberOfHashFunctions == other.numberOfHashFunctions;
}
return false;
}
Expand Down Expand Up @@ -463,8 +458,7 @@ public double getProbability(final int numberOfItems) {
if (numberOfItems == 0) {
return 0;
}
return Math.pow(-Math.expm1(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits),
numberOfHashFunctions);
return Math.pow(-Math.expm1(-1.0 * numberOfHashFunctions * numberOfItems / numberOfBits), numberOfHashFunctions);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -291,4 +291,13 @@ public void testTarget() {
assertEquals(2, supplierCount[0]);
}

static class NumberedBloomFilter extends WrappedBloomFilter {
int value;
int sequence;
NumberedBloomFilter(Shape shape, int value, int sequence) {
super(new SimpleBloomFilter(shape));
this.value = value;
this.sequence = sequence;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.commons.collections4.bloomfilter;

import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
Expand All @@ -30,6 +31,7 @@

import org.apache.commons.collections4.bloomfilter.LayerManager.Cleanup;
import org.apache.commons.collections4.bloomfilter.LayerManager.ExtendCheck;
import org.apache.commons.collections4.bloomfilter.LayerManagerTest.NumberedBloomFilter;
import org.junit.jupiter.api.Test;

public class LayeredBloomFilterTest extends AbstractBloomFilterTest<LayeredBloomFilter> {
Expand Down Expand Up @@ -311,4 +313,36 @@ public final void testNext() {
assertFalse(filter.get(1).contains(TestingHashers.FROM11));
assertTrue(filter.get(1).contains(new IncrementingHasher(11, 2)));
}

@Test
public void testCleanup() {
int[] sequence = {1};
LayerManager layerManager = LayerManager.builder()
.setSupplier(() -> new NumberedBloomFilter(getTestShape(), 3, sequence[0]++))
.setExtendCheck(ExtendCheck.neverAdvance())
.setCleanup(ll -> ll.removeIf( f -> (((NumberedBloomFilter) f).value-- == 0))).build();
LayeredBloomFilter underTest = new LayeredBloomFilter(getTestShape(), layerManager );
assertEquals(1, underTest.getDepth());
underTest.merge(TestingHashers.randomHasher());
underTest.cleanup(); // first count == 2
assertEquals(1, underTest.getDepth());
underTest.next(); // first count == 1
assertEquals(2, underTest.getDepth());
underTest.merge(TestingHashers.randomHasher());
underTest.cleanup(); // first count == 0
NumberedBloomFilter f = (NumberedBloomFilter) underTest.get(0);
assertEquals(1, f.sequence);

assertEquals(2, underTest.getDepth());
underTest.cleanup(); // should be removed ; second is now 1st with value 1
assertEquals(1, underTest.getDepth());
f = (NumberedBloomFilter) underTest.get(0);
assertEquals(2, f.sequence);

underTest.cleanup(); // first count == 0
underTest.cleanup(); // should be removed. But there is always at least one
assertEquals(1, underTest.getDepth());
f = (NumberedBloomFilter) underTest.get(0);
assertEquals(3, f.sequence); // it is a new one.
}
}

0 comments on commit 720c9e5

Please sign in to comment.