Skip to content

Commit

Permalink
added getMaxInsert()
Browse files Browse the repository at this point in the history
  • Loading branch information
Claudenw committed Jul 22, 2023
1 parent 62a0484 commit 00adc10
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,57 @@ public interface CountingBloomFilter extends BloomFilter, CellProducer {
*/
int getMaxCell();

/**
* Determines the maximum number of times the Bloom filter could have been inserted
* into this counting filter.
* @param bloomFilter the Bloom filter the check for.
* @return the maximum number of times the Bloom filter could have been inserted.
*/
default int getMaxInsert(BloomFilter bloomFilter) {
return getMaxInsert((BitMapProducer) bloomFilter);
}

/**
* Determines the maximum number of times the IndexProducer could have been inserted
* into this counting filter.
* @param idxProducer the producer to drive the count check.
* @return the maximum number of times the IndexProducer could have been inserted.
*/
default int getMaxInsert(IndexProducer idxProducer) {
return getMaxInsert( BitMapProducer.fromIndexProducer(idxProducer, getShape().getNumberOfBits()));
}

/**
* Determines the maximum number of times the Hasher could have been inserted into this
* counting filter.
* @param hasher the Hasher to provide the indices.
* @return the maximum number of times the hasher could have been inserted.
*/
default int getMaxInsert(Hasher hasher) {
return getMaxInsert(hasher.indices(getShape()));
}

/**
* Determines the maximum number of times the BitMapProducer could hvae been insert into this
* counting filter.
* @param bitMapProducer the BitMapProducer to provide the indices.
* @return the maximum number of times the BitMapProducer could have been inserted.
*/
default int getMaxInsert(BitMapProducer bitMapProducer) {
if (!contains(bitMapProducer)) {
return 0;
}
long[] bitMaps = bitMapProducer.asBitMapArray();
int[] max = { Integer.MAX_VALUE };
forEachCell((x, y) -> {
if ((bitMaps[BitMap.getLongIndex(x)] & BitMap.getLongBit(x)) != 0) {
max[0] = max[0] <= y ? max[0] : y;

This comment has been minimized.

Copy link
@aherbert

aherbert Jul 22, 2023

Contributor

I do not think this is the correct implementation for all cases. When we insert IndexProducer items they may duplicate indices. The documentation for merge(IndexProducer) states:

"Indices that are returned multiple times will be incremented multiple times"

So after inserting 1 IndexProducer your count y may be higher than 1. If I then call getMaxInsert with the same index producer I will obtain more than 1.

Thoughts on this?

}
return true;
});
return max[0];
}

// Modification Operations

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,4 +293,48 @@ public void testExcludesDuplicates() {
assertEquals(0, bf1.cardinality());
assertTrue(bf1.forEachCell((x, y) -> false), "Hasher in removes results in value not equal to 0");
}

private void verifyMaxInsert( CountingBloomFilter bf, int from1, int from11) {
BloomFilter bfFrom0 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
bfFrom0.merge(new IncrementingHasher(0, 1));
BloomFilter bfFrom1 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
bfFrom1.merge(TestingHashers.FROM1);
BloomFilter bfFrom11 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
bfFrom11.merge(TestingHashers.FROM11);

assertEquals( 0, bf.getMaxInsert(new IncrementingHasher(0, 1)));
assertEquals( 0, bf.getMaxInsert(bfFrom0));
assertEquals( 0, bf.getMaxInsert((BitMapProducer) bfFrom0));
assertEquals( 0, bf.getMaxInsert((IndexProducer) bfFrom0));

assertEquals( from1, bf.getMaxInsert(TestingHashers.FROM1));
assertEquals( from1, bf.getMaxInsert(bfFrom1));
assertEquals( from1, bf.getMaxInsert((BitMapProducer) bfFrom1));
assertEquals( from1, bf.getMaxInsert((IndexProducer) bfFrom1));

assertEquals( from11, bf.getMaxInsert(TestingHashers.FROM11));
assertEquals( from11, bf.getMaxInsert(bfFrom11));
assertEquals( from11, bf.getMaxInsert((BitMapProducer) bfFrom11));
assertEquals( from11, bf.getMaxInsert((IndexProducer) bfFrom11));
}

@Test
public void testGetMaxInsert() {
CountingBloomFilter bf = createEmptyFilter(getTestShape());
verifyMaxInsert(bf, 0, 0);
bf.merge(TestingHashers.FROM1);
verifyMaxInsert(bf, 1, 0);
bf.merge(TestingHashers.FROM1);
verifyMaxInsert(bf, 2, 0);
bf.merge(TestingHashers.FROM11);
verifyMaxInsert(bf, 2, 1);
bf.remove(TestingHashers.FROM1);
verifyMaxInsert(bf, 1, 1);
// verify remove false positive works
// Incrementing hasher 5,1 spans the single count cells for both FROM1 and FROM11
assertEquals( 1, bf.getMaxInsert(new IncrementingHasher(5, 1)));
bf.remove(new IncrementingHasher(5, 1));
verifyMaxInsert(bf, 0, 0);
assertEquals( 0, bf.getMaxInsert(new IncrementingHasher(5, 1)));
}
}

0 comments on commit 00adc10

Please sign in to comment.