diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java index 7c18a4096f..750d3772e4 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java @@ -83,6 +83,57 @@ public interface CountingBloomFilter extends BloomFilter, CellProducer { */ int getMaxCell(); + /** + * Determines the maximum number of times the Bloom filter could have been inserted + * into this counting filter. + * @param bloomFilter the Bloom filter the check for. + * @return the maximum number of times the Bloom filter could have been inserted. + */ + default int getMaxInsert(BloomFilter bloomFilter) { + return getMaxInsert((BitMapProducer) bloomFilter); + } + + /** + * Determines the maximum number of times the IndexProducer could have been inserted + * into this counting filter. + * @param idxProducer the producer to drive the count check. + * @return the maximum number of times the IndexProducer could have been inserted. + */ + default int getMaxInsert(IndexProducer idxProducer) { + return getMaxInsert( BitMapProducer.fromIndexProducer(idxProducer, getShape().getNumberOfBits())); + } + + /** + * Determines the maximum number of times the Hasher could have been inserted into this + * counting filter. + * @param hasher the Hasher to provide the indices. + * @return the maximum number of times the hasher could have been inserted. + */ + default int getMaxInsert(Hasher hasher) { + return getMaxInsert(hasher.indices(getShape())); + } + + /** + * Determines the maximum number of times the BitMapProducer could hvae been insert into this + * counting filter. + * @param bitMapProducer the BitMapProducer to provide the indices. + * @return the maximum number of times the BitMapProducer could have been inserted. + */ + default int getMaxInsert(BitMapProducer bitMapProducer) { + if (!contains(bitMapProducer)) { + return 0; + } + long[] bitMaps = bitMapProducer.asBitMapArray(); + int[] max = { Integer.MAX_VALUE }; + forEachCell((x, y) -> { + if ((bitMaps[BitMap.getLongIndex(x)] & BitMap.getLongBit(x)) != 0) { + max[0] = max[0] <= y ? max[0] : y; + } + return true; + }); + return max[0]; + } + // Modification Operations /** diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java index 6c53e70268..a2950383b6 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java @@ -293,4 +293,48 @@ public void testExcludesDuplicates() { assertEquals(0, bf1.cardinality()); assertTrue(bf1.forEachCell((x, y) -> false), "Hasher in removes results in value not equal to 0"); } + + private void verifyMaxInsert( CountingBloomFilter bf, int from1, int from11) { + BloomFilter bfFrom0 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape()); + bfFrom0.merge(new IncrementingHasher(0, 1)); + BloomFilter bfFrom1 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape()); + bfFrom1.merge(TestingHashers.FROM1); + BloomFilter bfFrom11 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape()); + bfFrom11.merge(TestingHashers.FROM11); + + assertEquals( 0, bf.getMaxInsert(new IncrementingHasher(0, 1))); + assertEquals( 0, bf.getMaxInsert(bfFrom0)); + assertEquals( 0, bf.getMaxInsert((BitMapProducer) bfFrom0)); + assertEquals( 0, bf.getMaxInsert((IndexProducer) bfFrom0)); + + assertEquals( from1, bf.getMaxInsert(TestingHashers.FROM1)); + assertEquals( from1, bf.getMaxInsert(bfFrom1)); + assertEquals( from1, bf.getMaxInsert((BitMapProducer) bfFrom1)); + assertEquals( from1, bf.getMaxInsert((IndexProducer) bfFrom1)); + + assertEquals( from11, bf.getMaxInsert(TestingHashers.FROM11)); + assertEquals( from11, bf.getMaxInsert(bfFrom11)); + assertEquals( from11, bf.getMaxInsert((BitMapProducer) bfFrom11)); + assertEquals( from11, bf.getMaxInsert((IndexProducer) bfFrom11)); + } + + @Test + public void testGetMaxInsert() { + CountingBloomFilter bf = createEmptyFilter(getTestShape()); + verifyMaxInsert(bf, 0, 0); + bf.merge(TestingHashers.FROM1); + verifyMaxInsert(bf, 1, 0); + bf.merge(TestingHashers.FROM1); + verifyMaxInsert(bf, 2, 0); + bf.merge(TestingHashers.FROM11); + verifyMaxInsert(bf, 2, 1); + bf.remove(TestingHashers.FROM1); + verifyMaxInsert(bf, 1, 1); + // verify remove false positive works + // Incrementing hasher 5,1 spans the single count cells for both FROM1 and FROM11 + assertEquals( 1, bf.getMaxInsert(new IncrementingHasher(5, 1))); + bf.remove(new IncrementingHasher(5, 1)); + verifyMaxInsert(bf, 0, 0); + assertEquals( 0, bf.getMaxInsert(new IncrementingHasher(5, 1))); + } }