Skip to content

Commit

Permalink
Merge pull request #144 from DataSketches/tuple-reset
Browse files Browse the repository at this point in the history
added reset method
  • Loading branch information
jmalkin authored Apr 11, 2017
2 parents cab3510 + 6ade1fb commit 6254e77
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ public void update(final long[] key, final double[] values) {
*/
public abstract void trim();

/**
* Resets this sketch an empty state.
*/
public abstract void reset();

/**
* Gets an on-heap compact representation of the sketch
* @return compact sketch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,13 @@

package com.yahoo.sketches.tuple;

import static com.yahoo.sketches.Util.MIN_LG_ARR_LONGS;
import static com.yahoo.sketches.Util.ceilingPowerOf2;
import static com.yahoo.sketches.Util.startingSubMultiple;

import java.nio.ByteOrder;
import java.util.Arrays;

import com.yahoo.memory.Memory;
import com.yahoo.memory.NativeMemory;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.HashOperations;
import com.yahoo.sketches.ResizeFactor;
import com.yahoo.sketches.SketchesArgumentException;

/**
Expand Down Expand Up @@ -53,12 +48,7 @@ final class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSel
final float samplingProbability, final int numValues, final long seed, final Memory dstMem) {
super(numValues, seed);
mem_ = dstMem;
final int startingCapacity = 1 << startingSubMultiple(
// target table size is twice the number of nominal entries
Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries) * 2),
ResizeFactor.getRF(lgResizeFactor),
MIN_LG_ARR_LONGS
);
final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor);
checkIfEnoughMemory(dstMem, startingCapacity, numValues);
mem_.putByte(PREAMBLE_LONGS_BYTE, (byte) 1);
mem_.putByte(SERIAL_VERSION_BYTE, serialVersionUID);
Expand Down Expand Up @@ -159,6 +149,26 @@ public byte[] toByteArray() {
return byteArray;
}

@Override
public void reset() {
if (!isEmpty_) {
isEmpty_ = true;
mem_.setBits(FLAGS_BYTE, (byte) (1 << Flags.IS_EMPTY.ordinal()));
}
final int lgResizeFactor = mem_.getByte(LG_RESIZE_FACTOR_BYTE);
final float samplingProbability = mem_.getFloat(SAMPLING_P_FLOAT);
final int startingCapacity = Util.getStartingCapacity(getNominalEntries(), lgResizeFactor);
theta_ = (long) (Long.MAX_VALUE * (double) samplingProbability);
mem_.putLong(THETA_LONG, theta_);
mem_.putByte(LG_CUR_CAPACITY_BYTE, (byte) Integer.numberOfTrailingZeros(startingCapacity));
mem_.putInt(RETAINED_ENTRIES_INT, 0);
keysOffset_ = ENTRIES_START;
valuesOffset_ = keysOffset_ + SIZE_OF_KEY_BYTES * startingCapacity;
mem_.clear(keysOffset_, SIZE_OF_KEY_BYTES * startingCapacity); // clear keys only
lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity);
setRebuildThreshold();
}

@Override
protected long getKey(final int index) {
return mem_.getLong(keysOffset_ + SIZE_OF_KEY_BYTES * index);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@

package com.yahoo.sketches.tuple;

import static com.yahoo.sketches.Util.MIN_LG_ARR_LONGS;
import static com.yahoo.sketches.Util.ceilingPowerOf2;
import static com.yahoo.sketches.Util.startingSubMultiple;

import java.nio.ByteOrder;
import java.util.Arrays;
Expand All @@ -16,7 +14,6 @@
import com.yahoo.memory.NativeMemory;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.HashOperations;
import com.yahoo.sketches.ResizeFactor;
import com.yahoo.sketches.SketchesArgumentException;

/**
Expand Down Expand Up @@ -55,12 +52,7 @@ final class HeapArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelec
lgResizeFactor_ = lgResizeFactor;
samplingProbability_ = samplingProbability;
theta_ = (long) (Long.MAX_VALUE * (double) samplingProbability);
final int startingCapacity = 1 << startingSubMultiple(
// target table size is twice the number of nominal entries
Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries) * 2),
ResizeFactor.getRF(lgResizeFactor),
MIN_LG_ARR_LONGS
);
final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor);
keys_ = new long[startingCapacity];
values_ = new double[startingCapacity * numValues];
lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity);
Expand Down Expand Up @@ -167,6 +159,18 @@ public byte[] toByteArray() {
return byteArray;
}

@Override
public void reset() {
isEmpty_ = true;
count_ = 0;
theta_ = (long) (Long.MAX_VALUE * (double) samplingProbability_);
final int startingCapacity = Util.getStartingCapacity(nomEntries_, lgResizeFactor_);
keys_ = new long[startingCapacity];
values_ = new double[startingCapacity * numValues_];
lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity);
setRebuildThreshold();
}

@Override
protected long getKey(final int index) {
return keys_[index];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@

package com.yahoo.sketches.tuple;

import static com.yahoo.sketches.Util.MIN_LG_ARR_LONGS;
import static com.yahoo.sketches.Util.REBUILD_THRESHOLD;
import static com.yahoo.sketches.Util.RESIZE_THRESHOLD;
import static com.yahoo.sketches.Util.ceilingPowerOf2;
import static com.yahoo.sketches.Util.startingSubMultiple;

import java.lang.reflect.Array;
import java.nio.ByteOrder;
Expand All @@ -20,7 +18,6 @@
import com.yahoo.sketches.Family;
import com.yahoo.sketches.HashOperations;
import com.yahoo.sketches.QuickSelect;
import com.yahoo.sketches.ResizeFactor;
import com.yahoo.sketches.SketchesArgumentException;

/**
Expand Down Expand Up @@ -93,12 +90,7 @@ private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES,
lgResizeFactor,
samplingProbability,
summaryFactory,
1 << startingSubMultiple(
// target table size is twice the number of nominal entries
Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries) * 2),
ResizeFactor.getRF(lgResizeFactor),
MIN_LG_ARR_LONGS
)
Util.getStartingCapacity(nomEntries, lgResizeFactor)
);
}

Expand Down Expand Up @@ -211,6 +203,21 @@ public void trim() {
}
}

/**
* Resets this sketch an empty state.
*/
@SuppressWarnings("unchecked")
public void reset() {
isEmpty_ = true;
count_ = 0;
theta_ = (long) (Long.MAX_VALUE * (double) samplingProbability_);
final int startingCapacity = Util.getStartingCapacity(nomEntries_, lgResizeFactor_);
lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity);
keys_ = new long[startingCapacity];
summaries_ = (S[]) Array.newInstance(summaryFactory_.newSummary().getClass(), startingCapacity);
setRebuildThreshold();
}

/**
* Converts the current state of the sketch into a compact sketch
* @return compact sketch
Expand Down Expand Up @@ -344,11 +351,11 @@ boolean isInSamplingMode() {
}

void setThetaLong(final long theta) {
this.theta_ = theta;
theta_ = theta;
}

void setNotEmpty() {
this.isEmpty_ = false;
isEmpty_ = false;
}

SummaryFactory<S> getSummaryFactory() {
Expand Down
13 changes: 13 additions & 0 deletions sketches/src/main/java/com/yahoo/sketches/tuple/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@

package com.yahoo.sketches.tuple;

import static com.yahoo.sketches.Util.MIN_LG_ARR_LONGS;
import static com.yahoo.sketches.Util.ceilingPowerOf2;
import static com.yahoo.sketches.Util.startingSubMultiple;
import static com.yahoo.sketches.hash.MurmurHash3.hash;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.yahoo.sketches.ResizeFactor;
import com.yahoo.sketches.SketchesArgumentException;

final class Util {
Expand Down Expand Up @@ -49,4 +53,13 @@ static final void checkSeedHashes(final short seedHashA, final short seedHashB)

}

static int getStartingCapacity(final int nomEntries, final int lgResizeFactor) {
return 1 << startingSubMultiple(
// target table size is twice the number of nominal entries
Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries) * 2),
ResizeFactor.getRF(lgResizeFactor),
MIN_LG_ARR_LONGS
);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,19 @@ public void exactMode() {
for (int i = 0; i < values.length; i++) if (values[i] != null) count++;
Assert.assertEquals(count, 4096);
for (int i = 0; i < 4096; i++) Assert.assertEquals(values[i][0], 1.0);

sketch.reset();
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
Assert.assertEquals(sketch.getEstimate(), 0.0);
Assert.assertEquals(sketch.getUpperBound(1), 0.0);
Assert.assertEquals(sketch.getLowerBound(1), 0.0);
Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
Assert.assertEquals(sketch.getTheta(), 1.0);
ArrayOfDoublesSketchIterator it = sketch.iterator();
while (it.next()) {
Assert.fail("empty sketch expected");
}
}

@Test
Expand Down Expand Up @@ -120,6 +133,19 @@ public void estimationMode() {
}
}
Assert.assertEquals(count, values.length);

sketch.reset();
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
Assert.assertEquals(sketch.getEstimate(), 0.0);
Assert.assertEquals(sketch.getUpperBound(1), 0.0);
Assert.assertEquals(sketch.getLowerBound(1), 0.0);
Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
Assert.assertEquals(sketch.getTheta(), 1.0);
ArrayOfDoublesSketchIterator it = sketch.iterator();
while (it.next()) {
Assert.fail("empty sketch expected");
}
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,19 @@ public void exactMode() {
for (int i = 0; i < values.length; i++) if (values[i] != null) count++;
Assert.assertEquals(count, 4096);
for (int i = 0; i < 4096; i++) Assert.assertEquals(values[i][0], 1.0);

sketch.reset();
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
Assert.assertEquals(sketch.getEstimate(), 0.0);
Assert.assertEquals(sketch.getUpperBound(1), 0.0);
Assert.assertEquals(sketch.getLowerBound(1), 0.0);
Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
Assert.assertEquals(sketch.getTheta(), 1.0);
ArrayOfDoublesSketchIterator it = sketch.iterator();
while (it.next()) {
Assert.fail("empty sketch expected");
}
}

@Test
Expand Down Expand Up @@ -101,6 +114,19 @@ public void estimationMode() {
}
}
Assert.assertEquals(count, values.length);

sketch.reset();
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
Assert.assertEquals(sketch.getEstimate(), 0.0);
Assert.assertEquals(sketch.getUpperBound(1), 0.0);
Assert.assertEquals(sketch.getLowerBound(1), 0.0);
Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
Assert.assertEquals(sketch.getTheta(), 1.0);
ArrayOfDoublesSketchIterator it = sketch.iterator();
while (it.next()) {
Assert.fail("empty sketch expected");
}
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@ public void exactMode() {
for (int i = 0; i < summaries.length; i++) if (summaries[i] != null) count++;
Assert.assertEquals(count, 4096);
Assert.assertEquals(summaries[0].getValue(), 1.0);

sketch.reset();
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
Assert.assertEquals(sketch.getEstimate(), 0.0);
Assert.assertEquals(sketch.getUpperBound(1), 0.0);
Assert.assertEquals(sketch.getLowerBound(1), 0.0);
Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
Assert.assertEquals(sketch.getTheta(), 1.0);
}

@Test
Expand All @@ -95,7 +104,16 @@ public void estimationMode() {
}
}
Assert.assertEquals(count, summaries.length);
}

sketch.reset();
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
Assert.assertEquals(sketch.getEstimate(), 0.0);
Assert.assertEquals(sketch.getUpperBound(1), 0.0);
Assert.assertEquals(sketch.getLowerBound(1), 0.0);
Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE);
Assert.assertEquals(sketch.getTheta(), 1.0);
}

@Test
public void estimationModeWithSamplingNoResizing() {
Expand Down

0 comments on commit 6254e77

Please sign in to comment.