Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

touch-up changes mostly to clean up code contributed by others. #656

Merged
merged 1 commit into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 49 additions & 2 deletions src/main/java/org/apache/datasketches/theta/CompactSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,61 @@ else if (serVer == 2) {
"Corrupted: Serialization Version " + serVer + " not recognized.");
}

/**
* Wrap takes the sketch image in the given Memory and refers to it directly.
* There is no data copying onto the java heap.
* The wrap operation enables fast read-only merging and access to all the public read-only API.
*
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
* been explicitly stored as direct sketches can be wrapped.
* Wrapping earlier serial version sketches will result in a heapify operation.
* These early versions were never designed to "wrap".</p>
*
* <p>Wrapping any subclass of this class that is empty or contains only a single item will
* result in heapified forms of empty and single item sketch respectively.
* This is actually faster and consumes less overall memory.</p>
*
* <p>This method checks if the DEFAULT_UPDATE_SEED was used to create the source Memory image.
* Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field,
* so the resulting heapified CompactSketch will be given the hash of DEFAULT_UPDATE_SEED.</p>
*
* @param bytes a byte array image of a Sketch that was created using the DEFAULT_UPDATE_SEED.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
*
* @return a CompactSketch backed by the given Memory except as above.
*/
public static CompactSketch wrap(final byte[] bytes) {
return wrap(bytes, ThetaUtil.DEFAULT_UPDATE_SEED, false);
}


/**
* Wrap takes the sketch image in the given Memory and refers to it directly.
* There is no data copying onto the java heap.
* The wrap operation enables fast read-only merging and access to all the public read-only API.
*
* <p>Only "Direct" Serialization Version 3 (i.e, OpenSource) sketches that have
* been explicitly stored as direct sketches can be wrapped.
* Wrapping earlier serial version sketches will result in a heapify operation.
* These early versions were never designed to "wrap".</p>
*
* <p>Wrapping any subclass of this class that is empty or contains only a single item will
* result in heapified forms of empty and single item sketch respectively.
* This is actually faster and consumes less overall memory.</p>
*
* <p>This method checks if the given expectedSeed was used to create the source Memory image.
* Note that SerialVersion 1 sketches cannot be checked as they don't have a seedHash field,
* so the resulting heapified CompactSketch will be given the hash of the expectedSeed.</p>
*
* @param bytes a byte array image of a Sketch that was created using the given expectedSeed.
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param expectedSeed the seed used to validate the given Memory image.
* <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a CompactSketch backed by the given Memory except as above.
*/
public static CompactSketch wrap(final byte[] bytes, final long expectedSeed) {
return wrap(bytes, expectedSeed, true);
}

private static CompactSketch wrap(final byte[] bytes, final long seed, final boolean enforceSeed) {
final int serVer = bytes[PreambleUtil.SER_VER_BYTE];
final int familyId = bytes[PreambleUtil.FAMILY_BYTE];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public int getCurrentBytes() {

private static final int START_PACKED_DATA_EXACT_MODE = 8;
private static final int START_PACKED_DATA_ESTIMATION_MODE = 16;

@Override
public int getRetainedEntries(final boolean valid) { //compact is always valid
// number of entries is stored using variable length encoding
Expand Down Expand Up @@ -132,7 +132,7 @@ long[] getCache() {
final int numEntries = getRetainedEntries();
final long[] cache = new long[numEntries];
int i = 0;
HashIterator it = iterator();
final HashIterator it = iterator();
while (it.next()) {
cache[i++] = it.get();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -504,21 +504,21 @@ private void moveDataToTgt(final long[] arr, final int count) {
}

private void moveDataToTgt(final Sketch sketch) {
int count = sketch.getRetainedEntries();
final int count = sketch.getRetainedEntries();
int tmpCnt = 0;
if (wmem_ != null) { //Off Heap puts directly into mem
final int preBytes = CONST_PREAMBLE_LONGS << 3;
final int lgArrLongs = lgArrLongs_;
final long thetaLong = thetaLong_;
HashIterator it = sketch.iterator();
final HashIterator it = sketch.iterator();
while (it.next()) {
final long hash = it.get();
if (continueCondition(thetaLong, hash)) { continue; }
hashInsertOnlyMemory(wmem_, lgArrLongs, hash, preBytes);
tmpCnt++;
}
} else { //On Heap. Assumes HT exists and is large enough
HashIterator it = sketch.iterator();
final HashIterator it = sketch.iterator();
while (it.next()) {
final long hash = it.get();
if (continueCondition(thetaLong_, hash)) { continue; }
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/apache/datasketches/theta/Sketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ public String toString(final boolean sketchSummary, final boolean dataDetail, fi
final int w = width > 0 ? width : 8; // default is 8 wide
if (curCount > 0) {
sb.append("### SKETCH DATA DETAIL");
HashIterator it = iterator();
final HashIterator it = iterator();
int j = 0;
while (it.next()) {
final long h = it.get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public byte[] toByteArray() {
long[] getCache() {
final long[] cache = new long[getRetainedEntries()];
int i = 0;
HashIterator it = iterator();
final HashIterator it = iterator();
while (it.next()) {
cache[i++] = it.get();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ public void checkHLL8Heap() {
} else {
sk = new HllSketch(lgK, tgtHllType);
}
String type = tgtHllType.toString();
String store = direct ? "Memory" : "Heap";
for (int i = 1; i <= N; i++) {
sk.update(i);
Expand Down