For: *
The formula is: baseceiling(logbase(x))
+ * + * @param base The number in the expression ⌈basen⌉. * @param n The input argument. * @return the ceiling power of base as a double and equal to a mathematical integer. */ public static double ceilingPowerBaseOfDouble(final double base, final double n) { final double x = n < 1.0 ? 1.0 : n; - return pow(base, ceil(logBaseOfX(base, x))); + return Math.round(pow(base, ceil(logBaseOfX(base, x)))); } /** - * Computes the floor power of given base and n as doubles. - * This is the largest positive power - * of base that equal to or less than the given n and equal to a mathematical integer. + * Computes the floor of a given n given base, where the floor is an integral power of the base. + * This is the largest positive power of base that is equal to or less than the given n + * and equal to a mathematical integer. * The result of this function is consistent with {@link #floorPowerOf2(int)} for values * less than one. I.e., if n < 1, the result is 1. * - * @param base The base in the expression ⌊basen⌋. + *The formula is: basefloor(logbase(x))
+ * + * @param base The number in the expression ⌊basen⌋. * @param n The input argument. * @return the floor power of 2 and equal to a mathematical integer. */ public static double floorPowerBaseOfDouble(final double base, final double n) { final double x = n < 1.0 ? 1.0 : n; - return pow(base, floor(logBaseOfX(base, x))); + return Math.round(pow(base, floor(logBaseOfX(base, x)))); } // Logarithm related /** - * The log base 2 of the value + * The log2(value) * @param value the given value - * @return The log base 2 of the value + * @return log2(value) */ public static double log2(final double value) { return log(value) / LOG2; } /** - * Returns the logarithm_logBase of x. Example: logB(2.0, x) = log(x) / log(2.0). - * @param logBase the base of the logarithm used + * Returns the logbase(x). Example, if base = 2.0: logB(2.0, x) = log(x) / log(2.0). + * @param base The number in the expression log(x) / log(base). * @param x the given value - * @return the logarithm_logBase of x: Example: logB(2.0, x) = log(x) / log(2.0). + * @return the logbase(x) */ - public static double logBaseOfX(final double logBase, final double x) { - return log(x) / log(logBase); + public static double logBaseOfX(final double base, final double x) { + return log(x) / log(base); } /** diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 183a15ba7..7c175512a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -24,7 +24,6 @@ import static org.apache.datasketches.common.ByteArrayUtil.putDoubleLE; import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallyWeightedRanks; import java.util.Objects; @@ -175,21 +174,6 @@ public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria return kllDoublesSV.getCDF(splitPoints, searchCrit); } - @Override - public DoublesPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted, - final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - final double[] ranks = equallyWeightedRanks(numEquallyWeighted); - final double[] boundaries = getQuantiles(ranks, searchCrit); - boundaries[0] = getMinItem(); - boundaries[boundaries.length - 1] = getMaxItem(); - final DoublesPartitionBoundaries dpb = new DoublesPartitionBoundaries(); - dpb.N = this.getN(); - dpb.ranks = ranks; - dpb.boundaries = boundaries; - return dpb; - } - @Override public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java index 473d5f1bb..bc18c5347 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java @@ -24,20 +24,12 @@ /** * Iterator over KllDoublesSketch. The order is not defined. */ -public final class KllDoublesSketchIterator implements QuantilesDoublesSketchIterator { +public final class KllDoublesSketchIterator extends KllSketchIterator implements QuantilesDoublesSketchIterator { private final double[] quantiles; - private final int[] levelsArr; - private final int numLevels; - private int level; - private int index; - private long weight; - private boolean isInitialized; KllDoublesSketchIterator(final double[] quantiles, final int[] levelsArr, final int numLevels) { + super(levelsArr, numLevels); this.quantiles = quantiles; - this.levelsArr = levelsArr; - this.numLevels = numLevels; - this.isInitialized = false; } @Override @@ -45,34 +37,4 @@ public double getQuantile() { return quantiles[index]; } - @Override - public long getWeight() { - return weight; - } - - @Override - public boolean next() { - if (!isInitialized) { - level = 0; - index = levelsArr[level]; - weight = 1; - isInitialized = true; - } else { - index++; - } - if (index < levelsArr[level + 1]) { - return true; - } - // go to the next non-empty level - do { - level++; - if (level == numLevels) { - return false; // run out of levels - } - weight *= 2; - } while (levelsArr[level] == levelsArr[level + 1]); - index = levelsArr[level]; - return true; - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedView.java index 03259b952..cac663695 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedView.java @@ -21,11 +21,13 @@ import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.quantilescommon.DoublesSortedView; +import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; import org.apache.datasketches.quantilescommon.InequalitySearch; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesUtil; @@ -39,6 +41,8 @@ public final class KllDoublesSketchSortedView implements DoublesSortedView { private final double[] quantiles; private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights private final long totalN; + private final double maxItem; + private final double minItem; /** * Construct from elements for testing. @@ -46,25 +50,31 @@ public final class KllDoublesSketchSortedView implements DoublesSortedView { * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. */ - KllDoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN) { + KllDoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN, + final double maxItem, final double minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; + this.maxItem = maxItem; + this.minItem = minItem; } /** * Constructs this Sorted View given the sketch - * @param sk the given KllDoublesSketch. + * @param sketch the given KllDoublesSketch. */ - public KllDoublesSketchSortedView(final KllDoublesSketch sk) { - this.totalN = sk.getN(); - final double[] srcQuantiles = sk.getDoubleItemsArray(); - final int[] srcLevels = sk.levelsArr; - final int srcNumLevels = sk.getNumLevels(); - - if (!sk.isLevelZeroSorted()) { + public KllDoublesSketchSortedView(final KllDoublesSketch sketch) { + if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + this.totalN = sketch.getN(); + this.maxItem = sketch.getMaxItem(); + this.minItem = sketch.getMinItem(); + final double[] srcQuantiles = sketch.getDoubleItemsArray(); + final int[] srcLevels = sketch.levelsArr; + final int srcNumLevels = sketch.getNumLevels(); + + if (!sketch.isLevelZeroSorted()) { Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]); - if (!sk.hasMemory()) { sk.setLevelZeroSorted(true); } + if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); } } final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove garbage @@ -78,17 +88,31 @@ public long[] getCumulativeWeights() { return cumWeights.clone(); } + @Override + public double getMaxItem() { + return maxItem; + } + + @Override + public double getMinItem() { + return minItem; + } + + @Override + public long getN() { + return totalN; + } + @Override public double getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); final int len = cumWeights.length; - final long naturalRank = (searchCrit == INCLUSIVE) - ? (long)Math.ceil(rank * totalN) : (long)Math.floor(rank * totalN); + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); if (index == -1) { - return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; + return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; } return quantiles[index]; } @@ -116,8 +140,8 @@ public boolean isEmpty() { } @Override - public KllDoublesSketchSortedViewIterator iterator() { - return new KllDoublesSketchSortedViewIterator(quantiles, cumWeights); + public DoublesSortedViewIterator iterator() { + return new DoublesSortedViewIterator(quantiles, cumWeights); } //restricted methods diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedViewIterator.java deleted file mode 100644 index 29131bd2c..000000000 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedViewIterator.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.kll; - -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; - -import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; -import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; - -/** - * Iterator over KllDoublesSketchSortedView - * @author Alexander Saydakov - * @author Lee Rhodes - */ -public final class KllDoublesSketchSortedViewIterator implements DoublesSortedViewIterator { - - private final double[] quantiles; - private final long[] cumWeights; - private final long totalN; - private int index; - - KllDoublesSketchSortedViewIterator(final double[] quantiles, final long[] cumWeights) { - this.quantiles = quantiles; - this.cumWeights = cumWeights; - this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; - index = -1; - } - - @Override - public long getCumulativeWeight(final QuantileSearchCriteria searchCrit) { - if (searchCrit == INCLUSIVE) { return cumWeights[index]; } - return (index == 0) ? 0 : cumWeights[index - 1]; - } - - @Override - public long getN() { - return totalN; - } - - @Override - public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { - return (double) getCumulativeWeight(searchCrit) / totalN; - } - - @Override - public double getQuantile() { - return quantiles[index]; - } - - @Override - public long getWeight() { - if (index == 0) { return cumWeights[0]; } - return cumWeights[index] - cumWeights[index - 1]; - } - - @Override - public boolean next() { - index++; - return index < quantiles.length; - } - -} diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 6c60facae..5484e8bf1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -24,7 +24,6 @@ import static org.apache.datasketches.common.ByteArrayUtil.putFloatLE; import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallyWeightedRanks; import java.util.Objects; @@ -175,21 +174,6 @@ public double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria s return kllFloatsSV.getCDF(splitPoints, searchCrit); } - @Override - public FloatsPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted, - final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - final double[] ranks = equallyWeightedRanks(numEquallyWeighted); - final float[] boundaries = getQuantiles(ranks, searchCrit); - boundaries[0] = getMinItem(); - boundaries[boundaries.length - 1] = getMaxItem(); - final FloatsPartitionBoundaries fpb = new FloatsPartitionBoundaries(); - fpb.N = this.getN(); - fpb.ranks = ranks; - fpb.boundaries = boundaries; - return fpb; - } - @Override public double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java index 8c5808ead..accf039de 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java @@ -24,20 +24,12 @@ /** * Iterator over KllFloatsSketch. The order is not defined. */ -public final class KllFloatsSketchIterator implements QuantilesFloatsSketchIterator { +public final class KllFloatsSketchIterator extends KllSketchIterator implements QuantilesFloatsSketchIterator { private final float[] quantiles; - private final int[] levelsArr; - private final int numLevels; - private int level; - private int index; - private long weight; - private boolean isInitialized; KllFloatsSketchIterator(final float[] quantiles, final int[] levelsArr, final int numLevels) { + super(levelsArr, numLevels); this.quantiles = quantiles; - this.levelsArr = levelsArr; - this.numLevels = numLevels; - this.isInitialized = false; } @Override @@ -45,34 +37,4 @@ public float getQuantile() { return quantiles[index]; } - @Override - public long getWeight() { - return weight; - } - - @Override - public boolean next() { - if (!isInitialized) { - level = 0; - index = levelsArr[level]; - weight = 1; - isInitialized = true; - } else { - index++; - } - if (index < levelsArr[level + 1]) { - return true; - } - // go to the next non-empty level - do { - level++; - if (level == numLevels) { - return false; // run out of levels - } - weight *= 2; - } while (levelsArr[level] == levelsArr[level + 1]); - index = levelsArr[level]; - return true; - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java index 6a378531d..ebad5f397 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java @@ -21,11 +21,13 @@ import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.quantilescommon.FloatsSortedView; +import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; import org.apache.datasketches.quantilescommon.InequalitySearch; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesUtil; @@ -39,6 +41,8 @@ public final class KllFloatsSketchSortedView implements FloatsSortedView { private final float[] quantiles; private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights private final long totalN; + private final float maxItem; + private final float minItem; /** * Construct from elements for testing. @@ -46,25 +50,31 @@ public final class KllFloatsSketchSortedView implements FloatsSortedView { * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. */ - KllFloatsSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN) { + KllFloatsSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN, + final float maxItem, final float minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; + this.maxItem = maxItem; + this.minItem = minItem; } /** * Constructs this Sorted View given the sketch - * @param sk the given KllFloatsSketch. + * @param sketch the given KllFloatsSketch. */ - public KllFloatsSketchSortedView(final KllFloatsSketch sk) { - this.totalN = sk.getN(); - final float[] srcQuantiles = sk.getFloatItemsArray(); - final int[] srcLevels = sk.levelsArr; - final int srcNumLevels = sk.getNumLevels(); - - if (!sk.isLevelZeroSorted()) { + public KllFloatsSketchSortedView(final KllFloatsSketch sketch) { + if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + this.totalN = sketch.getN(); + this.maxItem = sketch.getMaxItem(); + this.minItem = sketch.getMinItem(); + final float[] srcQuantiles = sketch.getFloatItemsArray(); + final int[] srcLevels = sketch.levelsArr; + final int srcNumLevels = sketch.getNumLevels(); + + if (!sketch.isLevelZeroSorted()) { Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]); - if (!sk.hasMemory()) { sk.setLevelZeroSorted(true); } + if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); } } final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove garbage @@ -73,18 +83,34 @@ public KllFloatsSketchSortedView(final KllFloatsSketch sk) { populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); } + //end of constructors + @Override public long[] getCumulativeWeights() { return cumWeights.clone(); } + @Override + public float getMaxItem() { + return maxItem; + } + + @Override + public float getMinItem() { + return minItem; + } + + @Override + public long getN() { + return totalN; + } + @Override public float getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); final int len = cumWeights.length; - final long naturalRank = (searchCrit == INCLUSIVE) - ? (long)Math.ceil(rank * totalN) : (long)Math.floor(rank * totalN); + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); if (index == -1) { @@ -116,8 +142,8 @@ public boolean isEmpty() { } @Override - public KllFloatsSketchSortedViewIterator iterator() { - return new KllFloatsSketchSortedViewIterator(quantiles, cumWeights); + public FloatsSortedViewIterator iterator() { + return new FloatsSortedViewIterator(quantiles, cumWeights); } //restricted methods diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java deleted file mode 100644 index 87c2e88bd..000000000 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.kll; - -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; - -import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; -import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; - -/** - * Iterator over KllFloatsSketchSortedView - * @author Alexander Saydakov - * @author Lee Rhodes - */ -public final class KllFloatsSketchSortedViewIterator implements FloatsSortedViewIterator { - - private final float[] quantiles; - private final long[] cumWeights; - private final long totalN; - private int index; - - KllFloatsSketchSortedViewIterator(final float[] quantiles, final long[] cumWeights) { - this.quantiles = quantiles; - this.cumWeights = cumWeights; - this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; - index = -1; - } - - @Override - public long getCumulativeWeight(final QuantileSearchCriteria searchCrit) { - if (searchCrit == INCLUSIVE) { return cumWeights[index]; } - return (index == 0) ? 0 : cumWeights[index - 1]; - } - - @Override - public long getN() { - return totalN; - } - - @Override - public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { - return (double) getCumulativeWeight(searchCrit) / totalN; - } - - @Override - public float getQuantile() { - return quantiles[index]; - } - - @Override - public long getWeight() { - if (index == 0) { return cumWeights[0]; } - return cumWeights[index] - cumWeights[index - 1]; - } - - @Override - public boolean next() { - index++; - return index < quantiles.length; - } - -} diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java index 68c9a6dfd..f0e923fbd 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java @@ -23,7 +23,6 @@ import static java.lang.Math.min; import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallyWeightedRanks; import java.lang.reflect.Array; import java.util.Comparator; @@ -34,7 +33,10 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.PartitioningFeature; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator; @@ -46,7 +48,7 @@ * @see org.apache.datasketches.kll.KllSketch */ @SuppressWarnings("unchecked") -public abstract class KllItemsSketchPrototype example of the recommended iteration loop:
+ *{@code + * SketchIterator itr = sketch.iterator(); + * while (itr.next()) { + * ...get*(); + * } + * }+ * + * @author Lee Rhodes + */ +public class KllSketchIterator implements QuantilesSketchIterator { + protected final int[] levelsArr; + protected final int numLevels; + protected int level; + protected int index; + protected long weight; + protected boolean isInitialized_; + + KllSketchIterator(final int[] levelsArr, final int numLevels) { + this.levelsArr = levelsArr; + this.numLevels = numLevels; + this.isInitialized_ = false; + } + + @Override + public long getWeight() { + return weight; + } + + @Override + public boolean next() { + if (!isInitialized_) { + level = 0; + index = levelsArr[level]; + weight = 1; + isInitialized_ = true; + } else { + index++; + } + if (index < levelsArr[level + 1]) { + return true; + } + // go to the next non-empty level + do { + level++; + if (level == numLevels) { + return false; // run out of levels + } + weight *= 2; + } while (levelsArr[level] == levelsArr[level + 1]); + index = levelsArr[level]; + return true; + } + +} diff --git a/src/main/java/org/apache/datasketches/partitions/BoundsRule.java b/src/main/java/org/apache/datasketches/partitions/BoundsRule.java new file mode 100644 index 000000000..68dc87bc1 --- /dev/null +++ b/src/main/java/org/apache/datasketches/partitions/BoundsRule.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +public enum BoundsRule { + + /** + * Include both the upper and lower bounds + */ + INCLUDE_BOTH, + + /** + * Include only the upper bound but not the lower bound + */ + INCLUDE_UPPER, + /** + * Include only the lower bound but not the upper bound + */ + INCLUDE_LOWER +} diff --git a/src/main/java/org/apache/datasketches/partitions/Partitioner.java b/src/main/java/org/apache/datasketches/partitions/Partitioner.java new file mode 100644 index 000000000..be256e479 --- /dev/null +++ b/src/main/java/org/apache/datasketches/partitions/Partitioner.java @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static java.lang.Math.ceil; +import static java.lang.Math.log; +import static java.lang.Math.max; +import static java.lang.Math.min; +import static java.lang.Math.pow; +import static java.lang.Math.round; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.List; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.PartitioningFeature; +import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; + +/** + * A partitioning process that can partition very large data sets into thousands + * of partitions of approximately the same size. + * + *
The code included here does work fine for moderate sized partitioning tasks. + * As an example, using the test code in the test branch with the partitioning task of splitting + * a data set of 1 billion items into 324 partitions of size 3M items completed in under 3 minutes, which was + * performed on a single CPU. For much larger partitioning tasks, it is recommended that this code be leveraged into a + * parallelized systems environment.
+ * @paramThis range of data may or may not be subsequently further partitioned.
+ * @param lowerQuantile the lowest quantile of a range + * @param upperQuantile the highest quantile of a range + * @param boundsRule determines which quantile bounds to include + * @return a quantiles sketch filled from the given upper and lower bounds. + */ + public S getRange(final T lowerQuantile, final T upperQuantile, final BoundsRule boundsRule); + +} diff --git a/src/main/java/org/apache/datasketches/partitions/package-info.java b/src/main/java/org/apache/datasketches/partitions/package-info.java new file mode 100644 index 000000000..cee11ec1d --- /dev/null +++ b/src/main/java/org/apache/datasketches/partitions/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * + */ +package org.apache.datasketches.partitions; diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java index afa660205..bbcdf44f7 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java @@ -28,7 +28,6 @@ import static org.apache.datasketches.quantiles.ClassicUtil.checkK; import static org.apache.datasketches.quantiles.ClassicUtil.computeNumLevelsNeeded; import static org.apache.datasketches.quantiles.ClassicUtil.computeRetainedItems; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallyWeightedRanks; import java.util.Random; @@ -170,21 +169,6 @@ public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria @Override public abstract double getMinItem(); - @Override - public DoublesPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted, - final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } - final double[] ranks = equallyWeightedRanks(numEquallyWeighted); - final double[] boundaries = getQuantiles(ranks, searchCrit); - boundaries[0] = getMinItem(); - boundaries[boundaries.length - 1] = getMaxItem(); - final DoublesPartitionBoundaries dpb = new DoublesPartitionBoundaries(); - dpb.N = this.getN(); - dpb.ranks = ranks; - dpb.boundaries = boundaries; - return dpb; - } - @Override public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedView.java index 02ccdd039..a5f2d476b 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedView.java @@ -22,14 +22,17 @@ import static java.lang.System.arraycopy; import static org.apache.datasketches.quantiles.DoublesSketchAccessor.BB_LVL_IDX; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.util.Arrays; +import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.quantilescommon.DoublesSortedView; +import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; import org.apache.datasketches.quantilescommon.InequalitySearch; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; -import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesUtil; /** @@ -41,6 +44,8 @@ public final class DoublesSketchSortedView implements DoublesSortedView { private final double[] quantiles; private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights private final long totalN; + private final double maxItem; + private final double minItem; /** * Construct from elements for testing. @@ -48,10 +53,13 @@ public final class DoublesSketchSortedView implements DoublesSortedView { * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. */ - DoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN) { + DoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN, + final double maxItem, final double minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; + this.maxItem = maxItem; + this.minItem = minItem; } /** @@ -59,7 +67,10 @@ public final class DoublesSketchSortedView implements DoublesSortedView { * @param sketch the given Classic Quantiles DoublesSketch */ public DoublesSketchSortedView(final DoublesSketch sketch) { + if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } this.totalN = sketch.getN(); + this.maxItem = sketch.getMaxItem(); + this.minItem = sketch.getMinItem(); final int k = sketch.getK(); final int numQuantiles = sketch.getNumRetained(); quantiles = new double[numQuantiles]; @@ -78,24 +89,43 @@ public DoublesSketchSortedView(final DoublesSketch sketch) { } } + @Override + public long[] getCumulativeWeights() { + return cumWeights.clone(); + } + + @Override + public double getMaxItem() { + return maxItem; + } + + @Override + public double getMinItem() { + return minItem; + } + + @Override + public long getN() { + return totalN; + } + @Override public double getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } + if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); final int len = cumWeights.length; - final long naturalRank = (searchCrit == INCLUSIVE) - ? (long)Math.ceil(rank * totalN) : (long)Math.floor(rank * totalN); + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); if (index == -1) { - return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; + return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; } return quantiles[index]; } @Override public double getRank(final double quantile, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } + if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } final int len = quantiles.length; final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT; final int index = InequalitySearch.find(quantiles, 0, len - 1, quantile, crit); @@ -105,11 +135,6 @@ public double getRank(final double quantile, final QuantileSearchCriteria search return (double)cumWeights[index] / totalN; } - @Override - public long[] getCumulativeWeights() { - return cumWeights.clone(); - } - @Override public double[] getQuantiles() { return quantiles.clone(); @@ -121,8 +146,8 @@ public boolean isEmpty() { } @Override - public DoublesSketchSortedViewIterator iterator() { - return new DoublesSketchSortedViewIterator(quantiles, cumWeights); + public DoublesSortedViewIterator iterator() { + return new DoublesSortedViewIterator(quantiles, cumWeights); } //restricted methods diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedViewIterator.java deleted file mode 100644 index f834fb2aa..000000000 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedViewIterator.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.quantiles; - -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; - -import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; -import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; - -/** - * Iterator over DoublesSketchSortedView. - */ -public final class DoublesSketchSortedViewIterator implements DoublesSortedViewIterator { - - private final double[] quantiles; - private final long[] cumWeights; - private final long totalN; - private int index; - - DoublesSketchSortedViewIterator(final double[] quantiles, final long[] cumWeights) { - this.quantiles = quantiles; - this.cumWeights = cumWeights; - this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; - index = -1; - } - - @Override - public long getCumulativeWeight(final QuantileSearchCriteria searchCrit) { - if (searchCrit == INCLUSIVE) { return cumWeights[index]; } - return (index == 0) ? 0 : cumWeights[index - 1]; - } - - @Override - public long getN() { - return totalN; - } - - @Override - public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { - return (double) getCumulativeWeight(searchCrit) / totalN; - } - - @Override - public double getQuantile() { - return quantiles[index]; - } - - @Override - public long getWeight() { - if (index == 0) { return cumWeights[0]; } - return cumWeights[index] - cumWeights[index - 1]; - } - - @Override - public boolean next() { - index++; - return index < quantiles.length; - } - -} diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java index cdb21ae94..6b247347a 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java @@ -36,10 +36,7 @@ import static org.apache.datasketches.quantiles.PreambleUtil.extractN; import static org.apache.datasketches.quantiles.PreambleUtil.extractPreLongs; import static org.apache.datasketches.quantiles.PreambleUtil.extractSerVer; -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallyWeightedRanks; -import java.lang.reflect.Array; import java.util.Arrays; import java.util.Comparator; import java.util.Objects; @@ -49,7 +46,8 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.quantilescommon.GenericSortedView; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.PartitioningFeature; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; @@ -74,25 +72,13 @@ * * @paramAssume boundaries array has size N + 1. Let the indicies be sequentially numbered from 0 to N. + * The number of partitions is always one less than the size of the boundaries array. + * Let the the partitions be sequentially numbered from 1 to N. + * + *
If these results were computed using QuantileSearchCriteria.INCLUSIVE then these sequential boundaries + * are to be interpreted as follows: + *
If these results were computed using QuantileSearchCriteria.EXCLUSIVE then these sequential boundaries + * are to be interpreted as follows: + *
Don't call this before calling next() for the first time + * or after getting false from next().
+ * + * @return the quantile at the current index. + */ public T getQuantile() { return quantiles[index]; } - @Override - public long getN() { - return totalN; - } - - @Override - public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { - return (double) getCumulativeWeight(searchCrit) / totalN; - } - - @Override - public long getWeight() { - if (index == 0) { return cumWeights[0]; } - return cumWeights[index] - cumWeights[index - 1]; - } - - @Override - public boolean next() { - index++; - return index < quantiles.length; - } - } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java index 5a61e525f..51b013573 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java @@ -73,6 +73,11 @@ int compare(final long[] arr, final int a, final int b, final long v) { return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0; } + @Override + int compare(final long[] arr, final int a, final int b, final double v) { + return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0; + } + @Override int getIndex(final double[] arr, final int a, final int b, final double v) { return a; @@ -88,6 +93,11 @@ int getIndex(final long[] arr, final int a, final int b, final long v) { return a; } + @Override + int getIndex(final long[] arr, final int a, final int b, final double v) { + return a; + } + @Override int resolve(final double[] arr, final int lo, final int hi, final double v) { return (lo == hi) @@ -109,6 +119,13 @@ int resolve(final long[] arr, final int lo, final int hi, final long v) { : v > arr[hi] ? hi : (v > arr[lo] ? lo : -1); } + @Override + int resolve(final long[] arr, final int lo, final int hi, final double v) { + return (lo == hi) + ? (v > arr[lo] ? lo : -1) + : v > arr[hi] ? hi : (v > arr[lo] ? lo : -1); + } + @Override public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { @@ -150,6 +167,20 @@ public String desc(final long[] arr, final int low, final int high, final long v + ": arr[" + idx + "]=" + arr[idx] + " < " + v + " <= arr[" + (idx + 1) + "]=" + arr[idx + 1] + "; return arr[" + idx + "]=" + arr[idx]; } + + @Override + public String desc(final long[] arr, final int low, final int high, final double v, final int idx) { + if (idx == -1) { + return "LT: " + v + " <= arr[" + low + "]=" + arr[low] + "; return -1"; + } + if (idx == high) { + return "LT: " + v + " > arr[" + high + "]=" + arr[high] + + "; return arr[" + high + "]=" + arr[high]; + } //idx < high + return "LT: " + v + + ": arr[" + idx + "]=" + arr[idx] + " < " + v + " <= arr[" + (idx + 1) + "]=" + arr[idx + 1] + + "; return arr[" + idx + "]=" + arr[idx]; + } }, /** @@ -179,6 +210,11 @@ int compare(final long[] arr, final int a, final int b, final long v) { return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0; } + @Override + int compare(final long[] arr, final int a, final int b, final double v) { + return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0; + } + @Override int getIndex(final double[] arr, final int a, final int b, final double v) { return a; @@ -194,6 +230,11 @@ int getIndex(final long[] arr, final int a, final int b, final long v) { return a; } + @Override + int getIndex(final long[] arr, final int a, final int b, final double v) { + return a; + } + @Override int resolve(final double[] arr, final int lo, final int hi, final double v) { return (lo == hi) @@ -215,6 +256,13 @@ int resolve(final long[] arr, final int lo, final int hi, final long v) { : v >= arr[hi] ? hi : (v >= arr[lo] ? lo : -1); } + @Override + int resolve(final long[] arr, final int lo, final int hi, final double v) { + return (lo == hi) + ? (v >= arr[lo] ? lo : -1) + : v >= arr[hi] ? hi : (v >= arr[lo] ? lo : -1); + } + @Override public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { @@ -256,6 +304,20 @@ public String desc(final long[] arr, final int low, final int high, final long v + ": arr[" + idx + "]=" + arr[idx] + " <= " + v + " < arr[" + (idx + 1) + "]=" + arr[idx + 1] + "; return arr[" + idx + "]=" + arr[idx]; } + + @Override + public String desc(final long[] arr, final int low, final int high, final double v, final int idx) { + if (idx == -1) { + return "LE: " + v + " < arr[" + low + "]=" + arr[low] + "; return -1"; + } + if (idx == high) { + return "LE: " + v + " >= arr[" + high + "]=" + arr[high] + + "; return arr[" + high + "]=" + arr[high]; + } + return "LE: " + v + + ": arr[" + idx + "]=" + arr[idx] + " <= " + v + " < arr[" + (idx + 1) + "]=" + arr[idx + 1] + + "; return arr[" + idx + "]=" + arr[idx]; + } }, /** @@ -281,6 +343,11 @@ int compare(final long[] arr, final int a, final int b, final long v) { return v < arr[a] ? -1 : arr[b] < v ? 1 : 0; } + @Override + int compare(final long[] arr, final int a, final int b, final double v) { + return v < arr[a] ? -1 : arr[b] < v ? 1 : 0; + } + @Override int getIndex(final double[] arr, final int a, final int b, final double v) { return v == arr[a] ? a : v == arr[b] ? b : -1; @@ -296,6 +363,11 @@ int getIndex(final long[] arr, final int a, final int b, final long v) { return v == arr[a] ? a : v == arr[b] ? b : -1; } + @Override + int getIndex(final long[] arr, final int a, final int b, final double v) { + return v == arr[a] ? a : v == arr[b] ? b : -1; + } + @Override int resolve(final double[] arr, final int lo, final int hi, final double v) { return (lo == hi) @@ -317,6 +389,13 @@ int resolve(final long[] arr, final int lo, final int hi, final long v) { : v == arr[lo] ? lo : (v == arr[hi] ? hi : -1); } + @Override + int resolve(final long[] arr, final int lo, final int hi, final double v) { + return (lo == hi) + ? (v == arr[lo] ? lo : -1) + : v == arr[lo] ? lo : (v == arr[hi] ? hi : -1); + } + @Override public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { @@ -358,6 +437,20 @@ public String desc(final long[] arr, final int low, final int high, final long v } return "EQ: " + v + " == arr[" + idx + "]; return arr[" + idx + "]=" + arr[idx]; } + + @Override + public String desc(final long[] arr, final int low, final int high, final double v, final int idx) { + if (idx == -1) { + if (v > arr[high]) { + return "EQ: " + v + " > arr[" + high + "]; return -1"; + } + if (v < arr[low]) { + return "EQ: " + v + " < arr[" + low + "]; return -1"; + } + return "EQ: " + v + " Cannot be found within arr[" + low + "], arr[" + high + "]; return -1"; + } + return "EQ: " + v + " == arr[" + idx + "]; return arr[" + idx + "]=" + arr[idx]; + } }, /** @@ -387,6 +480,11 @@ int compare(final long[] arr, final int a, final int b, final long v) { return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0; } + @Override + int compare(final long[] arr, final int a, final int b, final double v) { + return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0; + } + @Override int getIndex(final double[] arr, final int a, final int b, final double v) { return b; @@ -402,6 +500,11 @@ int getIndex(final long[] arr, final int a, final int b, final long v) { return b; } + @Override + int getIndex(final long[] arr, final int a, final int b, final double v) { + return b; + } + @Override int resolve(final double[] arr, final int lo, final int hi, final double v) { return (lo == hi) @@ -423,6 +526,13 @@ int resolve(final long[] arr, final int lo, final int hi, final long v) { : v <= arr[lo] ? lo : (v <= arr[hi] ? hi : -1); } + @Override + int resolve(final long[] arr, final int lo, final int hi, final double v) { + return (lo == hi) + ? (v <= arr[lo] ? lo : -1) + : v <= arr[lo] ? lo : (v <= arr[hi] ? hi : -1); + } + @Override public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { @@ -464,6 +574,20 @@ public String desc(final long[] arr, final int low, final int high, final long v + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " < " + v + " <= arr[" + idx + "]=" + arr[idx] + "; return arr[" + idx + "]=" + arr[idx]; } + + @Override + public String desc(final long[] arr, final int low, final int high, final double v, final int idx) { + if (idx == -1) { + return "GE: " + v + " > arr[" + high + "]=" + arr[high] + "; return -1"; + } + if (idx == low) { + return "GE: " + v + " <= arr[" + low + "]=" + arr[low] + + "; return arr[" + low + "]=" + arr[low]; + } //idx > low + return "GE: " + v + + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " < " + v + " <= arr[" + idx + "]=" + arr[idx] + + "; return arr[" + idx + "]=" + arr[idx]; + } }, /** @@ -493,6 +617,11 @@ int compare(final long[] arr, final int a, final int b, final long v) { return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0; } + @Override + int compare(final long[] arr, final int a, final int b, final double v) { + return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0; + } + @Override int getIndex(final double[] arr, final int a, final int b, final double v) { return b; @@ -508,6 +637,11 @@ int getIndex(final long[] arr, final int a, final int b, final long v) { return b; } + @Override + int getIndex(final long[] arr, final int a, final int b, final double v) { + return b; + } + @Override int resolve(final double[] arr, final int lo, final int hi, final double v) { return (lo == hi) @@ -529,6 +663,13 @@ int resolve(final long[] arr, final int lo, final int hi, final long v) { : v < arr[lo] ? lo : (v < arr[hi] ? hi : -1); } + @Override + int resolve(final long[] arr, final int lo, final int hi, final double v) { + return (lo == hi) + ? (v < arr[lo] ? lo : -1) + : v < arr[lo] ? lo : (v < arr[hi] ? hi : -1); + } + @Override public String desc(final double[] arr, final int low, final int high, final double v, final int idx) { if (idx == -1) { @@ -570,14 +711,28 @@ public String desc(final long[] arr, final int low, final int high, final long v + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " <= " + v + " < arr[" + idx + "]=" + arr[idx] + "; return arr[" + idx + "]=" + arr[idx]; } + + @Override + public String desc(final long[] arr, final int low, final int high, final double v, final int idx) { + if (idx == -1) { + return "GT: " + v + " >= arr[" + high + "]=" + arr[high] + "; return -1"; + } + if (idx == low) { + return "GT: " + v + " < arr[" + low + "]=" + arr[low] + + "; return arr[" + low + "]=" + arr[low]; + } //idx > low + return "GT: " + v + + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " <= " + v + " < arr[" + idx + "]=" + arr[idx] + + "; return arr[" + idx + "]=" + arr[idx]; + } }; /** * The call to compare index a and index b with the value v. - * @param arr The underlying sorted array of double values + * @param arr The underlying sorted array of values * @param a the lower index of the current pair * @param b the higher index of the current pair - * @param v the double value to search for + * @param v the value to search for * @return +1, which means we must search higher in the array, or -1, which means we must * search lower in the array, or 0, which means we have found the correct bounding pair. */ @@ -585,10 +740,10 @@ public String desc(final long[] arr, final int low, final int high, final long v /** * The call to compare index a and index b with the value v. - * @param arr The underlying sorted array of float values + * @param arr The underlying sorted array of values * @param a the lower index of the current pair * @param b the higher index of the current pair - * @param v the float value to search for + * @param v the value to search for * @return +1, which means we must search higher in the array, or -1, which means we must * search lower in the array, or 0, which means we have found the correct bounding pair. */ @@ -596,15 +751,26 @@ public String desc(final long[] arr, final int low, final int high, final long v /** * The call to compare index a and index b with the value v. - * @param arr The underlying sorted array of long values + * @param arr The underlying sorted array of values * @param a the lower index of the current pair * @param b the higher index of the current pair - * @param v the long value to search for + * @param v the value to search for * @return +1, which means we must search higher in the array, or -1, which means we must * search lower in the array, or 0, which means we have found the correct bounding pair. */ abstract int compare(long[] arr, int a, int b, long v); + /** + * The call to compare index a and index b with the value v. + * @param arr The underlying sorted array of values + * @param a the lower index of the current pair + * @param b the higher index of the current pair + * @param v the value to search for + * @return +1, which means we must search higher in the array, or -1, which means we must + * search lower in the array, or 0, which means we have found the correct bounding pair. + */ + abstract int compare(long[] arr, int a, int b, double v); + /** * If the compare operation returns 0, which means "found", this returns the index of the * found value that satisfies the selected criteria. @@ -638,6 +804,17 @@ public String desc(final long[] arr, final int low, final int high, final long v */ abstract int getIndex(long[] arr, int a, int b, long v); + /** + * If the compare operation returns 0, which means "found", this returns the index of the + * found value that satisfies the selected criteria. + * @param arr the array being searched + * @param a the lower index of the current pair + * @param b the higher index of the current pair + * @param v the value being searched for. + * @return the index of the found value that satisfies the selected criteria. + */ + abstract int getIndex(long[] arr, int a, int b, double v); + /** * Called to resolve the search when the hi and lo pointers are equal or adjacent. * @param arr the array being searched @@ -668,13 +845,23 @@ public String desc(final long[] arr, final int low, final int high, final long v */ abstract int resolve(long[] arr, int lo, int hi, long v); + /** + * Called to resolve the search when the hi and lo pointers are equal or adjacent. + * @param arr the array being searched + * @param lo the current lo value + * @param hi the current hi value + * @param v the value being searched for + * @return the index of the resolution or -1, if it cannot be resolved. + */ + abstract int resolve(long[] arr, int lo, int hi, double v); + /** * Optional call that describes the details of the results of the search. * Used primarily for debugging. - * @param arr The underlying sorted array of double values + * @param arr The underlying sorted array of values * @param low the low index of the range * @param high the high index of the range - * @param v the double value to search for + * @param v the value to search for * @param idx the resolved index from the search * @return the descriptive string. */ @@ -683,10 +870,10 @@ public String desc(final long[] arr, final int low, final int high, final long v /** * Optional call that describes the details of the results of the search. * Used primarily for debugging. - * @param arr The underlying sorted array of double values + * @param arr The underlying sorted array of values * @param low the low index of the range * @param high the high index of the range - * @param v the double value to search for + * @param v the value to search for * @param idx the resolved index from the search * @return the descriptive string. */ @@ -695,15 +882,27 @@ public String desc(final long[] arr, final int low, final int high, final long v /** * Optional call that describes the details of the results of the search. * Used primarily for debugging. - * @param arr The underlying sorted array of double values + * @param arr The underlying sorted array of values * @param low the low index of the range * @param high the high index of the range - * @param v the double value to search for + * @param v the value to search for * @param idx the resolved index from the search * @return the descriptive string. */ public abstract String desc(long[] arr, int low, int high, long v, int idx); + /** + * Optional call that describes the details of the results of the search. + * Used primarily for debugging. + * @param arr The underlying sorted array of values + * @param low the low index of the range + * @param high the high index of the range + * @param v the value to search for + * @param idx the resolved index from the search + * @return the descriptive string. + */ + public abstract String desc(long[] arr, int low, int high, double v, int idx); + /** * Binary Search for the index of the double value in the given search range that satisfies * the given InequalitySearch criterion. @@ -804,4 +1003,36 @@ public static int find(final long[] arr, final int low, final int high, return -1; //should never return here } + /** + * Binary Search for the index of the double value in the given search range that satisfies + * the given InequalitySearch criterion. + * If -1 is returned there are no values in the search range that satisfy the criterion. + * + * @param arr the given array that must be sorted. + * @param low the lowest index of the lowest value in the search range, inclusive. + * @param high the highest index of the highest value in the search range, inclusive. + * @param v the value to search for. + * @param crit one of LT, LE, EQ, GT, GE + * @return the index of the value in the given search range that satisfies the criterion + */ + public static int find(final long[] arr, final int low, final int high, + final double v, final InequalitySearch crit) { + Objects.requireNonNull(arr, "Input arr must not be null"); + Objects.requireNonNull(crit, "Input crit must not be null"); + if (arr.length == 0) { throw new SketchesArgumentException("Input array must not be empty."); } + int lo = low; + int hi = high; + while (lo <= hi) { + if (hi - lo <= 1) { + return crit.resolve(arr, lo, hi, v); + } + final int mid = lo + (hi - lo) / 2; + final int ret = crit.compare(arr, mid, mid + 1, v); + if (ret == -1 ) { hi = mid; } + else if (ret == 1) { lo = mid + 1; } + else { return crit.getIndex(arr, mid, mid + 1, v); } + } + return -1; //should never return here + } + } //End of enum diff --git a/src/main/java/org/apache/datasketches/quantilescommon/PartitionBoundaries.java b/src/main/java/org/apache/datasketches/quantilescommon/PartitionBoundaries.java new file mode 100644 index 000000000..e3c59d2c7 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/PartitionBoundaries.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * This defines a set of results computed from the getParitionBoundaries() function and + * encapsulates the basic methods needed to construct actual partitions based on generic items. + */ +public interface PartitionBoundaries { + + /** + * Gets the length of the input stream offered to the underlying sketch. + * @return the length of the input stream offered to the underlying sketch. + */ + long getN(); + + /** + * Gets an ordered array of natural ranks of the associated array of partition boundaries utilizing + * a specified search criterion. Natural ranks are integral values on the interval [1, N] + * @return an array of natural ranks. + */ + long[] getNaturalRanks(); + + /** + * Gets an ordered array of normalized ranks of the associated array of partition boundaries utilizing + * a specified search criterion. Normalized ranks are double values on the interval [0.0, 1.0]. + * @return an array of normalized ranks. + */ + double[] getNormalizedRanks(); + + /** + * Gets the number of items to be included for each partition as an array. + * The count at index 0 is 0. The number of items included in the first partition, defined by the boundaries at + * index 0 and index 1, is at index 1 in this array, etc. + * @return the number of items to be included for each partition as an array. + */ + long[] getNumDeltaItems(); + + /** + * Gets the number of partitions + * @return the number of partitions + */ + int getNumPartitions(); + + /** + * Gets the search criteria specified for the source sketch + * @return The search criteria specified for the source sketch + */ + QuantileSearchCriteria getSearchCriteria(); +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java new file mode 100644 index 000000000..2c36bb10a --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + +/** + * This enables the special functions for performing efficient partitioning of massive data. + */ +public interface PartitioningFeatureThis method is equivalent to + * {@link #getPartitionBoundaries(int, QuantileSearchCriteria) getPartitionBoundaries(numEquallySized, INCLUSIVE)}. + *
+ * + * @param numEquallySized an integer that specifies the number of equally sized partitions between + * {@link GenericPartitionBoundaries#getMinItem() getMinItem()} and + * {@link GenericPartitionBoundaries#getMaxItem() getMaxItem()}. + * This must be a positive integer greater than zero. + *This method is equivalent to - * {@link #getPartitionBoundaries(int, QuantileSearchCriteria) getPartitionBoundaries(numEquallyWeighted, INCLUSIVE)}. - *
- * - * @param numEquallyWeighted an integer that specifies the number of equally weighted partitions between - * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. - * This must be a positive integer greater than zero. - *This method is equivalent to - * {@link #getPartitionBoundaries(int, QuantileSearchCriteria) getPartitionBoundaries(numEquallyWeighted, INCLUSIVE)}. - *
- * - * @param numEquallyWeighted an integer that specifies the number of equally weighted partitions between - * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. - * This must be a positive integer greater than zero. - *Although it is possible to estimate the probablity that the true quantile + *
Although it is possible to estimate the probability that the true quantile * exists within the quantile confidence interval specified by the upper and lower quantile bounds, * it is not possible to guarantee the width of the quantile confidence interval * as an additive or multiplicative percent of the true quantile.
@@ -237,7 +187,7 @@ default float getQuantile(double rank) { * Gets the upper bound of the quantile confidence interval in which the true quantile of the * given rank exists. * - *Although it is possible to estimate the probablity that the true quantile + *
Although it is possible to estimate the probability that the true quantile * exists within the quantile confidence interval specified by the upper and lower quantile bounds, * it is not possible to guarantee the width of the quantile interval * as an additive or multiplicative percent of the true quantile.
diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java index eb27ce76d..fbd7f691f 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java @@ -92,58 +92,6 @@ default double[] getCDF(T[] splitPoints) { */ T getMinItem(); - /** - * This method returns an instance of - * {@link GenericPartitionBoundaries GenericPartitionBoundaries} which provides - * sufficient information for the user to create the given number of equally weighted partitions. - * - *This method is equivalent to - * {@link #getPartitionBoundaries(int, QuantileSearchCriteria) getPartitionBoundaries(numEquallyWeighted, INCLUSIVE)}. - *
- * - * @param numEquallyWeighted an integer that specifies the number of equally weighted partitions between - * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. - * This must be a positive integer greater than zero. - *To compute the weight or density of a specific - * partition i where i varies from 1 to m partitions: - *
{@code - * long N = getN(); - * double[] ranks = getRanks(); - * long weight = Math.round((ranks[i] - ranks[i - 1]) * N); - * }- */ - public long N; - - /** - * The normalized ranks that correspond to the returned boundaries. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always 0.0, and index m is always 1.0. - */ - public double[] ranks; - - /** - * The partition boundaries as quantiles. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always {@link #getMinItem() getMinItem()}, and index m is always - * {@link #getMaxItem() getMaxItem()}. - */ - public T[] boundaries; - } } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java index 34faefb4f..a35aa27cd 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java @@ -21,6 +21,7 @@ import static java.lang.Math.log; import static java.lang.Math.pow; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import java.util.Objects; @@ -86,18 +87,18 @@ public static final void checkFloatsSplitPointsOrder(final float[] values) { } /** - * Returns a double array of ranks that defines equally weighted regions between 0.0, inclusive and 1.0, inclusive. - * The 0.0 and 1.0 end points are part of the returned array and are the getMinItem() and getMaxItem() values of the - * sketch. - * For example, if num == 2, three values will be returned: 0.0, .5, and 1, where the two equally weighted regions are - * 0.0 to 0.5, and 0.5 to 1.0. - * @param num the total number of equally weighted regions between 0.0 and 1.0 defined by the ranks in the returned - * array. num must be 1 or greater. - * @return a double array of num + 1 ranks that define the boundaries of num equally weighted - * regions between 0.0, inclusive and 1.0, inclusive. + * Returns an array of (num + 1) values that define equally sized intervals between 0.0, inclusive, and 1.0, + * inclusive. The end points 0.0 and 1.0 are part of the returned array. + * + *
For example, if num == 2, three values will be returned: 0.0, .5, and 1, where the two equally sized regions + * are {0.0,0.5}, and {0.5, 1.0}.
+ * @param num the total number of equally sized intervals between 0.0, inclusive and 1.0, inclusive. + * Must be 1 or greater. + * @return a double array of values that define (num + 1) equally sized intervals between 0.0, inclusive and 1.0, + * inclusive. * @throws IllegalArgumentException if num is less than 1. */ - public static double[] equallyWeightedRanks(final int num) { + public static double[] equallySpacedDoubles(final int num) { if (num < 1) { throw new IllegalArgumentException("num must be >= 1"); } final double[] out = new double[num + 1]; out[0] = 0.0; @@ -107,6 +108,36 @@ public static double[] equallyWeightedRanks(final int num) { return out; } + /** + * Returns an array of (num + 1) longs that define, approximately, equally spaced intervals between the given + * max, inclusive, and min, inclusive. The end points max and min are part of the + * returned array. Because the range of the values may not exactly divide into num intervals, + * the size of these intervals may vary by plus or minus one. + * @param min the lowest positive valued (or zero) number of the range + * @param max the highest positive valued number of the range. max must be greater than min + * @param num Number of requested intervals. Must be greater or equal to one, and less than or equal to + * max - min. + * + * @return an array of (num + 1) longs that are approximately equally spaced between the given min and max. + */ + public static long[] equallySpacedLongs(final long min, final long max, final int num) { + if (num < 1 || min < 0 || max < 1 || (min >= max) || num > (max - min)) { + throw new SketchesArgumentException( + "Improper inputs: n < 1, min < 0, max < 1, min >= max, or n > (max - min)"); + } + final long span = (max - min); + final double[] splits = equallySpacedDoubles(num); + final int len = num + 1; + final long[] out = new long[len]; + long prev = -1L; + for (int i = 0; i < len; i++) { + long cur = Math.round(splits[i] * span); + if (cur == prev) { cur++; } else { prev = cur; } + out[i] = min + cur; + } + return out; + } + /** * Returns a float array of evenly spaced values between value1, inclusive, and value2 inclusive. * If value2 > value1, the resulting sequence will be increasing. @@ -178,5 +209,18 @@ public static double[] evenlyLogSpaced(final double value1, final double value2, return arr; } + public static final double tailRoundingFactor = 1e7; + + public static double getNaturalRank( + final double normalizedRank, + final long totalN, + final QuantileSearchCriteria searchCrit) { + double naturalRank = (normalizedRank * totalN); + if (totalN <= tailRoundingFactor) { + naturalRank = Math.round(naturalRank * tailRoundingFactor) / tailRoundingFactor; + } + return (searchCrit == INCLUSIVE) ? (long)Math.ceil(naturalRank) : (long)Math.floor(naturalRank); + } + } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/SortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/SortedView.java index 434b548a9..5fb50291f 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/SortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/SortedView.java @@ -20,19 +20,15 @@ package org.apache.datasketches.quantilescommon; /** - * This is the base interface for the Sorted View interface hierarchy. + * This is the base interface for the Sorted View interface hierarchy and defines the methods that are type independent. * - *The Sorted View provides a view of the data retained by a quantiles-type sketch - * that would be cumbersome to get any other way. - * One can iterate over the contents of the sketch using the sketch's iterator, but the result is not sorted.
+ *The SortedView interface hierarchy provides a sorted view of the data retained by a quantiles-type sketch that + * would be cumbersome to get any other way. + * One could use the sketch's iterator to iterate over the contents of the sketch, + * but the result would not be sorted.
* - *Once this sorted view has been created, it provides not only a sorted view of the data retained by the sketch - * but also the basic queries, such as getRank(), getQuantile(), and getCDF() and getPMF(). - * In addition, the iterator obtained from this sorted view provides useful detailed information about each entry.
- * - *The data from a Sorted view is an unbiased sample of the input stream that can be used for other kinds of - * analysis not directly provided by the sketch. For example, comparing two sketches using the Kolmogorov-Smirnov - * test.
+ *The data from a Sorted view is an unbiased random sample of the input stream that can be used for other kinds of + * analysis not directly provided by the sketch.
* * @author Alexander Saydakov * @author Lee Rhodes @@ -40,11 +36,18 @@ public interface SortedView { /** - * Returns the array of cumulative weights - * @return the array of cumulative weights + * Returns the array of cumulative weights from the sketch. + * Also known as the natural ranks, which are the Natural Numbers on the interval [1, N]. + * @return the array of cumulative weights (or natural ranks). */ long[] getCumulativeWeights(); + /** + * Returns the total number of items presented to the sourcing sketch. + * @return the total number of items presented to the sourcing sketch. + */ + long getN(); + /** * Returns true if this sorted view is empty. * @return true if this sorted view is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java index b36a2594e..06c298d4e 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java @@ -19,6 +19,8 @@ package org.apache.datasketches.quantilescommon; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + /** * This is the base interface for the SortedViewIterator hierarchy used with a SortedView obtained * from a quantile-type sketch. This provides an ordered iterator over the retained quantiles of @@ -35,30 +37,47 @@ * @author Alexander Saydakov * @author Lee Rhodes */ -public interface SortedViewIterator { +public class SortedViewIterator { + protected final long[] cumWeights; + protected long totalN; + protected int index; + + SortedViewIterator(final long[] cumWeights) { + this.cumWeights = cumWeights; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter + this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; + index = -1; + } /** - * Gets the cumulative weight at the current index (or previous index) based on the chosen search criterion. - * This is also referred to as the "Natural Rank". + * Gets the natural rank at the current index (or previous index) based on the chosen search criterion. + * This is also referred to as the "cumulative weight". The natural rank is a number in the range [1, N], + * where N ({@link #getN()}) is the total number of items fed to the sketch. * *Don't call this before calling next() for the first time * or after getting false from next().
* - * @param searchCrit if INCLUSIVE, includes the weight at the current index in the cumulative sum. - * Otherwise, it will return the cumulative weight of the previous index. - * @return cumulative weight at the current index on the chosen search criterion. + * @param searchCrit if INCLUSIVE, includes the weight of the item at the current index in the computation of + * the natural rank. + * Otherwise, it will return the natural rank of the previous index. + * @return the natural rank at the current index (or previous index) based on the chosen search criterion. */ - long getCumulativeWeight(QuantileSearchCriteria searchCrit); + public long getNaturalRank(final QuantileSearchCriteria searchCrit) { + if (searchCrit == INCLUSIVE) { return cumWeights[index]; } + return (index == 0) ? 0 : cumWeights[index - 1]; + } /** * Gets the total count of all items presented to the sketch. * @return the total count of all items presented to the sketch. */ - long getN(); + public long getN() { + return totalN; + } /** * Gets the normalized rank at the current index (or previous index) - * based on the chosen search criterion. + * based on the chosen search criterion. Where normalized rank = natural rank / N ({@link #getN()}) + * and is a fraction in the range (0,1.0]. * *Don't call this before calling next() for the first time * or after getting false from next().
@@ -68,24 +87,32 @@ public interface SortedViewIterator { * @return the normalized rank at the current index (or previous index) * based on the chosen search criterion. */ - double getNormalizedRank(QuantileSearchCriteria searchCrit); + public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { + return (double) getNaturalRank(searchCrit) / totalN; + } /** - * Gets the natural weight at the current index. + * Gets the weight contribution of the item at the current index. * *Don't call this before calling next() for the first time * or after getting false from next().
* - * @return the natural weight at the current index. + * @return the weight contribution of the item at the current index. */ - long getWeight(); + public long getWeight() { + if (index == 0) { return cumWeights[0]; } + return cumWeights[index] - cumWeights[index - 1]; + } /** * Advances the index and checks if it is valid. * The state of this iterator is undefined before the first call of this method. * @return true if the next index is valid. */ - boolean next(); + public boolean next() { + index++; + return index < cumWeights.length; + } } diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java index 39e808dff..e587cd633 100644 --- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java @@ -19,11 +19,8 @@ package org.apache.datasketches.req; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallyWeightedRanks; - import org.apache.datasketches.quantilescommon.FloatsSortedView; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; -import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesFloatsAPI; import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator; @@ -62,21 +59,6 @@ abstract class BaseReqSketch implements QuantilesFloatsAPI { @Override public abstract float getMinItem(); - @Override - public FloatsPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted, - final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } - final double[] ranks = equallyWeightedRanks(numEquallyWeighted); - final float[] boundaries = getQuantiles(ranks, searchCrit); - boundaries[0] = getMinItem(); - boundaries[boundaries.length - 1] = getMaxItem(); - final FloatsPartitionBoundaries fpb = new FloatsPartitionBoundaries(); - fpb.N = this.getN(); - fpb.ranks = ranks; - fpb.boundaries = boundaries; - return fpb; - } - /** * Returns an a priori estimate of relative standard error (RSE, expressed as a number in [0,1]). * Derived from Lemma 12 in https://arxiv.org/abs/2004.01668v2, but the constant factors were diff --git a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java index 1b8586abf..40842221b 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java @@ -20,10 +20,14 @@ package org.apache.datasketches.req; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.util.List; +import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.quantilescommon.FloatsSortedView; +import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; import org.apache.datasketches.quantilescommon.InequalitySearch; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesAPI; @@ -38,6 +42,8 @@ public final class ReqSketchSortedView implements FloatsSortedView { private float[] quantiles; private long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights private final long totalN; + private final float maxItem; + private final float minItem; /** * Construct from elements for testing. @@ -45,37 +51,59 @@ public final class ReqSketchSortedView implements FloatsSortedView { * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. */ - ReqSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN) { + ReqSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN, + final float maxItem, final float minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; + this.maxItem = maxItem; + this.minItem = minItem; } /** * Constructs this Sorted View given the sketch - * @param sk the given ReqSketch + * @param sketch the given ReqSketch */ - public ReqSketchSortedView(final ReqSketch sk) { - totalN = sk.getN(); - buildSortedViewArrays(sk); + public ReqSketchSortedView(final ReqSketch sketch) { + if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + this.totalN = sketch.getN(); + this.maxItem = sketch.getMaxItem(); + this.minItem = sketch.getMinItem(); + buildSortedViewArrays(sketch); } + //end of constructors + @Override public long[] getCumulativeWeights() { return cumWeights.clone(); } + @Override + public float getMaxItem() { + return maxItem; + } + + @Override + public float getMinItem() { + return minItem; + } + + @Override + public long getN() { + return totalN; + } + @Override public float getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); final int len = cumWeights.length; - final long naturalRank = (searchCrit == INCLUSIVE) - ? (long)Math.ceil(rank * totalN) : (long)Math.floor(rank * totalN); + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); if (index == -1) { - return quantiles[quantiles.length - 1]; ///EXCLUSIVE (GT) case: normRank == 1.0; + return quantiles[len - 1]; ///EXCLUSIVE (GT) case: normRank == 1.0; } return quantiles[index]; } @@ -103,8 +131,8 @@ public boolean isEmpty() { } @Override - public ReqSketchSortedViewIterator iterator() { - return new ReqSketchSortedViewIterator(quantiles, cumWeights); + public FloatsSortedViewIterator iterator() { + return new FloatsSortedViewIterator(quantiles, cumWeights); } //restricted methods diff --git a/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java deleted file mode 100644 index 6dbc63222..000000000 --- a/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.req; - -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; - -import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; -import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; - -/** - * Iterator over ReqSketchSortedView. - * @author Alexander Saydakov - * @author Lee Rhodes - */ -public final class ReqSketchSortedViewIterator implements FloatsSortedViewIterator { - - private final float[] quantiles; - private final long[] cumWeights; - private final long totalN; - private int index; - - ReqSketchSortedViewIterator(final float[] quantiles, final long[] cumWeights) { - this.quantiles = quantiles; - this.cumWeights = cumWeights; - this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; - index = -1; - } - - @Override - public long getCumulativeWeight(final QuantileSearchCriteria searchCrit) { - if (searchCrit == INCLUSIVE) { return cumWeights[index]; } - return (index == 0) ? 0 : cumWeights[index - 1]; - } - - @Override - public long getN() { - return totalN; - } - - @Override - public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { - return (double) getCumulativeWeight(searchCrit) / totalN; - } - - @Override - public float getQuantile() { - return quantiles[index]; - } - - @Override - public long getWeight() { - if (index == 0) { return cumWeights[0]; } - return cumWeights[index] - cumWeights[index - 1]; - } - - @Override - public boolean next() { - index++; - return index < quantiles.length; - } - -} diff --git a/src/test/java/org/apache/datasketches/common/UtilTest.java b/src/test/java/org/apache/datasketches/common/UtilTest.java index a68671685..50112a315 100644 --- a/src/test/java/org/apache/datasketches/common/UtilTest.java +++ b/src/test/java/org/apache/datasketches/common/UtilTest.java @@ -263,9 +263,14 @@ public void checkZeroPad() { @Test public void checkCharacterPad() { - final String s = "Pad 30, postpend z:"; - final String out = characterPad(s, 30, 'z', true); + String s = "Pad 30, postpend z:"; + String out = characterPad(s, 30, 'z', true); println(out); + assertEquals(out, "Pad 30, postpend z:zzzzzzzzzzz"); + s = "Pad 30, prepend z:"; + out = characterPad(s, 30, 'z', false); + println(out); + assertEquals(out,"zzzzzzzzzzzzPad 30, prepend z:"); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectCompactItemsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectCompactItemsSketchIteratorTest.java index bc7651b14..ccfb52533 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectCompactItemsSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectCompactItemsSketchIteratorTest.java @@ -96,8 +96,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), "1"); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); @@ -105,8 +105,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), "2"); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index e4e349205..a8ca4145e 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -21,7 +21,6 @@ import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -423,21 +422,6 @@ public void nanSplitPoint() { sketch.getCDF(new double[] {Double.NaN}); } - @Test - public void getQuantiles() { - final KllDoublesSketch sketch = getUpdatableDirectDoublesSketch(200, 0); - sketch.update(1); - sketch.update(2); - sketch.update(3); - sketch.update(4); - double[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - double[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - } - @Test public void checkSimpleMergeDirect() { //used for troubleshooting int k = 20; diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java index 6f9ea0ba5..3013e6295 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -21,7 +21,6 @@ import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -423,21 +422,6 @@ public void nanSplitPoint() { sketch.getCDF(new float[] {Float.NaN}); } - @Test - public void getQuantiles() { - final KllFloatsSketch sketch = getUpdatableDirectFloatSketch(200, 0); - sketch.update(1); - sketch.update(2); - sketch.update(3); - sketch.update(4); - float[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - float[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - } - @Test public void checkSimpleMergeDirect() { //used for troubleshooting int k = 20; diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java index d428cd259..7a12d8466 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java @@ -77,8 +77,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), 1.0); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); @@ -86,8 +86,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), 2.0); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); } diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index ba63e8bef..8aeabb8bf 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -391,21 +391,6 @@ public void nanSplitPoint() { sketch.getCDF(new double[] {Double.NaN}); } - @Test - public void getQuantiles() { - final KllDoublesSketch sketch = KllDoublesSketch.newHeapInstance(); - sketch.update(1); - sketch.update(2); - sketch.update(3); - sketch.update(4); - double[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - double[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - } - @Test public void checkReset() { KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20); @@ -456,18 +441,18 @@ public void sortedView() { assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 1); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 2); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 3); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 2); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 3); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 2); + assertEquals(itr.getNaturalRank(INCLUSIVE), 3); assertEquals(itr.next(), false); } diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java index e511de562..88003b836 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java @@ -77,8 +77,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), 1.0f); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); @@ -86,8 +86,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), 2.0f); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); } diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index 161ee4318..846965cb8 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -391,21 +391,6 @@ public void nanSplitPoint() { sketch.getCDF(new float[] {Float.NaN}); } - @Test - public void getQuantiles() { - final KllFloatsSketch sketch = KllFloatsSketch.newHeapInstance(); - sketch.update(1); - sketch.update(2); - sketch.update(3); - sketch.update(4); - float[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - float[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - } - @Test public void checkReset() { KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20); @@ -456,18 +441,18 @@ public void sortedView() { assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 1); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 2); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 3); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 2); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 3); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 2); + assertEquals(itr.getNaturalRank(INCLUSIVE), 3); assertEquals(itr.next(), false); } diff --git a/src/test/java/org/apache/datasketches/kll/KllItemsSketchSortedViewString.java b/src/test/java/org/apache/datasketches/kll/KllItemsSketchSortedViewString.java index 5eb513aa8..b0024420c 100644 --- a/src/test/java/org/apache/datasketches/kll/KllItemsSketchSortedViewString.java +++ b/src/test/java/org/apache/datasketches/kll/KllItemsSketchSortedViewString.java @@ -30,8 +30,9 @@ public KllItemsSketchSortedViewString( final String[] quantiles, final long[] cumWeights, final long totalN, - final String minItem, - final Comparator