From 855301bf26c29828bbdd90d957c90faa80d0b3be Mon Sep 17 00:00:00 2001 From: Stanley Feng Date: Sat, 31 Oct 2015 11:35:00 -0700 Subject: [PATCH 1/2] Add a new and optional measurement type called "RAW" which outputs every datapoint of a run. Raw datapoint output is quite useful and important for two reasons: 1) Allows the user to do more thorough statistical analysis on the complete set of the data. 2) Using timestamps that are logged with raw data points, it becomes much easier to investigate and correlate latency outliers at the backend service. --- .../yahoo/ycsb/measurements/Measurements.java | 40 +++- .../ycsb/measurements/OneMeasurementRaw.java | 192 ++++++++++++++++++ workloads/workload_template | 17 ++ 3 files changed, 238 insertions(+), 11 deletions(-) create mode 100644 core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java diff --git a/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java b/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java index e91375849e..6c84f80a1a 100644 --- a/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java +++ b/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java @@ -29,8 +29,18 @@ * @author cooperb * */ -public class Measurements -{ +public class Measurements { + /** + * All supported measurement types are defined in this enum. + * + */ + public enum MeasurementType { + HISTOGRAM, + HDRHISTOGRAM, + HDRHISTOGRAM_AND_HISTOGRAM, + TIMESERIES, + RAW + } public static final String MEASUREMENT_TYPE_PROPERTY = "measurementtype"; private static final String MEASUREMENT_TYPE_PROPERTY_DEFAULT = "hdrhistogram"; @@ -60,7 +70,7 @@ public synchronized static Measurements getMeasurements() final ConcurrentHashMap _opToMesurementMap; final ConcurrentHashMap _opToIntendedMesurementMap; - final int _measurementType; + final MeasurementType _measurementType; final int _measurementInterval; private Properties _props; @@ -77,19 +87,23 @@ public Measurements(Properties props) String mTypeString = _props.getProperty(MEASUREMENT_TYPE_PROPERTY, MEASUREMENT_TYPE_PROPERTY_DEFAULT); if (mTypeString.equals("histogram")) { - _measurementType = 0; + _measurementType = MeasurementType.HISTOGRAM; } else if (mTypeString.equals("hdrhistogram")) { - _measurementType = 1; + _measurementType = MeasurementType.HDRHISTOGRAM; } else if (mTypeString.equals("hdrhistogram+histogram")) { - _measurementType = 2; + _measurementType = MeasurementType.HDRHISTOGRAM_AND_HISTOGRAM; } else if (mTypeString.equals("timeseries")) { - _measurementType = 3; + _measurementType = MeasurementType.TIMESERIES; + } + else if (mTypeString.equals("raw")) + { + _measurementType = MeasurementType.RAW; } else { throw new IllegalArgumentException("unknown "+MEASUREMENT_TYPE_PROPERTY+"="+mTypeString); @@ -117,16 +131,20 @@ OneMeasurement constructOneMeasurement(String name) { switch (_measurementType) { - case 0: + case HISTOGRAM: return new OneMeasurementHistogram(name, _props); - case 1: + case HDRHISTOGRAM: return new OneMeasurementHdrHistogram(name, _props); - case 2: + case HDRHISTOGRAM_AND_HISTOGRAM: return new TwoInOneMeasurement(name, new OneMeasurementHdrHistogram("Hdr"+name, _props), new OneMeasurementHistogram("Bucket"+name, _props)); - default: + case TIMESERIES: return new OneMeasurementTimeSeries(name, _props); + case RAW: + return new OneMeasurementRaw(name, _props); + default: + throw new AssertionError("Impossible to be here. Dead code reached. Bugs?"); } } diff --git a/core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java b/core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java new file mode 100644 index 0000000000..b401b42f08 --- /dev/null +++ b/core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java @@ -0,0 +1,192 @@ +/** + * Copyright (c) 2015 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you + * may not use this file except in compliance with the License. You + * may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. See accompanying + * LICENSE file. + */ + +package com.yahoo.ycsb.measurements; + +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.Properties; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import com.yahoo.ycsb.measurements.exporter.MeasurementsExporter; + +/** + * Record a series of measurements as raw data points without down sampling, + * optionally write to an output file when configured. + * + * @author stfeng + * + */ +public class OneMeasurementRaw extends OneMeasurement { + /** + * One raw data point, two fields: timestamp (ms) when the datapoint is + * inserted, and the value. + */ + class RawDataPoint { + private final long timestamp; + private final int value; + + public RawDataPoint(int value) { + this.timestamp = System.currentTimeMillis(); + this.value = value; + } + + public long timeStamp() { + return timestamp; + } + + public int value() { + return value; + } + } + + class RawDataPointComparator implements Comparator { + @Override + public int compare(RawDataPoint p1, RawDataPoint p2){ + if (p1.value() < p2.value()){ + return -1; + } else if (p1.value() == p2.value()) { + return 0; + } else { + return 1; + } + } + } + + /** + * Optionally, user can configure an output file to save the raw data points. + * Default is none, raw results will be written to stdout. + * + */ + public static final String OUTPUT_FILE_PATH = "measurement.raw.output_file"; + public static final String OUTPUT_FILE_PATH_DEFAULT = ""; + private String outputFilePath = ""; + private final PrintStream outputStream; + + private ArrayList measurements; + private long totalLatency = 0; + + // A window of stats to print summary for at the next getSummary() call. + // It's supposed to be a one line summary, so we will just print count and + // average. + private int windowOperations = 0; + private long windowTotalLatency = 0; + + public OneMeasurementRaw(String name, Properties props) { + super(name); + + outputFilePath = props.getProperty(OUTPUT_FILE_PATH, + OUTPUT_FILE_PATH_DEFAULT); + if (!outputFilePath.isEmpty()) { + System.out.println("Raw data measurement: will output to result file: " + + outputFilePath); + + try { + outputStream = new PrintStream( + new FileOutputStream(outputFilePath, true), + true); + } catch (FileNotFoundException e) { + throw new RuntimeException("Failed to open raw data output file", e); + } + + } else{ + System.out.println("Raw data measurement: will output to stdout."); + outputStream = System.out; + + } + measurements = new ArrayList(1000); + } + + @Override + public synchronized void measure(int latency) { + totalLatency += latency; + windowTotalLatency += latency; + windowOperations++; + + measurements.add(new RawDataPoint(latency)); + } + + @Override + public void exportMeasurements(MeasurementsExporter exporter) + throws IOException { + // Output raw data points first then print out a summary of percentiles to + // stdout. + + outputStream.println(getName() + + " latency raw data: op, timestamp(ms), latency(us)"); + for (RawDataPoint point : measurements) { + outputStream.println( + String.format("%s,%d,%d", getName(), point.timeStamp(), + point.value())); + } + if (outputStream != System.out) { + outputStream.close(); + } + + int totalOps = measurements.size(); + exporter.write(getName(), "Total Operations", totalOps); + if (totalOps > 0) { + exporter.write(getName(), + "Below is a summary of latency in microseconds:", -1); + exporter.write(getName(), "Average", + (double)totalLatency / (double)totalOps); + + Collections.sort(measurements, new RawDataPointComparator()); + + exporter.write(getName(), "Min", measurements.get(0).value()); + exporter.write( + getName(), "Max", measurements.get(totalOps - 1).value()); + exporter.write( + getName(), "p1", measurements.get((int)(totalOps*0.01)).value()); + exporter.write( + getName(), "p5", measurements.get((int)(totalOps*0.05)).value()); + exporter.write( + getName(), "p50", measurements.get((int)(totalOps*0.5)).value()); + exporter.write( + getName(), "p90", measurements.get((int)(totalOps*0.9)).value()); + exporter.write( + getName(), "p95", measurements.get((int)(totalOps*0.95)).value()); + exporter.write( + getName(), "p99", measurements.get((int)(totalOps*0.99)).value()); + exporter.write(getName(), "p99.9", + measurements.get((int)(totalOps*0.999)).value()); + exporter.write(getName(), "p99.99", + measurements.get((int)(totalOps*0.9999)).value()); + } + + exportReturnCodes(exporter); + } + + @Override + public synchronized String getSummary() { + if (windowOperations == 0) { + return ""; + } + + String toReturn = String.format("%s count: %d, average latency(us): %.2f", + getName(), windowOperations, + (double)windowTotalLatency / (double)windowOperations); + + windowTotalLatency=0; + windowOperations=0; + + return toReturn; + } +} diff --git a/workloads/workload_template b/workloads/workload_template index e59bace12a..f95d1d4804 100644 --- a/workloads/workload_template +++ b/workloads/workload_template @@ -115,6 +115,23 @@ table=usertable # How the latency measurements are presented measurementtype=histogram #measurementtype=timeseries +#measurementtype=raw +# When measurementtype is set to raw, measurements will be output +# as RAW datapoints in the following csv format: +# "operation, timestamp of the measurement, latency in us" +# +# Raw datapoints are collected in-memory while the test is running. Each +# data point consumes about 20 bytes (including java object overhead). +# For a typical run of 1 million to 10 million operations, this should +# easily fit into memory. If you plan to do a run with 100s of millions of +# operations, consider increasing your jvm heap size before you enable the +# RAW measurement type, or split the run into multiple runs. +# +# Optionally, you can specify an output file to save raw datapoints. +# Otherwise, raw datapoints will be written to stdout. +# The output file will be appended to if it already exists, otherwise +# a new output file will be created. +#measurement.raw.output_file = /tmp/your_output_file_for_this_run # The range of latencies to track in the histogram (milliseconds) histogram.buckets=1000 From 8bfadaeafce7c5fb0d51c8fb53cd6c76296e9255 Mon Sep 17 00:00:00 2001 From: Stanley Feng Date: Tue, 3 Nov 2015 10:58:06 -0800 Subject: [PATCH 2/2] Incorporate CR feedback: 1. Use LinkedList rather than ArrayList for the measurement array in RAW. Update size estimation in documentation in workload_template. 2. Add a new composite measurement type which combines HdrHistogram and RAW. 3. Allow user to optionally disable output of summary stats from the raw measurement class. --- .../yahoo/ycsb/measurements/Measurements.java | 9 ++++++ .../ycsb/measurements/OneMeasurementRaw.java | 28 +++++++++++++++---- workloads/workload_template | 8 +++--- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java b/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java index 6c84f80a1a..f1d7bbe5db 100644 --- a/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java +++ b/core/src/main/java/com/yahoo/ycsb/measurements/Measurements.java @@ -38,6 +38,7 @@ public enum MeasurementType { HISTOGRAM, HDRHISTOGRAM, HDRHISTOGRAM_AND_HISTOGRAM, + HDRHISTOGRAM_AND_RAW, TIMESERIES, RAW } @@ -97,6 +98,10 @@ else if (mTypeString.equals("hdrhistogram+histogram")) { _measurementType = MeasurementType.HDRHISTOGRAM_AND_HISTOGRAM; } + else if (mTypeString.equals("hdrhistogram+raw")) + { + _measurementType = MeasurementType.HDRHISTOGRAM_AND_RAW; + } else if (mTypeString.equals("timeseries")) { _measurementType = MeasurementType.TIMESERIES; @@ -139,6 +144,10 @@ OneMeasurement constructOneMeasurement(String name) return new TwoInOneMeasurement(name, new OneMeasurementHdrHistogram("Hdr"+name, _props), new OneMeasurementHistogram("Bucket"+name, _props)); + case HDRHISTOGRAM_AND_RAW: + return new TwoInOneMeasurement(name, + new OneMeasurementHdrHistogram("Hdr"+name, _props), + new OneMeasurementHistogram("Raw"+name, _props)); case TIMESERIES: return new OneMeasurementTimeSeries(name, _props); case RAW: diff --git a/core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java b/core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java index b401b42f08..132952d90b 100644 --- a/core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java +++ b/core/src/main/java/com/yahoo/ycsb/measurements/OneMeasurementRaw.java @@ -22,7 +22,7 @@ import java.io.IOException; import java.io.PrintStream; import java.util.Properties; -import java.util.ArrayList; +import java.util.LinkedList; import java.util.Collections; import java.util.Comparator; import com.yahoo.ycsb.measurements.exporter.MeasurementsExporter; @@ -77,14 +77,28 @@ public int compare(RawDataPoint p1, RawDataPoint p2){ */ public static final String OUTPUT_FILE_PATH = "measurement.raw.output_file"; public static final String OUTPUT_FILE_PATH_DEFAULT = ""; + + /** + * Optionally, user can request to not output summary stats. This is useful + * if the user chains the raw measurement type behind the HdrHistogram type + * which already outputs summary stats. But even in that case, the user may + * still want this class to compute summary stats for them, especially if + * they want accurate computation of percentiles (because percentils computed + * by histogram classes are still approximations). + */ + public static final String NO_SUMMARY_STATS = "measurement.raw.no_summary"; + public static final String NO_SUMMARY_STATS_DEFAULT = "false"; + private String outputFilePath = ""; private final PrintStream outputStream; - private ArrayList measurements; + private boolean noSummaryStats = false; + + private LinkedList measurements; private long totalLatency = 0; // A window of stats to print summary for at the next getSummary() call. - // It's supposed to be a one line summary, so we will just print count and + // It's supposed to be a one line summary, so we will just print count and // average. private int windowOperations = 0; private long windowTotalLatency = 0; @@ -111,7 +125,11 @@ public OneMeasurementRaw(String name, Properties props) { outputStream = System.out; } - measurements = new ArrayList(1000); + + noSummaryStats = Boolean.parseBoolean(props.getProperty(NO_SUMMARY_STATS, + NO_SUMMARY_STATS_DEFAULT)); + + measurements = new LinkedList(); } @Override @@ -142,7 +160,7 @@ public void exportMeasurements(MeasurementsExporter exporter) int totalOps = measurements.size(); exporter.write(getName(), "Total Operations", totalOps); - if (totalOps > 0) { + if (totalOps > 0 && !noSummaryStats) { exporter.write(getName(), "Below is a summary of latency in microseconds:", -1); exporter.write(getName(), "Average", diff --git a/workloads/workload_template b/workloads/workload_template index f95d1d4804..6aebd64a17 100644 --- a/workloads/workload_template +++ b/workloads/workload_template @@ -121,11 +121,11 @@ measurementtype=histogram # "operation, timestamp of the measurement, latency in us" # # Raw datapoints are collected in-memory while the test is running. Each -# data point consumes about 20 bytes (including java object overhead). +# data point consumes about 50 bytes (including java object overhead). # For a typical run of 1 million to 10 million operations, this should -# easily fit into memory. If you plan to do a run with 100s of millions of -# operations, consider increasing your jvm heap size before you enable the -# RAW measurement type, or split the run into multiple runs. +# fit into memory most of the time. If you plan to do 100s of millions of +# operations per run, consider provisioning a machine with larger RAM when using +# the RAW measurement type, or split the run into multiple runs. # # Optionally, you can specify an output file to save raw datapoints. # Otherwise, raw datapoints will be written to stdout.