-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
175 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
from contextlib import ExitStack | ||
from dataclasses import dataclass | ||
|
||
import gradio as gr | ||
import pandas as pd | ||
|
||
|
||
@dataclass | ||
class PlotConfig: | ||
x_title: str | ||
y_title: str | ||
title: str | ||
|
||
|
||
css = ''' | ||
.summary span { | ||
font-size: 10px; | ||
padding-top:0; | ||
padding-bottom:0; | ||
} | ||
''' | ||
|
||
summary_desc = ''' | ||
## Summary | ||
This table shows the average of the metrics for each model and QPS rate. | ||
Benchmark are run with: | ||
- Prompts: 200±10 tokens length (normal distribution) | ||
- Generation: 200±10 tokens length (normal distribution) | ||
- 120s duration | ||
Each benchmark is run using a constant arrival rate of requests per second (QPS), | ||
independently of the number of requests that are being processed (open loop). | ||
''' | ||
|
||
df = pd.DataFrame() | ||
summary = pd.DataFrame() | ||
line_plots = [] | ||
|
||
|
||
def plot(model, device) -> pd.DataFrame: | ||
d = df[(df['model'] == model) & (df['device'] == device)] | ||
return d | ||
|
||
|
||
def update_app(device, model): | ||
res = [] | ||
for plot in line_plots: | ||
res.append(df[(df['model'] == model) & (df['device'] == device)]) | ||
return res + [summary_table(device)] | ||
|
||
|
||
def summary_table(device) -> pd.DataFrame: | ||
rates = [4., 8., 16.] | ||
data = df[(df['device'] == device) & (df['rate'].isin(rates))] | ||
data = data.groupby(['model', 'rate']).agg( | ||
{'inter_token_latency_ms_p90': 'mean', 'time_to_first_token_ms_p90': 'mean', | ||
'e2e_latency_ms_p90': 'mean', 'token_throughput_secs': 'mean', | ||
'successful_requests': 'mean', 'error_rate': 'mean'}).reset_index() | ||
data = data[['model', 'rate', 'inter_token_latency_ms_p90', 'time_to_first_token_ms_p90', 'e2e_latency_ms_p90', | ||
'token_throughput_secs']] | ||
data = data.rename( | ||
columns={'inter_token_latency_ms_p90': 'ITL P90 (ms)', 'time_to_first_token_ms_p90': 'TTFT P90 (ms)', | ||
'e2e_latency_ms_p90': 'E2E P90 (ms)', 'token_throughput_secs': 'Throughput (tokens/s)', | ||
'successful_requests': 'Successful requests', 'error_rate': 'Error rate (%)', 'model': 'Model', | ||
'rate': 'QPS'}) | ||
return data | ||
|
||
|
||
def load_data() -> pd.DataFrame: | ||
data = pd.read_parquet('results.parquet') | ||
# remove warmup and throughput | ||
data = data[(data['id'] != 'warmup') & (data['id'] != 'throughput')] | ||
# only keep constant rate | ||
data = data[data['executor_type'] == 'ConstantArrivalRate'] | ||
return data | ||
|
||
|
||
if __name__ == '__main__': | ||
metrics = { | ||
"inter_token_latency_ms_p90": PlotConfig(title="Inter Token Latency P90 (lower is better)", x_title="QPS", | ||
y_title="Time (ms)"), | ||
"time_to_first_token_ms_p90": PlotConfig(title="TTFT P90 (lower is better)", x_title="QPS", | ||
y_title="Time (ms)"), | ||
"e2e_latency_ms_p90": PlotConfig(title="End to End Latency P90 (lower is better)", x_title="QPS", | ||
y_title="Time (ms)"), | ||
"token_throughput_secs": PlotConfig(title="Request Output Throughput P90 (higher is better)", x_title="QPS", | ||
y_title="Tokens/s"), | ||
"successful_requests": PlotConfig(title="Successful requests (higher is better)", x_title="QPS", | ||
y_title="Count"), | ||
"error_rate": PlotConfig(title="Error rate (lower is better)", x_title="QPS", y_title="%") | ||
} | ||
default_df = pd.DataFrame.from_dict( | ||
{"rate": [1, 2], "inter_token_latency_ms_p90": [10, 20], "engine": ["tgi", "vllm"]}) | ||
df = load_data() | ||
models = df["model"].unique() | ||
devices= df["device"].unique() | ||
with gr.Blocks(css=css) as demo: | ||
with gr.Row(): | ||
header = gr.Markdown("# TGI benchmarks\nBenchmark results for Hugging Face TGI 🤗") | ||
with gr.Row(): | ||
device = gr.Radio(devices, label="Select device", value="H100") | ||
with gr.Row(): | ||
summary_desc = gr.Markdown(summary_desc) | ||
with gr.Row(): | ||
table = gr.DataFrame( | ||
pd.DataFrame(), | ||
elem_classes=["summary"], | ||
) | ||
with gr.Row(): | ||
details_desc = gr.Markdown("## Details") | ||
with gr.Row(): | ||
model = gr.Dropdown(list(models), label="Select model", value=models[0]) | ||
i = 0 | ||
with ExitStack() as stack: | ||
for k, v in metrics.items(): | ||
if i % 2 == 0: | ||
stack.close() | ||
gs = stack.enter_context(gr.Row()) | ||
line_plots.append( | ||
{"component": gr.LinePlot(default_df, label=f'{v.title}', x="rate", y=k, | ||
color="engine", y_title=v.y_title, color_map={'vLLM':'#2F5BA1','TGI':'#FF9D00'}), "model": model.value, | ||
"device": device}) | ||
i += 1 | ||
|
||
device.change(update_app, [device, model], [item["component"] for item in line_plots] + [table]) | ||
model.change(update_app, [device, model], [item["component"] for item in line_plots] + [table]) | ||
demo.load(update_app, [device, model], [item["component"] for item in line_plots] + [table]) | ||
|
||
demo.launch() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import json | ||
import os | ||
from operator import index | ||
|
||
import pandas as pd | ||
from pandas import DataFrame | ||
|
||
|
||
def build_df(model: str, data_files: dict[str, str]) -> pd.DataFrame: | ||
df = pd.DataFrame() | ||
# Load the results | ||
for key, filename in data_files.items(): | ||
with open(filename, 'r') as f: | ||
data = json.load(f) | ||
for result in data['results']: | ||
entry = result | ||
[config] = pd.json_normalize(result['config']).to_dict(orient='records') | ||
entry.update(config) | ||
entry['engine'] = key | ||
entry['model'] = model | ||
del entry['config'] | ||
df = pd.concat([df, pd.DataFrame(entry, index=[0])]) | ||
return df | ||
|
||
|
||
def build_results_df() -> pd.DataFrame: | ||
results_dir = 'results' | ||
df = pd.DataFrame() | ||
# list directories | ||
directories = [f'{results_dir}/{d}' for d in os.listdir(results_dir) if os.path.isdir(f'{results_dir}/{d}')] | ||
for directory in directories: | ||
# list json files in results directory | ||
data_files = {} | ||
for filename in os.listdir(directory): | ||
if filename.endswith('.json'): | ||
data_files[filename.split('.')[-2]] = f'{directory}/{filename}' | ||
df = pd.concat([df, build_df(directory.split('/')[-1], data_files)]) | ||
return df | ||
|
||
|
||
if __name__ == '__main__': | ||
df = build_results_df() | ||
df['device'] = df['model'].apply(lambda x: 'H100') | ||
df['error_rate'] = df['failed_requests'] / (df['failed_requests'] + df['successful_requests']) * 100.0 | ||
df.to_parquet('results.parquet') |