Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
7acf531
上传文档
Dec 11, 2025
4730e8c
上传文档
Dec 11, 2025
19d5728
Add dir structure & coding style for tests.
congw729 Dec 11, 2025
8343e8f
Merge pull request #1 from congw729/doc/test_plan
yenuo26 Dec 11, 2025
b162a3a
修改文档
Dec 11, 2025
385d91f
Merge branch 'main' of https://github.com/yenuo26/vllm-omni into main
Dec 15, 2025
35b7676
Merge branch 'main' of https://github.com/yenuo26/vllm-omni into main
Dec 15, 2025
37eddcb
Merge branch 'main' of https://github.com/yenuo26/vllm-omni into main
Dec 15, 2025
155884c
Merge branch 'main' of https://github.com/yenuo26/vllm-omni into main
Dec 15, 2025
61442b6
Merge branch 'main' of https://github.com/yenuo26/vllm-omni into main
Dec 15, 2025
f2c67c9
Merge branch 'main' of https://github.com/yenuo26/vllm-omni into main
Dec 15, 2025
94875c1
提交benchmark
Dec 15, 2025
da30505
提交benchmark
Dec 16, 2025
1e5842e
提交benchmark
Dec 16, 2025
d80d709
提交benchmark
Dec 17, 2025
8d57415
新增benchmark
Dec 17, 2025
4ac263a
新增benchmark
Dec 17, 2025
1bb5407
新增benchmark
Dec 18, 2025
e8d26fc
新增benchmark
Dec 18, 2025
9cb9abc
新增benchmark
Dec 18, 2025
555d8ae
新增benchmark
Dec 18, 2025
0d94a5a
新增benchmark
Dec 18, 2025
e07c41e
新增benchmark
Dec 18, 2025
556436f
新增benchmark
Dec 18, 2025
f5c4591
新增benchmark
Dec 18, 2025
bcd0dd6
新增benchmark
Dec 18, 2025
e49d385
新增benchmark
Dec 18, 2025
76db196
新增benchmark
Dec 18, 2025
4e58d43
Merge branch 'main' of https://github.com/yenuo26/vllm-omni into main
Dec 18, 2025
b673b56
modify blank lines and chinese comments
Dec 18, 2025
5deba40
Merge branch 'vllm-project:main' into main
yenuo26 Dec 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
1,094 changes: 1,094 additions & 0 deletions vllm_omni/benchmarks/datasets.py

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions vllm_omni/benchmarks/lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Benchmark library utilities."""
127 changes: 127 additions & 0 deletions vllm_omni/benchmarks/lib/endpoint_request_func.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""The request function for API endpoints."""

import io
import json
import os
import sys
import time
import traceback
from collections.abc import Awaitable
from dataclasses import dataclass, field
from typing import Optional, Protocol, Union
import aiohttp
from tqdm.asyncio import tqdm
from vllm.benchmarks.lib.endpoint_request_func import (async_request_openai_completions,async_request_openai_audio,
async_request_openai_embeddings, RequestFunc,
RequestFuncInput,
RequestFuncOutput,StreamedResponseHandler)

AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)

@dataclass
class MixRequestFuncOutput(RequestFuncOutput):
output_audio_num: int = None
prompt_tokens: int = None

async def async_request_openai_chat_completions(
request_func_input: RequestFuncInput,
session: aiohttp.ClientSession,
pbar: Optional[tqdm] = None,
) -> RequestFuncOutput:
api_url = request_func_input.api_url
assert api_url.endswith(("chat/completions", "profile")), (
"OpenAI Chat Completions API URL must end with 'chat/completions'.")

content = [{"type": "text", "text": request_func_input.prompt}]
if request_func_input.multi_modal_content:
mm_content = request_func_input.multi_modal_content
if isinstance(mm_content, list):
content.extend(mm_content)
elif isinstance(mm_content, dict):
content.append(mm_content)
else:
raise TypeError(
"multi_modal_content must be a dict or list[dict] "
"for openai-chat"
)
payload = {
"model":
request_func_input.model_name
if request_func_input.model_name else request_func_input.model,
"messages": [
{
"role": "user",
"content": content
},
],
"temperature":
0.0,
"max_completion_tokens":
request_func_input.output_len,
"stream":
False
}
if request_func_input.ignore_eos:
payload["ignore_eos"] = request_func_input.ignore_eos
if request_func_input.extra_body:
payload.update(request_func_input.extra_body)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
}
if request_func_input.extra_headers:
headers |= request_func_input.extra_headers
if request_func_input.request_id:
headers["x-request-id"] = request_func_input.request_id

output = MixRequestFuncOutput()
output.prompt_len = request_func_input.prompt_len
output.ttft = 0.0
st = time.perf_counter()
output.start_time = st
output.output_audio_num = 0
try:
async with session.post(url=api_url, json=payload,
headers=headers) as response:
if response.status == 200:
data = await response.json()
choices = data.get("choices")
for choice in choices:
content = choice["message"].get("content")
output.generated_text += content or ""
if choice["message"].get("audio"):
output.output_audio_num += 1
usage = data.get("usage")
output.output_tokens = usage.get("completion_tokens")
output.prompt_tokens = usage.get("prompt_tokens")
output.success = True
output.latency = time.perf_counter() - st
output.ttft = output.latency
else:
output.error = response.reason or ""
output.success = False
except Exception:
output.success = False
exc_info = sys.exc_info()
output.error = "".join(traceback.format_exception(*exc_info))

if pbar:
pbar.update(1)
return output

# TODO: Add more request functions for different API protocols.
ASYNC_REQUEST_FUNCS: dict[str, RequestFunc] = {
"vllm": async_request_openai_completions,
"openai": async_request_openai_completions,
"openai-chat": async_request_openai_chat_completions,
"openai-audio": async_request_openai_audio,
"openai-embeddings": async_request_openai_embeddings,
}

OPENAI_COMPATIBLE_BACKENDS = [
k for k, v in ASYNC_REQUEST_FUNCS.items()
if v in (async_request_openai_completions,
async_request_openai_chat_completions)
]
Loading
Loading