Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/getting_started/installation/gpu/cuda.inc.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Therefore, it is recommended to install vLLM and vLLM-Omni with a **fresh new**

vLLM-Omni is built based on vLLM. Please install it with command below.
```bash
uv pip install vllm==0.11.0 --torch-backend=auto
uv pip install vllm==0.12.0 --torch-backend=auto
```

#### Installation of vLLM-Omni
Expand Down Expand Up @@ -89,7 +89,7 @@ docker run --runtime nvidia --gpus 2 \
--env "HF_TOKEN=$HF_TOKEN" \
-p 8091:8091 \
--ipc=host \
vllm/vllm-omni:v0.11.0rc1 \
vllm/vllm-omni:v0.12.0rc1 \
--model Qwen/Qwen3-Omni-30B-A3B-Instruct --port 8091
```

Expand Down
29 changes: 28 additions & 1 deletion examples/offline_inference/qwen3_omni/end2end.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,39 @@ def get_audio_query(question: str = None, audio_path: str | None = None, samplin
limit_mm_per_prompt={"audio": 1},
)

def get_mixed_modalities_query() -> QueryResult:
question = (
"What is recited in the audio? "
"What is the content of this image? Why is this video funny?"
)
prompt = (
f"<|im_start|>system\n{default_system}<|im_end|>\n"
"<|im_start|>user\n<|audio_start|><|audio_pad|><|audio_end|>"
"<|vision_start|><|image_pad|><|vision_end|>"
"<|vision_start|><|video_pad|><|vision_end|>"
f"{question}<|im_end|>\n"
f"<|im_start|>assistant\n"
)
return QueryResult(
inputs={
"prompt": prompt,
"multi_modal_data": {
"audio": AudioAsset("mary_had_lamb").audio_and_sample_rate,
"image": convert_image_mode(
ImageAsset("cherry_blossom").pil_image, "RGB"
),
"video": VideoAsset(name="baby_reading", num_frames=16).np_ndarrays,
},
},
limit_mm_per_prompt={"audio": 1, "image": 1, "video": 1},
)

query_map = {
"text": get_text_query,
"use_audio": get_audio_query,
"use_image": get_image_query,
"use_video": get_video_query,
"use_mixed_modalities": get_mixed_modalities_query,
}


Expand Down Expand Up @@ -281,7 +308,7 @@ def parse_args():
"--query-type",
"-q",
type=str,
default="use_video",
default="use_mixed_modalities",
choices=query_map.keys(),
help="Query type.",
)
Expand Down
Loading