vllm-project · david6666666 · Dec 23, 2025
@@ -0,0 +1,145 @@
+# Text-To-Video
+
+This example demonstrates how to deploy Wan2.2 text-to-video model for online generation using vLLM-Omni.
+
+## Start Server
+
+### Basic Start
+
+```bash
+vllm serve Wan-AI/Wan2.2-T2V-A14B-Diffusers --omni --port 8093 --boundary-ratio 0.875 --flow-shift 5.0
+```
+
+Notes:
+- `flow-shift`: 5.0 for 720p, 12.0 for 480p (Wan2.2 recommendation).
+- `boundary-ratio`: 0.875 for Wan2.2 low/high DiT split.
+
+### Start with Parameters
+
+Or use the startup script:
+
+```bash
+bash run_server.sh
+```
+
+## API Calls
+
+### Method 1: Using curl
+
+```bash
+# Basic text-to-video generation
+bash run_curl_text_to_video.sh
+
+# Or execute directly
+curl -s http://localhost:8093/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "messages": [
+      {"role": "user", "content": "A cinematic shot of a flying kite over the ocean."}
+    ],
+    "extra_body": {
+      "height": 720,
+      "width": 1280,
+      "num_frames": 81,
+      "fps": 24,
+      "num_inference_steps": 40,
+      "guidance_scale": 4.0,
+      "guidance_scale_2": 4.0,
+      "seed": 42
+    }
+  }' | jq -r '.choices[0].message.content[0].video_url.url' | cut -d',' -f2 | base64 -d > output.mp4
+```
+
+### Method 2: Using Python Client
+
+```bash
+python openai_chat_client.py --prompt "A cinematic shot of a flying kite over the ocean." --output output.mp4
+```
+
+## Request Format
+
+### Simple Text Generation
+
+```json
+{
+  "messages": [
+    {"role": "user", "content": "A cinematic shot of a flying kite over the ocean."}
+  ]
+}
+```
+
+### Generation with Parameters
+
+Use `extra_body` to pass generation parameters:
+
+```json
+{
+  "messages": [
+    {"role": "user", "content": "A cinematic shot of a flying kite over the ocean."}
+  ],
+  "extra_body": {
+    "height": 720,
+    "width": 1280,
+    "num_frames": 81,
+    "fps": 24,
+    "num_inference_steps": 40,
+    "guidance_scale": 4.0,
+    "guidance_scale_2": 4.0,
+    "seed": 42,
+    "negative_prompt": ""
+  }
+}
+```
+
+## Generation Parameters (extra_body)
+
+| Parameter                | Type  | Default | Description                           |
+| ------------------------ | ----- | ------- | ------------------------------------- |
+| `height`                 | int   | None    | Video height in pixels                |
+| `width`                  | int   | None    | Video width in pixels                 |
+| `num_frames`             | int   | None    | Number of frames                      |
+| `fps`                    | int   | 24      | Frames per second for exported MP4    |
+| `num_inference_steps`    | int   | 40      | Number of denoising steps             |
+| `guidance_scale`         | float | 4.0     | CFG guidance scale (low noise)        |
+| `guidance_scale_2`        | float | 4.0     | CFG guidance scale (high noise)       |
+| `seed`                   | int   | None    | Random seed (reproducible)            |
+| `negative_prompt`        | str   | None    | Negative prompt                       |
+| `num_outputs_per_prompt` | int   | 1       | Number of videos to generate          |
+
+## Response Format
+
+```json
+{
+  "id": "chatcmpl-xxx",
+  "created": 1234567890,
+  "model": "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
+  "choices": [{
+    "index": 0,
+    "message": {
+      "role": "assistant",
+      "content": [{
+        "type": "video_url",
+        "video_url": {
+          "url": "data:video/mp4;base64,..."
+        }
+      }]
+    },
+    "finish_reason": "stop"
+  }],
+  "usage": {...}
+}
+```
+
+## Extract Video
+
+```bash
+cat response.json | jq -r '.choices[0].message.content[0].video_url.url' | cut -d',' -f2 | base64 -d > output.mp4
+```
+
+## File Description
+
+| File                        | Description            |
+| --------------------------- | ---------------------- |
+| `run_server.sh`             | Server startup script  |
+| `run_curl_text_to_video.sh` | curl example           |
+| `openai_chat_client.py`     | Python client          |
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+"""
+Wan2.2 OpenAI-compatible chat client for text-to-video generation.
+
+Usage:
+    python openai_chat_client.py --prompt "A cinematic shot..." --output output.mp4
+    python openai_chat_client.py --num-frames 81 --fps 24 --height 720 --width 1280
+"""
+
+import argparse
+import base64
+from pathlib import Path
+
+import requests
+
+
+def generate_video(
+    prompt: str,
+    server_url: str = "http://localhost:8093",
+    height: int | None = None,
+    width: int | None = None,
+    num_frames: int | None = None,
+    fps: int | None = None,
+    steps: int | None = None,
+    guidance_scale: float | None = None,
+    guidance_scale_2: float | None = None,
+    seed: int | None = None,
+    negative_prompt: str | None = None,
+) -> bytes | None:
+    """Generate a video using the chat completions API."""
+    messages = [{"role": "user", "content": prompt}]
+
+    extra_body = {}
+    if height is not None:
+        extra_body["height"] = height
+    if width is not None:
+        extra_body["width"] = width
+    if num_frames is not None:
+        extra_body["num_frames"] = num_frames
+    if fps is not None:
+        extra_body["fps"] = fps
+    if steps is not None:
+        extra_body["num_inference_steps"] = steps
+    if guidance_scale is not None:
+        extra_body["guidance_scale"] = guidance_scale
+    if guidance_scale_2 is not None:
+        extra_body["guidance_scale_2"] = guidance_scale_2
+    if seed is not None:
+        extra_body["seed"] = seed
+    if negative_prompt:
+        extra_body["negative_prompt"] = negative_prompt
+
+    payload = {"messages": messages}
+    if extra_body:
+        payload["extra_body"] = extra_body
+
+    try:
+        response = requests.post(
+            f"{server_url}/v1/chat/completions",
+            headers={"Content-Type": "application/json"},
+            json=payload,
+            timeout=600,
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        content = data["choices"][0]["message"]["content"]
+        if isinstance(content, list):
+            for item in content:
+                video_url = item.get("video_url", {}).get("url", "")
+                if video_url.startswith("data:video"):
+                    _, b64_data = video_url.split(",", 1)
+                    return base64.b64decode(b64_data)
+
+        print(f"Unexpected response format: {content}")
+        return None
+
+    except Exception as e:
+        print(f"Error: {e}")
+        return None
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Wan2.2 chat client")
+    parser.add_argument("--prompt", "-p", default="A cinematic shot of a flying kite over the ocean.")
+    parser.add_argument("--output", "-o", default="wan22_output.mp4", help="Output file")
+    parser.add_argument("--server", "-s", default="http://localhost:8093", help="Server URL")
+    parser.add_argument("--height", type=int, default=720, help="Video height")
+    parser.add_argument("--width", type=int, default=1280, help="Video width")
+    parser.add_argument("--num-frames", type=int, default=81, help="Number of frames")
+    parser.add_argument("--fps", type=int, default=24, help="Frames per second")
+    parser.add_argument("--steps", type=int, default=40, help="Inference steps")
+    parser.add_argument("--cfg-scale", type=float, default=4.0, help="CFG scale (low noise)")
+    parser.add_argument("--cfg-scale-high", type=float, default=None, help="CFG scale (high noise)")
+    parser.add_argument("--seed", type=int, default=42, help="Random seed")
+    parser.add_argument("--negative", default="", help="Negative prompt")
+
+    args = parser.parse_args()
+
+    print(f"Generating video for: {args.prompt}")
+
+    video_bytes = generate_video(
+        prompt=args.prompt,
+        server_url=args.server,
+        height=args.height,
+        width=args.width,
+        num_frames=args.num_frames,
+        fps=args.fps,
+        steps=args.steps,
+        guidance_scale=args.cfg_scale,
+        guidance_scale_2=args.cfg_scale_high,
+        seed=args.seed,
+        negative_prompt=args.negative,
+    )
+
+    if video_bytes:
+        output_path = Path(args.output)
+        output_path.write_bytes(video_bytes)
+        print(f"Video saved to: {output_path}")
+        print(f"Size: {len(video_bytes) / 1024 / 1024:.2f} MB")
+    else:
+        print("Failed to generate video")
+        exit(1)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,46 @@
+#!/bin/bash
+# Wan2.2 text-to-video curl example
+
+SERVER="${SERVER:-http://localhost:8093}"
+PROMPT="${PROMPT:-A cinematic shot of a flying kite over the ocean.}"
+OUTPUT="${OUTPUT:-wan22_output.mp4}"
+
+HEIGHT="${HEIGHT:-720}"
+WIDTH="${WIDTH:-1280}"
+NUM_FRAMES="${NUM_FRAMES:-81}"
+FPS="${FPS:-24}"
+STEPS="${STEPS:-40}"
+GUIDANCE_SCALE="${GUIDANCE_SCALE:-4.0}"
+GUIDANCE_SCALE_2="${GUIDANCE_SCALE_2:-4.0}"
+SEED="${SEED:-42}"
+
+echo "Generating video..."
+echo "Prompt: $PROMPT"
+echo "Output: $OUTPUT"
+
+curl -s "$SERVER/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -d "{
+    \"messages\": [
+      {\"role\": \"user\", \"content\": \"$PROMPT\"}
+    ],
+    \"extra_body\": {
+      \"height\": $HEIGHT,
+      \"width\": $WIDTH,
+      \"num_frames\": $NUM_FRAMES,
+      \"fps\": $FPS,
+      \"num_inference_steps\": $STEPS,
+      \"guidance_scale\": $GUIDANCE_SCALE,
+      \"guidance_scale_2\": $GUIDANCE_SCALE_2,
+      \"seed\": $SEED,
+      \"num_outputs_per_prompt\": 1
+    }
+  }" | jq -r '.choices[0].message.content[0].video_url.url' | cut -d',' -f2 | base64 -d > "$OUTPUT"
+
+if [ -f "$OUTPUT" ]; then
+    echo "Video saved to: $OUTPUT"
+    echo "Size: $(du -h "$OUTPUT" | cut -f1)"
+else
+    echo "Failed to generate video"
+    exit 1
+fi
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Wan2.2 text-to-video server
+
+MODEL="${MODEL:-Wan-AI/Wan2.2-T2V-A14B-Diffusers}"
+PORT="${PORT:-8093}"
+BOUNDARY_RATIO="${BOUNDARY_RATIO:-0.875}"
+FLOW_SHIFT="${FLOW_SHIFT:-5.0}"
+
+echo "Starting server for: $MODEL"
+echo "Port: $PORT"
+echo "boundary_ratio: $BOUNDARY_RATIO"
+echo "flow_shift: $FLOW_SHIFT"
+
+vllm serve "$MODEL" --omni --port "$PORT" --boundary-ratio "$BOUNDARY_RATIO" --flow-shift "$FLOW_SHIFT"