Skip to content

Commit

Permalink
feat: add initial qwen2.5-vl model and test
Browse files Browse the repository at this point in the history
  • Loading branch information
drbh committed Jan 30, 2025
1 parent 065aabb commit 1adfee4
Show file tree
Hide file tree
Showing 8 changed files with 857 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "The image depicts a person in a space-themed environment, possibly on another planet given the red sand and harsh landscape in the background. The individual is wearing a detailed, high-tech spacesuit with various gadgets and gadgets. The space suit features a large red button in the center of the chest, and the individual is in a crouched, ready stance, as if in a dramatic or adventurous pose. The background showcases an expansive high gorge or canyon, with walls of red and orange hues, with potential light sources in the distance that create a dramatic and intense atmosphere.",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1738257213,
"id": "",
"model": "Qwen/Qwen2.5-VL-3B-Instruct",
"object": "chat.completion",
"system_fingerprint": "3.0.2-dev0-native",
"usage": {
"completion_tokens": 114,
"prompt_tokens": 1363,
"total_tokens": 1477
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"choices": [
{
"delta": {
"content": "",
"role": "assistant",
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1738257318,
"id": "",
"model": "Qwen/Qwen2.5-VL-3B-Instruct",
"object": "chat.completion.chunk",
"system_fingerprint": "3.0.2-dev0-native",
"usage": null
}
78 changes: 78 additions & 0 deletions integration-tests/models/test_flash_qwen2_5_vl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import pytest


@pytest.fixture(scope="module")
def flash_qwen2_5_vl_handle(launcher):
with launcher("Qwen/Qwen2.5-VL-3B-Instruct") as handle:
yield handle


@pytest.fixture(scope="module")
async def flash_qwen2_5(flash_qwen2_5_vl_handle):
await flash_qwen2_5_vl_handle.health(300)
return flash_qwen2_5_vl_handle.client


@pytest.mark.private
async def test_flash_qwen2_5_vl_simple(flash_qwen2_5, response_snapshot):
response = await flash_qwen2_5.chat(
seed=1337,
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png"
},
},
{"type": "text", "text": "Describe the image"},
],
},
],
)

assert (
response.choices[0].message.content
== "The image depicts a person in a space-themed environment, possibly on another planet given the red sand and harsh landscape in the background. The individual is wearing a detailed, high-tech spacesuit with various gadgets and gadgets. The space suit features a large red button in the center of the chest, and the individual is in a crouched, ready stance, as if in a dramatic or adventurous pose. The background showcases an expansive high gorge or canyon, with walls of red and orange hues, with potential light sources in the distance that create a dramatic and intense atmosphere."
)

assert response == response_snapshot


@pytest.mark.private
async def test_flash_qwen2_5_vl_simple_streaming(flash_qwen2_5, response_snapshot):
responses = await flash_qwen2_5.chat(
seed=1337,
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png"
},
},
{"type": "text", "text": "Describe the image"},
],
},
],
stream=True,
)

count = 0
generated = ""
last_response = None
async for response in responses:
count += 1
generated += response.choices[0].delta.content
last_response = response

assert (
generated
== "The image depicts a person in a space-themed environment, possibly on another planet given the red sand and harsh landscape in the background. The individual is wearing a detailed, high-tech spacesuit with various gadgets and gadgets. The space suit features a large red button in the center of the chest, and the individual is in a crouched, ready stance, as if in a dramatic or adventurous pose. The background showcases an expansive high gorge or canyon, with walls of red and orange hues, with potential light sources in the distance that create a dramatic and intense atmosphere."
)
assert count == 114
assert last_response == response_snapshot
33 changes: 33 additions & 0 deletions router/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,43 @@ impl Qwen2Vl {
}
}

#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub struct Qwen2_5VlVisionConfig {
pub(crate) depth: usize,
pub(crate) hidden_act: String,
pub(crate) hidden_size: usize,
pub(crate) intermediate_size: usize,
pub(crate) num_heads: usize,
pub(crate) in_chans: usize,
pub(crate) out_hidden_size: usize,
pub(crate) patch_size: usize,
pub(crate) spatial_merge_size: usize,
pub(crate) spatial_patch_size: usize,
pub(crate) window_size: usize,
pub(crate) fullatt_block_indexes: Vec<usize>,
pub(crate) tokens_per_second: usize,
pub(crate) temporal_patch_size: usize,
}

#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub struct Qwen2_5Vl {
pub(crate) vision_config: Qwen2_5VlVisionConfig,
}

impl Qwen2_5Vl {
pub fn get_number_of_features(&self, height: usize, width: usize) -> usize {
let num_pixels = height * width;
num_pixels / self.vision_config.patch_size.pow(2)
}
}

#[derive(Clone, Debug, Serialize, Deserialize)]
#[serde(tag = "model_type")]
#[serde(rename_all = "snake_case")]
pub enum Config {
Qwen2_5Vl(Qwen2_5Vl),
Qwen2Vl(Qwen2Vl),
LlavaNext(LlavaNext),
ClipVisionModel(ClipVisionModel),
Expand Down
6 changes: 5 additions & 1 deletion router/src/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,10 @@ fn image_tokens(
"<|vision_start|>{:?}<|vision_end|>",
"<|image_pad|>".repeat(config.get_number_of_features(height, width))
),
Qwen2_5Vl(config) => format!(
"<|vision_start|>{:?}<|vision_end|>",
"<|image_pad|>".repeat(config.get_number_of_features(height, width))
),
_ => unimplemented!("Images tokens are not supported for this model configuration"),
}
}
Expand Down Expand Up @@ -712,7 +716,7 @@ fn prepare_input<T: TokenizerTrait>(
let (tokenizer_query, input_chunks) = match config {
Some(
config @ (Idefics | Mllama | Idefics2(_) | Idefics3(_) | Paligemma(_) | LlavaNext(_)
| Qwen2Vl(_)),
| Qwen2Vl(_) | Qwen2_5Vl(_)),
) => {
let mut input_chunks = Vec::new();
let mut tokenizer_query = String::with_capacity(inputs.len());
Expand Down
21 changes: 21 additions & 0 deletions server/text_generation_server/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@
from text_generation_server.models.custom_modeling.qwen2_vl import (
Qwen2VLForConditionalGeneration,
)
from text_generation_server.models.custom_modeling.qwen2_5_vl import (
Qwen2_5VLForConditionalGeneration,
)
from text_generation_server.layers.attention import SUPPORTS_WINDOWING
except ImportError as e:
log_master(logger.warning, f"Could not import Flash Attention enabled models: {e}")
Expand Down Expand Up @@ -317,6 +320,11 @@ class ModelType(enum.Enum):
"name": "Qwen 2 VL",
"url": "https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d",
}
QWEN2_5_VL = {
"type": "qwen2_5_vl",
"name": "Qwen 2.5 VL",
"url": "https://huggingface.co/collections/Qwen/qwen25-66e81a666513e518adb90d9e",
}
OPT = {
"type": "opt",
"name": "Opt",
Expand Down Expand Up @@ -1367,6 +1375,19 @@ def get_model(
trust_remote_code=trust_remote_code,
lora_adapter_ids=lora_adapter_ids,
)
if model_type == QWEN2_5_VL:
return VlmCausalLM(
model_id=model_id,
model_class=Qwen2_5VLForConditionalGeneration,
revision=revision,
quantize=quantize,
speculator=speculator,
dtype=dtype,
default_dtype=torch.bfloat16,
kv_cache_dtype=kv_cache_dtype,
trust_remote_code=trust_remote_code,
lora_adapter_ids=lora_adapter_ids,
)
if model_type == MLLAMA:
if FLASH_ATTENTION:
return MllamaCausalLM(
Expand Down
Loading

0 comments on commit 1adfee4

Please sign in to comment.