Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
45106e9
Add ovis model support step 1
testdig Jan 12, 2026
f90e1c0
Update the format to pass pre-commit
testdig Jan 21, 2026
415d992
Add copyright header
testdig Jan 29, 2026
def8760
Added Qwen3 Test (#736)
slokesha Jan 21, 2026
dc2de8b
Interleaved sliding window fix (#805)
rsmyrek Jan 21, 2026
b1560d0
[GAUDISW-245665] fix diverge from vllm in multiModalBudget (#837)
linoybu Jan 21, 2026
29c17ff
KV cache sharing for HPU (#834)
jakub-sochacki Jan 21, 2026
13d0aa3
Fix for #32077 (#851)
iboiko-habana Jan 22, 2026
830dc2c
Resolve qwen25 vl accuracy regression (#831)
tvoas Jan 22, 2026
d675017
DP: Fix for torch.compile (#722)
xinyu-intel Jan 22, 2026
95b0536
Remove unused test utils (#864)
microslaw Jan 23, 2026
2314f85
Add support for chunked attention (#821)
kfojcik-intel Jan 23, 2026
66a19e8
Fix Llama4 shape mismatch for 32k+ context window (#842) (#855)
afierka-intel Jan 26, 2026
e29844b
Fix HPU model runner profile_run to work with dynamic kv-cache scales…
dudilester Jan 26, 2026
756eaf6
Revert "skip HPU graphs for long prefills" (#850)
adobrzyn Jan 26, 2026
2e43dcb
Implement bucket corrector for Mamba chunk size (#886)
jbyczkow Jan 28, 2026
0ca1436
Draft: Add FlashAttention online merge in Unified Attention (#785)
kzawora-intel Jan 28, 2026
406c869
Enable support for prefill side kv_layout and block_size update (#867)
yeonsily Jan 28, 2026
eb02dd2
Merge branch 'main' into ovis
testdig Jan 29, 2026
f1e4bd3
Merge branch 'main' into ovis
testdig Jan 29, 2026
691e11e
Fix an incorrect prefix
testdig Feb 2, 2026
9ad424c
Merge branch 'main' into ovis
testdig Feb 2, 2026
37ec32f
Merge branch 'main' into ovis
michalkuligowski Feb 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vllm_gaudi/extension/bucketing/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
},

# Pixel-based models
'ovis': {
'is_batch_based': False,
'buckets': [1600, 3136, 4096, 6400]
},
'ovis2.5': {
'is_batch_based': False,
'buckets': [784, 1600, 3136, 4096, 6400, 7744, 9216, 12544]
Expand Down
3 changes: 3 additions & 0 deletions vllm_gaudi/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ def register_model():
from vllm_gaudi.models.qwen3_vl import HpuQwen3_VLForConditionalGeneration # noqa: F401
ModelRegistry.register_model("Qwen3VLForConditionalGeneration",
"vllm_gaudi.models.qwen3_vl:HpuQwen3_VLForConditionalGeneration")

from vllm_gaudi.models.ovis import HpuOvis # noqa: F401
ModelRegistry.register_model("Ovis", "vllm_gaudi.models.ovis:HpuOvis")
14 changes: 14 additions & 0 deletions vllm_gaudi/models/ovis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# SPDX-License-Identifier: Apache-2.0

from vllm.config import VllmConfig
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copyright header missing

Copy link
Author

@testdig testdig Jan 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated, thanks for the review

from vllm.model_executor.models.ovis import (Ovis, OvisMultiModalProcessor, OvisProcessingInfo, OvisDummyInputsBuilder)
from vllm.multimodal import MULTIMODAL_REGISTRY


@MULTIMODAL_REGISTRY.register_processor(OvisMultiModalProcessor,
info=OvisProcessingInfo,
dummy_inputs=OvisDummyInputsBuilder)
class HpuOvis(Ovis):

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__(vllm_config=vllm_config, prefix=prefix)