From ed4354e849e7f71f2f2c077512a6f54ba2b3027a Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Tue, 9 Sep 2025 13:55:20 -0700 Subject: [PATCH] AICSDEV-216: gaudi oss enablement --- vllm_gaudi/attention/backends/hpu_attn.py | 2 ++ vllm_gaudi/extension/ops.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm_gaudi/attention/backends/hpu_attn.py b/vllm_gaudi/attention/backends/hpu_attn.py index 58b9b08c..1ed1412d 100644 --- a/vllm_gaudi/attention/backends/hpu_attn.py +++ b/vllm_gaudi/attention/backends/hpu_attn.py @@ -351,8 +351,10 @@ def __init__( attn_type: str = AttentionType.DECODER, kv_sharing_target_layer_name: Optional[str] = None, use_irope: bool = False, + sinks: Optional[int] = None, ) -> None: super(AttentionImpl, self).__init__() + self._sinks = sinks if kv_sharing_target_layer_name is not None: raise NotImplementedError("KV sharing is not currently supported on HPU.") if use_irope: diff --git a/vllm_gaudi/extension/ops.py b/vllm_gaudi/extension/ops.py index 42dbd1bf..a60597cd 100644 --- a/vllm_gaudi/extension/ops.py +++ b/vllm_gaudi/extension/ops.py @@ -473,7 +473,7 @@ def forward(self, hidden_states, expert_routing_table, router_weights, permuted_ w12=w1_list, w3=w2_list, permuted_weights=permuted_weights, - activation=activation, + activation="silu", experts_min=self.experts_min, experts_max=self.experts_max) for i in range(self.moe_n_slice):