From ed4354e849e7f71f2f2c077512a6f54ba2b3027a Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal1.verma@intel.com>
Date: Tue, 9 Sep 2025 13:55:20 -0700
Subject: [PATCH] AICSDEV-216: gaudi oss enablement

---
 vllm_gaudi/attention/backends/hpu_attn.py | 2 ++
 vllm_gaudi/extension/ops.py               | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/vllm_gaudi/attention/backends/hpu_attn.py b/vllm_gaudi/attention/backends/hpu_attn.py
index 58b9b08c..1ed1412d 100644
--- a/vllm_gaudi/attention/backends/hpu_attn.py
+++ b/vllm_gaudi/attention/backends/hpu_attn.py
@@ -351,8 +351,10 @@ def __init__(
         attn_type: str = AttentionType.DECODER,
         kv_sharing_target_layer_name: Optional[str] = None,
         use_irope: bool = False,
+        sinks: Optional[int] = None,
     ) -> None:
         super(AttentionImpl, self).__init__()
+        self._sinks = sinks
         if kv_sharing_target_layer_name is not None:
             raise NotImplementedError("KV sharing is not currently supported on HPU.")
         if use_irope:
diff --git a/vllm_gaudi/extension/ops.py b/vllm_gaudi/extension/ops.py
index 42dbd1bf..a60597cd 100644
--- a/vllm_gaudi/extension/ops.py
+++ b/vllm_gaudi/extension/ops.py
@@ -473,7 +473,7 @@ def forward(self, hidden_states, expert_routing_table, router_weights, permuted_
                                                     w12=w1_list,
                                                     w3=w2_list,
                                                     permuted_weights=permuted_weights,
-                                                    activation=activation,
+                                                    activation="silu",
                                                     experts_min=self.experts_min,
                                                     experts_max=self.experts_max)
         for i in range(self.moe_n_slice):