AICSDEV-216: gaudi oss enablement

vermavis · vermavis · commit acf8c825e1d0 · 2025-09-17T12:45:28.000-07:00
diff --git a/vllm_gaudi/attention/backends/hpu_attn.py b/vllm_gaudi/attention/backends/hpu_attn.py
@@ -356,8 +356,10 @@ def __init__(
         attn_type: str = AttentionType.DECODER,
         kv_sharing_target_layer_name: Optional[str] = None,
         use_irope: bool = False,
+        sinks: Optional[int] = None,
     ) -> None:
         super(AttentionImpl, self).__init__()
+        self._sinks = sinks
         if kv_sharing_target_layer_name is not None:
             raise NotImplementedError("KV sharing is not currently supported on HPU.")
         if use_irope:
@@ -535,7 +537,7 @@ def forward(
                 and attn_metadata.block_list is not None else None
 
             if self.sliding_window \
-               and attn_metadata.window_attn_bias is not None:
+               and getattr(attn_metadata, "window_attn_bias", None) is not None:
                 attn_bias = attn_metadata.window_attn_bias
 
             out = ops.prompt_attention(impl=self.prefill_impl,
@@ -558,10 +560,10 @@ def forward(
                 block_mapping = attn_metadata.block_mapping
                 attn_bias = attn_metadata.attn_bias
             else:
-                block_list = attn_metadata.window_block_list
-                block_groups = attn_metadata.window_block_groups
-                block_mapping = attn_metadata.window_block_mapping
-                attn_bias = attn_metadata.window_attn_bias
+                block_list = attn_metadata.block_list
+                block_groups = attn_metadata.block_groups
+                block_mapping = attn_metadata.block_mapping
+                attn_bias = attn_metadata.attn_bias
 
             self.position_bias = None
             alibi_blocks = getattr(attn_metadata, 'alibi_blocks', None)
diff --git a/vllm_gaudi/extension/ops.py b/vllm_gaudi/extension/ops.py
@@ -474,7 +474,7 @@ def forward(self, hidden_states, expert_routing_table, router_weights, permuted_
                                                     w12=w1_list,
                                                     w3=w2_list,
                                                     permuted_weights=permuted_weights,
-                                                    activation=activation,
+                                                    activation="silu",
                                                     experts_min=self.experts_min,
                                                     experts_max=self.experts_max)
         for i in range(self.moe_n_slice):