Skip to content

Commit acf8c82

Browse files
committed
AICSDEV-216: gaudi oss enablement
1 parent 322bb1c commit acf8c82

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

vllm_gaudi/attention/backends/hpu_attn.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -356,8 +356,10 @@ def __init__(
356356
attn_type: str = AttentionType.DECODER,
357357
kv_sharing_target_layer_name: Optional[str] = None,
358358
use_irope: bool = False,
359+
sinks: Optional[int] = None,
359360
) -> None:
360361
super(AttentionImpl, self).__init__()
362+
self._sinks = sinks
361363
if kv_sharing_target_layer_name is not None:
362364
raise NotImplementedError("KV sharing is not currently supported on HPU.")
363365
if use_irope:
@@ -535,7 +537,7 @@ def forward(
535537
and attn_metadata.block_list is not None else None
536538

537539
if self.sliding_window \
538-
and attn_metadata.window_attn_bias is not None:
540+
and getattr(attn_metadata, "window_attn_bias", None) is not None:
539541
attn_bias = attn_metadata.window_attn_bias
540542

541543
out = ops.prompt_attention(impl=self.prefill_impl,
@@ -558,10 +560,10 @@ def forward(
558560
block_mapping = attn_metadata.block_mapping
559561
attn_bias = attn_metadata.attn_bias
560562
else:
561-
block_list = attn_metadata.window_block_list
562-
block_groups = attn_metadata.window_block_groups
563-
block_mapping = attn_metadata.window_block_mapping
564-
attn_bias = attn_metadata.window_attn_bias
563+
block_list = attn_metadata.block_list
564+
block_groups = attn_metadata.block_groups
565+
block_mapping = attn_metadata.block_mapping
566+
attn_bias = attn_metadata.attn_bias
565567

566568
self.position_bias = None
567569
alibi_blocks = getattr(attn_metadata, 'alibi_blocks', None)

vllm_gaudi/extension/ops.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,7 @@ def forward(self, hidden_states, expert_routing_table, router_weights, permuted_
474474
w12=w1_list,
475475
w3=w2_list,
476476
permuted_weights=permuted_weights,
477-
activation=activation,
477+
activation="silu",
478478
experts_min=self.experts_min,
479479
experts_max=self.experts_max)
480480
for i in range(self.moe_n_slice):

0 commit comments

Comments
 (0)