@@ -356,8 +356,10 @@ def __init__(
356356 attn_type : str = AttentionType .DECODER ,
357357 kv_sharing_target_layer_name : Optional [str ] = None ,
358358 use_irope : bool = False ,
359+ sinks : Optional [int ] = None ,
359360 ) -> None :
360361 super (AttentionImpl , self ).__init__ ()
362+ self ._sinks = sinks
361363 if kv_sharing_target_layer_name is not None :
362364 raise NotImplementedError ("KV sharing is not currently supported on HPU." )
363365 if use_irope :
@@ -535,7 +537,7 @@ def forward(
535537 and attn_metadata .block_list is not None else None
536538
537539 if self .sliding_window \
538- and attn_metadata . window_attn_bias is not None :
540+ and getattr ( attn_metadata , " window_attn_bias" , None ) is not None :
539541 attn_bias = attn_metadata .window_attn_bias
540542
541543 out = ops .prompt_attention (impl = self .prefill_impl ,
@@ -558,10 +560,10 @@ def forward(
558560 block_mapping = attn_metadata .block_mapping
559561 attn_bias = attn_metadata .attn_bias
560562 else :
561- block_list = attn_metadata .window_block_list
562- block_groups = attn_metadata .window_block_groups
563- block_mapping = attn_metadata .window_block_mapping
564- attn_bias = attn_metadata .window_attn_bias
563+ block_list = attn_metadata .block_list
564+ block_groups = attn_metadata .block_groups
565+ block_mapping = attn_metadata .block_mapping
566+ attn_bias = attn_metadata .attn_bias
565567
566568 self .position_bias = None
567569 alibi_blocks = getattr (attn_metadata , 'alibi_blocks' , None )
0 commit comments