Skip to content

Commit a3dce5c

Browse files
authored
CI fix (#186)
vllm-project/vllm#24795 and vllm-project/vllm#24615 and vllm-project/vllm#24078 --------- Signed-off-by: Agata Dobrzyniewicz <[email protected]>
1 parent e677dd1 commit a3dce5c

File tree

3 files changed

+7
-6
lines changed

3 files changed

+7
-6
lines changed

tests/unit_tests/worker/test_hpu_input_batch.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from vllm.v1.pool.metadata import PoolingMetadata
1616
from vllm.v1.sample.logits_processor import LogitsProcessors
1717
from vllm.v1.sample.metadata import SamplingMetadata
18+
from vllm.v1.utils import CpuGpuBuffer
1819
from vllm.v1.worker.block_table import BlockTable, MultiGroupBlockTable
1920
from vllm_gaudi.v1.worker.hpu_input_batch import InputBatch, CachedRequestState
2021

@@ -37,7 +38,7 @@ def _compare_objs(obj1, obj2, skip: Sequence = ("logitsprocs", "batch_update_bui
3738

3839
is_same = False
3940
if isinstance(a, torch.Tensor):
40-
if (a.numel() == 0 or b.numel() == 0):
41+
if a.numel() == 0 or b.numel() == 0:
4142
is_same = (a.numel() == 0 and b.numel() == 0)
4243
elif torch.allclose(a, b):
4344
is_same = True
@@ -53,6 +54,8 @@ def _compare_objs(obj1, obj2, skip: Sequence = ("logitsprocs", "batch_update_bui
5354
is_same = True # if we make it here must be same
5455
elif a == b:
5556
is_same = True
57+
elif isinstance(a, CpuGpuBuffer):
58+
is_same = np.allclose(a.np, b.np) and torch.allclose(a.gpu, b.gpu)
5659
assert is_same, f"Attribute {attr_name} is different"\
5760
f" in {obj1} and {obj2}: {a} != {b}"
5861

tests/unit_tests/worker/test_hpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def _is_req_state_block_table_match(model_runner, req_id: str) -> bool:
157157
if block_table.num_blocks_per_row[req_index] != len(req_state.block_ids[0]):
158158
return False
159159
num_blocks = block_table.num_blocks_per_row[req_index]
160-
return (block_table.block_table_np[req_index, :num_blocks] == req_state.block_ids[0]).all()
160+
return (block_table.block_table.np[req_index, :num_blocks] == req_state.block_ids[0]).all()
161161

162162

163163
def test_update_states_new_request(model_runner, dist_init):

vllm_gaudi/v1/worker/hpu_model_runner.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding
4444
from vllm.multimodal.inputs import PlaceholderRange
4545
from vllm.sampling_params import SamplingType
46-
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
46+
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
4747
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, LayerBlockType, cdiv, is_pin_memory_available, LazyLoader)
4848
from vllm_gaudi.utils import (HPUCompileConfig, is_fake_hpu, async_h2d_copy)
4949
from vllm_gaudi.v1.attention.backends.hpu_attn import HPUAttentionMetadataV1
@@ -728,9 +728,7 @@ def __init__(
728728
logger.info("Bucketing is OFF.")
729729
self._PAD_SLOT_ID = -1
730730
self._PAD_BLOCK_ID = -1
731-
self._tokenizer = init_tokenizer_from_configs(model_config=vllm_config.model_config,
732-
scheduler_config=vllm_config.scheduler_config,
733-
lora_config=vllm_config.lora_config).tokenizer
731+
self._tokenizer = init_tokenizer_from_configs(model_config=vllm_config.model_config)
734732

735733
# TODO(madamczyk-intel): add a knob for that
736734
# TODO(madamczyk-intel): debug why increasing it lowers acc

0 commit comments

Comments
 (0)