Skip to content

Commit 2a3bbdf

Browse files
committed
fix: use cpu apply kernel for npu and others
1 parent fc0fbce commit 2a3bbdf

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

lmdeploy/pytorch/engine/guided_process.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,15 @@ def process(self, scores: torch.Tensor) -> torch.Tensor:
2222
"""Apply grammar constraints to logits before sampling the next
2323
token."""
2424
self.matcher.fill_next_token_bitmask(self.token_bitmask)
25-
xgr.apply_token_bitmask_inplace(scores, self.token_bitmask.to(scores.device))
25+
device = scores.device
26+
if device in {'cpu', 'cuda'}:
27+
xgr.apply_token_bitmask_inplace(scores, self.token_bitmask.to(device))
28+
else:
29+
cpu_scores = scores.cpu()
30+
cpu_mask = self.token_bitmask.cpu()
31+
xgr.apply_token_bitmask_inplace(cpu_scores, cpu_mask)
32+
scores.copy_(cpu_scores.to(device))
33+
2634
return scores
2735

2836
def accept(self, token_id: int) -> bool:

0 commit comments

Comments
 (0)