File tree Expand file tree Collapse file tree 3 files changed +1
-9
lines changed
Expand file tree Collapse file tree 3 files changed +1
-9
lines changed Original file line number Diff line number Diff line change @@ -1059,7 +1059,7 @@ def propose_draft_token_ids(
10591059 self .tokenID_processor .prev_token_ids = self ._latest_tokens_gpu
10601060 next_token_ids = self ._latest_tokens_gpu
10611061 elif self ._latest_tokens_gpu .ndim == 2 : # spec
1062- bs = len (batch .seqs )
1062+ bs = len (batch .req_ids )
10631063 col_indices = self ._lasted_num_accept - 1
10641064 next_token_ids = self ._latest_tokens_gpu [torch .arange (bs , device = self ._latest_tokens_gpu .device ), col_indices ]
10651065 self .tokenID_processor .prev_token_ids = next_token_ids
Original file line number Diff line number Diff line change @@ -201,19 +201,15 @@ def postprocess(
201201 token_ids = prev_token_ids [seq .id ]
202202 new_tokens = []
203203 if is_deferred_out :
204- print (f"{ token_ids = } " )
205204 idx = seq .token_ids .index (self .eos_token_id )
206205 seq .token_ids [idx :] = token_ids
207- print (f"{ seq .token_ids = } " )
208206
209207 if seq .output_tokens :
210208 idx = seq .output_tokens .index (self .eos_token_id )
211209 seq .output_tokens [idx :] = token_ids
212- print (f"{ seq .output_tokens = } " )
213210
214211 else :
215212 seq .output_tokens .extend (token_ids )
216- print (f"{ seq .output_tokens = } " )
217213
218214 new_tokens = token_ids
219215 else :
Original file line number Diff line number Diff line change @@ -147,10 +147,6 @@ def propose(
147147 inputs_embeds = None
148148 input_ids = self .input_ids [:num_input_tokens ]
149149
150- # forwad
151- if str (input_ids .device ) == "cuda:0" :
152- print (f"draft model forward { input_ids = } " )
153-
154150 ret_hidden_states = self .model (
155151 input_ids = input_ids ,
156152 positions = self .positions [:num_input_tokens ],
You can’t perform that action at this time.
0 commit comments