33from threading import Thread
44
55import numpy as np
6- import re
76from openrec .postprocess .unirec_postprocess import clean_special_tokens
87from openrec .preprocess import create_operators , transform
98from tools .engine .config import Config
@@ -41,18 +40,6 @@ def set_device(device):
4140transforms , ratio_resize_flag = build_rec_process (cfg )
4241ops = create_operators (transforms , global_config )
4342
44- rules = [
45- (r'-<\|sn\|>' , '' ),
46- (r' <\|sn\|>' , ' ' ),
47- (r'<\|sn\|>' , ' ' ),
48- (r'<\|unk\|>' , '' ),
49- (r'<s>' , '' ),
50- (r'</s>' , '' ),
51- (r'\uffff' , '' ),
52- (r'_{4,}' , '___' ),
53- (r'\.{4,}' , '...' ),
54- ]
55-
5643
5744# --- 2. 定义流式生成函数 ---
5845def stream_chat_with_image (input_image , history ):
@@ -74,17 +61,20 @@ def stream_chat_with_image(input_image, history):
7461 'input_ids' : None ,
7562 'attention_mask' : None
7663 }
77- generation_kwargs = dict (inputs , streamer = streamer , max_new_tokens = 1024 )
64+ generation_kwargs = dict (inputs , streamer = streamer , max_new_tokens = 2048 )
7865 # 后台线程运行生成
7966 thread = Thread (target = model .generate , kwargs = generation_kwargs )
8067 thread .start ()
8168 # 流式输出
82- generated_text = ''
8369 history = history + [('🖼️(图片)' , '' )]
70+ generated_text_ori = ''
8471 for new_text in streamer :
85- generated_text += clean_special_tokens (new_text )
86- for rule in rules :
87- generated_text = re .sub (rule [0 ], rule [1 ], generated_text )
72+ generated_text_ori += new_text
73+ generated_text = clean_special_tokens (
74+ generated_text_ori .replace (' ' , '' ))
75+ text = generated_text .replace ('<tdcolspan=' , '<td colspan=' )
76+ text = text .replace ('<tdrowspan=' , '<td rowspan=' )
77+ generated_text = text .replace ('"colspan=' , '" colspan=' )
8878 history [- 1 ] = ('🖼️(图片)' , generated_text )
8979 yield history
9080
0 commit comments