stepfun-ai · Daiyimo · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/copilot_agent_client/mcp_agent_loop.py b/copilot_agent_client/mcp_agent_loop.py
@@ -5,13 +5,14 @@
     sys.path.append(".")
 
 import json
+import subprocess
 
 from PIL import Image
 import io
 
 from tools.image_tools import make_b64_url
 
-from copilot_front_end.mobile_action_helper import capture_screenshot, dectect_screen_on, press_home_key
+from copilot_front_end.mobile_action_helper import capture_screenshot, dectect_screen_on, press_home_key, _get_adb_command
 
 from copilot_front_end.mobile_action_helper import init_device, open_screen
 from copilot_front_end.pu_frontend_executor import act_on_device, uiTars_to_frontend_action
@@ -27,6 +28,26 @@
 
 import threading
 
+
+def _check_yadb_installed(device_id, print_command=False):
+    """
+    只检查 yadb 是否安装，不按 HOME 键。
+    """
+    adb_command = _get_adb_command(device_id)
+
+    command = f"{adb_command} shell md5sum /data/local/tmp/yadb"
+    if print_command:
+        print(f"Executing command: {command}")
+
+    result = subprocess.run(command, shell=True, capture_output=True, text=True)
+    if "29a0cd3b3adea92350dd5a25594593df" not in result.stdout:
+        command = f"{adb_command} push yadb /data/local/tmp"
+        print(f"YADB is not installed on the device. Installing now...")
+        if print_command:
+            print(f"Executing command: {command}")
+        subprocess.run(command, shell=True, capture_output=True, text=True)
+
+
 def auto_reply(current_image_url, task, info_action, model_provider, model_name):
     """
     Reply with information action.
@@ -193,11 +214,15 @@ def gui_agent_loop(
 
     # init device for the first time
     open_screen(device_id)
-    init_device(device_id)
-
-    # if reset_environment, press home key before starting the task
+
+    # 只在需要重置环境时调用 init_device（它内部会按 HOME 键）
+    # 否则只检查 yadb 是否安装，不按 HOME 键
     if reset_environment and session_id is None and task is not None:
-        press_home_key(device_id, print_command=True)
+        init_device(device_id, reset_environment=True)  # init_device 内部会按 HOME 键
+    else:
+        # 只检查 yadb，不按 HOME 键
+        init_device(device_id, reset_environment=False)
+
 
     # task, task_type = task, rollout_config['task_type']
     task_type = agent_loop_config['task_type']
@@ -265,6 +290,9 @@ def gui_agent_loop(
     global_step_idx = 0
     # restart the steps from 0, even continuing an existing session
     for step_idx in range(max_steps):
+        # 打印步骤开始分隔符
+        step_label = f" Step {step_idx+1} start "
+        print(f"\n{step_label:-^50}")
 
         if not dectect_screen_on(device_id):
             print("Screen is off, turn on the screen first")
@@ -371,7 +399,17 @@ def gui_agent_loop(
 
         history_actions.append(action)
 
-        print(f"Step {step_idx+1}/{max_steps} done.\nAction Type: {action['action_type']}, cot: {action.get('cot', '')}\nSession ID: {session_id}\n")
+        # 清理 cot 中的 <THINK> 标签用于打印
+        cot_display = action.get('cot', '')
+        if cot_display:
+            import re
+            cot_display = re.sub(r'<\s*/?THINK\s*>', '', cot_display, flags=re.IGNORECASE).strip()
+
+        print(f"Action: {action['action_type']}")
+        if cot_display:
+            print(f"cot: {cot_display}")
+        step_end_label = f" Step {step_idx+1} end "
+        print(f"{step_end_label:-^50}")
 
         # print(f"local:{step_idx+1}/global:{global_step_idx}/{max_steps} done. Action: {action}")
 
@@ -446,5 +484,3 @@ def gui_agent_loop(
     # print(f"Task {task} done in {len(history_actions)} steps. Session ID: {session_id}")
 
     return return_log
-
-
diff --git a/copilot_agent_client/pu_client.py b/copilot_agent_client/pu_client.py
@@ -5,23 +5,19 @@
     sys.path.append(".")
 
 import json
-
 from PIL import Image
 import io
+from collections import OrderedDict
 
 from tools.image_tools import draw_points, make_b64_url
-
-from copilot_front_end.mobile_action_helper import capture_screenshot, dectect_screen_on, press_home_key
-
+from copilot_front_end.mobile_action_helper import capture_screenshot, dectect_screen_on
 from copilot_front_end.mobile_action_helper import init_device, open_screen
 from copilot_front_end.pu_frontend_executor import act_on_device, uiTars_to_frontend_action
-
 from megfile import smart_remove
-
 import time
-
 from tools.ask_llm_v2 import ask_llm_anything
 
+
 def reply_info_action(current_image_url, task, info_action, model_provider, model_name):
     """
     Reply with information action.
@@ -32,22 +28,18 @@ def reply_info_action(current_image_url, task, info_action, model_provider, mode
             "content": [
                 {
                     "type": "text",
-                    "text":  f"""# 角色
+                    "text": f"""# 角色
 你将扮演一个正在使用GUI Agent完成任务的用户。
-
 # 任务
 阅读下方提供的所有背景信息，针对[Agent的澄清问题]，生成一个提供关键信息的、简短直接的回答。
-
 # 背景信息
 - **任务目标:** {task}
 - **agent 问的问题:** {json.dumps(info_action, ensure_ascii=False)}
-
 # 输出要求
 - 你的回答必须极其简短和明确。
 - 你的回答应直接命中问题的核心，解决Agent的疑惑。
 - 不要进行任何额外的解释、对话或使用礼貌用语。
 - 只输出回答本身，不要添加任何引号或其他修饰。
-
 以下是当前页面内容:
                 """,
                 },
@@ -64,7 +56,6 @@ def reply_info_action(current_image_url, task, info_action, model_provider, mode
             ]
         }
     ]
-
     response = ask_llm_anything(
         model_provider=model_provider,
         model_name=model_name,
@@ -76,69 +67,80 @@ def reply_info_action(current_image_url, task, info_action, model_provider, mode
             "frequency_penalty": 0.0,
         }
     )
-
     if "</think>" in response:
         response = response.split("</think>")[-1].strip()
-
     return response
 
-# delay after act on device
-# rollout config
-# device info
-# def evaluate_task_on_device(agent_server, device_info, task, frontend_action_converter, ask_action_function_func, max_steps = 40, delay_after_capture = 2):
-def evaluate_task_on_device(agent_server, device_info, task, rollout_config, extra_info = {}, reflush_app=True, auto_reply = False, reset_environment=True):
+
+def evaluate_task_on_device(agent_server, device_info, task, rollout_config, extra_info={}, reflush_app=True, auto_reply=False, reset_environment=False):
     """
     Evaluate a task on a device using the provided frontend action converter and action function.
-
     """
 
+    # ===== 新增：本地美化函数 =====
+    def _pretty_format_action(act):
+        if not isinstance(act, (dict, OrderedDict)):
+            return str(act)
+        lines = []
+        # 不再打印 <THINK> 标签
+        if 'cot' in act and act['cot']:
+            cot_clean = str(act['cot']).replace('\n', ' ').replace('\r', ' ')
+            # 移除 <THINK> 和 </THINK> 标签
+            import re
+            cot_clean = re.sub(r'<\s*/?THINK\s*>', '', cot_clean, flags=re.IGNORECASE).strip()
+            if cot_clean:
+                lines.append(f"cot: {cot_clean}")
+        # Define field order for readability
+        field_order = ['explain', 'action', 'value', 'point', 'point1', 'point2', 'return', 'summary']
+        for key in field_order:
+            if key in act:
+                val = act[key]
+                if isinstance(val, list):
+                    val_str = ",".join(str(x) for x in val)
+                else:
+                    val_str = str(val).replace('\n', ' ').strip()
+                lines.append(f"{key}: {val_str}")
+        return "\n".join(lines)
+    # ============================
+
     # init device for the first time
     device_id = device_info['device_id']
     open_screen(device_id)
-    init_device(device_id)
-
-
-    if reset_environment:
-        press_home_key(device_id, print_command=True)
+    init_device(device_id, reset_environment=reset_environment)
 
     task, task_type = task, rollout_config['task_type']
-
     session_id = agent_server.get_session({
         "task": task,
         "task_type": task_type,
         "model_config": rollout_config['model_config'],
         "extra_info": extra_info
-
     })
-
     print(f"Session ID: {session_id}")
-
     return_log = {
         "session_id": session_id,
         "device_info": device_info,
         "task": task,
         "rollout_config": rollout_config,
         "extra_info": extra_info
     }
-
     device_id, device_wm_size = device_info['device_id'], device_info['device_wm_size']
-
     max_steps = rollout_config.get('max_steps', 40)
     delay_after_capture = rollout_config.get('delay_after_capture', 2)
-
     history_actions = []
 
     for step_idx in range(max_steps):
-
+        # 打印步骤开始分隔符
+        step_label = f" Step {step_idx+1} start "
+        print(f"\n{step_label:-^50}")
+
         if not dectect_screen_on(device_id):
             print("Screen is off, turn on the screen first")
             break
 
         image_path = capture_screenshot(device_id, "tmp_screenshot", print_command=False)
-
         image_b64_url = make_b64_url(image_path, resize_config=rollout_config['model_config'].get("resize_config", None))
         smart_remove(image_path)
-        
+
         payload = {
             "session_id": session_id,
             "observation": {
@@ -150,57 +152,50 @@ def evaluate_task_on_device(agent_server, device_info, task, rollout_config, ext
                 },
             }
         }
-        if history_actions[-1]['action_type'] == "INFO" if len(history_actions) > 0 else False:
-            info_action = history_actions[-1]
 
+        if history_actions and history_actions[-1]['action_type'] == "INFO":
+            info_action = history_actions[-1]
             if auto_reply:
                 print(f"AUTO REPLY INFO FROM MODEL!")
-                reply_info = reply_info_action(image_b64_url, task, info_action, model_provider=rollout_config['model_config']['model_provider'], model_name=rollout_config['model_config']['model_name'])
+                reply_info = reply_info_action(
+                    image_b64_url, task, info_action,
+                    model_provider=rollout_config['model_config']['model_provider'],
+                    model_name=rollout_config['model_config']['model_name']
+                )
                 print(f"info: {reply_info}")
-
             else:
                 print(f"EN: Agent asks: {history_actions[-1]['value']} Please Reply: ")
                 print(f"ZH: Agent 问你: {history_actions[-1]['value']} 回复一下：")
-
                 reply_info = input("Your reply:")
-
             print(f"Replied info action: {reply_info}")
-
             payload['observation']['query'] = reply_info
 
-
         action = agent_server.automate_step(payload)['action']
-
-        #TODO: to replace with the new function
         action = uiTars_to_frontend_action(action)
-
-        act_on_device(action, device_id, device_wm_size, print_command=True, reflush_app=reflush_app)
-
+        act_on_device(action, device_id, device_wm_size, print_command=True, reflush_app=reflush_app, print_executing_command=True)
         history_actions.append(action)
 
-
-        print(f"Step {step_idx+1}/{max_steps} done. Action: {action}")
+        # ===== 替换原始打印：使用美观格式 =====
+        print(f"Action: {action['action_type']}")
+        print(_pretty_format_action(action))
+        step_end_label = f" Step {step_idx+1} end "
+        print(f"{step_end_label:-^50}")
+        # ===================================
 
         if action['action_type'].upper() in ['COMPLETE', "ABORT"]:
             stop_reason = action['action_type'].upper()
             break
 
         time.sleep(delay_after_capture)
-    
+
     if action['action_type'] in ['COMPLETE', "ABORT"]:
         stop_reason = action['action_type']
     elif step_idx == max_steps - 1:
         stop_reason = "MAX_STEPS_REACHED"
     else:
         stop_reason = "MANUAL_STOP"
 
-    # return_log['session_id'] = session_id
     return_log['stop_reason'] = stop_reason
-
     return_log['stop_steps'] = step_idx + 1
-
-    print(f"Task {task} done in {len(history_actions)} steps. Session ID: {session_id}")
-
+    print(f"\ndone in {len(history_actions)} steps.\nSession ID: {session_id}")
     return return_log
-
-
diff --git a/copilot_front_end/mobile_action_helper.py b/copilot_front_end/mobile_action_helper.py
@@ -75,7 +75,7 @@ def press_home_key(device_id, print_command = False):
 
     subprocess.run(command, shell=True, capture_output=True, text=True)
 
-def init_device(device_id, print_command = False):
+def init_device(device_id, reset_environment=False, print_command = False):
     """
     Initialize the device by checking if yadb is installed.
     """
@@ -99,19 +99,23 @@ def init_device(device_id, print_command = False):
 
         subprocess.run(command, shell=True, capture_output=True, text=True)
     else:
-        print("yadb is already installed on the device.")
+        # print("yadb is already installed on the device.")
+        pass
+
+    if reset_environment:
+        press_home_key(device_id, print_command=print_command)
 
-    # press_home_key(device_id, print_command=print_command)
 
 def init_all_devices():
     """
     Initialize all devices by listing them and setting up the environment.
     """
     devices = list_devices()
     for device_id in tqdm(devices):
-        init_device(device_id)
+        init_device(device_id, reset_environment=True)
         print(f"Initialized device: {device_id}")
 
+
 def dectect_screen_on(device_id, print_command = False):
     """
     Detect whether the screen is on for the specified device.
@@ -636,9 +640,10 @@ def __init__(self, device_id = None):
         self.device_id = device_id
         self.wm_size = get_device_wm_size(self.device_id)
         if self.device_id is not None:
-            init_device(self.device_id, print_command=True)
+            init_device(self.device_id, reset_environment=True, print_command=True)
             # _open_screen(self.device_id, print_command=True)
 
+
         pass
 
     def set_device_id(self, device_id):
@@ -730,4 +735,4 @@ def step_interaction(self, action, capture_duration = 0.5, image_full_path = Non
     print(get_device_wm_size("bc23727a"))
 
     open_screen(None, print_command=True)
-    pass
+    pass
diff --git a/copilot_front_end/package_map.py b/copilot_front_end/package_map.py
@@ -209,6 +209,7 @@
     "osmAnd": "net.osmand",
     "给到": "com.guanaitong",
     "百词斩": "com.jiongji.andriod.card",
+    "象棋": "com.tencent.qqgame.xq",
 
 }
 

diff --git a/copilot_tools/scrcpy/linux/icon.png b/copilot_tools/scrcpy/linux/icon.png
diff --git a/copilot_tools/scrcpy/linux/scrcpy b/copilot_tools/scrcpy/linux/scrcpy
diff --git a/copilot_tools/scrcpy/linux/scrcpy-server b/copilot_tools/scrcpy/linux/scrcpy-server