Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 44 additions & 8 deletions copilot_agent_client/mcp_agent_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
sys.path.append(".")

import json
import subprocess

from PIL import Image
import io

from tools.image_tools import make_b64_url

from copilot_front_end.mobile_action_helper import capture_screenshot, dectect_screen_on, press_home_key
from copilot_front_end.mobile_action_helper import capture_screenshot, dectect_screen_on, press_home_key, _get_adb_command

from copilot_front_end.mobile_action_helper import init_device, open_screen
from copilot_front_end.pu_frontend_executor import act_on_device, uiTars_to_frontend_action
Expand All @@ -27,6 +28,26 @@

import threading


def _check_yadb_installed(device_id, print_command=False):
"""
只检查 yadb 是否安装,不按 HOME 键。
"""
adb_command = _get_adb_command(device_id)

command = f"{adb_command} shell md5sum /data/local/tmp/yadb"
if print_command:
print(f"Executing command: {command}")

result = subprocess.run(command, shell=True, capture_output=True, text=True)
if "29a0cd3b3adea92350dd5a25594593df" not in result.stdout:
command = f"{adb_command} push yadb /data/local/tmp"
print(f"YADB is not installed on the device. Installing now...")
if print_command:
print(f"Executing command: {command}")
subprocess.run(command, shell=True, capture_output=True, text=True)


def auto_reply(current_image_url, task, info_action, model_provider, model_name):
"""
Reply with information action.
Expand Down Expand Up @@ -193,11 +214,15 @@ def gui_agent_loop(

# init device for the first time
open_screen(device_id)
init_device(device_id)

# if reset_environment, press home key before starting the task

# 只在需要重置环境时调用 init_device(它内部会按 HOME 键)
# 否则只检查 yadb 是否安装,不按 HOME 键
if reset_environment and session_id is None and task is not None:
press_home_key(device_id, print_command=True)
init_device(device_id, reset_environment=True) # init_device 内部会按 HOME 键
else:
# 只检查 yadb,不按 HOME 键
init_device(device_id, reset_environment=False)


# task, task_type = task, rollout_config['task_type']
task_type = agent_loop_config['task_type']
Expand Down Expand Up @@ -265,6 +290,9 @@ def gui_agent_loop(
global_step_idx = 0
# restart the steps from 0, even continuing an existing session
for step_idx in range(max_steps):
# 打印步骤开始分隔符
step_label = f" Step {step_idx+1} start "
print(f"\n{step_label:-^50}")

if not dectect_screen_on(device_id):
print("Screen is off, turn on the screen first")
Expand Down Expand Up @@ -371,7 +399,17 @@ def gui_agent_loop(

history_actions.append(action)

print(f"Step {step_idx+1}/{max_steps} done.\nAction Type: {action['action_type']}, cot: {action.get('cot', '')}\nSession ID: {session_id}\n")
# 清理 cot 中的 <THINK> 标签用于打印
cot_display = action.get('cot', '')
if cot_display:
import re
cot_display = re.sub(r'<\s*/?THINK\s*>', '', cot_display, flags=re.IGNORECASE).strip()

print(f"Action: {action['action_type']}")
if cot_display:
print(f"cot: {cot_display}")
step_end_label = f" Step {step_idx+1} end "
print(f"{step_end_label:-^50}")

# print(f"local:{step_idx+1}/global:{global_step_idx}/{max_steps} done. Action: {action}")

Expand Down Expand Up @@ -446,5 +484,3 @@ def gui_agent_loop(
# print(f"Task {task} done in {len(history_actions)} steps. Session ID: {session_id}")

return return_log


113 changes: 54 additions & 59 deletions copilot_agent_client/pu_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,19 @@
sys.path.append(".")

import json

from PIL import Image
import io
from collections import OrderedDict

from tools.image_tools import draw_points, make_b64_url

from copilot_front_end.mobile_action_helper import capture_screenshot, dectect_screen_on, press_home_key

from copilot_front_end.mobile_action_helper import capture_screenshot, dectect_screen_on
from copilot_front_end.mobile_action_helper import init_device, open_screen
from copilot_front_end.pu_frontend_executor import act_on_device, uiTars_to_frontend_action

from megfile import smart_remove

import time

from tools.ask_llm_v2 import ask_llm_anything


def reply_info_action(current_image_url, task, info_action, model_provider, model_name):
"""
Reply with information action.
Expand All @@ -32,22 +28,18 @@ def reply_info_action(current_image_url, task, info_action, model_provider, mode
"content": [
{
"type": "text",
"text": f"""# 角色
"text": f"""# 角色
你将扮演一个正在使用GUI Agent完成任务的用户。

# 任务
阅读下方提供的所有背景信息,针对[Agent的澄清问题],生成一个提供关键信息的、简短直接的回答。

# 背景信息
- **任务目标:** {task}
- **agent 问的问题:** {json.dumps(info_action, ensure_ascii=False)}

# 输出要求
- 你的回答必须极其简短和明确。
- 你的回答应直接命中问题的核心,解决Agent的疑惑。
- 不要进行任何额外的解释、对话或使用礼貌用语。
- 只输出回答本身,不要添加任何引号或其他修饰。

以下是当前页面内容:
""",
},
Expand All @@ -64,7 +56,6 @@ def reply_info_action(current_image_url, task, info_action, model_provider, mode
]
}
]

response = ask_llm_anything(
model_provider=model_provider,
model_name=model_name,
Expand All @@ -76,69 +67,80 @@ def reply_info_action(current_image_url, task, info_action, model_provider, mode
"frequency_penalty": 0.0,
}
)

if "</think>" in response:
response = response.split("</think>")[-1].strip()

return response

# delay after act on device
# rollout config
# device info
# def evaluate_task_on_device(agent_server, device_info, task, frontend_action_converter, ask_action_function_func, max_steps = 40, delay_after_capture = 2):
def evaluate_task_on_device(agent_server, device_info, task, rollout_config, extra_info = {}, reflush_app=True, auto_reply = False, reset_environment=True):

def evaluate_task_on_device(agent_server, device_info, task, rollout_config, extra_info={}, reflush_app=True, auto_reply=False, reset_environment=False):
"""
Evaluate a task on a device using the provided frontend action converter and action function.

"""

# ===== 新增:本地美化函数 =====
def _pretty_format_action(act):
if not isinstance(act, (dict, OrderedDict)):
return str(act)
lines = []
# 不再打印 <THINK> 标签
if 'cot' in act and act['cot']:
cot_clean = str(act['cot']).replace('\n', ' ').replace('\r', ' ')
# 移除 <THINK> 和 </THINK> 标签
import re
cot_clean = re.sub(r'<\s*/?THINK\s*>', '', cot_clean, flags=re.IGNORECASE).strip()
if cot_clean:
lines.append(f"cot: {cot_clean}")
# Define field order for readability
field_order = ['explain', 'action', 'value', 'point', 'point1', 'point2', 'return', 'summary']
for key in field_order:
if key in act:
val = act[key]
if isinstance(val, list):
val_str = ",".join(str(x) for x in val)
else:
val_str = str(val).replace('\n', ' ').strip()
lines.append(f"{key}: {val_str}")
return "\n".join(lines)
# ============================

# init device for the first time
device_id = device_info['device_id']
open_screen(device_id)
init_device(device_id)


if reset_environment:
press_home_key(device_id, print_command=True)
init_device(device_id, reset_environment=reset_environment)

task, task_type = task, rollout_config['task_type']

session_id = agent_server.get_session({
"task": task,
"task_type": task_type,
"model_config": rollout_config['model_config'],
"extra_info": extra_info

})

print(f"Session ID: {session_id}")

return_log = {
"session_id": session_id,
"device_info": device_info,
"task": task,
"rollout_config": rollout_config,
"extra_info": extra_info
}

device_id, device_wm_size = device_info['device_id'], device_info['device_wm_size']

max_steps = rollout_config.get('max_steps', 40)
delay_after_capture = rollout_config.get('delay_after_capture', 2)

history_actions = []

for step_idx in range(max_steps):

# 打印步骤开始分隔符
step_label = f" Step {step_idx+1} start "
print(f"\n{step_label:-^50}")

if not dectect_screen_on(device_id):
print("Screen is off, turn on the screen first")
break

image_path = capture_screenshot(device_id, "tmp_screenshot", print_command=False)

image_b64_url = make_b64_url(image_path, resize_config=rollout_config['model_config'].get("resize_config", None))
smart_remove(image_path)

payload = {
"session_id": session_id,
"observation": {
Expand All @@ -150,57 +152,50 @@ def evaluate_task_on_device(agent_server, device_info, task, rollout_config, ext
},
}
}
if history_actions[-1]['action_type'] == "INFO" if len(history_actions) > 0 else False:
info_action = history_actions[-1]

if history_actions and history_actions[-1]['action_type'] == "INFO":
info_action = history_actions[-1]
if auto_reply:
print(f"AUTO REPLY INFO FROM MODEL!")
reply_info = reply_info_action(image_b64_url, task, info_action, model_provider=rollout_config['model_config']['model_provider'], model_name=rollout_config['model_config']['model_name'])
reply_info = reply_info_action(
image_b64_url, task, info_action,
model_provider=rollout_config['model_config']['model_provider'],
model_name=rollout_config['model_config']['model_name']
)
print(f"info: {reply_info}")

else:
print(f"EN: Agent asks: {history_actions[-1]['value']} Please Reply: ")
print(f"ZH: Agent 问你: {history_actions[-1]['value']} 回复一下:")

reply_info = input("Your reply:")

print(f"Replied info action: {reply_info}")

payload['observation']['query'] = reply_info


action = agent_server.automate_step(payload)['action']

#TODO: to replace with the new function
action = uiTars_to_frontend_action(action)

act_on_device(action, device_id, device_wm_size, print_command=True, reflush_app=reflush_app)

act_on_device(action, device_id, device_wm_size, print_command=True, reflush_app=reflush_app, print_executing_command=True)
history_actions.append(action)


print(f"Step {step_idx+1}/{max_steps} done. Action: {action}")
# ===== 替换原始打印:使用美观格式 =====
print(f"Action: {action['action_type']}")
print(_pretty_format_action(action))
step_end_label = f" Step {step_idx+1} end "
print(f"{step_end_label:-^50}")
# ===================================

if action['action_type'].upper() in ['COMPLETE', "ABORT"]:
stop_reason = action['action_type'].upper()
break

time.sleep(delay_after_capture)

if action['action_type'] in ['COMPLETE', "ABORT"]:
stop_reason = action['action_type']
elif step_idx == max_steps - 1:
stop_reason = "MAX_STEPS_REACHED"
else:
stop_reason = "MANUAL_STOP"

# return_log['session_id'] = session_id
return_log['stop_reason'] = stop_reason

return_log['stop_steps'] = step_idx + 1

print(f"Task {task} done in {len(history_actions)} steps. Session ID: {session_id}")

print(f"\ndone in {len(history_actions)} steps.\nSession ID: {session_id}")
return return_log


17 changes: 11 additions & 6 deletions copilot_front_end/mobile_action_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def press_home_key(device_id, print_command = False):

subprocess.run(command, shell=True, capture_output=True, text=True)

def init_device(device_id, print_command = False):
def init_device(device_id, reset_environment=False, print_command = False):
"""
Initialize the device by checking if yadb is installed.
"""
Expand All @@ -99,19 +99,23 @@ def init_device(device_id, print_command = False):

subprocess.run(command, shell=True, capture_output=True, text=True)
else:
print("yadb is already installed on the device.")
# print("yadb is already installed on the device.")
pass

if reset_environment:
press_home_key(device_id, print_command=print_command)

# press_home_key(device_id, print_command=print_command)

def init_all_devices():
"""
Initialize all devices by listing them and setting up the environment.
"""
devices = list_devices()
for device_id in tqdm(devices):
init_device(device_id)
init_device(device_id, reset_environment=True)
print(f"Initialized device: {device_id}")


def dectect_screen_on(device_id, print_command = False):
"""
Detect whether the screen is on for the specified device.
Expand Down Expand Up @@ -636,9 +640,10 @@ def __init__(self, device_id = None):
self.device_id = device_id
self.wm_size = get_device_wm_size(self.device_id)
if self.device_id is not None:
init_device(self.device_id, print_command=True)
init_device(self.device_id, reset_environment=True, print_command=True)
# _open_screen(self.device_id, print_command=True)


pass

def set_device_id(self, device_id):
Expand Down Expand Up @@ -730,4 +735,4 @@ def step_interaction(self, action, capture_duration = 0.5, image_full_path = Non
print(get_device_wm_size("bc23727a"))

open_screen(None, print_command=True)
pass
pass
1 change: 1 addition & 0 deletions copilot_front_end/package_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@
"osmAnd": "net.osmand",
"给到": "com.guanaitong",
"百词斩": "com.jiongji.andriod.card",
"象棋": "com.tencent.qqgame.xq",

}

Expand Down
Binary file added copilot_tools/scrcpy/linux/icon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added copilot_tools/scrcpy/linux/scrcpy
Binary file not shown.
Binary file added copilot_tools/scrcpy/linux/scrcpy-server
Binary file not shown.
Loading