Skip to content

Commit 54266dd

Browse files
committed
Add --verbose flag and directly access verbose flag from Config singleton
1 parent 9e332a1 commit 54266dd

File tree

6 files changed

+54
-49
lines changed

6 files changed

+54
-49
lines changed

operate/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class Config:
1111
Configuration class for managing settings.
1212
1313
Attributes:
14-
debug (bool): Flag indicating whether debug mode is enabled.
14+
verbose (bool): Flag indicating whether verbose mode is enabled.
1515
openai_api_key (str): API key for OpenAI.
1616
google_api_key (str): API key for Google.
1717
"""

operate/main.py

+9
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ def main_entry():
2424
help="Use voice input mode",
2525
action="store_true",
2626
)
27+
28+
# Add a flag for verbose mode
29+
parser.add_argument(
30+
"--verbose",
31+
help="Run operate in verbose mode",
32+
action="store_true",
33+
)
34+
2735
# Allow for direct input of prompt
2836
parser.add_argument(
2937
"--prompt",
@@ -38,6 +46,7 @@ def main_entry():
3846
args.model,
3947
terminal_prompt=args.prompt,
4048
voice_mode=args.voice,
49+
verbose_mode=args.verbose
4150
)
4251
except KeyboardInterrupt:
4352
print(f"\n{ANSI_BRIGHT_MAGENTA}Exiting...")

operate/models/apis.py

+31-33
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,9 @@
3434

3535
# Load configuration
3636
config = Config()
37-
VERBOSE = config.verbose
38-
3937

4038
async def get_next_action(model, messages, objective, session_id):
41-
if VERBOSE:
39+
if config.verbose:
4240
print("[Self-Operating Computer][get_next_action]")
4341
print("[Self-Operating Computer][get_next_action] model", model)
4442
if model == "gpt-4":
@@ -61,7 +59,7 @@ async def get_next_action(model, messages, objective, session_id):
6159

6260

6361
def call_gpt_4_vision_preview(messages):
64-
if VERBOSE:
62+
if config.verbose:
6563
print("[call_gpt_4_v]")
6664
time.sleep(1)
6765
client = config.initialize_openai()
@@ -82,7 +80,7 @@ def call_gpt_4_vision_preview(messages):
8280
else:
8381
user_prompt = get_user_prompt()
8482

85-
if VERBOSE:
83+
if config.verbose:
8684
print(
8785
"[call_gpt_4_v] user_prompt",
8886
user_prompt,
@@ -117,7 +115,7 @@ def call_gpt_4_vision_preview(messages):
117115
content = content[: -len("```")] # Remove ending
118116

119117
assistant_message = {"role": "assistant", "content": content}
120-
if VERBOSE:
118+
if config.verbose:
121119
print(
122120
"[call_gpt_4_v] content",
123121
content,
@@ -137,7 +135,7 @@ def call_gpt_4_vision_preview(messages):
137135
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RED}[Error] AI response was {ANSI_RESET}",
138136
content,
139137
)
140-
if VERBOSE:
138+
if config.verbose:
141139
traceback.print_exc()
142140
return call_gpt_4_vision_preview(messages)
143141

@@ -146,7 +144,7 @@ def call_gemini_pro_vision(messages, objective):
146144
"""
147145
Get the next action for Self-Operating Computer using Gemini Pro Vision
148146
"""
149-
if VERBOSE:
147+
if config.verbose:
150148
print(
151149
"[Self Operating Computer][call_gemini_pro_vision]",
152150
)
@@ -165,18 +163,18 @@ def call_gemini_pro_vision(messages, objective):
165163
prompt = get_system_prompt("gemini-pro-vision", objective)
166164

167165
model = config.initialize_google()
168-
if VERBOSE:
166+
if config.verbose:
169167
print("[call_gemini_pro_vision] model", model)
170168

171169
response = model.generate_content([prompt, Image.open(screenshot_filename)])
172170

173171
content = response.text[1:]
174-
if VERBOSE:
172+
if config.verbose:
175173
print("[call_gemini_pro_vision] response", response)
176174
print("[call_gemini_pro_vision] content", content)
177175

178176
content = json.loads(content)
179-
if VERBOSE:
177+
if config.verbose:
180178
print(
181179
"[get_next_action][call_gemini_pro_vision] content",
182180
content,
@@ -188,14 +186,14 @@ def call_gemini_pro_vision(messages, objective):
188186
print(
189187
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_BRIGHT_MAGENTA}[Operate] That did not work. Trying another method {ANSI_RESET}"
190188
)
191-
if VERBOSE:
189+
if config.verbose:
192190
print("[Self-Operating Computer][Operate] error", e)
193191
traceback.print_exc()
194192
return call_gpt_4_vision_preview(messages)
195193

196194

197195
async def call_gpt_4_vision_preview_ocr(messages, objective, model):
198-
if VERBOSE:
196+
if config.verbose:
199197
print("[call_gpt_4_vision_preview_ocr]")
200198

201199
# Construct the path to the file within the package
@@ -260,7 +258,7 @@ async def call_gpt_4_vision_preview_ocr(messages, objective, model):
260258
# Normalize line breaks and remove any unwanted characters
261259
content = "\n".join(line.strip() for line in content.splitlines())
262260

263-
if VERBOSE:
261+
if config.verbose:
264262
print(
265263
"\n\n\n[call_gpt_4_vision_preview_ocr] content after cleaning", content
266264
)
@@ -274,7 +272,7 @@ async def call_gpt_4_vision_preview_ocr(messages, objective, model):
274272
for operation in content:
275273
if operation.get("operation") == "click":
276274
text_to_click = operation.get("text")
277-
if VERBOSE:
275+
if config.verbose:
278276
print(
279277
"[call_gpt_4_vision_preview_ocr][click] text_to_click",
280278
text_to_click,
@@ -296,7 +294,7 @@ async def call_gpt_4_vision_preview_ocr(messages, objective, model):
296294
operation["x"] = coordinates["x"]
297295
operation["y"] = coordinates["y"]
298296

299-
if VERBOSE:
297+
if config.verbose:
300298
print(
301299
"[call_gpt_4_vision_preview_ocr][click] text_element_index",
302300
text_element_index,
@@ -324,7 +322,7 @@ async def call_gpt_4_vision_preview_ocr(messages, objective, model):
324322
print(
325323
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_BRIGHT_MAGENTA}[Operate] That did not work. Trying another method {ANSI_RESET}"
326324
)
327-
if VERBOSE:
325+
if config.verbose:
328326
print("[Self-Operating Computer][Operate] error", e)
329327
traceback.print_exc()
330328
return gpt_4_fallback(messages, objective, model)
@@ -356,7 +354,7 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
356354
else:
357355
user_prompt = get_user_prompt()
358356

359-
if VERBOSE:
357+
if config.verbose:
360358
print(
361359
"[call_gpt_4_vision_preview_labeled] user_prompt",
362360
user_prompt,
@@ -393,7 +391,7 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
393391
content = content[: -len("```")] # Remove ending
394392

395393
assistant_message = {"role": "assistant", "content": content}
396-
if VERBOSE:
394+
if config.verbose:
397395
print(
398396
"[call_gpt_4_vision_preview_labeled] content",
399397
content,
@@ -407,14 +405,14 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
407405
for operation in content:
408406
if operation.get("operation") == "click":
409407
label = operation.get("label")
410-
if VERBOSE:
408+
if config.verbose:
411409
print(
412410
"[Self Operating Computer][call_gpt_4_vision_preview_labeled] label",
413411
label,
414412
)
415413

416414
coordinates = get_label_coordinates(label, label_coordinates)
417-
if VERBOSE:
415+
if config.verbose:
418416
print(
419417
"[Self Operating Computer][call_gpt_4_vision_preview_labeled] coordinates",
420418
coordinates,
@@ -426,7 +424,7 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
426424
click_position_percent = get_click_position_in_percent(
427425
coordinates, image_size
428426
)
429-
if VERBOSE:
427+
if config.verbose:
430428
print(
431429
"[Self Operating Computer][call_gpt_4_vision_preview_labeled] click_position_percent",
432430
click_position_percent,
@@ -441,7 +439,7 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
441439
y_percent = f"{click_position_percent[1]:.2f}"
442440
operation["x"] = x_percent
443441
operation["y"] = y_percent
444-
if VERBOSE:
442+
if config.verbose:
445443
print(
446444
"[Self Operating Computer][call_gpt_4_vision_preview_labeled] new click operation",
447445
operation,
@@ -450,7 +448,7 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
450448
else:
451449
processed_content.append(operation)
452450

453-
if VERBOSE:
451+
if config.verbose:
454452
print(
455453
"[Self Operating Computer][call_gpt_4_vision_preview_labeled] new processed_content",
456454
processed_content,
@@ -461,14 +459,14 @@ async def call_gpt_4_vision_preview_labeled(messages, objective):
461459
print(
462460
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_BRIGHT_MAGENTA}[Operate] That did not work. Trying another method {ANSI_RESET}"
463461
)
464-
if VERBOSE:
462+
if config.verbose:
465463
print("[Self-Operating Computer][Operate] error", e)
466464
traceback.print_exc()
467465
return call_gpt_4_vision_preview(messages)
468466

469467

470468
def call_ollama_llava(messages):
471-
if VERBOSE:
469+
if config.verbose:
472470
print("[call_ollama_llava]")
473471
time.sleep(1)
474472
try:
@@ -485,7 +483,7 @@ def call_ollama_llava(messages):
485483
else:
486484
user_prompt = get_user_prompt()
487485

488-
if VERBOSE:
486+
if config.verbose:
489487
print(
490488
"[call_ollama_llava] user_prompt",
491489
user_prompt,
@@ -516,7 +514,7 @@ def call_ollama_llava(messages):
516514
content = content[: -len("```")] # Remove ending
517515

518516
assistant_message = {"role": "assistant", "content": content}
519-
if VERBOSE:
517+
if config.verbose:
520518
print(
521519
"[call_ollama_llava] content",
522520
content,
@@ -542,7 +540,7 @@ def call_ollama_llava(messages):
542540
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RED}[Error] AI response was {ANSI_RESET}",
543541
content,
544542
)
545-
if VERBOSE:
543+
if config.verbose:
546544
traceback.print_exc()
547545
return call_ollama_llava(messages)
548546

@@ -562,15 +560,15 @@ def get_last_assistant_message(messages):
562560

563561

564562
def gpt_4_fallback(messages, objective, model):
565-
if VERBOSE:
563+
if config.verbose:
566564
print("[gpt_4_fallback]")
567565
system_prompt = get_system_prompt("gpt-4-vision-preview", objective)
568566
new_system_message = {"role": "system", "content": system_prompt}
569567
# remove and replace the first message in `messages` with `new_system_message`
570568

571569
messages[0] = new_system_message
572570

573-
if VERBOSE:
571+
if config.verbose:
574572
print("[gpt_4_fallback][updated]")
575573
print("[gpt_4_fallback][updated] len(messages)", len(messages))
576574

@@ -581,7 +579,7 @@ def confirm_system_prompt(messages, objective, model):
581579
"""
582580
On `Exception` we default to `call_gpt_4_vision_preview` so we have this function to reassign system prompt in case of a previous failure
583581
"""
584-
if VERBOSE:
582+
if config.verbose:
585583
print("[confirm_system_prompt] model", model)
586584

587585
system_prompt = get_system_prompt(model, objective)
@@ -590,7 +588,7 @@ def confirm_system_prompt(messages, objective, model):
590588

591589
messages[0] = new_system_message
592590

593-
if VERBOSE:
591+
if config.verbose:
594592
print("[confirm_system_prompt]")
595593
print("[confirm_system_prompt] len(messages)", len(messages))
596594
for m in messages:

operate/models/prompts.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from operate.config import Config
33

44
# Load configuration
5-
VERBOSE = Config().verbose
5+
config = Config()
66

77
# General user Prompts
88
USER_QUESTION = "Hello, I can help you with anything. What would you like done?"
@@ -380,7 +380,7 @@ def get_system_prompt(model, objective):
380380
prompt = prompt_string.format(objective=objective)
381381

382382
# Optional verbose output
383-
if VERBOSE:
383+
if config.verbose:
384384
print("[get_system_prompt] model:", model)
385385
print("[get_system_prompt] prompt name:", prompt_name)
386386
# print("[get_system_prompt] prompt:", prompt)

operate/operate.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,7 @@
2929
config = Config()
3030
operating_system = OperatingSystem()
3131

32-
VERBOSE = config.verbose
33-
34-
35-
def main(model, terminal_prompt, voice_mode=False):
32+
def main(model, terminal_prompt, voice_mode=False, verbose_mode=False):
3633
"""
3734
Main function for the Self-Operating Computer.
3835
@@ -48,6 +45,7 @@ def main(model, terminal_prompt, voice_mode=False):
4845
mic = None
4946
# Initialize `WhisperMic`, if `voice_mode` is True
5047

48+
config.verbose = verbose_mode
5149
config.validation(model, voice_mode)
5250

5351
if voice_mode:
@@ -104,7 +102,7 @@ def main(model, terminal_prompt, voice_mode=False):
104102
session_id = None
105103

106104
while True:
107-
if VERBOSE:
105+
if config.verbose:
108106
print("[Self Operating Computer] loop_count", loop_count)
109107
try:
110108
operations, session_id = asyncio.run(
@@ -131,17 +129,17 @@ def main(model, terminal_prompt, voice_mode=False):
131129

132130

133131
def operate(operations):
134-
if VERBOSE:
132+
if config.verbose:
135133
print("[Self Operating Computer][operate]")
136134
for operation in operations:
137-
if VERBOSE:
135+
if config.verbose:
138136
print("[Self Operating Computer][operate] operation", operation)
139137
# wait one second
140138
time.sleep(1)
141139
operate_type = operation.get("operation").lower()
142140
operate_thought = operation.get("thought")
143141
operate_detail = ""
144-
if VERBOSE:
142+
if config.verbose:
145143
print("[Self Operating Computer][operate] operate_type", operate_type)
146144

147145
if operate_type == "press" or operate_type == "hotkey":

0 commit comments

Comments
 (0)