3232from ace .observability import configure_opik
3333from browser_use import ChatBrowserUse
3434
35-
3635# Utility function for timeout calculation
3736def calculate_timeout_steps (timeout_seconds : float ) -> int :
3837 """Calculate steps for timeout based on 1 step per 12 seconds."""
3938 return int (timeout_seconds // 12 )
4039
41-
4240# Import domain-specific utilities
4341from domain_utils import get_test_domains
4442
@@ -68,9 +66,7 @@ def calculate_timeout_steps(timeout_seconds: float) -> int:
6866"""
6967
7068
71- def get_ace_token_usage (
72- run_start_time : datetime .datetime = None ,
73- ) -> tuple [int , int , int , int ]:
69+ def get_ace_token_usage (run_start_time : datetime .datetime = None ) -> tuple [int , int , int , int ]:
7470 """Query Opik for ACE token usage only.
7571
7672 Returns:
@@ -112,7 +108,9 @@ def get_ace_token_usage(
112108 filter_string = f'start_time >= "{ recent_time } "' ,
113109 max_results = 50 ,
114110 )
115- print (f" 📊 Found { len (traces )} recent traces in '{ project } ' project" )
111+ print (
112+ f" 📊 Found { len (traces )} recent traces in '{ project } ' project"
113+ )
116114 all_traces .extend (traces )
117115 except Exception as e :
118116 print (f" ⚠️ Failed to search '{ project } ' project: { e } " )
@@ -196,32 +194,21 @@ def parse_domain_result(output: str, domain: str) -> dict:
196194 return {"status" : "TAKEN" }
197195
198196 # Check for natural language indicators of availability
199- elif (
200- ("AVAILABLE" in output_upper and domain_upper in output_upper )
201- or ("ADD TO CART" in output_upper and domain_upper in output_upper )
202- or ("PRICE:" in output_upper and domain_upper in output_upper )
203- or (
204- "REGISTRATION" in output_upper
205- and "AVAILABLE" in output_upper
206- and domain_upper in output_upper
207- )
208- ):
197+ elif ("AVAILABLE" in output_upper and domain_upper in output_upper ) or \
198+ ("ADD TO CART" in output_upper and domain_upper in output_upper ) or \
199+ ("PRICE:" in output_upper and domain_upper in output_upper ) or \
200+ ("REGISTRATION" in output_upper and "AVAILABLE" in output_upper and domain_upper in output_upper ):
209201 return {"status" : "AVAILABLE" }
210202
211203 # Check for natural language indicators of taken/unavailable
212- elif (
213- ("TAKEN" in output_upper and domain_upper in output_upper )
214- or ("REGISTERED" in output_upper and domain_upper in output_upper )
215- or ("NOT AVAILABLE" in output_upper and domain_upper in output_upper )
216- or ("UNAVAILABLE" in output_upper and domain_upper in output_upper )
217- ):
204+ elif ("TAKEN" in output_upper and domain_upper in output_upper ) or \
205+ ("REGISTERED" in output_upper and domain_upper in output_upper ) or \
206+ ("NOT AVAILABLE" in output_upper and domain_upper in output_upper ) or \
207+ ("UNAVAILABLE" in output_upper and domain_upper in output_upper ):
218208 return {"status" : "TAKEN" }
219209
220210 else :
221- return {
222- "status" : "ERROR" ,
223- "reason" : f"Could not parse result: { output [:100 ]} ..." ,
224- }
211+ return {"status" : "ERROR" , "reason" : f"Could not parse result: { output [:100 ]} ..." }
225212
226213
227214async def check_single_domain (agent : ACEAgent , domain : str ) -> dict :
@@ -236,6 +223,7 @@ async def check_single_domain(agent: ACEAgent, domain: str) -> dict:
236223 # Track browser-use tokens across all attempts
237224 total_browseruse_tokens = 0
238225
226+
239227 for attempt in range (max_retries ):
240228 print (f" 🔄 Attempt { attempt + 1 } /{ max_retries } " )
241229
@@ -245,7 +233,8 @@ async def check_single_domain(agent: ACEAgent, domain: str) -> dict:
245233
246234 # Run domain check with ACE learning (with timeout like baseline)
247235 history = await asyncio .wait_for (
248- agent .run (task = task , max_steps = 25 ), timeout = 180.0
236+ agent .run (task = task , max_steps = 25 ),
237+ timeout = 180.0
249238 )
250239
251240 # Extract results
@@ -302,12 +291,10 @@ async def check_single_domain(agent: ACEAgent, domain: str) -> dict:
302291 print (f" ⚠️ Could not get tokens from history: { e } " )
303292
304293 # Method 2: Try agent internal token tracking (ACEAgent specific)
305- if attempt_tokens == 0 and hasattr (agent , " browser_llm" ):
294+ if attempt_tokens == 0 and hasattr (agent , ' browser_llm' ):
306295 try :
307296 # ACEAgent uses browser_use Agent internally, check if it has token tracking
308- if hasattr (agent , "_last_agent" ) and hasattr (
309- agent ._last_agent , "token_cost_service"
310- ):
297+ if hasattr (agent , '_last_agent' ) and hasattr (agent ._last_agent , 'token_cost_service' ):
311298 usage_summary = (
312299 await agent ._last_agent .token_cost_service .get_usage_summary ()
313300 )
@@ -367,13 +354,9 @@ async def check_single_domain(agent: ACEAgent, domain: str) -> dict:
367354
368355 steps = actual_steps + timeout_steps
369356 total_steps += steps
370- attempt_details .append (
371- f"attempt { attempt + 1 } : { steps } steps (timeout, +{ timeout_steps } for duration)"
372- )
357+ attempt_details .append (f"attempt { attempt + 1 } : { steps } steps (timeout, +{ timeout_steps } for duration)" )
373358 last_error = f"Timeout on attempt { attempt + 1 } "
374- print (
375- f" ⏰ Timeout after { actual_steps } steps (+{ timeout_steps } timeout penalty)"
376- )
359+ print (f" ⏰ Timeout after { actual_steps } steps (+{ timeout_steps } timeout penalty)" )
377360
378361 except Exception as e :
379362 # Get actual steps even on error
@@ -426,12 +409,10 @@ async def check_single_domain(agent: ACEAgent, domain: str) -> dict:
426409 print (f" ⚠️ Could not get tokens from history: { e } " )
427410
428411 # Method 2: Try agent internal token tracking (ACEAgent specific)
429- if attempt_tokens == 0 and hasattr (agent , " browser_llm" ):
412+ if attempt_tokens == 0 and hasattr (agent , ' browser_llm' ):
430413 try :
431414 # ACEAgent uses browser_use Agent internally, check if it has token tracking
432- if hasattr (agent , "_last_agent" ) and hasattr (
433- agent ._last_agent , "token_cost_service"
434- ):
415+ if hasattr (agent , '_last_agent' ) and hasattr (agent ._last_agent , 'token_cost_service' ):
435416 usage_summary = (
436417 await agent ._last_agent .token_cost_service .get_usage_summary ()
437418 )
@@ -451,6 +432,7 @@ async def check_single_domain(agent: ACEAgent, domain: str) -> dict:
451432 f" 🤖 Attempt { attempt + 1 } tokens: { attempt_tokens } (total: { total_browseruse_tokens } )"
452433 )
453434
435+
454436 # All retries failed - use accumulated tokens from all attempts
455437 print (f" ❌ All { max_retries } attempts failed" )
456438 return {
@@ -459,7 +441,7 @@ async def check_single_domain(agent: ACEAgent, domain: str) -> dict:
459441 "success" : False ,
460442 "correct" : False ,
461443 "expected" : "AVAILABLE" ,
462- "steps" : steps if " steps" in locals () else 0 ,
444+ "steps" : steps if ' steps' in locals () else 0 ,
463445 "total_steps" : total_steps ,
464446 "error" : f"Failed after { max_retries } attempts. Last error: { last_error } " ,
465447 "attempt" : max_retries ,
@@ -490,12 +472,12 @@ async def main():
490472
491473 # Create ACE agent - handles everything automatically!
492474 agent = ACEAgent (
493- llm = ChatBrowserUse (), # Browser automation LLM
494- ace_model = "claude-haiku-4-5-20251001" , # ACE learning LLM
495- ace_max_tokens = 4096 , # Enough for domain check analysis
475+ llm = ChatBrowserUse (), # Browser automation LLM
476+ ace_model = "claude-haiku-4-5-20251001" , # ACE learning LLM
477+ ace_max_tokens = 4096 , # Enough for domain check analysis
496478 playbook_path = str (playbook_path ) if playbook_path .exists () else None ,
497- max_steps = 25 , # Browser automation steps
498- calculate_cost = True , # Track usage
479+ max_steps = 25 , # Browser automation steps
480+ calculate_cost = True # Track usage
499481 )
500482
501483 # Show current knowledge
@@ -562,9 +544,7 @@ async def main():
562544 print (f"\n { '=' * 60 } " )
563545 print ("📊 DOMAIN CHECK RESULTS" )
564546 print ("=" * 60 )
565- print (
566- f"{ '#' :<3} { 'Domain' :<25} { 'Status' :<10} { 'Acc' :<4} { 'Steps' :<8} { 'Browser-Tokens' :<13} { 'Details' } "
567- )
547+ print (f"{ '#' :<3} { 'Domain' :<25} { 'Status' :<10} { 'Acc' :<4} { 'Steps' :<8} { 'Browser-Tokens' :<13} { 'Details' } " )
568548 print ("-" * 93 )
569549
570550 total_steps = 0
@@ -593,9 +573,7 @@ async def main():
593573 accuracy_indicator = "✓" if correct else "✗"
594574 browseruse_tokens = result .get ("browseruse_tokens" , 0 )
595575
596- print (
597- f"{ i :<3} { result ['domain' ]:<25} { result ['status' ]:<10} { accuracy_indicator :<4} { total_steps_domain :<8} { browseruse_tokens :<12} { step_details } "
598- )
576+ print (f"{ i :<3} { result ['domain' ]:<25} { result ['status' ]:<10} { accuracy_indicator :<4} { total_steps_domain :<8} { browseruse_tokens :<12} { step_details } " )
599577
600578 if not correct and result ["success" ]:
601579 expected = result .get ("expected" , "UNKNOWN" )
@@ -652,9 +630,7 @@ async def main():
652630 for i , bullet in enumerate (recent_strategies , 1 ):
653631 helpful = bullet .helpful
654632 harmful = bullet .harmful
655- effectiveness = (
656- "✅" if helpful > harmful else "⚠️" if helpful == harmful else "❌"
657- )
633+ effectiveness = "✅" if helpful > harmful else "⚠️" if helpful == harmful else "❌"
658634 print (f"{ i } . { effectiveness } { bullet .content } " )
659635 print (f" (+{ helpful } /-{ harmful } )" )
660636
@@ -670,4 +646,4 @@ async def main():
670646
671647
672648if __name__ == "__main__" :
673- asyncio .run (main ())
649+ asyncio .run (main ())
0 commit comments