@@ -886,10 +886,11 @@ def _prepare_agent_input(
886886 async def _execute_standard_agent (
887887 self , node : AgentNode , input_data : Any , query : str , context : GraphExecutionContext
888888 ) -> Any :
889- """Execute standard agent with error handling."""
889+ """Execute standard agent with error handling and fallback models ."""
890890 # Get message history from context (limit to most recent 10 messages for token efficiency)
891891 message_history = context .get_message_history (max_messages = 10 )
892892
893+ # Try with the original agent first
893894 try :
894895 # Pass message_history if available (Pydantic AI agents support this)
895896 if message_history :
@@ -909,13 +910,204 @@ async def _execute_standard_agent(
909910 "Failed to accumulate messages from agent result" , error = str (e )
910911 )
911912 return result
912- except Exception :
913- # Handle validation errors and API errors for planner node
913+ except Exception as e :
914+ # Check if we should retry with fallback models
915+ from src .utils .hf_error_handler import (
916+ extract_error_details ,
917+ should_retry_with_fallback ,
918+ )
919+
920+ error_details = extract_error_details (e )
921+ should_retry = should_retry_with_fallback (e )
922+
923+ # Handle validation errors and API errors for planner node (with fallback)
914924 if node .node_id == "planner" :
925+ if should_retry :
926+ self .logger .warning (
927+ "Planner failed, trying fallback models" ,
928+ original_error = str (e ),
929+ status_code = error_details .get ("status_code" ),
930+ )
931+ # Try fallback models for planner
932+ fallback_result = await self ._try_fallback_models (
933+ node , input_data , message_history , query , context , e
934+ )
935+ if fallback_result is not None :
936+ return fallback_result
937+ # If fallback failed or not applicable, use fallback plan
915938 return self ._create_fallback_plan (query , input_data )
916- # For other nodes, re-raise the exception
939+
940+ # For other nodes, try fallback models if applicable
941+ if should_retry :
942+ self .logger .warning (
943+ "Agent node failed, trying fallback models" ,
944+ node_id = node .node_id ,
945+ original_error = str (e ),
946+ status_code = error_details .get ("status_code" ),
947+ )
948+ fallback_result = await self ._try_fallback_models (
949+ node , input_data , message_history , query , context , e
950+ )
951+ if fallback_result is not None :
952+ return fallback_result
953+
954+ # If fallback didn't work or wasn't applicable, re-raise the exception
917955 raise
918956
957+ async def _try_fallback_models (
958+ self ,
959+ node : AgentNode ,
960+ input_data : Any ,
961+ message_history : list [Any ],
962+ query : str ,
963+ context : GraphExecutionContext ,
964+ original_error : Exception ,
965+ ) -> Any | None :
966+ """Try executing agent with fallback models.
967+
968+ Args:
969+ node: The agent node that failed
970+ input_data: Input data for the agent
971+ message_history: Message history for the agent
972+ query: The research query
973+ context: Execution context
974+ original_error: The original error that triggered fallback
975+
976+ Returns:
977+ Agent result if successful, None if all fallbacks failed
978+ """
979+ from src .utils .hf_error_handler import extract_error_details , get_fallback_models
980+
981+ error_details = extract_error_details (original_error )
982+ original_model = error_details .get ("model_name" )
983+ fallback_models = get_fallback_models (original_model )
984+
985+ # Also try models from settings fallback list
986+ from src .utils .config import settings
987+
988+ settings_fallbacks = settings .get_hf_fallback_models_list ()
989+ for model in settings_fallbacks :
990+ if model not in fallback_models :
991+ fallback_models .append (model )
992+
993+ self .logger .info (
994+ "Trying fallback models" ,
995+ node_id = node .node_id ,
996+ original_model = original_model ,
997+ fallback_count = len (fallback_models ),
998+ )
999+
1000+ # Try each fallback model
1001+ for fallback_model in fallback_models :
1002+ try :
1003+ # Recreate agent with fallback model
1004+ fallback_agent = self ._recreate_agent_with_model (node .node_id , fallback_model )
1005+ if fallback_agent is None :
1006+ continue
1007+
1008+ # Try running with fallback agent
1009+ if message_history :
1010+ result = await fallback_agent .run (input_data , message_history = message_history )
1011+ else :
1012+ result = await fallback_agent .run (input_data )
1013+
1014+ self .logger .info (
1015+ "Fallback model succeeded" ,
1016+ node_id = node .node_id ,
1017+ fallback_model = fallback_model ,
1018+ )
1019+
1020+ # Accumulate new messages from agent result if available
1021+ if hasattr (result , "new_messages" ):
1022+ try :
1023+ new_messages = result .new_messages ()
1024+ for msg in new_messages :
1025+ context .add_message (msg )
1026+ except Exception as e :
1027+ self .logger .debug (
1028+ "Failed to accumulate messages from fallback agent result" , error = str (e )
1029+ )
1030+
1031+ return result
1032+
1033+ except Exception as e :
1034+ self .logger .warning (
1035+ "Fallback model failed" ,
1036+ node_id = node .node_id ,
1037+ fallback_model = fallback_model ,
1038+ error = str (e ),
1039+ )
1040+ continue
1041+
1042+ # All fallback models failed
1043+ self .logger .error (
1044+ "All fallback models failed" ,
1045+ node_id = node .node_id ,
1046+ fallback_count = len (fallback_models ),
1047+ )
1048+ return None
1049+
1050+ def _recreate_agent_with_model (self , node_id : str , model_name : str ) -> Any | None :
1051+ """Recreate an agent with a specific model.
1052+
1053+ Args:
1054+ node_id: The node ID (e.g., "thinking", "knowledge_gap")
1055+ model_name: The model name to use
1056+
1057+ Returns:
1058+ Agent instance or None if recreation failed
1059+ """
1060+ try :
1061+ from pydantic_ai .models .huggingface import HuggingFaceModel
1062+ from pydantic_ai .providers .huggingface import HuggingFaceProvider
1063+
1064+ # Create model with fallback model name
1065+ hf_provider = HuggingFaceProvider (api_key = self .oauth_token )
1066+ model = HuggingFaceModel (model_name , provider = hf_provider )
1067+
1068+ # Recreate agent based on node_id
1069+ if node_id == "thinking" :
1070+ from src .agent_factory .agents import create_thinking_agent
1071+
1072+ agent_wrapper = create_thinking_agent (model = model , oauth_token = self .oauth_token )
1073+ return agent_wrapper .agent
1074+ elif node_id == "knowledge_gap" :
1075+ from src .agent_factory .agents import create_knowledge_gap_agent
1076+
1077+ agent_wrapper = create_knowledge_gap_agent ( # type: ignore[assignment]
1078+ model = model , oauth_token = self .oauth_token
1079+ )
1080+ return agent_wrapper .agent
1081+ elif node_id == "tool_selector" :
1082+ from src .agent_factory .agents import create_tool_selector_agent
1083+
1084+ agent_wrapper = create_tool_selector_agent ( # type: ignore[assignment]
1085+ model = model , oauth_token = self .oauth_token
1086+ )
1087+ return agent_wrapper .agent
1088+ elif node_id == "planner" :
1089+ from src .agent_factory .agents import create_planner_agent
1090+
1091+ agent_wrapper = create_planner_agent (model = model , oauth_token = self .oauth_token ) # type: ignore[assignment]
1092+ return agent_wrapper .agent
1093+ elif node_id == "writer" :
1094+ from src .agent_factory .agents import create_writer_agent
1095+
1096+ agent_wrapper = create_writer_agent (model = model , oauth_token = self .oauth_token ) # type: ignore[assignment]
1097+ return agent_wrapper .agent
1098+ else :
1099+ self .logger .warning ("Unknown node_id for agent recreation" , node_id = node_id )
1100+ return None
1101+
1102+ except Exception as e :
1103+ self .logger .error (
1104+ "Failed to recreate agent with fallback model" ,
1105+ node_id = node_id ,
1106+ model_name = model_name ,
1107+ error = str (e ),
1108+ )
1109+ return None
1110+
9191111 def _create_fallback_plan (self , query : str , input_data : Any ) -> Any :
9201112 """Create fallback ReportPlan when planner fails."""
9211113 from src .utils .models import ReportPlan , ReportPlanSection
0 commit comments