agent: structure to test

vsoch · vsoch · commit 41f45cc02afb · 2025-11-27T17:26:17.000-08:00
Signed-off-by: vsoch &lt;vsoch@users.noreply.github.com&gt;
diff --git a/README.md b/README.md
@@ -46,6 +46,16 @@ Tools to add:
  - build
    - docker
 
+### Environment
+
+The following variables can be set in the environment.
+
+| Name | Description | Default       |
+|-------|------------|---------------|
+| `FRACTALE_MCP_PORT` | Port to run MCP server on, if using http variant | 8089 |
+| `FRACTALE_MCP_TOKEN` | Token to use for testing | unset |
+| `FRACTALE_LLM_PROVIDER` | LLM Backend to use (gemini, openai, llama) | gemini |
+
 ### Testing
 
 Start the server in one terminal. Export `FRACTALE_MCP_TOKEN` if you want to require simple token auth. Here is for http.
@@ -78,17 +88,21 @@ export FRACTALE_MCP_TOKEN=dude
 # In one terminal (start MCP)
 fractale start -t http --port 8089
 
+# Define the model (provider and endpoints) to use.
+export FRACTALE_LLM_PROVIDER=openai
+export OPENAI_API_KEY=xxxxxxxxxxxxxxxx
+export OPENAI_BASE_URL=https://my.custom.url/v1
+
 # In the other, run the plan
 fractale agent ./examples/plans/docker-build-lammps.yaml
 ```
 
-
  - `manager` agents know how to orchestrate step agents and choose between them (don't hold state, but could)
  - `step` agents are experts on doing specific tasks. This originally was an agent with specific functions to do something (e.g., docker build) and now is a generic MCP agent with a prompt that gives it context and a goal.
 
 The initial design of `helper` agents from the first fractale is subsumed by the idea of an MCP function. A helper agent _is_ an MCP tool.
 
-The design is simple in that each agent is responding to state of error vs. success. In the [first version](https://github.com/compspec/fractale) of our library, agents formed a custom graph. In this variant, we refactor to use MCP server tools. It has the same top level design with a manager, but each step agent is like a small state machine governmed by an LLM with access to MCP tools and resources.
+The design is simple in that each agent is responding to state of error vs. success. In the [first version](https://github.com/compspec/fractale) of our library, agents formed a custom graph. In this variant, we refactor to use MCP server tools. It has the same top level design with a manager, but each step agent is like a small state machine governed by an LLM with access to MCP tools and resources.
 
 See [examples/agent](examples/agent) for an example, along with observations, research questions, ideas, and experiment brainstorming!
 
diff --git a/examples/plans/docker-build-lammps.yaml b/examples/plans/docker-build-lammps.yaml
@@ -4,11 +4,11 @@ plan:
   - name: "build"
     prompt: "docker-build-persona"
     inputs:
-      description: "LAMMPS (Large-scale Atomic/Molecular Massively Parallel Simulator)"
+      application: "LAMMPS (Large-scale Atomic/Molecular Massively Parallel Simulator)"
       container: "ghcr.io/hpc-lab/lammps:cpu-latest"
       environment: "Rocky Linux 9, CPU Only"
 
 #  - name: "deploy"
 #    prompt: "k8s-deploy-persona"
 #    inputs:
-#      replicas: 4
+#      replicas: 4
diff --git a/fractale/agent/agent.py b/fractale/agent/agent.py
@@ -6,6 +6,7 @@
 from fastmcp.client.transports import StreamableHttpTransport
 
 import fractale.agent.backends as backends
+import fractale.agent.defaults as defaults
 import fractale.agent.logger as logger
 from fractale.agent.base import Agent
 
@@ -17,7 +18,7 @@ class MCPAgent(Agent):
 
     def init(self):
         # 1. Setup MCP Client
-        port = os.environ.get("FRACTALE_MCP_PORT", "8089")
+        port = os.environ.get("FRACTALE_MCP_PORT", defaults.mcp_port)
         token = os.environ.get("FRACTALE_MCP_TOKEN")
         url = f"http://localhost:{port}/mcp"
 
@@ -27,8 +28,18 @@ def init(self):
         else:
             self.client = Client(url)
 
-        # 2. Select Backend based on Config/Env
-        provider = os.environ.get("FRACTALE_LLM_PROVIDER", "gemini").lower()
+        # Initialize the provider. We will do this for each step.
+        self.init_provider()
+
+    def init_provider(self):
+        """
+        Initialize the provider.
+        """
+        # select Backend based on Config/Env first, then cached version
+        provider = self._provider or os.environ.get("FRACTALE_LLM_PROVIDER", "gemini").lower()
+        self._provider = provider
+
+        # Other envars come from provider backend
         if provider in backends.BACKENDS:
             self.backend = backends.BACKENDS[provider]()
         else:
@@ -52,79 +63,118 @@ async def get_tools_list(self):
             tools = await self.client.list_tools()
         return tools
 
-    async def execute_mission_async(self, prompt_text: str):
+    async def execute(self, context, step):
         """
-        The Async Loop: Think -> Act -> Observe -> Think
+        The Async Loop that will start with a prompt name, retrieve it,
+        and then respond to it until the state is successful.
         """
         start_time = time.perf_counter()
 
-        # 1. Connect & Discover Tools
+        # We keep the client connection open for the duration of the step
         async with self.client:
-            mcp_tools = await self.client.list_tools()
 
-            # 2. Initialize Backend with these tools
+            # These are tools available to agent
+            # TODO need to filter these to be agent specific?
+            mcp_tools = await self.client.list_tools()
             await self.backend.initialize(mcp_tools)
 
-            # 3. Initial Prompt
-            # 'response_text' is what the LLM says to the user
-            # 'calls' is a list of tools it wants to run
-            response_text, calls = await self.backend.generate_response(prompt=prompt_text)
-
-            max_loops = 15
-            loops = 0
-
-            while loops < max_loops:
-                loops += 1
+            # Get prompt to give goal/task/personality to agent
+            args = getattr(context, "data", context)
 
-                # If there are tool calls, we MUST execute them and feed back results
-                if calls:
-                    tool_outputs = []
+            # This partitions inputs, adding inputs from the step and separating
+            # those from extra
+            args, extra = step.partition_inputs(args)
+            instruction = await self.fetch_persona(step.prompt, args)
+            # TODO STOPPED HERE should we add "extra" to context?
+            print("INSTRUCTION")
+            print(instruction)
+            print("EXTRA")
+            print(extra)
 
-                    for call in calls:
-                        t_name = call["name"]
-                        t_args = call["args"]
-                        t_id = call.get("id")  # Needed for OpenAI
+            # Run the loop up to some max attempts (internal state machine with MCP tools)
+            max_loops = context.get("max_loops", 15)
+            response_text = await self.run_llm_loop(instruction, max_loops)
 
-                        logger.info(f"🛠️ Tool Call: {t_name} {t_args}")
-
-                        # --- EXECUTE TOOL ---
-                        try:
-                            result = await self.client.call_tool(t_name, t_args)
-                            # Handle FastMCP result object
-                            output_str = (
-                                result.content[0].text
-                                if hasattr(result, "content")
-                                else str(result)
-                            )
-                        except Exception as e:
-                            output_str = f"Error: {str(e)}"
-
-                        # Record Metadata (Your Requirement)
-                        self._record_step(t_name, t_args, output_str)
+        self.record_usage(time.perf_counter() - start_time)
+        return response_text
 
-                        tool_outputs.append({"name": t_name, "content": output_str, "id": t_id})
+    async def run_llm_loop(self, instruction: str, max_loops: int) -> str:
+        """
+        Process -> Tool -> Process loop.
+        We need to return on some state of success or ultimate failure.
+        """
+        # Initial response to first prompt
+        response_text, calls = await self.backend.generate_response(prompt=instruction)
+
+        loops = 0
+        while loops < max_loops:
+            loops += 1
+
+            # If no tools called, we are done
+            if not calls:
+                break
+
+            # Execute all requested tools
+            tool_outputs = []
+            for call in calls:
+                t_name = call["name"]
+                t_args = call["args"]
+                t_id = call.get("id")
+                logger.info(f"🛠️  Calling: {t_name}")
+
+                try:
+                    # Get result and unpack (FastMCP format)
+                    result = await self.client.call_tool(t_name, t_args)
+                    if hasattr(result, "content") and isinstance(result.content, list):
+                        content = result.content[0].text
+                    else:
+                        content = str(result)
+                except Exception as e:
+                    content = f"Error executing {t_name}: {str(e)}"
+
+                # Record metadata about the step
+                self.record_step(t_name, t_args, content)
+
+                # Save outputs (name, id, and content)
+                tool_outputs.append({"id": t_id, "name": t_name, "content": content})
+
+            # Feed results back to backend with history.
+            response_text, calls = await self.backend.generate_response(tool_outputs=tool_outputs)
+            if not calls:
+                logger.info("🎢 Agent has not requested new calls, ending loop.")
+
+        # When we get here, we either have no calls, or we reached max attempts.
+        return response_text
 
-                    # --- FEEDBACK LOOP ---
-                    # We pass the outputs back to the backend.
-                    # It returns the NEXT thought.
-                    response_text, calls = await self.backend.generate_response(
-                        tool_outputs=tool_outputs
-                    )
+    async def fetch_persona(self, prompt_name: str, arguments: dict) -> str:
+        """
+        Asks the MCP Server to render the prompt template.
 
+        This is akin to rendering or fetching the person. E.g., "You are X and
+        here are your instructions for a task."
+        """
+        logger.info(f"📥 Bootstrapping Persona: {prompt_name}")
+        try:
+            prompt_result = await self.client.get_prompt(name=prompt_name, arguments=arguments)
+            # MCP Prompts return a list of messages (User/Assistant/Text).
+            # We squash them into a single string for the instruction.
+            msgs = []
+            for m in prompt_result.messages:
+                if hasattr(m.content, "text"):
+                    msgs.append(m.content.text)
                 else:
-                    # No tool calls? The LLM is done thinking.
-                    break
-
-        end_time = time.perf_counter()
+                    msgs.append(str(m.content))
 
-        # Save Summary Metadata
-        self.save_mcp_metadata(end_time - start_time)
+            return "\n\n".join(msgs)
 
-        return response_text
+        except Exception as e:
+            raise RuntimeError(f"Failed to load persona '{prompt_name}': {e}")
 
-    def _record_step(self, tool, args, output):
-        if "steps" not in self.metadata:
-            self.metadata["steps"] = []
+    def record_step(self, tool, args, output):
+        """
+        Record step metadata.
+        TODO: refactor this into metadata registry (decorator)
+        """
         self.metadata["steps"].append(
             {
                 "tool": tool,
@@ -134,33 +184,31 @@ def _record_step(self, tool, args, output):
             }
         )
 
-    def save_mcp_metadata(self, duration):
-        """Save token usage from backend."""
-        usage = self.backend.token_usage
-        if "llm_usage" not in self.metadata:
-            self.metadata["llm_usage"] = []
-
-        self.metadata["llm_usage"].append(
-            {
-                "duration": duration,
-                "prompt_tokens": usage.get("prompt_tokens", 0),
-                "completion_tokens": usage.get("completion_tokens", 0),
-            }
-        )
-
-    def run_step(self, context):
+    def record_usage(self, duration):
         """
-        Bridge the sync Base Class to the async implementation.
+        Record token usage.
+        TODO: refactor this into metadata registry (decorator)
+        """
+        if hasattr(self.backend, "token_usage"):
+            usage = self.backend.token_usage
+            self.metadata["llm_usage"].append(
+                {
+                    "duration": duration,
+                    "prompt": usage.get("prompt_tokens", 0),
+                    "completion": usage.get("completion_tokens", 0),
+                }
+            )
+
+    def run_step(self, context, step):
+        """
+        Run step is called from the Agent run (base class)
+        It's here so we can asyncio.run the thing!
         """
-        prompt_text = self.get_prompt(context)
-
         try:
-            # Run the loop
-            final_result = asyncio.run(self.execute_mission_async(prompt_text))
-            context["result"] = final_result
+            final_result = asyncio.run(self.execute(context, step))
+            context.result = final_result
         except Exception as e:
             context["error_message"] = str(e)
             logger.error(f"Agent failed: {e}")
-            raise  # Or handle gracefully depending on policy
-
+            raise e
         return context
diff --git a/fractale/agent/backends/openai.py b/fractale/agent/backends/openai.py
@@ -1,6 +1,7 @@
+import os
 from typing import Any, Dict, List
 
-from openai import AsyncOpenAI
+from openai import AsyncOpenAI, OpenAI
 
 from .llm import LLMBackend
 
@@ -11,7 +12,12 @@ class OpenAIBackend(LLMBackend):
     """
 
     def __init__(self, model_name="gpt-4o"):
-        self.client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+        # Needs to be tested if base url is None.
+        # Switch to async if/when needed. Annoying for development
+        # self.client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url=os.environ.get("OPENAI_BASE_URL"))
+        self.client = OpenAI(
+            api_key=os.environ.get("OPENAI_API_KEY"), base_url=os.environ.get("OPENAI_BASE_URL")
+        )
         self.model_name = model_name
         self.history = []
         self.tools_schema = []
diff --git a/fractale/agent/base.py b/fractale/agent/base.py
@@ -21,6 +21,7 @@ def __init__(
         self.name = name
         self.attempts = 0
         self.max_attempts = max_attempts
+        self._provider = None
 
         self.results_dir = results_dir or os.getcwd()
         self.save_incremental = save_incremental
@@ -46,28 +47,26 @@ def init_metadata(self):
             "llm_usage": [],
         }
 
-    def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
+    def run(self, context, step):
         """
         Main execution wrapper
         """
         # Ensure max_attempts is set
         context["max_attempts"] = self.max_attempts or context.get("max_attempts")
-
-        # 3. RUN STEP
         logger.info(f"▶️  Running {self.name}...")
         start_time = time.time()
 
         try:
             # Call abstract method
-            context = self.run_step(context)
+            context = self.run_step(context, step)
 
         finally:
             duration = time.time() - start_time
             self.metadata["times"]["execution"] = duration
 
         return context
 
-    def run_step(self, context):
+    def run_step(self, context, step):
         """
         Abstract: Implemented by MCPAgent
         """
diff --git a/fractale/agent/defaults.py b/fractale/agent/defaults.py
@@ -1,5 +1,6 @@
 environment = "generic cloud environment"
 gemini_model = "gemini-2.5-pro"
+mcp_port = "8089"
 
 # These are common / default args we don't need to give in any prompt.
 shared_args = {
diff --git a/fractale/agent/manager/agent.py b/fractale/agent/manager/agent.py
diff --git a/fractale/agent/manager/plan.py b/fractale/agent/manager/plan.py
diff --git a/fractale/agent/manager/prompts.py b/fractale/agent/manager/prompts.py