allenai · hamishivi · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025 · Oct 16, 2025
diff --git a/open_instruct/grpo_fast.py b/open_instruct/grpo_fast.py
@@ -105,6 +105,8 @@
     push_folder_to_hub,
 )
 from open_instruct.queue_types import GenerationResult, PromptRequest, RequestInfo, TokenStatistics
+from open_instruct.tools.tool_actor import TOOL_CLASS_REGISTRY, ToolActor
+from open_instruct.tools.utils.tool_proxy import ToolProxy
 from open_instruct.rl_utils import Timer, pack_sequences
 from open_instruct.utils import (
     ArgumentParserPlus,
@@ -420,16 +422,19 @@ class Args:
     """Whether to mask the tool output. By default on."""
     only_reward_good_outputs: bool = False
     """Whether to only reward good outputs. By default off. Useful to force the model to use the tool(s)."""
+    tool_max_concurrency: int = 512
+    """The maximum number of concurrent tool calls allowed across all rollouts per tool."""
 
-    # rl-rag specific settngs
+    # code-tool specific settings
+    code_tool_api_endpoint: str | None = None
+
+    # search-tool specific settings
+    # rl-rag tool settings. These are shared across different tools.
     number_documents_to_search: int = 3
     """The maximum number of documents to retrieve for each query."""
     search_api_endpoint: str | None = None
     """The API endpoint for the search engine."""
 
-    # code-tool specific settings
-    code_tool_api_endpoint: str | None = None
-
     def __post_init__(self):
         if os.environ.get("VLLM_USE_V1") == "0":
             logger.warning("When using the v0 version of vLLM, caching is broken and will never be invalidated.")
@@ -492,8 +497,10 @@ def __post_init__(self):
             calibrate_checkpoint_state_dir(self.checkpoint_state_dir)
         if self.tools is not None and len(self.tools) > 0:
             for tool in self.tools:
-                if tool not in ["search", "code"]:
-                    raise ValueError(f"Tool {tool} is not supported. Supported tools are: search, code")
+                if tool not in TOOL_CLASS_REGISTRY:
+                    raise ValueError(
+                        f"Tool {tool} is not supported. Supported tools are: {', '.join(TOOL_CLASS_REGISTRY.keys())}"
+                    )
             assert len(self.tools) == len(set(self.tools)), "Duplicate tools are not allowed"
             if self.use_vllm_logprobs or self.truncated_importance_sampling_ratio_cap > 0.0:
                 assert self.mask_tool_use, (
@@ -2195,29 +2202,40 @@ def create_model_and_optimizer(
     # Set up tools
     max_len = args.max_prompt_token_length + args.response_length
     tool_objects = {}
+    tool_max_conc = args.tool_max_concurrency
+
+    def _register_actor_backed_tool(tool_name: str, class_path: str, init_kwargs: dict):
+        actor = ToolActor.options(max_concurrency=tool_max_conc).remote(
+            tool_name=tool_name, class_path=class_path, init_kwargs=init_kwargs
+        )
+        tool_name_from_actor = ray.get(actor.get_name.remote())
+        # Ensure tool name matches registry name
+        if tool_name_from_actor != tool_name:
+            logger.warning(
+                f"Tool name mismatch: registry name '{tool_name}' vs tool.get_name() '{tool_name_from_actor}'. "
+                f"Using registry name '{tool_name}' for consistency."
+            )
+            tool_name_from_actor = tool_name
+        start = ray.get(actor.get_start_str.remote())
+        stop_strings = ray.get(actor.get_stop_strings.remote())
+        # Tools dict is keyed by end_str for stop string checking during generation
+        # But tracking (max_tool_calls, num_calls) uses tool name (registry name)
+        for end_str in stop_strings:
+            tool_proxy = ToolProxy(actor_handle=actor, start_str=start, end_str=end_str, name=tool_name_from_actor)
+            # Store by end_str for stop string checking (this is what vllm_utils expects)
+            tool_objects[end_str] = tool_proxy
+            # Add tool end string to stop_strings
+            args.stop_strings.append(end_str)
+
+    # Register tools via actors
     if args.tools:
-        for tool in args.tools:
-            if tool.lower() == "search":
-                from open_instruct.search_utils.search_tool import SearchTool
-
-                tool = SearchTool(
-                    start_str="<query>",
-                    end_str="</query>",
-                    api_endpoint=args.search_api_endpoint,
-                    number_documents_to_search=args.number_documents_to_search,
-                )
-                tool_objects[tool.end_str] = tool
-                # Add tool end string to stop_strings
-                args.stop_strings.append(tool.end_str)
-            elif tool.lower() == "code":
-                from open_instruct.tool_utils.tools import PythonCodeTool
-
-                tool = PythonCodeTool(start_str="<code>", end_str="</code>", api_endpoint=args.code_tool_api_endpoint)
-                tool_objects[tool.end_str] = tool
-                # Add tool end string to stop_strings
-                args.stop_strings.append(tool.end_str)
-            else:
-                raise ValueError(f"Unknown tool: {tool}")
+        for tool_registry_name in args.tools:
+            registry_key = tool_registry_name.lower()
+            class_path = TOOL_CLASS_REGISTRY.get(registry_key, None)
+            if class_path is None:
+                raise ValueError(f"Unknown tool: {tool_registry_name}")
+            # Pass the registry name so the tool is created with the correct name
+            _register_actor_backed_tool(tool_name=registry_key, class_path=class_path, init_kwargs=vars(args))
 
     queues_to_monitor = {
         "Inference Results Queue": inference_results_Q,

diff --git a/open_instruct/ppo_fast.py b/open_instruct/ppo_fast.py
@@ -1588,7 +1588,7 @@ def main(args: Args, tc: TokenizerConfig, model_config: ModelConfig, reward_fn:
     if args.tools:
         for tool in args.tools:
             if tool.lower() == "search":
-                from open_instruct.search_utils.search_tool import SearchTool
+                from open_instruct.tools.search_tool.search_tool import SearchTool
 
                 tool = SearchTool(
                     start_str="<query>",
@@ -1598,7 +1598,7 @@ def main(args: Args, tc: TokenizerConfig, model_config: ModelConfig, reward_fn:
                 )
                 tool_objects[tool.end_str] = tool
             elif tool.lower() == "code":
-                from open_instruct.tool_utils.tools import PythonCodeTool
+                from open_instruct.tools.python_tool.tool import PythonCodeTool
 
                 tool = PythonCodeTool(start_str="<code>", end_str="</code>", api_endpoint=args.code_tool_api_endpoint)
                 tool_objects[tool.end_str] = tool

diff --git a/open_instruct/search_utils/search_tool.py b/open_instruct/search_utils/search_tool.py
diff --git a/open_instruct/tool_utils/test_tools.py b/open_instruct/tool_utils/test_tools.py
@@ -2,7 +2,8 @@
 import time
 import unittest
 
-from open_instruct.tool_utils.tools import MaxCallsExceededTool, PythonCodeTool, Tool, ToolOutput
+from open_instruct.tools.python_tool.tool import PythonCodeTool
+from open_instruct.tools.utils.tool_classes import MaxCallsExceededTool, Tool, ToolOutput
 
 
 class TestToolOutput(unittest.TestCase):
@@ -51,7 +52,7 @@ def test_max_calls_exceeded_output(self):
         self.assertIsInstance(result, ToolOutput)
         self.assertEqual(result.output, "Max tool calls exceeded.")
         self.assertFalse(result.called)
-        self.assertEqual(result.error, "")
+        self.assertEqual(result.error, "Max tool calls exceeded")
         self.assertFalse(result.timeout)
         self.assertEqual(result.runtime, 0)
 
@@ -63,7 +64,7 @@ def setUpClass(cls):
         # Start the server in a subprocess
         cls.server_process = subprocess.Popen(
             ["uv", "run", "uvicorn", "tool_server:app", "--host", "0.0.0.0", "--port", "1212"],
-            cwd="open_instruct/tool_utils",
+            cwd="open_instruct/tools/python_tool/python_server",
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             start_new_session=True,  # Create new process group