rllm-org · jeffreysijuntan · Dec 10, 2025 · Nov 27, 2025 · Nov 27, 2025 · Dec 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -209,3 +209,6 @@ deepresearch_outputs/*
 examples/deepresearch/hle_outputs/*
 */hle_outputs/*
 examples/deepresearch/HLE_OUTPUT_EVOLUTION.md
+
+# Until we have a good way to handle cuda-version specific pkgs, we ignore uv.lock
+uv.lock
diff --git a/.gitmodules b/.gitmodules
diff --git a/Dockerfile b/Dockerfile
@@ -1,15 +1,15 @@
-# Start from the verl base image
-# Dockerfile.base
-FROM verlai/verl:app-verl0.4-sglang0.4.6.post5-vllm0.8.5-mcore0.12.2-te2.2
+FROM verlai/verl:vllm011.latest
 
 WORKDIR /workspace
 
-# 1) Clone rllm repository with submodules
-RUN git clone --recurse-submodules https://github.com/rllm-org/rllm.git rllm
+RUN git clone https://github.com/volcengine/verl.git
+RUN cd verl && \
+    git checkout v0.6.1 && \
+    pip install -e .
 
-# 2) Install verl and rllm (editable)
+# 2) Install rllm (editable)
+RUN git clone https://github.com/rllm-org/rllm.git
 RUN cd rllm && \
-    pip install --no-deps -e ./verl && \
     pip install -e .
 
 # 3) Install playwright

diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
@@ -4,32 +4,85 @@ This guide will help you set up rLLM on your system.
 
 ## Prerequisites
 
-Before installing rLLM, ensure you have the following:
+Starting with v0.2.1, rLLM's recommended dependency manager is `uv`. To install `uv`, run:
 
-- Python 3.10 or higher
-- CUDA version >= 12.4
+```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+rLLM requires `python>=3.10`, but certain beckends may require a newer installation (e.g., `tinker` requires `python>=3.11`). Ensure that your system has a suitable installation of Python:
+
+```bash
+uv python install 3.11
+```
 
 ## Basic Installation
 
-rLLM uses [verl](https://github.com/volcengine/verl) as its training backend. Follow these steps to install rLLM and verl:
+The following will perform a minimal installation of rLLM:
 
 ```bash
-# Clone the repository
-git clone --recurse-submodules https://github.com/rllm-org/rllm.git
+git clone https://github.com/rllm-org/rllm.git
 cd rllm
 
-# Create a conda environment
-conda create -n rllm python=3.10 -y
-conda activate rllm
+uv venv --python 3.11
+uv pip install -e .
+```
+
+rLLM supports multiple backends for training, including `verl` and `tinker`, which need to be installed separately.
 
-# Install verl
-bash scripts/install_verl.sh
+To train with `tinker` on a CPU-only machine, run:
 
-# Install rLLM
-pip install -e .
+```bash
+uv pip install -e .[tinker] --torch-backend=cpu
+```
+
+To train with `verl` on a GPU-equipped machine with CUDA 12.8, run:
+```bash
+uv pip install -e .[verl] --torch-backend=cu128
 ```
 
-This will install rLLM and all its dependencies in development mode.
+> The `verl` extra installs vLLM by default. If you'd rather use SGLang to sample rollouts, you can install it with `uv pip install sglang --torch-backend=cu128`.
+
+> rLLM with verl supports alternative hardware accelerators, including AMD ROCm and Huawei Ascend. For these platforms, we strongly recommend installing rLLM on top of verl's official Docker containers for ROCm ([here](https://github.com/volcengine/verl/tree/main/docker/rocm)) and Ascend ([here](https://github.com/volcengine/verl/tree/main/docker/ascend)).
+
+### Activating your environment
+
+Be sure to activate the virtual environment before running a job:
+
+```bash
+source .venv/bin/activate
+python your_script.py
+```
+
+### Editable Verl Installation
+
+If you wish to make changes to verl, you can do an editable install:
+
+```bash 
+git clone https://github.com/volcengine/verl.git
+cd verl
+git checkout v0.6.1
+uv pip install -e .
+```
+
+### Optional Extras
+
+rLLM provides additional optional dependencies for specific agent domains and framework integrations. For example:
+- `web`: Tools for web agents (BrowserGym, Selenium).
+- `code-tools`: Sandboxed code execution (E2B, Together).
+- `smolagents`: Integration with Hugging Face's smolagents.
+
+See the full list of managed extras [here](pyproject.toml).
+
+## Installation without `uv`
+
+While rLLM can also be installed without `uv` (i.e., just using `pip`), it is not recommended and may cause issues if you don't have a compatible PyTorch or CUDA version preinstalled:
+
+```bash
+conda create -n rllm python=3.11
+conda activate rllm
+pip install -e .[verl]
+```
 
 ## Installation with Docker 🐳
 

diff --git a/examples/geo3k/train_geo3k.sh b/examples/geo3k/train_geo3k.sh
@@ -26,7 +26,7 @@ python3 -m examples.geo3k.train_geo3k \
     actor_rollout_ref.actor.fsdp_config.param_offload=False \
     actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \
     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
-    actor_rollout_ref.rollout.name=sglang \
+    actor_rollout_ref.rollout.name=vllm \
     actor_rollout_ref.rollout.mode="async" \
     actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \
     actor_rollout_ref.rollout.enforce_eager=False \

diff --git a/pyproject.toml b/pyproject.toml
@@ -19,11 +19,12 @@ classifiers = [
 license = {file = "LICENSE"}
 dependencies = [
     # Core ML/AI packages
-    "torch>=2.6",
-    "transformers>=4.5",
+    "torch",
+    "torchvision",
+    "transformers>=4.55.0",
 
     # Data processing
-    "datasets",
+    "datasets>=2.20.0",
     "pandas",
     "polars",
     "pillow",
@@ -49,33 +50,43 @@ dependencies = [
     "PyYAML",
     "pydantic",
     "wrapt",
-    "asgiref>=3.7.0",  # Async/sync conversion utilities
+    "asgiref>=3.7.0",
     "wandb",
+]
+
+[project.optional-dependencies]
 
-    # Development and testing
+dev = [
     "pytest",
     "pre-commit",
     "ruff",
     "mypy",
-
-    # Documentation
     "mkdocs>=1.5.0",
     "mkdocs-material>=9.0.0",
     "mkdocstrings[python]>=0.24.0",
     "mkdocs-autorefs>=0.5.0",
     "pymdown-extensions>=10.0.0",
-
-    # Sdk
+]
+
+verl = [
+    "verl==0.6.1",
+    "torch>=2.8.0",
+    "torchvision>=0.23.0",
+    "vllm>=0.10.2,<=0.11.0",
+    "flash-attn>=2.8.1",
+    "qwen-vl-utils",
+]
+
+sdk = [
     "litellm[proxy]",
     "aiosqlite",
     "asgiref",
-
-    # Tinker
-    "tinker",
-    "tinker-cookbook @ git+https://github.com/thinking-machines-lab/tinker-cookbook.git#egg=tinker-cookbook",
 ]
 
-[project.optional-dependencies]
+tinker = [
+    "tinker ; python_version >= '3.11'",
+    "tinker-cookbook @ git+https://github.com/thinking-machines-lab/tinker-cookbook.git#egg=tinker-cookbook ; python_version >= '3.11'",
+]
 
 opentelemetry = [
     "opentelemetry-sdk",
@@ -89,7 +100,6 @@ strands = [
     "strands-agents",
 ]
 
-
 swe = [
     "docker",
     "kubernetes",
@@ -108,6 +118,11 @@ code-tools = [
     "together>=1.4",
 ]
 
+[tool.uv.extra-build-dependencies]
+flash-attn = [
+    { requirement = "torch", match-runtime = true },
+]
+
 [tool.ruff]
 line-length = 5000 # TODO: Reduce this to a more reasonable value
 
@@ -157,7 +172,6 @@ plugins = ["pydantic.mypy"]
 ignore_missing_imports = true
 check_untyped_defs = true
 follow_imports = "silent"
-
 exclude = [
     "verl/.*",
     "rllm/rewards/code_utils/.*",
@@ -166,4 +180,4 @@ exclude = [
     "scripts/.*",
     "tests/.*",
     "docs/.*",
-]
+]
diff --git a/rllm/engine/agent_sdk_engine.py b/rllm/engine/agent_sdk_engine.py
@@ -27,9 +27,10 @@
 
 # Avoid hard dependency on verl at import time; only for typing
 if TYPE_CHECKING:
-    from rllm.sdk.tracers import TracerProtocol
     from verl import DataProto
 
+    from rllm.sdk.tracers import TracerProtocol
+
 logger = logging.getLogger(__name__)
 
 

diff --git a/rllm/engine/rollout/verl_engine.py b/rllm/engine/rollout/verl_engine.py
@@ -1,11 +1,12 @@
 import asyncio
 import uuid
 
+from verl.experimental.agent_loop.agent_loop import AgentLoopManager, AsyncLLMServerManager
+from verl.workers.rollout.replica import TokenOutput
+
 from rllm.engine.rollout.rollout_engine import ModelOutput, RolloutEngine
 from rllm.parser import ChatTemplateParser
 from rllm.workflows import TerminationEvent, TerminationReason
-from verl.experimental.agent_loop.agent_loop import AgentLoopManager, AsyncLLMServerManager
-from verl.workers.rollout.replica import TokenOutput
 
 
 class VerlEngine(RolloutEngine):

diff --git a/rllm/tools/web_tools/firecrawl_tool.py b/rllm/tools/web_tools/firecrawl_tool.py
@@ -12,8 +12,7 @@
 
 try:
     from firecrawl import FirecrawlApp
-except ImportError as e:
-    print(e)
+except ImportError:
     FirecrawlApp = None
 
 from rllm.tools.tool_base import Tool, ToolOutput

diff --git a/rllm/trainer/agent_sft_trainer.py b/rllm/trainer/agent_sft_trainer.py
@@ -24,13 +24,14 @@ def train(self):
             self._train_tinker()
 
     def _train_verl(self):
-        from rllm.trainer.verl.sft_dataset import RLLMSFTDataset
         from verl.trainer.fsdp_sft_trainer import FSDPSFTTrainer
         from verl.utils import hf_tokenizer
         from verl.utils.device import get_device_name
         from verl.utils.distributed import destroy_global_process_group, initialize_global_process_group
         from verl.utils.fs import copy_to_local
 
+        from rllm.trainer.verl.sft_dataset import RLLMSFTDataset
+
         config = self.config
         device_name = get_device_name()
         local_rank, rank, world_size = initialize_global_process_group()

diff --git a/rllm/trainer/verl/agent_ppo_trainer.py b/rllm/trainer/verl/agent_ppo_trainer.py
@@ -12,8 +12,6 @@
 import numpy as np
 import torch
 from omegaconf import OmegaConf
-
-from rllm.engine.agent_execution_engine import AsyncAgentExecutionEngine
 from verl import DataProto
 from verl.protocol import pad_dataproto_to_divisor
 from verl.single_controller.ray import RayWorkerGroup
@@ -29,6 +27,8 @@
 from verl.utils.debug import marked_timer
 from verl.utils.metric import reduce_metrics
 
+from rllm.engine.agent_execution_engine import AsyncAgentExecutionEngine
+
 
 class AgentPPOTrainer(RayPPOTrainer):
     def __init__(

diff --git a/rllm/trainer/verl/agent_ppo_trainer_pipeline.py b/rllm/trainer/verl/agent_ppo_trainer_pipeline.py
@@ -6,9 +6,6 @@
 
 import numpy as np
 import torch
-
-from rllm.engine.agent_execution_engine import AsyncAgentExecutionEngine
-from rllm.trainer.verl.agent_ppo_trainer import AgentPPOTrainer
 from verl import DataProto
 from verl.single_controller.ray import RayClassWithInitArgs, RayWorkerGroup
 from verl.trainer.ppo.metric_utils import compute_data_metrics, compute_timing_metrics
@@ -17,6 +14,9 @@
 from verl.utils.debug import marked_timer
 from verl.utils.metric import reduce_metrics
 
+from rllm.engine.agent_execution_engine import AsyncAgentExecutionEngine
+from rllm.trainer.verl.agent_ppo_trainer import AgentPPOTrainer
+
 
 class PipelineAgentPPOTrainer(AgentPPOTrainer):
     def init_workers(self):
@@ -78,7 +78,6 @@ def fit_agent(self):
         The light-weight advantage computation is done on the driver process.
         """
         from omegaconf import OmegaConf
-
         from verl.utils.tracking import Tracking
 
         logger = Tracking(project_name=self.config.trainer.project_name, experiment_name=self.config.trainer.experiment_name, default_backend=self.config.trainer.logger, config=OmegaConf.to_container(self.config, resolve=True))

diff --git a/rllm/trainer/verl/agent_sdk_trainer.py b/rllm/trainer/verl/agent_sdk_trainer.py
@@ -14,11 +14,6 @@
 import numpy as np
 import torch
 from omegaconf import OmegaConf
-
-from rllm.engine.agent_sdk_engine import AgentSdkEngine
-from rllm.engine.rollout.verl_engine import VerlEngine
-from rllm.misc import colorful_print
-from rllm.workflows.workflow import TerminationReason
 from verl import DataProto
 from verl.protocol import pad_dataproto_to_divisor
 from verl.single_controller.ray import RayWorkerGroup
@@ -40,6 +35,11 @@
 from verl.utils.debug import marked_timer
 from verl.utils.tracking import Tracking
 
+from rllm.engine.agent_sdk_engine import AgentSdkEngine
+from rllm.engine.rollout.verl_engine import VerlEngine
+from rllm.misc import colorful_print
+from rllm.workflows.workflow import TerminationReason
+
 
 class AgentSdkTrainer(RayPPOTrainer):
     """PPO trainer for agent workflows with stepwise advantage and rejection sampling."""
@@ -80,7 +80,6 @@ def __init__(
     def init_workers(self):
         """Initialize workers with instrumented vLLM servers for distributed rollouts."""
         import ray
-
         from verl.workers.rollout.vllm_rollout.vllm_async_server import vLLMHttpServerBase, vLLMReplica
 
         # Create an instrumented vLLM HTTP server class