diff --git a/.gitignore b/.gitignore index 1ceebbe78..0014412f0 100644 --- a/.gitignore +++ b/.gitignore @@ -209,3 +209,6 @@ deepresearch_outputs/* examples/deepresearch/hle_outputs/* */hle_outputs/* examples/deepresearch/HLE_OUTPUT_EVOLUTION.md + +# Until we have a good way to handle cuda-version specific pkgs, we ignore uv.lock +uv.lock \ No newline at end of file diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 5e2692889..000000000 --- a/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ -[submodule "verl"] - path = verl - url = https://github.com/volcengine/verl.git - branch = main diff --git a/Dockerfile b/Dockerfile index 372870517..b0e19f2c4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,15 @@ -# Start from the verl base image -# Dockerfile.base -FROM verlai/verl:app-verl0.4-sglang0.4.6.post5-vllm0.8.5-mcore0.12.2-te2.2 +FROM verlai/verl:vllm011.latest WORKDIR /workspace -# 1) Clone rllm repository with submodules -RUN git clone --recurse-submodules https://github.com/rllm-org/rllm.git rllm +RUN git clone https://github.com/volcengine/verl.git +RUN cd verl && \ + git checkout v0.6.1 && \ + pip install -e . -# 2) Install verl and rllm (editable) +# 2) Install rllm (editable) +RUN git clone https://github.com/rllm-org/rllm.git RUN cd rllm && \ - pip install --no-deps -e ./verl && \ pip install -e . # 3) Install playwright diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index 8cec88612..a9ae91131 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -4,32 +4,85 @@ This guide will help you set up rLLM on your system. ## Prerequisites -Before installing rLLM, ensure you have the following: +Starting with v0.2.1, rLLM's recommended dependency manager is `uv`. To install `uv`, run: -- Python 3.10 or higher -- CUDA version >= 12.4 +```bash +curl -LsSf https://astral.sh/uv/install.sh | sh +``` + +rLLM requires `python>=3.10`, but certain beckends may require a newer installation (e.g., `tinker` requires `python>=3.11`). Ensure that your system has a suitable installation of Python: + +```bash +uv python install 3.11 +``` ## Basic Installation -rLLM uses [verl](https://github.com/volcengine/verl) as its training backend. Follow these steps to install rLLM and verl: +The following will perform a minimal installation of rLLM: ```bash -# Clone the repository -git clone --recurse-submodules https://github.com/rllm-org/rllm.git +git clone https://github.com/rllm-org/rllm.git cd rllm -# Create a conda environment -conda create -n rllm python=3.10 -y -conda activate rllm +uv venv --python 3.11 +uv pip install -e . +``` + +rLLM supports multiple backends for training, including `verl` and `tinker`, which need to be installed separately. -# Install verl -bash scripts/install_verl.sh +To train with `tinker` on a CPU-only machine, run: -# Install rLLM -pip install -e . +```bash +uv pip install -e .[tinker] --torch-backend=cpu +``` + +To train with `verl` on a GPU-equipped machine with CUDA 12.8, run: +```bash +uv pip install -e .[verl] --torch-backend=cu128 ``` -This will install rLLM and all its dependencies in development mode. +> The `verl` extra installs vLLM by default. If you'd rather use SGLang to sample rollouts, you can install it with `uv pip install sglang --torch-backend=cu128`. + +> rLLM with verl supports alternative hardware accelerators, including AMD ROCm and Huawei Ascend. For these platforms, we strongly recommend installing rLLM on top of verl's official Docker containers for ROCm ([here](https://github.com/volcengine/verl/tree/main/docker/rocm)) and Ascend ([here](https://github.com/volcengine/verl/tree/main/docker/ascend)). + +### Activating your environment + +Be sure to activate the virtual environment before running a job: + +```bash +source .venv/bin/activate +python your_script.py +``` + +### Editable Verl Installation + +If you wish to make changes to verl, you can do an editable install: + +```bash +git clone https://github.com/volcengine/verl.git +cd verl +git checkout v0.6.1 +uv pip install -e . +``` + +### Optional Extras + +rLLM provides additional optional dependencies for specific agent domains and framework integrations. For example: +- `web`: Tools for web agents (BrowserGym, Selenium). +- `code-tools`: Sandboxed code execution (E2B, Together). +- `smolagents`: Integration with Hugging Face's smolagents. + +See the full list of managed extras [here](pyproject.toml). + +## Installation without `uv` + +While rLLM can also be installed without `uv` (i.e., just using `pip`), it is not recommended and may cause issues if you don't have a compatible PyTorch or CUDA version preinstalled: + +```bash +conda create -n rllm python=3.11 +conda activate rllm +pip install -e .[verl] +``` ## Installation with Docker 🐳 diff --git a/examples/geo3k/train_geo3k.sh b/examples/geo3k/train_geo3k.sh index d441936ff..49a364ad1 100644 --- a/examples/geo3k/train_geo3k.sh +++ b/examples/geo3k/train_geo3k.sh @@ -26,7 +26,7 @@ python3 -m examples.geo3k.train_geo3k \ actor_rollout_ref.actor.fsdp_config.param_offload=False \ actor_rollout_ref.actor.fsdp_config.optimizer_offload=False \ actor_rollout_ref.rollout.tensor_model_parallel_size=1 \ - actor_rollout_ref.rollout.name=sglang \ + actor_rollout_ref.rollout.name=vllm \ actor_rollout_ref.rollout.mode="async" \ actor_rollout_ref.rollout.gpu_memory_utilization=0.8 \ actor_rollout_ref.rollout.enforce_eager=False \ diff --git a/pyproject.toml b/pyproject.toml index d88b0b975..603959c49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,11 +19,12 @@ classifiers = [ license = {file = "LICENSE"} dependencies = [ # Core ML/AI packages - "torch>=2.6", - "transformers>=4.5", + "torch", + "torchvision", + "transformers>=4.55.0", # Data processing - "datasets", + "datasets>=2.20.0", "pandas", "polars", "pillow", @@ -49,33 +50,43 @@ dependencies = [ "PyYAML", "pydantic", "wrapt", - "asgiref>=3.7.0", # Async/sync conversion utilities + "asgiref>=3.7.0", "wandb", +] + +[project.optional-dependencies] - # Development and testing +dev = [ "pytest", "pre-commit", "ruff", "mypy", - - # Documentation "mkdocs>=1.5.0", "mkdocs-material>=9.0.0", "mkdocstrings[python]>=0.24.0", "mkdocs-autorefs>=0.5.0", "pymdown-extensions>=10.0.0", - - # Sdk +] + +verl = [ + "verl==0.6.1", + "torch>=2.8.0", + "torchvision>=0.23.0", + "vllm>=0.10.2,<=0.11.0", + "flash-attn>=2.8.1", + "qwen-vl-utils", +] + +sdk = [ "litellm[proxy]", "aiosqlite", "asgiref", - - # Tinker - "tinker", - "tinker-cookbook @ git+https://github.com/thinking-machines-lab/tinker-cookbook.git#egg=tinker-cookbook", ] -[project.optional-dependencies] +tinker = [ + "tinker ; python_version >= '3.11'", + "tinker-cookbook @ git+https://github.com/thinking-machines-lab/tinker-cookbook.git#egg=tinker-cookbook ; python_version >= '3.11'", +] opentelemetry = [ "opentelemetry-sdk", @@ -89,7 +100,6 @@ strands = [ "strands-agents", ] - swe = [ "docker", "kubernetes", @@ -108,6 +118,11 @@ code-tools = [ "together>=1.4", ] +[tool.uv.extra-build-dependencies] +flash-attn = [ + { requirement = "torch", match-runtime = true }, +] + [tool.ruff] line-length = 5000 # TODO: Reduce this to a more reasonable value @@ -157,7 +172,6 @@ plugins = ["pydantic.mypy"] ignore_missing_imports = true check_untyped_defs = true follow_imports = "silent" - exclude = [ "verl/.*", "rllm/rewards/code_utils/.*", @@ -166,4 +180,4 @@ exclude = [ "scripts/.*", "tests/.*", "docs/.*", -] +] \ No newline at end of file diff --git a/rllm/engine/agent_sdk_engine.py b/rllm/engine/agent_sdk_engine.py index 47837a2ae..5b8fe61e8 100644 --- a/rllm/engine/agent_sdk_engine.py +++ b/rllm/engine/agent_sdk_engine.py @@ -27,9 +27,10 @@ # Avoid hard dependency on verl at import time; only for typing if TYPE_CHECKING: - from rllm.sdk.tracers import TracerProtocol from verl import DataProto + from rllm.sdk.tracers import TracerProtocol + logger = logging.getLogger(__name__) diff --git a/rllm/engine/rollout/verl_engine.py b/rllm/engine/rollout/verl_engine.py index acd63b3b7..e6210db36 100644 --- a/rllm/engine/rollout/verl_engine.py +++ b/rllm/engine/rollout/verl_engine.py @@ -1,11 +1,12 @@ import asyncio import uuid +from verl.experimental.agent_loop.agent_loop import AgentLoopManager, AsyncLLMServerManager +from verl.workers.rollout.replica import TokenOutput + from rllm.engine.rollout.rollout_engine import ModelOutput, RolloutEngine from rllm.parser import ChatTemplateParser from rllm.workflows import TerminationEvent, TerminationReason -from verl.experimental.agent_loop.agent_loop import AgentLoopManager, AsyncLLMServerManager -from verl.workers.rollout.replica import TokenOutput class VerlEngine(RolloutEngine): diff --git a/rllm/tools/web_tools/firecrawl_tool.py b/rllm/tools/web_tools/firecrawl_tool.py index 8623e6612..31b27993f 100644 --- a/rllm/tools/web_tools/firecrawl_tool.py +++ b/rllm/tools/web_tools/firecrawl_tool.py @@ -12,8 +12,7 @@ try: from firecrawl import FirecrawlApp -except ImportError as e: - print(e) +except ImportError: FirecrawlApp = None from rllm.tools.tool_base import Tool, ToolOutput diff --git a/rllm/trainer/agent_sft_trainer.py b/rllm/trainer/agent_sft_trainer.py index a17c5fc4c..78bd12a98 100644 --- a/rllm/trainer/agent_sft_trainer.py +++ b/rllm/trainer/agent_sft_trainer.py @@ -24,13 +24,14 @@ def train(self): self._train_tinker() def _train_verl(self): - from rllm.trainer.verl.sft_dataset import RLLMSFTDataset from verl.trainer.fsdp_sft_trainer import FSDPSFTTrainer from verl.utils import hf_tokenizer from verl.utils.device import get_device_name from verl.utils.distributed import destroy_global_process_group, initialize_global_process_group from verl.utils.fs import copy_to_local + from rllm.trainer.verl.sft_dataset import RLLMSFTDataset + config = self.config device_name = get_device_name() local_rank, rank, world_size = initialize_global_process_group() diff --git a/rllm/trainer/verl/agent_ppo_trainer.py b/rllm/trainer/verl/agent_ppo_trainer.py index 51a7dd738..69ae08f09 100644 --- a/rllm/trainer/verl/agent_ppo_trainer.py +++ b/rllm/trainer/verl/agent_ppo_trainer.py @@ -12,8 +12,6 @@ import numpy as np import torch from omegaconf import OmegaConf - -from rllm.engine.agent_execution_engine import AsyncAgentExecutionEngine from verl import DataProto from verl.protocol import pad_dataproto_to_divisor from verl.single_controller.ray import RayWorkerGroup @@ -29,6 +27,8 @@ from verl.utils.debug import marked_timer from verl.utils.metric import reduce_metrics +from rllm.engine.agent_execution_engine import AsyncAgentExecutionEngine + class AgentPPOTrainer(RayPPOTrainer): def __init__( diff --git a/rllm/trainer/verl/agent_ppo_trainer_pipeline.py b/rllm/trainer/verl/agent_ppo_trainer_pipeline.py index 2623de31c..c9ec95ba1 100644 --- a/rllm/trainer/verl/agent_ppo_trainer_pipeline.py +++ b/rllm/trainer/verl/agent_ppo_trainer_pipeline.py @@ -6,9 +6,6 @@ import numpy as np import torch - -from rllm.engine.agent_execution_engine import AsyncAgentExecutionEngine -from rllm.trainer.verl.agent_ppo_trainer import AgentPPOTrainer from verl import DataProto from verl.single_controller.ray import RayClassWithInitArgs, RayWorkerGroup from verl.trainer.ppo.metric_utils import compute_data_metrics, compute_timing_metrics @@ -17,6 +14,9 @@ from verl.utils.debug import marked_timer from verl.utils.metric import reduce_metrics +from rllm.engine.agent_execution_engine import AsyncAgentExecutionEngine +from rllm.trainer.verl.agent_ppo_trainer import AgentPPOTrainer + class PipelineAgentPPOTrainer(AgentPPOTrainer): def init_workers(self): @@ -78,7 +78,6 @@ def fit_agent(self): The light-weight advantage computation is done on the driver process. """ from omegaconf import OmegaConf - from verl.utils.tracking import Tracking logger = Tracking(project_name=self.config.trainer.project_name, experiment_name=self.config.trainer.experiment_name, default_backend=self.config.trainer.logger, config=OmegaConf.to_container(self.config, resolve=True)) diff --git a/rllm/trainer/verl/agent_sdk_trainer.py b/rllm/trainer/verl/agent_sdk_trainer.py index ee4086dd9..1d3482161 100644 --- a/rllm/trainer/verl/agent_sdk_trainer.py +++ b/rllm/trainer/verl/agent_sdk_trainer.py @@ -14,11 +14,6 @@ import numpy as np import torch from omegaconf import OmegaConf - -from rllm.engine.agent_sdk_engine import AgentSdkEngine -from rllm.engine.rollout.verl_engine import VerlEngine -from rllm.misc import colorful_print -from rllm.workflows.workflow import TerminationReason from verl import DataProto from verl.protocol import pad_dataproto_to_divisor from verl.single_controller.ray import RayWorkerGroup @@ -40,6 +35,11 @@ from verl.utils.debug import marked_timer from verl.utils.tracking import Tracking +from rllm.engine.agent_sdk_engine import AgentSdkEngine +from rllm.engine.rollout.verl_engine import VerlEngine +from rllm.misc import colorful_print +from rllm.workflows.workflow import TerminationReason + class AgentSdkTrainer(RayPPOTrainer): """PPO trainer for agent workflows with stepwise advantage and rejection sampling.""" @@ -80,7 +80,6 @@ def __init__( def init_workers(self): """Initialize workers with instrumented vLLM servers for distributed rollouts.""" import ray - from verl.workers.rollout.vllm_rollout.vllm_async_server import vLLMHttpServerBase, vLLMReplica # Create an instrumented vLLM HTTP server class diff --git a/rllm/trainer/verl/agent_workflow_trainer.py b/rllm/trainer/verl/agent_workflow_trainer.py index 09048ed32..9ff6f4ee1 100644 --- a/rllm/trainer/verl/agent_workflow_trainer.py +++ b/rllm/trainer/verl/agent_workflow_trainer.py @@ -9,11 +9,6 @@ import numpy as np import torch from omegaconf import OmegaConf - -from rllm.engine.agent_workflow_engine import AgentWorkflowEngine -from rllm.engine.rollout.verl_engine import VerlEngine -from rllm.utils.episode_logger import EpisodeLogger -from rllm.workflows.workflow import TerminationReason from verl import DataProto from verl.protocol import pad_dataproto_to_divisor from verl.single_controller.ray import RayWorkerGroup @@ -36,6 +31,11 @@ from verl.trainer.ppo.utils import Role, WorkerType from verl.utils.debug import marked_timer +from rllm.engine.agent_workflow_engine import AgentWorkflowEngine +from rllm.engine.rollout.verl_engine import VerlEngine +from rllm.utils.episode_logger import EpisodeLogger +from rllm.workflows.workflow import TerminationReason + class AgentWorkflowPPOTrainer(RayPPOTrainer): def __init__( diff --git a/rllm/trainer/verl/agent_workflow_trainer_fireworks.py b/rllm/trainer/verl/agent_workflow_trainer_fireworks.py index a569850ab..d0c3aed3d 100644 --- a/rllm/trainer/verl/agent_workflow_trainer_fireworks.py +++ b/rllm/trainer/verl/agent_workflow_trainer_fireworks.py @@ -16,8 +16,6 @@ except ImportError as e: raise ImportError("The 'fireworks' package is required to use the Fireworks backend. Please install it with: pip install fireworks-ai") from e -from rllm.trainer.verl.agent_workflow_trainer import AgentWorkflowPPOTrainer -from rllm.workflows.workflow import TerminationReason from verl import DataProto from verl.single_controller.ray import RayClassWithInitArgs, RayWorkerGroup from verl.trainer.ppo.core_algos import agg_loss @@ -37,6 +35,9 @@ from verl.utils.debug import marked_timer from verl.utils.tracking import Tracking +from rllm.trainer.verl.agent_workflow_trainer import AgentWorkflowPPOTrainer +from rllm.workflows.workflow import TerminationReason + class FireworksAgentWorkflowPPOTrainer(AgentWorkflowPPOTrainer): def __init__( diff --git a/rllm/trainer/verl/sft_dataset.py b/rllm/trainer/verl/sft_dataset.py index 8daf92030..dd7253e20 100644 --- a/rllm/trainer/verl/sft_dataset.py +++ b/rllm/trainer/verl/sft_dataset.py @@ -1,7 +1,6 @@ import logging import torch - from verl.utils.dataset.multiturn_sft_dataset import MultiTurnSFTDataset from verl.utils.parser.chat_template_parser import ChatTemplateParser diff --git a/rllm/trainer/verl/train_agent_ppo.py b/rllm/trainer/verl/train_agent_ppo.py index 698b594f6..b17fb673c 100644 --- a/rllm/trainer/verl/train_agent_ppo.py +++ b/rllm/trainer/verl/train_agent_ppo.py @@ -10,6 +10,8 @@ import hydra import ray from omegaconf import OmegaConf +from verl.trainer.ppo.reward import load_reward_manager +from verl.utils.device import is_cuda_available from rllm.trainer.env_agent_mappings import AGENT_CLASS_MAPPING, ENV_CLASS_MAPPING from rllm.trainer.verl.agent_ppo_trainer import AgentPPOTrainer @@ -17,8 +19,6 @@ # Local application imports from rllm.trainer.verl.agent_workflow_trainer import AgentWorkflowPPOTrainer from rllm.trainer.verl.ray_runtime_env import get_ppo_ray_runtime_env -from verl.trainer.ppo.reward import load_reward_manager -from verl.utils.device import is_cuda_available @hydra.main(config_path="../config", config_name="agent_ppo_trainer", version_base=None) @@ -74,7 +74,6 @@ def run(self, config, workflow_class=None, workflow_args=None, agent_class=None, from pprint import pprint from omegaconf import OmegaConf - from verl.single_controller.ray import RayWorkerGroup from verl.utils.fs import copy_to_local diff --git a/rllm/trainer/verl/train_agent_ppo_pipeline.py b/rllm/trainer/verl/train_agent_ppo_pipeline.py index 3b9a8d513..c20976936 100644 --- a/rllm/trainer/verl/train_agent_ppo_pipeline.py +++ b/rllm/trainer/verl/train_agent_ppo_pipeline.py @@ -18,15 +18,15 @@ import hydra import ray -from rllm.trainer.env_agent_mappings import AGENT_CLASS_MAPPING, ENV_CLASS_MAPPING, setup_environment -from rllm.trainer.verl.agent_ppo_trainer_pipeline import PipelineAgentPPOTrainer - # Local application imports from verl.single_controller.ray import RayWorkerGroup from verl.trainer.ppo.ray_trainer import ResourcePoolManager, Role from verl.workers.fsdp_workers import ActorRolloutRefWorker from verl.workers.reward_manager import NaiveRewardManager +from rllm.trainer.env_agent_mappings import AGENT_CLASS_MAPPING, ENV_CLASS_MAPPING, setup_environment +from rllm.trainer.verl.agent_ppo_trainer_pipeline import PipelineAgentPPOTrainer + @hydra.main(config_path="../config", config_name="agent_ppo_trainer", version_base=None) def main(config): @@ -47,7 +47,6 @@ def main_task(config, compute_score=None): from pprint import pprint from omegaconf import OmegaConf - from verl.utils.fs import copy_local_path_from_hdfs pprint(OmegaConf.to_container(config, resolve=True)) # resolve=True will eval symbol values diff --git a/rllm/trainer/verl/train_workflow_pipeline.py b/rllm/trainer/verl/train_workflow_pipeline.py index 4c522a3f8..70414caa2 100644 --- a/rllm/trainer/verl/train_workflow_pipeline.py +++ b/rllm/trainer/verl/train_workflow_pipeline.py @@ -4,14 +4,14 @@ import hydra import ray from omegaconf import OmegaConf +from verl.trainer.constants_ppo import get_ppo_ray_runtime_env +from verl.trainer.ppo.reward import load_reward_manager +from verl.utils.device import is_cuda_available from rllm.trainer.env_agent_mappings import WORKFLOW_CLASS_MAPPING from rllm.trainer.verl.agent_workflow_trainer_fireworks import ( FireworksAgentWorkflowPPOTrainer, ) -from verl.trainer.constants_ppo import get_ppo_ray_runtime_env -from verl.trainer.ppo.reward import load_reward_manager -from verl.utils.device import is_cuda_available @hydra.main(config_path="../config", config_name="agent_ppo_trainer", version_base=None) @@ -71,7 +71,6 @@ def run(self, config, workflow_class=None, workflow_args=None): from pprint import pprint from omegaconf import OmegaConf - from verl.utils.fs import copy_to_local print(f"TaskRunner hostname: {socket.gethostname()}, PID: {os.getpid()}") diff --git a/scripts/data/code_dataset.py b/scripts/data/code_dataset.py index ec2a00ffe..1daa99cc4 100644 --- a/scripts/data/code_dataset.py +++ b/scripts/data/code_dataset.py @@ -12,12 +12,12 @@ from typing import Any import pandas as pd +from verl.utils.hdfs_io import makedirs # Get the rllm package path import rllm from rllm.data.dataset_types import TestDataset, TrainDataset from rllm.data.utils import fetch_live_code_bench_system_prompt, load_dataset -from verl.utils.hdfs_io import makedirs RLLM_PATH = os.path.dirname(os.path.dirname(rllm.__file__)) diff --git a/scripts/data/deepscaler_dataset.py b/scripts/data/deepscaler_dataset.py index a4aff74af..7006545f1 100644 --- a/scripts/data/deepscaler_dataset.py +++ b/scripts/data/deepscaler_dataset.py @@ -10,12 +10,12 @@ from typing import Any import pandas as pd +from verl.utils.hdfs_io import copy, makedirs +from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed import rllm from rllm.data.dataset_types import TestDataset, TrainDataset from rllm.data.utils import load_dataset -from verl.utils.hdfs_io import copy, makedirs -from verl.utils.reward_score.math import last_boxed_only_string, remove_boxed RLLM_PATH = os.path.dirname(os.path.dirname(rllm.__file__)) diff --git a/scripts/data/frozenlake_dataset.py b/scripts/data/frozenlake_dataset.py index 99084ec51..ff6b214fa 100644 --- a/scripts/data/frozenlake_dataset.py +++ b/scripts/data/frozenlake_dataset.py @@ -3,9 +3,9 @@ import numpy as np import pandas as pd +from verl.utils.hdfs_io import copy, makedirs import rllm -from verl.utils.hdfs_io import copy, makedirs # Get the directory for rLLM repo (rllm.__file__) RLLM_DIR = os.path.dirname(os.path.dirname(os.path.abspath(rllm.__file__))) diff --git a/scripts/data/gaia_dataset.py b/scripts/data/gaia_dataset.py index 7737a246f..d59f6265f 100644 --- a/scripts/data/gaia_dataset.py +++ b/scripts/data/gaia_dataset.py @@ -11,10 +11,10 @@ from typing import Any import pandas as pd +from verl.utils.hdfs_io import makedirs from rllm.data.dataset_types import TestDataset from rllm.data.utils import load_dataset -from verl.utils.hdfs_io import makedirs def make_map_fn(split: str): diff --git a/scripts/data/miniwob_dataset.py b/scripts/data/miniwob_dataset.py index 5db4c7cab..0593ca54c 100644 --- a/scripts/data/miniwob_dataset.py +++ b/scripts/data/miniwob_dataset.py @@ -5,9 +5,9 @@ import browsergym.miniwob import gymnasium as gym import pandas as pd +from verl.utils.hdfs_io import copy, makedirs import rllm -from verl.utils.hdfs_io import copy, makedirs if __name__ == "__main__": import importlib diff --git a/scripts/data/swe_dataset.py b/scripts/data/swe_dataset.py index 12c35408b..2671d7544 100644 --- a/scripts/data/swe_dataset.py +++ b/scripts/data/swe_dataset.py @@ -4,10 +4,10 @@ import pandas as pd from datasets import load_dataset +from verl.utils.hdfs_io import copy, makedirs import rllm from rllm.agents.system_prompts import SWE_SYSTEM_PROMPT, SWE_USER_PROMPT -from verl.utils.hdfs_io import copy, makedirs # Get the directory for rLLM repo (rllm.__file__) RLLM_DIR = os.path.dirname(os.path.dirname(os.path.abspath(rllm.__file__))) diff --git a/scripts/data/webarena_dataset.py b/scripts/data/webarena_dataset.py index d83601c88..87837beca 100644 --- a/scripts/data/webarena_dataset.py +++ b/scripts/data/webarena_dataset.py @@ -5,11 +5,11 @@ import gymnasium as gym import pandas as pd -import rllm - # import browsergym.miniwob from verl.utils.hdfs_io import copy, makedirs +import rllm + if __name__ == "__main__": import importlib import os diff --git a/verl b/verl deleted file mode 160000 index d62da4950..000000000 --- a/verl +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d62da4950573d7a4b7ef2362337952e7ab59e78d