databricks · rithwik-db · Aug 4, 2025
@@ -236,29 +236,3 @@ def is_cuda_visible_devices_set():
         'RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES',
         '0',
     ) == '0'
-
-# TODO: Since this uninstallation deals specifically with ray,
-# added the function here instead of the regular utils.py file
-# We need to investigate this further after the hackathon since
-# this is a super hacky solution to support CPU workers
-def uninstall_megablocks_if_exists():
-    """
-    Megablocks exists on the ray workers but is not supported on CPU.
-    We need to uninstall it to avoid errors.
-
-    Note: Installing `llm-foundry[all-cpu]` (which doesn't have megablocks)
-    on the StreamingDatasetActor worker through ray runtime options
-    doesn't seem to actually resolve this issue even though it's supposed
-    to set up a new environment...
-    TODO: Figure out why that's the case and if there's a better way to
-    resolve this issue.
-    """
-    import sys
-    import subprocess
-
-    # First uninstall megablocks package (if it exists)
-    command = [sys.executable, "-m", "pip", "uninstall", "megablocks", "-y"]
-    subprocess.run(command, check=False, capture_output=True, text=True)
-    # Then remove from sys.modules if present
-    if 'megablocks' in sys.modules:
-        del sys.modules['megablocks']
@@ -37,7 +37,7 @@
     create_vllm_engines,
     _vllm_generate,
 )
-from compose_rl.utils.ray_utils import start_ray_server, uninstall_megablocks_if_exists
+from compose_rl.utils.ray_utils import start_ray_server
 from compose_rl.controllers import BaseDistributedGPUActor, SPMDActorGroup
 from compose_rl.controllers.buffer import Buffer
 from compose_rl.algorithms.online.callback_utils import preprocess_batches
@@ -687,21 +687,7 @@ def _run_single_controller_ppo(
                 vllm_tensor_parallel_size=vllm_tensor_parallel_size,
                 pretrain_model_name=pretrain_model_name,
             )
-
-            # We are using a CPU worker for the StreamingActor
-            # and this involves a super hacky workaround by
-            # uninstalling megablocks if it exists. Better solutions
-            # would include:
-            # 1) decouple StreamingActor from llm-foundry altogether
-            # 2) don't broadly import llm-foundry in compose-rl (only
-            # import it into codepaths/files that will only be used by
-            # GPUActors as opposed to CPUActors)
-            # 3) Setting up ray actors with correct environments (which
-            # would involve creating a BaseDistributedActor instead of a
-            # BaseDistributedGPUActor so that we can use CPUs)
-            # We uninstall megablocks after the Train Actors have been
-            # created so that those actors still have megablocks functionality.
-            uninstall_megablocks_if_exists()
+            # Creating a CPU worker for the StreamingDatasetActor
             streaming_dataset_actor = ray.remote(num_gpus=0)(StreamingDatasetActor).remote()
             rollout_agent = RolloutAgent(inference_server, streaming_dataset_actor)