huggingface · J4nn1K · Oct 18, 2025 · Copilot · Oct 18, 2025 · Copilot
diff --git a/src/lerobot/scripts/lerobot_record.py b/src/lerobot/scripts/lerobot_record.py
@@ -182,6 +182,10 @@ class RecordConfig:
     policy: PreTrainedConfig | None = None
     # Display all cameras on screen
     display_data: bool = False
+    # Display data on a remote Rerun server
+    display_url: str = None
-    display_url: str = None
+    display_url: str | None = None
-    display_url: str = None
+    display_url: str | None = None
+    # Port of the remote Rerun server
+    display_port: int = 9876
     # Use vocal synthesis to read events.
     play_sounds: bool = True
     # Resume recording on an existing dataset.
@@ -374,7 +378,7 @@ def record(cfg: RecordConfig) -> LeRobotDataset:
     init_logging()
     logging.info(pformat(asdict(cfg)))
     if cfg.display_data:
-        init_rerun(session_name="recording")
+        init_rerun(session_name="recording", url=cfg.display_url, port=cfg.display_port)
 
     robot = make_robot_from_config(cfg.robot)
     teleop = make_teleoperator_from_config(cfg.teleop) if cfg.teleop is not None else None

diff --git a/src/lerobot/scripts/lerobot_teleoperate.py b/src/lerobot/scripts/lerobot_teleoperate.py
@@ -104,6 +104,10 @@ class TeleoperateConfig:
     teleop_time_s: float | None = None
     # Display all cameras on screen
     display_data: bool = False
+    # Display data on a remote Rerun server
+    display_url: str = None
-    display_url: str = None
+    display_url: str | None = None
-    display_url: str = None
+    display_url: str | None = None
+    # Port of the remote Rerun server
+    display_port: int = 9876
 
 
 def teleop_loop(
@@ -186,7 +190,7 @@ def teleoperate(cfg: TeleoperateConfig):
     init_logging()
     logging.info(pformat(asdict(cfg)))
     if cfg.display_data:
-        init_rerun(session_name="teleoperation")
+        init_rerun(session_name="teleoperation", url=cfg.display_url, port=cfg.display_port)
 
     teleop = make_teleoperator_from_config(cfg.teleop)
     robot = make_robot_from_config(cfg.robot)

diff --git a/src/lerobot/utils/visualization_utils.py b/src/lerobot/utils/visualization_utils.py
@@ -16,19 +16,23 @@
 import os
 from typing import Any
 
+import cv2
 import numpy as np
 import rerun as rr
 
 from .constants import OBS_PREFIX, OBS_STR
 
 
-def init_rerun(session_name: str = "lerobot_control_loop") -> None:
+def init_rerun(session_name: str = "lerobot_control_loop", url: str = None, port: int = 9876) -> None:
     """Initializes the Rerun SDK for visualizing the control loop."""
     batch_size = os.getenv("RERUN_FLUSH_NUM_BYTES", "8000")
     os.environ["RERUN_FLUSH_NUM_BYTES"] = batch_size
     rr.init(session_name)
     memory_limit = os.getenv("LEROBOT_RERUN_MEMORY_LIMIT", "10%")
-    rr.spawn(memory_limit=memory_limit)
+    if url:
+        rr.connect_grpc(url=f"rerun+http://{url}:{port}/proxy")
-        rr.connect_grpc(url=f"rerun+http://{url}:{port}/proxy")
+        rr.connect(f"{url}:{port}")
-        rr.connect_grpc(url=f"rerun+http://{url}:{port}/proxy")
+        rr.connect(f"{url}:{port}")
+    else:
+        rr.spawn(memory_limit=memory_limit)
 
 
 def _is_scalar(x):
@@ -48,7 +52,7 @@ def log_rerun_data(
     to the Rerun viewer. It handles different data types appropriately:
     - Scalars values (floats, ints) are logged as `rr.Scalars`.
     - 3D NumPy arrays that resemble images (e.g., with 1, 3, or 4 channels first) are transposed
-      from CHW to HWC format and logged as `rr.Image`.
+      from CHW to HWC format, encoded as JPEG and logged as `rr.EncodedImage`.
     - 1D NumPy arrays are logged as a series of individual scalars, with each element indexed.
     - Other multi-dimensional arrays are flattened and logged as individual scalars.
 
@@ -75,7 +79,11 @@ def log_rerun_data(
                     for i, vi in enumerate(arr):
                         rr.log(f"{key}_{i}", rr.Scalars(float(vi)))
                 else:
-                    rr.log(key, rr.Image(arr), static=True)
+                    _, buffer = cv2.imencode(
+                        ".jpg", cv2.cvtColor(arr, cv2.COLOR_RGB2BGR), [int(cv2.IMWRITE_JPEG_QUALITY), 50]
-                    _, buffer = cv2.imencode(
-                        ".jpg", cv2.cvtColor(arr, cv2.COLOR_RGB2BGR), [int(cv2.IMWRITE_JPEG_QUALITY), 50]
+                    # Handle channel count explicitly for JPEG encoding
+                    if arr.ndim == 3:
+                        if arr.shape[2] == 1:
+                            # Grayscale, no color conversion needed
+                            arr_to_encode = arr
+                        elif arr.shape[2] == 3:
+                            # RGB to BGR
+                            arr_to_encode = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
+                        elif arr.shape[2] == 4:
+                            # RGBA to BGR (drop alpha)
+                            arr_to_encode = cv2.cvtColor(arr[:, :, :3], cv2.COLOR_RGB2BGR)
+                        else:
+                            raise ValueError(f"Unsupported number of channels for image encoding: {arr.shape[2]}")
+                    else:
+                        raise ValueError(f"Expected 3D array for image encoding, got shape {arr.shape}")
+                    _, buffer = cv2.imencode(
+                        ".jpg", arr_to_encode, [int(cv2.IMWRITE_JPEG_QUALITY), 50]
-                    _, buffer = cv2.imencode(
-                        ".jpg", cv2.cvtColor(arr, cv2.COLOR_RGB2BGR), [int(cv2.IMWRITE_JPEG_QUALITY), 50]
+                    # Handle channel count explicitly for JPEG encoding
+                    if arr.ndim == 3:
+                        if arr.shape[2] == 1:
+                            # Grayscale, no color conversion needed
+                            arr_to_encode = arr
+                        elif arr.shape[2] == 3:
+                            # RGB to BGR
+                            arr_to_encode = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
+                        elif arr.shape[2] == 4:
+                            # RGBA to BGR (drop alpha)
+                            arr_to_encode = cv2.cvtColor(arr[:, :, :3], cv2.COLOR_RGB2BGR)
+                        else:
+                            raise ValueError(f"Unsupported number of channels for image encoding: {arr.shape[2]}")
+                    else:
+                        raise ValueError(f"Expected 3D array for image encoding, got shape {arr.shape}")
+                    _, buffer = cv2.imencode(
+                        ".jpg", arr_to_encode, [int(cv2.IMWRITE_JPEG_QUALITY), 50]
+                    )
+                    encoded_image = buffer.tobytes()
+                    rr.log(key, rr.EncodedImage(contents=encoded_image, media_type="image/jpeg"), static=True)
-                    rr.log(key, rr.EncodedImage(contents=encoded_image, media_type="image/jpeg"), static=True)
+                    rr.log(key, rr.ImageEncoded(contents=encoded_image, media_type="image/jpeg"), static=True)
-                    rr.log(key, rr.EncodedImage(contents=encoded_image, media_type="image/jpeg"), static=True)
+                    rr.log(key, rr.ImageEncoded(contents=encoded_image, media_type="image/jpeg"), static=True)
 
     if action:
         for k, v in action.items():