uf-mil · codebyjossaya · Nov 3, 2025 · Nov 4, 2025 · Nov 4, 2025 · Nov 16, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 ---
 default_language_version:
-  python: python3.12
+  python: python3
   node: lts
 
 ci:

diff --git a/requirements.txt b/requirements.txt
@@ -59,3 +59,8 @@ opencv-python>=4.6.0.66
 seaborn>=0.11.2
 thop>=0.1.1.post2207130030
 requests>=2.32.2
+
+# MiDAS
+opencv-python>=4.6.0.66
+torch>=1.7.0
+timm>=1.0.21
diff --git a/src/depth_estimation/depth_estimation/__init__.py b/src/depth_estimation/depth_estimation/__init__.py
diff --git a/src/depth_estimation/depth_estimation/interface.py b/src/depth_estimation/depth_estimation/interface.py
@@ -0,0 +1,81 @@
+# Copyright 2016 Open Source Robotics Foundation, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from enum import Enum
+
+import rclpy
+from cv_bridge import CvBridge
+from rclpy.node import Node
+from sensor_msgs.msg import Image
+from std_msgs.msg import String
+
+from .midas import midas_infer
+
+
+class Cam(Enum):
+    FRONT_CAM = 0
+    DOWN_CAM = 1
+
+
+class DepthNode(Node):
+
+    def __init__(self):
+        super().__init__("depth_node")
+
+        self.front_cam_sub = self.create_subscription(
+            Image,
+            "/front_cam/image_raw",
+            lambda img: self.listener_callback(img, Cam.FRONT_CAM),  # type: ignore[arg-type]
+            10,
+        )
+        self.down_cam_sub = self.create_subscription(
+            String,
+            "/down_cam/image_raw",
+            lambda img: self.listener_callback(img, Cam.DOWN_CAM),  # type: ignore
+            10,
+        )
+        print("Listening for image data..")
+        self.front_cam_pub = self.create_publisher(Image, "/front_cam/image_depth", 10)
+        self.down_cam_pub = self.create_publisher(Image, "/down_cam/image_depth", 10)
+        self.bridge = CvBridge()
+
+    def listener_callback(self, img: Image, cam: Cam):
+        print("Published data received! Conducting inference")
+        inference = midas_infer(self.bridge.imgmsg_to_cv2(img))
+
+        if cam == Cam.FRONT_CAM:
+            print("publishing to front cam...")
+            self.front_cam_pub.publish(self.bridge.cv2_to_imgmsg(inference))
+        else:
+            print("publishing to down cam...")
+            self.down_cam_pub.publish(self.bridge.cv2_to_imgmsg(inference))
+        print("Published map")
+
+
+def main(args=None):
+    rclpy.init(args=args)
+
+    depth_node = DepthNode()
+
+    rclpy.spin(depth_node)
+
+    # Destroy the node explicitly
+    # (optional - otherwise it will be done automatically
+    # when the garbage collector destroys the node object)
+    depth_node.destroy_node()
+    rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/depth_estimation/depth_estimation/midas.py b/src/depth_estimation/depth_estimation/midas.py
@@ -0,0 +1,46 @@
+import time
+from typing import Any
+
+import cv2
+import torch
+
+
+def midas_infer(img: cv2.typing.MatLike):
+
+    # model_type = "MiDAS"
+    # model_type = "DPT_Hybrid"
+    model_type = "MiDaS_small"
+
+    midas: Any = torch.hub.load("intel-isl/MiDaS", model_type)
+
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    midas.to(device)
+    midas.eval()
+
+    midas_transforms: Any = torch.hub.load("intel-isl/MiDaS", "transforms")
+
+    if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
+        transform = midas_transforms.dpt_transform
+    else:
+        transform = midas_transforms.small_transform
+
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+    input_batch = transform(img).to(device)
+
+    with torch.no_grad():
+        start = time.time()
+        prediction = midas(input_batch)
+        took = time.time() - start
+
+        prediction = torch.nn.functional.interpolate(
+            prediction.unsqueeze(1),
+            size=img.shape[:2],
+            mode="bicubic",
+            align_corners=False,
+        ).squeeze()
+    print(f"took {took:.5f}s")
+
+    output = prediction.cpu().numpy()
+
+    return output
diff --git a/src/depth_estimation/package.xml b/src/depth_estimation/package.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>depth_estimation</name>
+  <version>0.0.0</version>
+  <description>TODO: Package description</description>
+  <maintainer email="[email protected]">Jossaya Camilles</maintainer>
+  <license>TODO: License declaration</license>
+
+  <exec_depend>cv_bridge</exec_depend>
+  <exec_depend>opencv-python</exec_depend>
+  <exec_depend>python3-timm</exec_depend>
+  <exec_depend>rclpy</exec_depend>
+  <exec_depend>sensor_msgs</exec_depend>
+  <exec_depend>std_msgs</exec_depend>
+
+  <test_depend>ament_copyright</test_depend>
+  <test_depend>ament_flake8</test_depend>
+  <test_depend>ament_pep257</test_depend>
+  <test_depend>python3-pytest</test_depend>
+
+  <export>
+    <build_type>ament_python</build_type>
+  </export>
+</package>
diff --git a/src/depth_estimation/resource/depth_estimation b/src/depth_estimation/resource/depth_estimation
diff --git a/src/depth_estimation/setup.cfg b/src/depth_estimation/setup.cfg
@@ -0,0 +1,4 @@
+[develop]
+script_dir=$base/lib/depth_estimation
+[install]
+install_scripts=$base/lib/depth_estimation
diff --git a/src/depth_estimation/setup.py b/src/depth_estimation/setup.py
@@ -0,0 +1,27 @@
+from setuptools import find_packages, setup
+
+package_name = "depth_estimation"
+
+setup(
+    name=package_name,
+    version="0.0.0",
+    packages=find_packages(exclude=["test"]),
+    data_files=[
+        ("share/ament_index/resource_index/packages", ["resource/" + package_name]),
+        ("share/" + package_name, ["package.xml"]),
+    ],
+    install_requires=["setuptools"],
+    zip_safe=True,
+    maintainer="jossaya",
+    maintainer_email="[email protected]",
+    description="TODO: Package description",
+    license="TODO: License declaration",
+    extras_require={
+        "test": [
+            "pytest",
+        ],
+    },
+    entry_points={
+        "console_scripts": ["depth_inference = depth_estimation.interface:main"],
+    },
+)