-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_depth.py
122 lines (99 loc) · 4.55 KB
/
test_depth.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
import numpy as np
from ultralytics import YOLO
import cv2
from PIL import Image
import torch # Import PyTorch for GPU support
import DepthPro
def load_model(model_path):
"""Loads the YOLO model from the specified path."""
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model file not found at {model_path}")
return YOLO(model_path)
def load_image(image_path):
"""Loads an image from the specified path."""
if not os.path.exists(image_path):
raise FileNotFoundError(f"Image file not found at {image_path}")
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Error loading image from {image_path}")
return image
def process_results(results, image, model):
"""Draws bounding boxes for 'person' detections on the image."""
person_boxes = []
for result in results:
boxes = result.boxes.xyxy.cpu().numpy() # Bounding boxes
classes = result.boxes.cls.cpu().numpy() # Class ids
for box, cls in zip(boxes, classes):
if model.names[int(cls)] == 'person': # Use `model.names` for class names
x1, y1, x2, y2 = map(int, box[:4])
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2) # Draw rectangle
person_boxes.append((x1, y1, x2, y2))
return image, person_boxes
def save_image(output_path, image):
"""Saves the processed image to the specified output path."""
os.makedirs(os.path.dirname(output_path), exist_ok=True)
cv2.imwrite(output_path, image)
print(f"Result saved to {output_path}")
def add_depth_information(image, person_boxes, depth):
"""Adds depth information to person bounding boxes."""
for x1, y1, x2, y2 in person_boxes:
center_x = (x1 + x2) // 2
center_y = (y1 + y2) // 2
depth_value = depth[center_y, center_x]
text = f'Depth: {depth_value:.2f}m'
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1.2
font_thickness = 2
text_size = cv2.getTextSize(text, font, font_scale, font_thickness)[0]
cv2.putText(image, text, (x1, y1 - 10), font, font_scale, (0, 255, 0), font_thickness)
def visualize_depth(depth):
"""Visualizes the depth map."""
depth_np_normalized = (depth - depth.min()) / (depth.max() - depth.min())
inv_depth_np_normalized = 1.0 - depth_np_normalized
depth_colormap = cv2.applyColorMap((inv_depth_np_normalized * 255).astype(np.uint8), cv2.COLORMAP_TURBO)
# Display the colormapped inverted depth map
cv2.imshow('Inverted Depth Map', depth_colormap)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Save the colormapped image to file
cv2.imwrite('inverted_depth_map.jpg', depth_colormap)
def run_inference_on_image(model_path, image_path, output_path, show_image=False):
"""Runs inference on the image, processes results, and integrates depth information."""
# Load model and image
model = load_model(model_path)
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Move YOLO model to GPU if available
model.to(device)
image = load_image(image_path)
# Run YOLO inference
results = model.predict(image)
# Process results and extract bounding boxes
processed_image, person_boxes = process_results(results, image, model)
# Load depth model and move it to GPU
depth_model, transform = DepthPro.create_model_and_transforms()
depth_model = depth_model.to(device) # Move depth model to GPU
depth_model.eval()
# Load RGB image and transform for depth model
rgb_image, _, f_px = DepthPro.load_rgb(image_path)
depth_input = transform(rgb_image).to(device) # Move depth input to GPU
prediction = depth_model.infer(depth_input, f_px=f_px)
depth = prediction["depth"].squeeze().cpu().numpy()
# Add depth information to the detections
add_depth_information(processed_image, person_boxes, depth)
# Save the processed image with person detections and depth info
save_image(output_path, processed_image)
# Optionally display the result
if show_image:
cv2.imshow('Person Detection with Depth', processed_image)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Visualize depth map
visualize_depth(depth)
# Configuration parameters
model_path = "checkpoints/yolo11s.pt"
image_path = "data_input/example_single_beach.jpg"
output_path = "data_output/result_example_single_beach_depth.png"
# Run the YOLO inference with depth estimation
run_inference_on_image(model_path, image_path, output_path, show_image=True)