Skip to content
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
4329e56
adding basic slurm file for job submission
Senja20 Jun 1, 2024
be4ce19
updated the ci installation of requirenmetns to use dependencies from…
Senja20 Jun 1, 2024
a1480e0
✨ feat: Push the model to the hub
Senja20 Jun 6, 2024
3b4ca9f
📌 update the requirements.txt
Senja20 Jun 7, 2024
daaa619
➖ simplify requirements.txt by removing unused stuff
Senja20 Jun 8, 2024
a241df0
🔥 remove hugging face
Senja20 Jun 9, 2024
bcff411
feat: update slurm file
Senja20 Jul 15, 2024
b0604d3
✨ using the pt format for model storage
Senja20 Jul 15, 2024
80648c9
Merge branch '13-traning-on-idun' of github.com:vortexntnu/vortex-ima…
Senja20 Jul 15, 2024
c1d49c3
🔧 remove redundent steps from slurm file
Senja20 Jul 15, 2024
3f545ba
✨ feat: Update YOLO model training parameters
Senja20 Aug 22, 2024
ad41066
➖ Update requirements.txt to remove unused dependencies
Senja20 Aug 22, 2024
79c728d
➖ Update requirements.txt to remove unused dependencies
Senja20 Aug 22, 2024
ade6626
🔥 Update Job.slurm to install protobuf package
Senja20 Aug 22, 2024
793a882
➕ Update protobuf package version in requirements.txt
Senja20 Aug 22, 2024
4cfe24f
✨ feat: Enhance Job.slurm for improved environment setup and package …
Senja20 Jan 20, 2025
0a28975
added yolo roboflow training script
vortexuser Jan 21, 2025
d07aa1a
unet training script
vortexuser Feb 1, 2025
7a7cfaa
update: added correct account name and time
VegraD Feb 16, 2025
5b186db
Delete .github/workflows/pylint.yml
kluge7 Oct 26, 2025
89102ab
Delete .gitignore
kluge7 Oct 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- name: Lint Code
shell: bash
run: |
pip install -r requirements.txt
pip install -r YOLO-detect-buoys/requirements.txt
pip install -U pylint
pylint $(git ls-files '*.py')

Expand All @@ -37,7 +37,7 @@ jobs:
- name: Run tests
shell: bash
run: |
pip install -r requirements.txt
pip install -r YOLO-detect-buoys/requirements.txt
pip install -U pytest
pytest --capture=sys --disable-warnings -v

Expand All @@ -56,7 +56,7 @@ jobs:
with:
python-version: "3.11"
- name: Install requirements
run: pip install -r requirements.txt
run: pip install -r YOLO-detect-buoys/requirements.txt
- name: Run tests and collect coverage
run: pytest --capture=sys --cov --disable-warnings -v --cov-report=xml
- name: Upload coverage reports to Codecov with GitHub Action
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,6 @@ runs/
*jpg

# data
data/
data/
*.out
*/*.out
50 changes: 50 additions & 0 deletions YOLO-detect-buoys/Job.slurm
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash
#SBATCH --partition=GPUQ
#SBATCH --account=ie-idi
#SBATCH --time=999:99:99
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=4
#SBATCH --gres=gpu:a100:4
#SBATCH --constraint="gpu40g|gpu80g|gpu32g"
#SBATCH --job-name="vortex-img-process"
#SBATCH --output=vortex_img_process_log.out
#SBATCH --mem=32G

export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/cluster/apps/eb/software/Python/3.10.4-GCCcore-11.3.0/lib/

set -e

module purge
module --ignore_cache load foss/2022a
module --ignore_cache load Python/3.10.4-GCCcore-11.3.0

pip cache purge

# makes sure that the pip is up to date
python3 -m pip install --upgrade pip

# Create a temporary virtual environment
VENV_DIR=$(mktemp -d -t env-repaint-XXXXXXXXXX)
python3 -m venv $VENV_DIR
source $VENV_DIR/bin/activate

pip install --upgrade pip

# install the required packages
pip install -r requirements.txt
#pip install pyyaml # used to read the configuration file
#pip install blobfile # install blobfile to download the dataset
#pip install kagglehub # install kagglehub to download the dataset
pip install --force-reinstall torch -U
pip install torchvision torchaudio
#pip install diffusers transformers accelerate --user

# Mixing expandable_segments:True with max_split_size doesn't make sense because the expandable segment is the size of RAM and so it could never be split with max_split_size.
# export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True,max_split_size_mb:128"
export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"

python3 train.py

# Deactivate and remove the virtual environment
deactivate
rm -rf $VENV_DIR
19 changes: 16 additions & 3 deletions YOLO-detect-buoys/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,39 @@
The main entry point for YOLO detecter buoys.
"""

from os import getcwd, path
from os import getcwd, getenv, path

from dotenv import load_dotenv
from huggingface_hub import HfApi, Repository
from ultralytics import YOLO
from utils import get_data, get_device, process_video

if "__main__" == __name__:

load_dotenv()

device = get_device()
print("device", device)
model = YOLO("yolov8n.pt")

file_path = path.abspath(getcwd())
dataset = get_data()

result = model.train(
data=dataset.location + "\\data.yaml", epochs=50, imgsz=640, device=device
data=dataset.location + "/data.yaml",
epochs=100,
imgsz=640,
device=device,
batch=4,
cache=False,
)

model.val()

process_video("https://youtu.be/4WGpIOwkLA4?feature=shared", model)
path = model.export(format="onnx") # export to onnx

print("Model exported to: " + path)
print(path)

# References:
# https://docs.ultralytics.com/quickstart/#install-ultralytics
10 changes: 10 additions & 0 deletions YOLO-detect-buoys/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
huggingface_hub==0.23.3
numpy==1.26.4
opencv_contrib_python==4.9.0.80
opencv_python==4.9.0.80
pafy==0.5.5
python-dotenv==1.0.1
roboflow==1.1.24
torch==2.2.1
protobuf==4.24.0
ultralytics==8.0.196
Binary file removed requirements.txt
Binary file not shown.
50 changes: 50 additions & 0 deletions unet_roboflow_training/Job.slurm
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash
#SBATCH --partition=GPUQ
#SBATCH --account=ie-idi
#SBATCH --time=999:99:99
Copy link

Copilot AI Oct 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Invalid SLURM time format '999:99:99'. The time format should be DD-HH:MM:SS, HH:MM:SS, or MM:SS with valid values (e.g., hours 0-23, minutes/seconds 0-59).

Suggested change
#SBATCH --time=999:99:99
#SBATCH --time=7-00:00:00

Copilot uses AI. Check for mistakes.
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=4
#SBATCH --gres=gpu:a100:4
#SBATCH --constraint="gpu40g|gpu80g|gpu32g"
#SBATCH --job-name="vortex-img-process"
#SBATCH --output=vortex_img_process_log.out
#SBATCH --mem=32G

export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/cluster/apps/eb/software/Python/3.10.4-GCCcore-11.3.0/lib/

set -e

module purge
module --ignore_cache load foss/2022a
module --ignore_cache load Python/3.10.4-GCCcore-11.3.0

pip cache purge

# makes sure that the pip is up to date
python3 -m pip install --upgrade pip

# Create a temporary virtual environment
VENV_DIR=$(mktemp -d -t env-repaint-XXXXXXXXXX)
python3 -m venv $VENV_DIR
source $VENV_DIR/bin/activate

pip install --upgrade pip

# install the required packages
pip install -r requirements.txt
#pip install pyyaml # used to read the configuration file
#pip install blobfile # install blobfile to download the dataset
#pip install kagglehub # install kagglehub to download the dataset
pip install --force-reinstall torch -U
pip install torchvision torchaudio
#pip install diffusers transformers accelerate --user

# Mixing expandable_segments:True with max_split_size doesn't make sense because the expandable segment is the size of RAM and so it could never be split with max_split_size.
# export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True,max_split_size_mb:128"
export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"

python3 train.py

# Deactivate and remove the virtual environment
deactivate
rm -rf $VENV_DIR
10 changes: 10 additions & 0 deletions unet_roboflow_training/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
huggingface_hub==0.23.3
numpy==1.26.4
opencv_contrib_python==4.9.0.80
opencv_python==4.9.0.80
pafy==0.5.5
python-dotenv==1.0.1
roboflow==1.1.24
torch==2.2.1
protobuf==4.24.0
ultralytics==8.0.196
159 changes: 159 additions & 0 deletions unet_roboflow_training/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#!/usr/bin/env python3
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np

##############################################
# 1. Define the U-Net model (same as during training)
##############################################
class DoubleConv(nn.Module):
"""
A block with two consecutive convolution layers each followed by
batch normalization and ReLU activation.
"""
def __init__(self, in_channels, out_channels):
super(DoubleConv, self).__init__()
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.double_conv(x)

class UNet(nn.Module):
def __init__(self, in_channels=3, out_channels=1):
"""
For binary segmentation the model outputs 1 channel per pixel.
"""
super(UNet, self).__init__()
# Down-sampling path
self.down1 = DoubleConv(in_channels, 64)
self.pool1 = nn.MaxPool2d(2)
self.down2 = DoubleConv(64, 128)
self.pool2 = nn.MaxPool2d(2)
self.down3 = DoubleConv(128, 256)
self.pool3 = nn.MaxPool2d(2)
self.down4 = DoubleConv(256, 512)
self.pool4 = nn.MaxPool2d(2)

# Bottleneck
self.bottleneck = DoubleConv(512, 1024)

# Up-sampling path
self.up4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
self.conv4 = DoubleConv(1024, 512)
self.up3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
self.conv3 = DoubleConv(512, 256)
self.up2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
self.conv2 = DoubleConv(256, 128)
self.up1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
self.conv1 = DoubleConv(128, 64)

self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)

def forward(self, x):
# Down path
c1 = self.down1(x)
p1 = self.pool1(c1)
c2 = self.down2(p1)
p2 = self.pool2(c2)
c3 = self.down3(p2)
p3 = self.pool3(c3)
c4 = self.down4(p3)
p4 = self.pool4(c4)

# Bottleneck
bn = self.bottleneck(p4)

# Up path
u4 = self.up4(bn)
merge4 = torch.cat([u4, c4], dim=1)
c5 = self.conv4(merge4)
u3 = self.up3(c5)
merge3 = torch.cat([u3, c3], dim=1)
c6 = self.conv3(merge3)
u2 = self.up2(c6)
merge2 = torch.cat([u2, c2], dim=1)
c7 = self.conv2(merge2)
u1 = self.up1(c7)
merge1 = torch.cat([u1, c1], dim=1)
c8 = self.conv1(merge1)
output = self.final_conv(c8)
return output

##############################################
# 2. Load the saved model
##############################################
# Set device to CUDA if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model and load the saved weights
model = UNet(in_channels=3, out_channels=1)
model_path = "unet_segmentation.pth" # path to your saved model weights
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval() # set model to evaluation mode

##############################################
# 3. Prepare the test image
##############################################
# Define the transformation (should match the training transform)
transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.ToTensor(),
])

# Path to the test image (update this path to your test image)
test_image_path = "test.jpg"
image = Image.open(test_image_path).convert("RGB")
input_tensor = transform(image).unsqueeze(0) # add batch dimension
input_tensor = input_tensor.to(device)

##############################################
# 4. Run inference
##############################################
with torch.no_grad():
output = model(input_tensor)

# Apply sigmoid to convert logits to probabilities and then threshold for binary mask
output_prob = torch.sigmoid(output)
threshold = 0.2
predicted_mask = (output_prob > threshold).float()

# Remove batch and channel dimensions, and convert to NumPy array for visualization
mask_np = predicted_mask.squeeze().cpu().numpy()

##############################################
# 5. Visualize the results
##############################################
plt.figure(figsize=(12, 6))
unique_values = np.unique(mask_np)
print("Unique mask values:", unique_values)

mask_uint8 = (mask_np * 255).astype("uint8")

plt.imsave("predicted_mask.png", mask_uint8, cmap="gray")
print("Saved predicted mask to predicted_mask.png")

# Display the original image
plt.subplot(1, 2, 1)
plt.imshow(image)
plt.title("Original Image")
plt.axis("off")

# Display the predicted mask
plt.subplot(1, 2, 2)
plt.imshow(mask_uint8, cmap='gray')
plt.title("Predicted Mask")
plt.axis("off")

plt.savefig("test_output.png")
print("Output saved to test_output.png")

Loading
Loading