Skip to content

Commit 2da3adb

Browse files
CI: add build and push models workflow
This workflow will build and push into the ghcr.io the example MAP in examples/apps/simple_imaging_app. This workflow will use terraform to launch a VM with a GPU and then run the monai-deploy-sdk package subcommand inside it. NOTE: you need to setup the secrets: * AZURE_CLIENT_ID * AZURE_SUBSCRIPTION_ID * AZURE_TENANT_ID * AZURE_CLIENT_SECRET The workflow will patch holoscan, so that it works when there is no cache. The workflow will install a "patched" libseccomp package so that we can install libnvidia-container. The workflow will use nvidia docker runtime to build, load, export, ... the image. The workflow uses the smallest/cheapest Azure Image with GPU available in Western Europe, which is Standard_NC4as_T4_v3. For this to work, you need to request a quota increase to Azure Help Desk. However, the default 30GB disc is not enough for the build, so you need to setup a 64GB. :WARNING: This will incur in costs in Azure Cloud, use it with caution. Signed-off-by: Jordi Massaguer Pla <[email protected]>
1 parent 1aa54c8 commit 2da3adb

File tree

3 files changed

+314
-0
lines changed

3 files changed

+314
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
From 142be17d7563c3499b548dae913cabd7b8242f78 Mon Sep 17 00:00:00 2001
2+
From: Jordi Massaguer Pla <[email protected]>
3+
Date: Tue, 14 Nov 2023 10:30:15 +0100
4+
Subject: [PATCH] Fix using no-cache option for the container build
5+
6+
If we specify no-cache, we should not add the local cache with the
7+
from-cache and to-cache parameters. Otherwise, we get the error
8+
9+
```
10+
WARNING: local cache import at /home/adminuser/.holoscan_build_cache
11+
not found due to err: could not read
12+
/home/adminuser/.holoscan_build_cache/index.json: open
13+
/home/adminuser/.holoscan_build_cache/index.json: no such file or directory
14+
```
15+
being adminuser the user that runs the build.
16+
17+
This is important for CI, where we do not have any cache to start with.
18+
19+
Signed-off-by: Jordi Massaguer Pla <[email protected]>
20+
---
21+
python/holoscan/cli/packager/container_builder.py | 14 +++++++++-----
22+
1 file changed, 9 insertions(+), 5 deletions(-)
23+
24+
diff --git a/python/holoscan/cli/packager/container_builder.py b/python/holoscan/cli/packager/container_builder.py
25+
index 92edd91..ca6e141 100644
26+
--- a/python/holoscan/cli/packager/container_builder.py
27+
+++ b/python/holoscan/cli/packager/container_builder.py
28+
@@ -89,9 +89,11 @@ def _build_internal(
29+
builder = create_and_get_builder(Constants.LOCAL_BUILDX_BUILDER_NAME)
30+
31+
build_result = PlatformBuildResults(platform_parameters)
32+
-
33+
- cache_to = {"type": "local", "dest": self._build_parameters.build_cache}
34+
- cache_from = [{"type": "local", "src": self._build_parameters.build_cache}]
35+
+ cache_to = {}
36+
+ cache_from = []
37+
+ if not self._build_parameters.no_cache:
38+
+ cache_to = {"type": "local", "dest": self._build_parameters.build_cache}
39+
+ cache_from = [{"type": "local", "src": self._build_parameters.build_cache}]
40+
if platform_parameters.base_image is not None:
41+
cache_from.append({"type": "registry", "ref": platform_parameters.base_image})
42+
if platform_parameters.build_image is not None:
43+
@@ -99,8 +101,6 @@ def _build_internal(
44+
builds = {
45+
"builder": builder,
46+
"cache": not self._build_parameters.no_cache,
47+
- "cache_from": cache_from,
48+
- "cache_to": cache_to,
49+
"context_path": self._temp_dir,
50+
"file": dockerfile,
51+
"platforms": [platform_parameters.docker_arch],
52+
@@ -108,6 +108,10 @@ def _build_internal(
53+
"pull": True,
54+
"tags": [platform_parameters.tag],
55+
}
56+
+ if cache_to != {}:
57+
+ builds["cache_to"] = cache_to
58+
+ if cache_from != []:
59+
+ builds["cache_from"] = cache_from
60+
61+
export_to_tar_ball = False
62+
if self._build_parameters.tarball_output is not None:
+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# This workflow will install Python dependencies, build the latest models as containers, and push to the registry the resulting containers
2+
# TODO: Use cache for caching the docker images, to speed up the build
3+
# TODO: Can we have the dependencies stored somehow (predownloaded, a custom image, a container registry, our artifact server...) so this will always be reproduceable?
4+
# TODO: Can we have some kind of Bill Of Materials of the resulting image? What packages are in there? What python "modules" ? Code version of monai?
5+
6+
name: build_and_push_models
7+
8+
# This is triggered manually. It could be changed to be triggered by new pushed tags.
9+
on: workflow_dispatch
10+
11+
# Version could be infered from the new tag if this was triggered by a new tag push
12+
# FIXME: Python version could be inferred with "python --version" run inside the containers, and CP is the python version without '.'
13+
# FIXME: wheel name could be dynamically generated, by for example use "ls" on the download folder
14+
# ARM environment variables are used by the terraform azure provider for authentication using a client secret.
15+
# See https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/guides/service_principal_client_secret
16+
# https://github.com/Azure-Samples/terraform-github-actions/blob/main/.github/workflows/tf-plan-apply.yml
17+
env:
18+
ARM_CLIENT_ID: "${{ secrets.AZURE_CLIENT_ID }}"
19+
ARM_SUBSCRIPTION_ID: "${{ secrets.AZURE_SUBSCRIPTION_ID }}"
20+
ARM_TENANT_ID: "${{ secrets.AZURE_TENANT_ID }}"
21+
ARM_CLIENT_SECRET: "${{ secrets.AZURE_CLIENT_SECRET }}"
22+
VERSION: "0.6.0"
23+
PYTHON_VERSION: "3.8"
24+
CP_VERSION: "38"
25+
DOCKER_IMAGE_TAG : "latest"
26+
APP_IMAGE_NAME : "simple_app"
27+
PLATFORM : "x64-workstation"
28+
DOCKER_IMAGE_NAME : "simple_app-x64-workstation-dgpu-linux-amd64-latest"
29+
DOCKER_IMAGE_NAME_SHORT: "simple_app-x64-workstation-dgpu-linux-amd64"
30+
REGISTRY: ghcr.io
31+
IMAGE_NAME: ${{ github.repository }}
32+
APP: "examples/apps/simple_imaging_app"
33+
34+
jobs:
35+
do:
36+
runs-on: ubuntu-latest
37+
permissions:
38+
contents: read
39+
packages: write
40+
steps:
41+
- uses: actions/checkout@v2
42+
- name: Disclaimers
43+
run: |
44+
echo "!!! WARNING !!! This is a hackweek project, not meant for production or clinical usage, does not have any kind of guarantee, use at your own risk.https://hackweek.opensuse.org/23/projects/package-monai-machine-learning-models-for-medical-applications. !!! WARNING !!!"
45+
# Install the latest version of the Terraform CLI
46+
- name: Show disc space
47+
run: df -h
48+
- name: Setup Terraform
49+
uses: hashicorp/setup-terraform@v2
50+
with:
51+
terraform_wrapper: false
52+
- name: Initialize a new Terraform working directory
53+
run: terraform init
54+
- name: Check Terraform configuration files format
55+
run: terraform fmt -check
56+
- name: Generate unique SSH Key
57+
run: ssh-keygen -t rsa -f /tmp/ssh_id_gh -N ""
58+
- name: Terraform Apply
59+
run: terraform apply -auto-approve
60+
- name: Get IP address
61+
run: echo "AZURE_IPADDRESS=$(terraform output | grep instance_public_ip | cut -d\" -f2)" >> $GITHUB_ENV
62+
- name: Output ip address
63+
run: echo "AZURE_IPADDRESS=$AZURE_IPADDRESS"
64+
- name: Test connection
65+
# We use StrictHostKeyChecking=no to accept the SSH fingerprint on the first connection
66+
run: ssh -i /tmp/ssh_id_gh -o StrictHostKeyChecking=no adminuser@$AZURE_IPADDRESS "sudo uname -a"
67+
- name: Add fixed libseccomp package
68+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "sudo zypper ar -G https://download.opensuse.org/repositories/home:/jordimassaguerpla:/branches:/openSUSE:/Leap:/15.5:/Update/pool-leap-15.5/home:jordimassaguerpla:branches:openSUSE:Leap:15.5:Update.repo && sudo zypper ref && sudo zypper -n install --from home_jordimassaguerpla_branches_openSUSE_Leap_15.5_Update --allow-vendor-change libseccomp"
69+
- name: Install Deps
70+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "sudo zypper ar -G https://developer.download.nvidia.com/compute/cuda/repos/opensuse15/x86_64/ nvidia && sudo zypper ref && sudo zypper --non-interactive install patch python39 docker-buildx nvidia-container-toolkit nvidia-computeG05 cuda-cudart-devel-11-0 libyaml-cpp0_6 && wget -c https://bootstrap.pypa.io/get-pip.py && python3.9 get-pip.py && python3.9 --version"
71+
- name: Setup Nvidia container
72+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "sudo usermod -G docker,video adminuser && sudo nvidia-ctk runtime configure --runtime=docker && sudo nvidia-ctk runtime configure --runtime=containerd && sudo systemctl start docker && sudo systemctl start containerd && sudo sed -e \"s/user = \\\"\\\"/user = \\\"adminuser:video\\\"/g \" -i /etc/nvidia-container-runtime/config.toml && sudo modprobe nvidia"
73+
- name: Check nvidia
74+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "sudo systemctl start docker && nvidia-smi && docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi"
75+
- name: Install Monai Deploy Sdk and Holoscan
76+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "python3.9 -m pip install monai-deploy-app-sdk holoscan"
77+
- name: Fix Holoscan
78+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} 'cd /home/adminuser/.local/lib/python3.9/site-packages/holoscan/lib ; to_link=$(ls libholoscan_*.so*);for i in $to_link; do name=$(echo $i | cut -d. -f1); ln -sv $name.so.$VERSION $name.so.0;done'
79+
- name: Copy holoscan patch
80+
run: scp -i /tmp/ssh_id_gh .github/workflows/142be17d7563c3499b548dae913cabd7b8242f78.patch adminuser@${AZURE_IPADDRESS}:/home/adminuser/.local/lib/python3.9/site-packages/holoscan
81+
- name: Patch Holoscan
82+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "cd /home/adminuser/.local/lib/python3.9/site-packages/holoscan; patch -p3 < 142be17d7563c3499b548dae913cabd7b8242f78.patch"
83+
- name: Make work dir
84+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "mkdir /home/adminuser/work"
85+
- name: Download wheels
86+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "cd /home/adminuser/work && python3.9 -m pip download --no-deps --python-version=$PYTHON_VERSION holoscan==$VERSION && python3.9 -m pip download --no-deps monai-deploy-app-sdk==$VERSION"
87+
- name: Copy example code
88+
run: scp -i /tmp/ssh_id_gh -r * adminuser@${AZURE_IPADDRESS}:/home/adminuser/work
89+
- name: Monai Deploy package
90+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "mkdir /home/adminuser/work/output && cd /home/adminuser/work && monai-deploy package --no-cache /home/adminuser/work/$APP -c /home/adminuser/work/$APP/app.yaml -t $APP_IMAGE_NAME:$DOCKER_IMAGE_TAG --platform $PLATFORM -l DEBUG --holoscan-sdk-file=/home/adminuser/work/holoscan-$VERSION-cp$CP_VERSION-cp$CP_VERSION-manylinux2014_x86_64.whl --monai-deploy-sdk-file=/home/adminuser/work/monai_deploy_app_sdk-$VERSION-py3-none-any.whl --platform-config dgpu --gid 1000 --output /home/adminuser/work/output"
91+
- name: Size of docker image
92+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "du -hs /home/adminuser/work/output/*"
93+
- name: Compress docker image
94+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "cd /home/adminuser/work/output && gzip $DOCKER_IMAGE_NAME.tar"
95+
- name: Size of docker image
96+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "du -hs /home/adminuser/work/output/*"
97+
- name: Show disc space
98+
run: df -h
99+
- name: Load docker image
100+
run: ssh -i /tmp/ssh_id_gh adminuser@${AZURE_IPADDRESS} "cat /home/adminuser/work/output/$DOCKER_IMAGE_NAME.tar.gz" | docker load
101+
- name: Log in to the Container registry
102+
uses: docker/login-action@v3
103+
with:
104+
registry: ${{ env.REGISTRY }}
105+
username: ${{ github.actor }}
106+
password: ${{ secrets.GITHUB_TOKEN }}
107+
- name: Tag Docker image
108+
run: docker tag $DOCKER_IMAGE_NAME_SHORT:$DOCKER_IMAGE_TAG $REGISTRY/$IMAGE_NAME/$DOCKER_IMAGE_NAME_SHORT:$DOCKER_IMAGE_TAG
109+
- name: Push Docker image
110+
run: docker push $REGISTRY/$IMAGE_NAME/$DOCKER_IMAGE_NAME_SHORT:$DOCKER_IMAGE_TAG
111+
- name: Terraform Destroy
112+
if: ${{ always() }}
113+
run: terraform destroy -auto-approve

main.tf

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
export these variables before running this file
3+
ARM_CLIENT_ID
4+
ARM_SUBSCRIPTION_ID
5+
ARM_TENANT_ID
6+
ARM_CLIENT_SECRET
7+
*/
8+
9+
# We strongly recommend using the required_providers block to set the
10+
# Azure Provider source and version being used
11+
terraform {
12+
required_providers {
13+
azurerm = {
14+
source = "hashicorp/azurerm"
15+
version = "=3.0.0"
16+
}
17+
}
18+
}
19+
20+
# Configure the Microsoft Azure Provider
21+
provider "azurerm" {
22+
features {}
23+
}
24+
25+
# Create a resource group
26+
resource "azurerm_resource_group" "gh-actions-build-monai-models-resource-group" {
27+
name = "gh-actions-build-monai-models-resource-group"
28+
location = "West Europe"
29+
}
30+
31+
# Create a virtual network within the resource group
32+
resource "azurerm_virtual_network" "gh-actions-build-monai-models-virtual-network" {
33+
name = "gh-actions-build-monai-models-virtual-network"
34+
resource_group_name = azurerm_resource_group.gh-actions-build-monai-models-resource-group.name
35+
location = azurerm_resource_group.gh-actions-build-monai-models-resource-group.location
36+
address_space = ["10.0.0.0/16"]
37+
}
38+
39+
resource "azurerm_subnet" "gh-actions-build-monai-models-internal-subnet" {
40+
name = "gh-actions-build-monai-models-internal-subnet"
41+
resource_group_name = azurerm_resource_group.gh-actions-build-monai-models-resource-group.name
42+
virtual_network_name = azurerm_virtual_network.gh-actions-build-monai-models-virtual-network.name
43+
address_prefixes = ["10.0.2.0/24"]
44+
}
45+
46+
# Create public IPs
47+
resource "azurerm_public_ip" "gh-actions-build-monai-models-public-ip" {
48+
name = "gh-actions-build-monai-models-public-ip"
49+
location = azurerm_resource_group.gh-actions-build-monai-models-resource-group.location
50+
resource_group_name = azurerm_resource_group.gh-actions-build-monai-models-resource-group.name
51+
allocation_method = "Dynamic"
52+
}
53+
54+
resource "azurerm_network_interface" "gh-actions-build-monai-models-network-interface" {
55+
name = "gh-actions-build-monai-models-network-interface"
56+
location = azurerm_resource_group.gh-actions-build-monai-models-resource-group.location
57+
resource_group_name = azurerm_resource_group.gh-actions-build-monai-models-resource-group.name
58+
59+
ip_configuration {
60+
name = "gh-actions-build-monai-models-network-interface-ip-configuration"
61+
subnet_id = azurerm_subnet.gh-actions-build-monai-models-internal-subnet.id
62+
private_ip_address_allocation = "Dynamic"
63+
public_ip_address_id = azurerm_public_ip.gh-actions-build-monai-models-public-ip.id
64+
}
65+
}
66+
67+
# Create Network Security Group and rule
68+
resource "azurerm_network_security_group" "gh-actions-build-monai-models-nsg" {
69+
name = "gh-actions-build-monai-models-nsg"
70+
location = azurerm_resource_group.gh-actions-build-monai-models-resource-group.location
71+
resource_group_name = azurerm_resource_group.gh-actions-build-monai-models-resource-group.name
72+
73+
security_rule {
74+
name = "SSH"
75+
priority = 1001
76+
direction = "Inbound"
77+
access = "Allow"
78+
protocol = "Tcp"
79+
source_port_range = "*"
80+
destination_port_range = "22"
81+
source_address_prefix = "*"
82+
destination_address_prefix = "*"
83+
}
84+
}
85+
86+
# Connect the security group to the network interface
87+
resource "azurerm_network_interface_security_group_association" "gh-actions-build-monai-models-ga" {
88+
network_interface_id = azurerm_network_interface.gh-actions-build-monai-models-network-interface.id
89+
network_security_group_id = azurerm_network_security_group.gh-actions-build-monai-models-nsg.id
90+
}
91+
92+
resource "azurerm_linux_virtual_machine" "gh-actions-build-monai-models-vm" {
93+
name = "gh-actions-build-monai-models-vm"
94+
resource_group_name = azurerm_resource_group.gh-actions-build-monai-models-resource-group.name
95+
location = azurerm_resource_group.gh-actions-build-monai-models-resource-group.location
96+
// Standard_NC4as_T4_v3 has GPU. This has a cost associated!!!
97+
size = "Standard_NC4as_T4_v3"
98+
admin_username = "adminuser"
99+
network_interface_ids = [
100+
azurerm_network_interface.gh-actions-build-monai-models-network-interface.id,
101+
]
102+
103+
admin_ssh_key {
104+
username = "adminuser"
105+
public_key = file("/tmp/ssh_id_gh.pub") //This file is in the vm where you run terraform!!
106+
}
107+
108+
os_disk {
109+
caching = "ReadWrite"
110+
storage_account_type = "StandardSSD_LRS"
111+
# With the default 30GB, docker will fail to load and export the image
112+
disk_size_gb = "64"
113+
}
114+
115+
source_image_reference {
116+
publisher = "SUSE"
117+
offer = "opensuse-leap-15-5"
118+
sku = "gen2"
119+
version = "latest"
120+
}
121+
}
122+
123+
resource "null_resource" "example" {
124+
provisioner "remote-exec" {
125+
connection {
126+
host = azurerm_linux_virtual_machine.gh-actions-build-monai-models-vm.public_ip_address
127+
user = "adminuser"
128+
private_key = file("/tmp/ssh_id_gh")
129+
}
130+
131+
inline = ["echo 'connected!'"]
132+
}
133+
}
134+
135+
output "instance_public_ip" {
136+
description = "Public IP address"
137+
value = azurerm_linux_virtual_machine.gh-actions-build-monai-models-vm.public_ip_address
138+
}
139+

0 commit comments

Comments
 (0)