Skip to content

Commit 9fb0134

Browse files
committed
CI: Add multi-target build infra
1 parent 5e52e03 commit 9fb0134

File tree

6 files changed

+979
-4
lines changed

6 files changed

+979
-4
lines changed
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# NIXL PR Testing Matrix Configuration
2+
#
3+
# This file defines the build matrix for NIXL PR validation using the multi-target Docker system.
4+
# Primary purpose: Fast feedback on code changes via build + test execution.
5+
#
6+
# Key Components:
7+
# - Single NIXL Image Build: Uses multi-stage Dockerfile (base -> nixl) with layer caching
8+
# - Integrated Testing: Runs C++ and Python tests directly in the built environment
9+
# - CI-Demo Integration: Base image auto-builds only when Dockerfile changes
10+
#
11+
# PR Testing Benefits:
12+
# - Fast feedback: Base layer cached when only code changes
13+
# - Full rebuild: When infrastructure (Dockerfile) changes
14+
# - No registry operations: Focus on validation, not distribution
15+
# - Multi-OS/arch testing: Ensures compatibility across platforms
16+
#
17+
18+
---
19+
job: nixl-ci-build-multi-target
20+
21+
# Registry configuration for base image reusability
22+
registry_host: urm.nvidia.com
23+
registry_path: /sw-nbu-swx-nixl-docker-local/ci
24+
registry_auth: svc-nixl-artifactory-token
25+
26+
# Build settings
27+
failFast: false
28+
timeout_minutes: 120
29+
30+
# Infrastructure requirements
31+
kubernetes:
32+
cloud: il-ipp-blossom-prod
33+
namespace: swx-media
34+
limits: "{memory: 16Gi, cpu: 8000m}"
35+
requests: "{memory: 8Gi, cpu: 4000m}"
36+
37+
# Container images: Base for reusability + NIXL for PR testing
38+
runs_on_dockers:
39+
- { name: "podman-v5.0.2", url: "quay.io/podman/stable:v5.0.2", privileged: true, category: 'tool' }
40+
41+
# BASE IMAGE - Built & Pushed for Reusability
42+
# Registry: urm.nvidia.com/sw-nbu-swx-nixl-docker-local/ci/{arch}/nixl-base:base-{os}-{arch}
43+
- { file: 'contrib/Dockerfile.base', name: "nixl-base", tag: "base-${os}-${arch}", arch: '${arch}', build_args: 'OS=${os} ARCH=${arch}', category: 'reusable' }
44+
45+
# NIXL TEST IMAGE - Uses base via FROM directive (PR testing only)
46+
# Local only: Built from base + NIXL source for validation
47+
- { file: 'contrib/Dockerfile.nixl', name: "nixl-pr-test", tag: "test-${os}-${arch}", arch: '${arch}', build_args: 'OS=${os} ARCH=${arch} REGISTRY_HOST=urm.nvidia.com REGISTRY_PATH=/sw-nbu-swx-nixl-docker-local/ci', category: 'test' }
48+
49+
# Build matrix - test both architectures and OS versions
50+
matrix:
51+
axes:
52+
arch:
53+
- x86_64
54+
- aarch64
55+
os:
56+
- ubuntu22.04
57+
- ubuntu24.04
58+
59+
# Environment configuration
60+
env:
61+
NIXL_INSTALL_DIR: "/usr/local/nixl"
62+
NPROC: "16"
63+
64+
# Task naming for parallel execution
65+
taskName: "${os}/${arch}/${axis_index}"
66+
67+
credentials:
68+
- credentialsId: 'svc-nixl-artifactory-token'
69+
usernameVariable: 'ARTIFACTORY_USERNAME'
70+
passwordVariable: 'ARTIFACTORY_PASSWORD'
71+
72+
# Build pipeline steps
73+
steps:
74+
- name: Test NIXL with PR Changes
75+
parallel: true
76+
containerSelector: "{ name: 'nixl-pr-test' }"
77+
run: |
78+
# Set up environment
79+
cd /workspace/nixl
80+
export ARCH=$(uname -m)
81+
[ "$ARCH" = "arm64" ] && ARCH="aarch64"
82+
83+
.gitlab/test_cpp.sh ${NIXL_INSTALL_DIR}
84+
.gitlab/test_python.sh ${NIXL_INSTALL_DIR}
85+
86+
87+
# Pipeline completion
88+
pipeline_stop:
89+
shell: action
90+
module: groovy
91+
run: |
92+
def jobStatus = currentBuild.result ?: 'SUCCESS'
93+
def statusColor = jobStatus == 'SUCCESS' ? 'green' : 'red'
94+
95+
echo "NIXL PR testing completed with status: ${jobStatus}"
96+
97+
if (params.MAIL_TO) {
98+
def userName = currentBuild.rawBuild.getCause(hudson.model.Cause.UserIdCause)?.userName ?: 'schedule'
99+
100+
mail(
101+
to: params.MAIL_TO,
102+
subject: "NIXL PR Testing [${env.BUILD_NUMBER}] - ${jobStatus}",
103+
mimeType: 'text/html',
104+
body: """
105+
<h3>NIXL PR Testing Results</h3>
106+
<p><b>Status:</b> <span style="color: ${statusColor};">${jobStatus}</span></p>
107+
<p><b>Build:</b> <a href='${env.BUILD_URL}'>#${env.BUILD_NUMBER}</a></p>
108+
<p><b>Images:</b> Base image pushed for reuse, NIXL tested with PR changes</p>
109+
<p><a href='${env.BUILD_URL}console'>View Console Output</a></p>
110+
"""
111+
)
112+
}

.ci/jenkins/pipeline/proj-jjb.yaml

Lines changed: 70 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,71 @@
316316
parent-credentials: true
317317
script-path: "{jjb_jenkinsfile}"
318318

319+
# Template for multi-target build job using the new unified Dockerfile approach
320+
- job-template:
321+
name: "{jjb_proj}-build-multi-target"
322+
project-type: pipeline
323+
folder: "{jjb_folder}"
324+
disabled: false
325+
properties:
326+
- github:
327+
url: "{jjb_gh_url}"
328+
- build-discarder:
329+
days-to-keep: 14
330+
num-to-keep: 1000
331+
- inject:
332+
keep-system-variables: true
333+
properties-content: |
334+
jjb_proj={jjb_proj}-build-multi-target
335+
description: >
336+
<b>NIXL PR Testing Pipeline</b><br/>
337+
• Fast feedback for code changes using multi-target Docker system<br/>
338+
• Builds NIXL with PR changes (base layer cached when possible)<br/>
339+
• Supports both Ubuntu 22.04 and 24.04 on x86_64/aarch64<br/>
340+
• Runs comprehensive C++ and Python test suites<br/>
341+
<br/>
342+
<i>Do NOT edit this job through the Jenkins GUI — managed by Jenkins Job Builder.</i>
343+
concurrent: true
344+
sandbox: true
345+
parameters:
346+
- string:
347+
name: "sha1"
348+
default: "{jjb_branch}"
349+
description: "Git commit/branch/tag to build (default: main)"
350+
- string:
351+
name: "githubData"
352+
default: ""
353+
description: "GitHub webhook data (automatically set)"
354+
- string:
355+
name: "conf_file"
356+
default: ".ci/jenkins/lib/build-multi-target-matrix.yaml"
357+
description: "Build matrix configuration file"
358+
- string:
359+
name: "MAIL_TO"
360+
default: ""
361+
description: "Email address for build notifications (optional)"
362+
- string:
363+
name: "DEBUG"
364+
default: 0
365+
description: "Debug level (0-9) for verbose output"
366+
pipeline-scm:
367+
scm:
368+
- git:
369+
url: "{jjb_git}"
370+
credentials-id: 'swx-jenkins_ssh_key'
371+
branches: ['$sha1']
372+
shallow-clone: false
373+
do-not-fetch-tags: false
374+
refspec: "+refs/heads/*:refs/remotes/origin/* +refs/pull/*:refs/remotes/origin/pr/* +refs/tags/*:refs/remotes/origin/tags/*"
375+
browser: githubweb
376+
browser-url: "{jjb_git}"
377+
submodule:
378+
disable: false
379+
recursive: true
380+
tracking: true
381+
parent-credentials: true
382+
script-path: "{jjb_jenkinsfile}"
383+
319384
# Project definition that instantiates the job templates
320385
# This section defines the actual jobs that will be created
321386
- project:
@@ -327,7 +392,8 @@
327392
jjb_branch: 'main' # Default branch
328393
jjb_gh_url: 'https://github.com/ai-dynamo/nixl' # GitHub web URL
329394
jobs:
330-
- "{jjb_proj}-dispatcher" # Create dispatcher job
331-
- "{jjb_proj}-build" # Create build job
332-
- "{jjb_proj}-build-container" # Create container builder job
333-
- "{jjb_proj}-test" # Create test job
395+
# - "{jjb_proj}-dispatcher" # Create dispatcher job
396+
# - "{jjb_proj}-build" # Create build job
397+
# - "{jjb_proj}-build-container" # Create container builder job
398+
- "{jjb_proj}-build-multi-target" # Create multi-target build job
399+
# - "{jjb_proj}-test" # Create test job

contrib/Dockerfile.base

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
# Base Dockerfile for NIXL CI/CD system
2+
# Contains shared dependencies: system packages, UCX, Rust, AWS SDK, etc.
3+
# Used by both NIXL and NIXLBench builds
4+
5+
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
6+
ARG BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
7+
ARG OS="ubuntu24.04"
8+
ARG ARCH="x86_64"
9+
ARG DEFAULT_PYTHON_VERSION="3.12"
10+
ARG UCX_REF="v1.19.x"
11+
ARG NPROC="16"
12+
13+
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}
14+
15+
ARG ARCH
16+
ARG OS
17+
ARG DEFAULT_PYTHON_VERSION
18+
ARG NPROC
19+
ARG UCX_REF
20+
21+
# Install common system packages, build tools, and RDMA/InfiniBand packages
22+
RUN apt-get update -y && \
23+
apt-get install -y ubuntu-keyring && \
24+
DEBIAN_FRONTEND=noninteractive apt-get -y install \
25+
autotools-dev \
26+
automake \
27+
build-essential \
28+
cmake \
29+
etcd-client \
30+
etcd-server \
31+
flex \
32+
ibverbs-providers \
33+
ibverbs-utils \
34+
libaio-dev \
35+
libibumad-dev \
36+
libibverbs-dev \
37+
libclang-dev \
38+
libcpprest-dev \
39+
libcurl4-openssl-dev \
40+
libgflags-dev \
41+
libgrpc-dev \
42+
libgrpc++-dev \
43+
libgtest-dev \
44+
libnuma-dev \
45+
libprotobuf-dev \
46+
librdmacm-dev \
47+
libssl-dev \
48+
libtool \
49+
liburing-dev \
50+
libz-dev \
51+
ninja-build \
52+
protobuf-compiler-grpc \
53+
pybind11-dev \
54+
python${DEFAULT_PYTHON_VERSION}-dev \
55+
rdma-core \
56+
uuid-dev \
57+
zlib1g-dev
58+
59+
# Add Mellanox repository and install DOCA packages
60+
RUN case "${ARCH}" in \
61+
aarch64) ARCH_SUFFIX="arm64-sbsa" ;; \
62+
x86_64) ARCH_SUFFIX="${ARCH}" ;; \
63+
esac && \
64+
export PKG_CONFIG_PATH="/opt/mellanox/doca/lib/${ARCH_SUFFIX}-linux-gnu/pkgconfig:/opt/mellanox/dpdk/lib/${ARCH_SUFFIX}-linux-gnu/pkgconfig:$PKG_CONFIG_PATH" && \
65+
curl -fsSL https://linux.mellanox.com/public/repo/doca/3.0.0/${OS}/${ARCH_SUFFIX}/GPG-KEY-Mellanox.pub | \
66+
gpg --dearmor | tee /usr/share/keyrings/mellanox-archive-keyring.gpg && \
67+
echo "deb [signed-by=/usr/share/keyrings/mellanox-archive-keyring.gpg] https://linux.mellanox.com/public/repo/doca/3.0.0/${OS}/${ARCH_SUFFIX} ./" | \
68+
tee /etc/apt/sources.list.d/mellanox.list && \
69+
apt-get update -y && \
70+
apt-get install -y --no-install-recommends \
71+
doca-all \
72+
doca-sdk-common doca-sdk-dma doca-sdk-dpdk-bridge \
73+
doca-sdk-eth doca-sdk-flow doca-sdk-rdma \
74+
mlnx-dpdk mlnx-dpdk-dev
75+
76+
# Install Ubuntu 24 specific DOCA packages
77+
RUN if [ "$OS" = "ubuntu24.04" ]; then \
78+
apt-get install -y --no-install-recommends \
79+
doca-sdk-gpunetio \
80+
libdoca-sdk-gpunetio-dev; \
81+
fi
82+
83+
# Install AWS CLI
84+
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-${ARCH}.zip" -o "awscliv2.zip" && \
85+
unzip awscliv2.zip && ./aws/install && rm -rf awscliv2.zip aws
86+
87+
WORKDIR /workspace
88+
89+
# Build and install etcd-cpp-apiv3
90+
RUN git clone https://github.com/etcd-cpp-apiv3/etcd-cpp-apiv3.git && \
91+
cd etcd-cpp-apiv3 && \
92+
sed -i '/^find_dependency(cpprestsdk)$/d' etcd-cpp-apiv3-config.cmake.in && \
93+
sed -i '/^find_dependency(cpprestsdk)$/d' cmake/etcd-cpp-apiv3-config.cmake.in && \
94+
mkdir build && cd build && \
95+
cmake .. -DCMAKE_INSTALL_PREFIX=/usr/local && \
96+
make -j${NPROC} && \
97+
make -j${NPROC} install
98+
99+
# Build and install AWS SDK C++
100+
RUN git clone --recurse-submodules https://github.com/aws/aws-sdk-cpp.git --branch 1.11.581 && \
101+
mkdir aws_sdk_build && cd aws_sdk_build && \
102+
cmake ../aws-sdk-cpp/ -DCMAKE_BUILD_TYPE=Release -DBUILD_ONLY="s3" -DENABLE_TESTING=OFF -DCMAKE_INSTALL_PREFIX=/usr/local && \
103+
make -j${NPROC} && \
104+
make -j${NPROC} install
105+
106+
# Install uv for Python virtual environment management
107+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
108+
109+
# Build and install UCX
110+
RUN rm -rf /usr/lib/ucx /opt/hpcx/ucx && \
111+
cd /usr/local/src && \
112+
git clone https://github.com/openucx/ucx.git && \
113+
cd ucx && \
114+
git checkout $UCX_REF && \
115+
./autogen.sh && ./configure \
116+
--prefix=/usr \
117+
--with-rdmacm \
118+
--with-verbs \
119+
--with-mlx5-dv \
120+
--with-dm \
121+
--without-java \
122+
--enable-optimizations \
123+
--disable-logging \
124+
--disable-debug \
125+
--disable-assertions \
126+
--disable-params-check \
127+
--enable-mt && \
128+
make -j${NPROC} && \
129+
make -j${NPROC} install-strip && \
130+
ldconfig
131+
132+
# Install Rust toolchain
133+
ENV RUSTUP_HOME=/usr/local/rustup \
134+
CARGO_HOME=/usr/local/cargo \
135+
PATH=/usr/local/cargo/bin:$PATH \
136+
RUST_VERSION=1.86.0
137+
138+
RUN case "${ARCH}" in \
139+
aarch64) RUSTARCH="aarch64-unknown-linux-gnu" ;; \
140+
x86_64) RUSTARCH="x86_64-unknown-linux-gnu" ;; \
141+
esac && \
142+
wget --tries=3 --waitretry=5 \
143+
"https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" \
144+
"https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init.sha256" && \
145+
sha256sum -c rustup-init.sha256 && \
146+
chmod +x rustup-init && \
147+
./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
148+
rm rustup-init* && \
149+
chmod -R a+w $RUSTUP_HOME $CARGO_HOME
150+
151+
# Final cleanup
152+
RUN rm -rf etcd-cpp-apiv3 aws-sdk-cpp aws_sdk_build /usr/local/src/ucx && \
153+
apt-get clean && rm -rf /var/lib/apt/lists/*
154+
155+
# Set up environment variables
156+
ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
157+
ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH

0 commit comments

Comments
 (0)