-
Notifications
You must be signed in to change notification settings - Fork 324
200 lines (174 loc) · 7.26 KB
/
gpu_pcc.yml
File metadata and controls
200 lines (174 loc) · 7.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# Perform tfhe-cuda-backend post-commit checks on an AWS instance
name: gpu_pcc
env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACKIFY_MARKDOWN: true
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "large_ubuntu_16-22.04"
CUDA_KEYRING_PACKAGE: cuda-keyring_1.1-1_all.deb
CUDA_KEYRING_SHA: "d93190d50b98ad4699ff40f4f7af50f16a76dac3bb8da1eaaf366d47898ff8df"
on:
pull_request:
permissions:
contents: read
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow (via manual approval for PR from forks)
jobs:
setup-instance:
name: gpu_pcc/setup-instance
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@86c9d2378700ca356455e36ebb97b61e958ea47b # v1.6.2
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
profile: gpu-build
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
cuda-pcc:
name: gpu_pcc/cuda-pcc (bpr)
needs: setup-instance
concurrency:
group: ${{ github.workflow_ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 9
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install CUDA and other dependencies
if: env.SECRETS_AVAILABLE == 'false'
shell: bash
run: |
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
TOOLKIT_VERSION="$(echo "${CUDA_VERSION}" | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/"${CUDA_KEYRING_PACKAGE}"
echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
sha256sum -c checksum
sudo dpkg -i "${CUDA_KEYRING_PACKAGE}"
# Disable unattended-upgrades to avoid lock issues
sudo systemctl mask --now unattended-upgrades
sudo systemctl stop --now unattended-upgrades
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo apt purge -y unattended-upgrades
sudo apt update
sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format python3-venv
env:
CUDA_VERSION: ${{ matrix.cuda }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc" >> "${GITHUB_ENV}"
env:
CUDA_VERSION: ${{ matrix.cuda }}
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
} >> "${GITHUB_ENV}"
env:
GCC_VERSION: ${{ matrix.gcc }}
- name: Run semgrep and lint checks on CUDA code
run: |
make semgrep_and_lint_gpu_code
- name: Run fmt checks
run: |
make check_fmt_gpu
- name: Run clippy checks
run: |
make pcc_gpu
- name: Run semver checks on tfhe-cuda-backend
run: |
make semver_check_cuda_backend
- name: Run semver checks on zk-cuda-backend
run: |
make semver_check_zk_cuda_backend
- name: Run semver checks on tfhe-cuda-common
run: |
make semver_check_cuda_common
- name: Check build with hpu enabled
run: |
make clippy_gpu_hpu
- name: Set pull-request URL
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: cuda_pcc/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-pcc ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@86c9d2378700ca356455e36ebb97b61e958ea47b # v1.6.2
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"