-
Notifications
You must be signed in to change notification settings - Fork 324
Expand file tree
/
Copy pathgpu_unsigned_integer_h100_tests.yml
More file actions
187 lines (172 loc) · 7.56 KB
/
gpu_unsigned_integer_h100_tests.yml
File metadata and controls
187 lines (172 loc) · 7.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# Test unsigned integers on an H100 VM on hyperstack
name: gpu_unsigned_integer_h100_tests/
env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACKIFY_MARKDOWN: true
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [ labeled, opened, synchronize ]
permissions:
contents: read
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
jobs:
should-run:
name: gpu_unsigned_integer_h100_tests/should-run
runs-on: ubuntu-latest
permissions:
pull-requests: read # Needed to check for file change
outputs:
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
core_crypto_changed: ${{ steps.changed-files.outputs.core_crypto_any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
fetch-depth: 0
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
with:
files_yaml: |
gpu:
- tfhe/Cargo.toml
- tfhe/build.rs
- backends/tfhe-cuda-backend/**
- tfhe/src/core_crypto/gpu/**
- tfhe/src/integer/server_key/radix_parallel/tests_unsigned/**
- tfhe/src/integer/server_key/radix_parallel/tests_signed/**
- tfhe/src/integer/server_key/radix_parallel/tests_cases_unsigned.rs
- tfhe/src/integer/gpu/**
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- '.github/workflows/gpu_unsigned_integer_h100_tests.yml'
- scripts/integer-tests.sh
core_crypto:
- tfhe/src/core_crypto/gpu/**
setup-instance:
name: gpu_unsigned_integer_h100_tests/setup-instance
needs: should-run
if: github.event_name == 'workflow_dispatch' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true') ||
(github.event.action != 'labeled' && needs.should-run.outputs.core_crypto_changed == 'true')
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: single-h100
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
cuda-tests-linux:
name: gpu_unsigned_integer_h100_tests/cuda-tests-linux
needs: [ should-run, setup-instance ]
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow_ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run unsigned integer multi-bit tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
slack-notify:
name: gpu_unsigned_integer_h100_tests/slack-notify
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
continue-on-error: true
steps:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: gpu_unsigned_integer_h100_tests/teardown-instance
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@5aee5d157f4a0201e5eaefc9cc648e5f9f5472a5 # v1.6.0
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"