Skip to content

Commit bdb5abf

Browse files
committed
feat: NVIDIA GPU integration
1 parent 6b96421 commit bdb5abf

File tree

13 files changed

+252
-15
lines changed

13 files changed

+252
-15
lines changed

create-a-container/bin/create-container.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,28 @@ async function main() {
355355
console.log('Environment/entrypoint configuration applied');
356356
}
357357

358+
// Attach NVIDIA hookscript when GPU passthrough is requested
359+
if (container.nvidiaRequested) {
360+
const hookscriptVolid = 'local:snippets/nvidia';
361+
console.log(`NVIDIA requested — attaching hookscript ${hookscriptVolid}...`);
362+
363+
// Check if the hookscript file exists on the node
364+
try {
365+
const snippets = await client.storageContents(node.name, 'local', 'snippets');
366+
const hookExists = snippets.some(item => item.volid === hookscriptVolid);
367+
if (!hookExists) {
368+
console.warn('⚠️ WARNING: nvidia-container-toolkit hookscript not found at local:snippets/nvidia.');
369+
console.warn(' NVIDIA GPU passthrough may not function. See admin docs for setup instructions.');
370+
}
371+
} catch (snippetErr) {
372+
console.warn('⚠️ WARNING: Could not verify nvidia hookscript availability:', snippetErr.message);
373+
console.warn(' NVIDIA GPU passthrough may not function. See admin docs for setup instructions.');
374+
}
375+
376+
await client.updateLxcConfig(node.name, vmid, { hookscript: hookscriptVolid });
377+
console.log('NVIDIA hookscript attached');
378+
}
379+
358380
// Setup ACL for container owner
359381
await setupContainerAcl(client, node.name, vmid, container.username);
360382

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
'use strict';
2+
3+
/** @type {import('sequelize-cli').Migration} */
4+
module.exports = {
5+
async up(queryInterface, Sequelize) {
6+
await queryInterface.addColumn('Nodes', 'nvidiaAvailable', {
7+
type: Sequelize.BOOLEAN,
8+
allowNull: false,
9+
defaultValue: false
10+
});
11+
},
12+
13+
async down(queryInterface) {
14+
await queryInterface.removeColumn('Nodes', 'nvidiaAvailable');
15+
}
16+
};
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
'use strict';
2+
3+
/** @type {import('sequelize-cli').Migration} */
4+
module.exports = {
5+
async up(queryInterface, Sequelize) {
6+
await queryInterface.addColumn('Containers', 'nvidiaRequested', {
7+
type: Sequelize.BOOLEAN,
8+
allowNull: false,
9+
defaultValue: false
10+
});
11+
},
12+
13+
async down(queryInterface) {
14+
await queryInterface.removeColumn('Containers', 'nvidiaRequested');
15+
}
16+
};

create-a-container/models/container.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,11 @@ module.exports = (sequelize, DataTypes) => {
126126
allowNull: false,
127127
defaultValue: 'N'
128128
},
129+
nvidiaRequested: {
130+
type: DataTypes.BOOLEAN,
131+
allowNull: false,
132+
defaultValue: false
133+
},
129134
environmentVars: {
130135
type: DataTypes.TEXT,
131136
allowNull: true,

create-a-container/models/node.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ module.exports = (sequelize, DataTypes) => {
9090
type: DataTypes.STRING(255),
9191
allowNull: false,
9292
defaultValue: 'local-lvm'
93+
},
94+
nvidiaAvailable: {
95+
type: DataTypes.BOOLEAN,
96+
allowNull: false,
97+
defaultValue: false
9398
}
9499
}, {
95100
sequelize,

create-a-container/openapi.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,9 @@ paths:
307307
type: array
308308
items:
309309
$ref: '#/components/schemas/ExternalDomain'
310+
nvidiaAvailable:
311+
type: boolean
312+
description: Whether any node in the site has NVIDIA GPU support
310313
'401':
311314
$ref: '#/components/responses/Unauthorized'
312315
'404':
@@ -668,6 +671,9 @@ components:
668671
createdAt:
669672
type: string
670673
format: date-time
674+
nvidiaRequested:
675+
type: boolean
676+
description: Whether NVIDIA GPU passthrough was requested
671677

672678
ContainerCreateRequest:
673679
type: object
@@ -714,6 +720,13 @@ components:
714720
type: string
715721
nullable: true
716722
description: Override the container entrypoint command
723+
nvidiaRequested:
724+
type: boolean
725+
default: false
726+
description: |
727+
Request NVIDIA GPU passthrough. Requires at least one NVIDIA-capable node in the site.
728+
When true, `NVIDIA_VISIBLE_DEVICES` and `NVIDIA_DRIVER_CAPABILITIES` environment
729+
variables are set automatically (unless explicitly provided in `environmentVars`).
717730
718731
ServiceDefinition:
719732
type: object

create-a-container/routers/containers.js

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,24 @@ router.get('/new', requireAuth, async (req, res) => {
116116
order: [['name', 'ASC']]
117117
});
118118

119+
// Check if any node in this site has NVIDIA available
120+
const nvidiaAvailable = await Node.count({
121+
where: { siteId, nvidiaAvailable: true }
122+
}) > 0;
123+
119124
if (isApi) {
120125
return res.json({
121126
site_id: site.id,
122-
domains: externalDomains
127+
domains: externalDomains,
128+
nvidiaAvailable
123129
});
124130
}
125131
// ----------------------------
126132

127133
return res.render('containers/form', {
128134
site,
129135
externalDomains,
136+
nvidiaAvailable,
130137
container: undefined,
131138
req
132139
});
@@ -272,9 +279,14 @@ router.post('/', async (req, res) => {
272279
try {
273280
let { hostname, template, customTemplate, services, environmentVars, entrypoint,
274281
// Extract specific API fields
275-
template_name, repository, branch
282+
template_name, repository, branch,
283+
// NVIDIA GPU passthrough
284+
nvidiaRequested
276285
} = req.body;
277286

287+
// Normalize NVIDIA requested flag
288+
const wantsNvidia = !!nvidiaRequested;
289+
278290
// --- API Payload Mapping ---
279291
if (isApi) {
280292
if (template_name && !template) {
@@ -310,6 +322,18 @@ router.post('/', async (req, res) => {
310322
envVarsJson = JSON.stringify(envObj);
311323
}
312324
}
325+
326+
// Inject NVIDIA environment variables when GPU passthrough is requested
327+
if (wantsNvidia) {
328+
const envObj = envVarsJson ? JSON.parse(envVarsJson) : {};
329+
if (!envObj['NVIDIA_VISIBLE_DEVICES']) {
330+
envObj['NVIDIA_VISIBLE_DEVICES'] = 'all';
331+
}
332+
if (!envObj['NVIDIA_DRIVER_CAPABILITIES']) {
333+
envObj['NVIDIA_DRIVER_CAPABILITIES'] = 'utility compute';
334+
}
335+
envVarsJson = JSON.stringify(envObj);
336+
}
313337

314338
// Resolve Docker image ref from either the dropdown or the custom input
315339
const imageRef = (template === 'custom') ? customTemplate?.trim() : template;
@@ -318,14 +342,21 @@ router.post('/', async (req, res) => {
318342
}
319343
const templateName = normalizeDockerRef(imageRef);
320344

321-
const node = await Node.findOne({
322-
where: {
323-
siteId,
324-
apiUrl: { [Sequelize.Op.ne]: null },
325-
tokenId: { [Sequelize.Op.ne]: null },
326-
secret: { [Sequelize.Op.ne]: null }
327-
}
328-
});
345+
// Build node selection criteria
346+
const nodeWhere = {
347+
siteId,
348+
apiUrl: { [Sequelize.Op.ne]: null },
349+
tokenId: { [Sequelize.Op.ne]: null },
350+
secret: { [Sequelize.Op.ne]: null }
351+
};
352+
if (wantsNvidia) {
353+
nodeWhere.nvidiaAvailable = true;
354+
}
355+
356+
const node = await Node.findOne({ where: nodeWhere });
357+
if (!node && wantsNvidia) {
358+
throw new Error('NVIDIA requested but no NVIDIA-capable nodes are available in this site');
359+
}
329360
if (!node) {
330361
throw new Error('No nodes with API access available in this site');
331362
}
@@ -341,6 +372,7 @@ router.post('/', async (req, res) => {
341372
containerId: null,
342373
macAddress: null,
343374
ipv4Address: null,
375+
nvidiaRequested: wantsNvidia,
344376
environmentVars: envVarsJson,
345377
entrypoint: entrypoint && entrypoint.trim() ? entrypoint.trim() : null
346378
}, { transaction: t });

create-a-container/routers/nodes.js

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ router.post('/', async (req, res) => {
117117
return res.redirect('/sites');
118118
}
119119

120-
const { name, ipv4Address, apiUrl, tokenId, secret, tlsVerify, imageStorage, volumeStorage } = req.body;
120+
const { name, ipv4Address, apiUrl, tokenId, secret, tlsVerify, imageStorage, volumeStorage, nvidiaAvailable } = req.body;
121121

122122
await Node.create({
123123
name,
@@ -128,6 +128,7 @@ router.post('/', async (req, res) => {
128128
tlsVerify: tlsVerify === '' || tlsVerify === null ? null : tlsVerify === 'true',
129129
imageStorage: imageStorage || 'local',
130130
volumeStorage: volumeStorage || 'local-lvm',
131+
nvidiaAvailable: nvidiaAvailable === 'true',
131132
siteId
132133
});
133134

@@ -265,7 +266,7 @@ router.put('/:id', async (req, res) => {
265266
return res.redirect(`/sites/${siteId}/nodes`);
266267
}
267268

268-
const { name, ipv4Address, apiUrl, tokenId, secret, tlsVerify, imageStorage, volumeStorage } = req.body;
269+
const { name, ipv4Address, apiUrl, tokenId, secret, tlsVerify, imageStorage, volumeStorage, nvidiaAvailable } = req.body;
269270

270271
const updateData = {
271272
name,
@@ -274,7 +275,8 @@ router.put('/:id', async (req, res) => {
274275
tokenId: tokenId || null,
275276
tlsVerify: tlsVerify === '' || tlsVerify === null ? null : tlsVerify === 'true',
276277
imageStorage: imageStorage || 'local',
277-
volumeStorage: volumeStorage || 'local-lvm'
278+
volumeStorage: volumeStorage || 'local-lvm',
279+
nvidiaAvailable: nvidiaAvailable === 'true'
278280
};
279281

280282
// Only update secret if a new value was provided

create-a-container/views/containers/form.ejs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,31 @@ const breadcrumbLabel = isEdit ? 'Edit' : 'New';
5757
</div>
5858
</div>
5959

60+
<% if (!isEdit) { %>
61+
<%
62+
const nvidiaEnabled = typeof nvidiaAvailable !== 'undefined' && nvidiaAvailable;
63+
const nvidiaTooltip = nvidiaEnabled
64+
? 'Enable NVIDIA GPU passthrough for this container'
65+
: 'No NVIDIA-capable nodes are available in this site';
66+
%>
67+
<div style="margin-bottom: 15px;">
68+
<div class="form-check" title="<%= nvidiaTooltip %>">
69+
<input
70+
class="form-check-input"
71+
type="checkbox"
72+
id="nvidiaRequested"
73+
name="nvidiaRequested"
74+
value="true"
75+
<%= nvidiaEnabled ? '' : 'disabled' %>
76+
aria-label="<%= nvidiaTooltip %>"
77+
>
78+
<label class="form-check-label" for="nvidiaRequested" style="<%= nvidiaEnabled ? '' : 'color: #999;' %>">
79+
NVIDIA GPU
80+
</label>
81+
</div>
82+
</div>
83+
<% } %>
84+
6085
<details style="margin-bottom: 15px;">
6186
<summary style="cursor: pointer; font-weight: bold; padding: 8px 0; border-bottom: 1px solid #ddd; margin-bottom: 10px; text-align: left;">
6287
Services

create-a-container/views/nodes/form.ejs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,20 @@
130130
<div class="form-text">Storage for container root filesystems</div>
131131
</div>
132132

133+
<div class="mb-3 form-check">
134+
<input
135+
type="checkbox"
136+
class="form-check-input"
137+
id="nvidiaAvailable"
138+
name="nvidiaAvailable"
139+
value="true"
140+
<%= (node && node.nvidiaAvailable) ? 'checked' : '' %>
141+
aria-label="NVIDIA GPU available on this node"
142+
>
143+
<label class="form-check-label" for="nvidiaAvailable">NVIDIA Available</label>
144+
<div class="form-text">Enable if this node has NVIDIA drivers and nvidia-container-toolkit installed</div>
145+
</div>
146+
133147
<div class="d-flex justify-content-between">
134148
<a href="/sites/<%= site.id %>/nodes" class="btn btn-secondary" aria-label="Cancel and return to nodes list">Cancel</a>
135149
<button type="submit" class="btn btn-primary">

0 commit comments

Comments
 (0)