Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions create-a-container/bin/create-container.js
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,28 @@ async function main() {
console.log('Environment/entrypoint configuration applied');
}

// Attach NVIDIA hookscript when GPU passthrough is requested
if (container.nvidiaRequested) {
const hookscriptVolid = 'local:snippets/nvidia';
console.log(`NVIDIA requested — attaching hookscript ${hookscriptVolid}...`);

// Check if the hookscript file exists on the node
try {
const snippets = await client.storageContents(node.name, 'local', 'snippets');
const hookExists = snippets.some(item => item.volid === hookscriptVolid);
if (!hookExists) {
console.warn('⚠️ WARNING: nvidia-container-toolkit hookscript not found at local:snippets/nvidia.');
console.warn(' NVIDIA GPU passthrough may not function. See admin docs for setup instructions.');
}
} catch (snippetErr) {
console.warn('⚠️ WARNING: Could not verify nvidia hookscript availability:', snippetErr.message);
console.warn(' NVIDIA GPU passthrough may not function. See admin docs for setup instructions.');
}

await client.updateLxcConfig(node.name, vmid, { hookscript: hookscriptVolid });
console.log('NVIDIA hookscript attached');
}

// Setup ACL for container owner
await setupContainerAcl(client, node.name, vmid, container.username);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
'use strict';

/** @type {import('sequelize-cli').Migration} */
module.exports = {
async up(queryInterface, Sequelize) {
await queryInterface.addColumn('Nodes', 'nvidiaAvailable', {
type: Sequelize.BOOLEAN,
allowNull: false,
defaultValue: false
});
},

async down(queryInterface) {
await queryInterface.removeColumn('Nodes', 'nvidiaAvailable');
}
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
'use strict';

/** @type {import('sequelize-cli').Migration} */
module.exports = {
async up(queryInterface, Sequelize) {
await queryInterface.addColumn('Containers', 'nvidiaRequested', {
type: Sequelize.BOOLEAN,
allowNull: false,
defaultValue: false
});
},

async down(queryInterface) {
await queryInterface.removeColumn('Containers', 'nvidiaRequested');
}
};
5 changes: 5 additions & 0 deletions create-a-container/models/container.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ module.exports = (sequelize, DataTypes) => {
allowNull: false,
defaultValue: 'N'
},
nvidiaRequested: {
type: DataTypes.BOOLEAN,
allowNull: false,
defaultValue: false
},
environmentVars: {
type: DataTypes.TEXT,
allowNull: true,
Expand Down
5 changes: 5 additions & 0 deletions create-a-container/models/node.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ module.exports = (sequelize, DataTypes) => {
type: DataTypes.STRING(255),
allowNull: false,
defaultValue: 'vmbr0'
},
nvidiaAvailable: {
type: DataTypes.BOOLEAN,
allowNull: false,
defaultValue: false
}
}, {
sequelize,
Expand Down
13 changes: 13 additions & 0 deletions create-a-container/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,9 @@ paths:
type: array
items:
$ref: '#/components/schemas/ExternalDomain'
nvidiaAvailable:
type: boolean
description: Whether any node in the site has NVIDIA GPU support
'401':
$ref: '#/components/responses/Unauthorized'
'404':
Expand Down Expand Up @@ -668,6 +671,9 @@ components:
createdAt:
type: string
format: date-time
nvidiaRequested:
type: boolean
description: Whether NVIDIA GPU passthrough was requested

ContainerCreateRequest:
type: object
Expand Down Expand Up @@ -714,6 +720,13 @@ components:
type: string
nullable: true
description: Override the container entrypoint command
nvidiaRequested:
type: boolean
default: false
description: |
Request NVIDIA GPU passthrough. Requires at least one NVIDIA-capable node in the site.
When true, `NVIDIA_VISIBLE_DEVICES` and `NVIDIA_DRIVER_CAPABILITIES` environment
variables are set automatically (unless explicitly provided in `environmentVars`).

ServiceDefinition:
type: object
Expand Down
52 changes: 42 additions & 10 deletions create-a-container/routers/containers.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,17 +114,24 @@ router.get('/new', requireAuth, async (req, res) => {
// Get all external domains: default domains for this site first (by id), then others (by id)
const externalDomains = await site.getSortedExternalDomains();

// Check if any node in this site has NVIDIA available
const nvidiaAvailable = await Node.count({
where: { siteId, nvidiaAvailable: true }
}) > 0;

if (isApi) {
return res.json({
site_id: site.id,
domains: externalDomains
domains: externalDomains,
nvidiaAvailable
});
}
// ----------------------------

return res.render('containers/form', {
site,
externalDomains,
nvidiaAvailable,
container: undefined,
req
});
Expand Down Expand Up @@ -271,9 +278,14 @@ router.post('/', async (req, res) => {
try {
let { hostname, template, customTemplate, services, environmentVars, entrypoint,
// Extract specific API fields
template_name, repository, branch
template_name, repository, branch,
// NVIDIA GPU passthrough
nvidiaRequested
} = req.body;

// Normalize NVIDIA requested flag
const wantsNvidia = !!nvidiaRequested;

// --- API Payload Mapping ---
if (isApi) {
if (template_name && !template) {
Expand Down Expand Up @@ -309,6 +321,18 @@ router.post('/', async (req, res) => {
envVarsJson = JSON.stringify(envObj);
}
}

// Inject NVIDIA environment variables when GPU passthrough is requested
if (wantsNvidia) {
const envObj = envVarsJson ? JSON.parse(envVarsJson) : {};
if (!envObj['NVIDIA_VISIBLE_DEVICES']) {
envObj['NVIDIA_VISIBLE_DEVICES'] = 'all';
}
if (!envObj['NVIDIA_DRIVER_CAPABILITIES']) {
envObj['NVIDIA_DRIVER_CAPABILITIES'] = 'utility compute';
}
envVarsJson = JSON.stringify(envObj);
}

// Resolve Docker image ref from either the dropdown or the custom input
const imageRef = (template === 'custom') ? customTemplate?.trim() : template;
Expand All @@ -317,14 +341,21 @@ router.post('/', async (req, res) => {
}
const templateName = normalizeDockerRef(imageRef);

const node = await Node.findOne({
where: {
siteId,
apiUrl: { [Sequelize.Op.ne]: null },
tokenId: { [Sequelize.Op.ne]: null },
secret: { [Sequelize.Op.ne]: null }
}
});
// Build node selection criteria
const nodeWhere = {
siteId,
apiUrl: { [Sequelize.Op.ne]: null },
tokenId: { [Sequelize.Op.ne]: null },
secret: { [Sequelize.Op.ne]: null }
};
if (wantsNvidia) {
nodeWhere.nvidiaAvailable = true;
}

const node = await Node.findOne({ where: nodeWhere });
if (!node && wantsNvidia) {
throw new Error('NVIDIA requested but no NVIDIA-capable nodes are available in this site');
}
if (!node) {
throw new Error('No nodes with API access available in this site');
}
Expand All @@ -340,6 +371,7 @@ router.post('/', async (req, res) => {
containerId: null,
macAddress: null,
ipv4Address: null,
nvidiaRequested: wantsNvidia,
environmentVars: envVarsJson,
entrypoint: entrypoint && entrypoint.trim() ? entrypoint.trim() : null
}, { transaction: t });
Expand Down
8 changes: 5 additions & 3 deletions create-a-container/routers/nodes.js
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ router.post('/', async (req, res) => {
return res.redirect('/sites');
}

const { name, ipv4Address, apiUrl, tokenId, secret, tlsVerify, imageStorage, volumeStorage, networkBridge } = req.body;
const { name, ipv4Address, apiUrl, tokenId, secret, tlsVerify, imageStorage, volumeStorage, networkBridge, nvidiaAvailable } = req.body;

await Node.create({
name,
Expand All @@ -129,6 +129,7 @@ router.post('/', async (req, res) => {
imageStorage: imageStorage || 'local',
volumeStorage: volumeStorage || 'local-lvm',
networkBridge: networkBridge || 'vmbr0',
nvidiaAvailable: nvidiaAvailable === 'true',
siteId
});

Expand Down Expand Up @@ -267,7 +268,7 @@ router.put('/:id', async (req, res) => {
return res.redirect(`/sites/${siteId}/nodes`);
}

const { name, ipv4Address, apiUrl, tokenId, secret, tlsVerify, imageStorage, volumeStorage, networkBridge } = req.body;
const { name, ipv4Address, apiUrl, tokenId, secret, tlsVerify, imageStorage, volumeStorage, networkBridge, nvidiaAvailable } = req.body;

const updateData = {
name,
Expand All @@ -277,7 +278,8 @@ router.put('/:id', async (req, res) => {
tlsVerify: tlsVerify === '' || tlsVerify === null ? null : tlsVerify === 'true',
imageStorage: imageStorage || 'local',
volumeStorage: volumeStorage || 'local-lvm',
networkBridge: networkBridge || 'vmbr0'
networkBridge: networkBridge || 'vmbr0',
nvidiaAvailable: nvidiaAvailable === 'true'
};

// Only update secret if a new value was provided
Expand Down
25 changes: 25 additions & 0 deletions create-a-container/views/containers/form.ejs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,31 @@ const breadcrumbLabel = isEdit ? 'Edit' : 'New';
</div>
</div>

<% if (!isEdit) { %>
<%
const nvidiaEnabled = typeof nvidiaAvailable !== 'undefined' && nvidiaAvailable;
const nvidiaTooltip = nvidiaEnabled
? 'Enable NVIDIA GPU passthrough for this container'
: 'No NVIDIA-capable nodes are available in this site';
%>
<div style="margin-bottom: 15px;">
<div class="form-check" title="<%= nvidiaTooltip %>">
<input
class="form-check-input"
type="checkbox"
id="nvidiaRequested"
name="nvidiaRequested"
value="true"
<%= nvidiaEnabled ? '' : 'disabled' %>
aria-label="<%= nvidiaTooltip %>"
>
<label class="form-check-label" for="nvidiaRequested" style="<%= nvidiaEnabled ? '' : 'color: #999;' %>">
NVIDIA GPU
</label>
</div>
</div>
<% } %>

<details style="margin-bottom: 15px;">
<summary style="cursor: pointer; font-weight: bold; padding: 8px 0; border-bottom: 1px solid #ddd; margin-bottom: 10px; text-align: left;">
Services
Expand Down
15 changes: 15 additions & 0 deletions create-a-container/views/nodes/form.ejs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,21 @@
<div class="form-text">Proxmox network bridge used when creating containers</div>
</div>

<div class="mb-3 form-check">
<input
type="checkbox"
class="form-check-input"
id="nvidiaAvailable"
name="nvidiaAvailable"
value="true"
<%= (node && node.nvidiaAvailable) ? 'checked' : '' %>
aria-label="NVIDIA GPU available on this node"
>
<label class="form-check-label" for="nvidiaAvailable">NVIDIA Available</label>
<div class="form-text">Enable if this node has NVIDIA drivers and nvidia-container-toolkit installed</div>
</div>
</div>

<div class="d-flex justify-content-between">
<a href="/sites/<%= site.id %>/nodes" class="btn btn-secondary" aria-label="Cancel and return to nodes list">Cancel</a>
<button type="submit" class="btn btn-primary">
Expand Down
75 changes: 75 additions & 0 deletions mie-opensource-landing/docs/admins/nvidia-container-toolkit.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
---
sidebar_position: 7
---

# NVIDIA Container Toolkit

Configure Proxmox VE nodes to pass NVIDIA GPUs through to LXC containers.

## 1. Install the NVIDIA Driver

Install from Debian's official repositories. On Proxmox VE (Debian-based), `non-free-firmware` must be enabled.

```bash
# Ensure non-free sources are available
apt update
apt install -y nvidia-driver
```

If the packaged version is too old for your GPU, install from Debian backports:

```bash
apt install -y -t bookworm-backports nvidia-driver
```

Reboot after installation and verify with `nvidia-smi`.

## 2. Install the NVIDIA Container Toolkit

Follow the official NVIDIA installation guide for your distribution:

👉 [NVIDIA Container Toolkit — Install Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)

The short version for Debian/Ubuntu:

```bash
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
| gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg

curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
| tee /etc/apt/sources.list.d/nvidia-container-toolkit.list

apt update
apt install -y nvidia-container-toolkit
```

## 3. Create the Hookscript Symlink

The application passes GPU access to containers via an LXC hookscript. Proxmox stores snippets in `/var/lib/vz/snippets/`, but the hook file is installed to `/usr/share/lxc/hooks/nvidia` by the `lxc-common` package (installed automatically as a dependency). Create a symlink so Proxmox can find it:

```bash
mkdir -p /var/lib/vz/snippets
ln -sf /usr/share/lxc/hooks/nvidia /var/lib/vz/snippets/nvidia
```

Verify the symlink:

```bash
ls -la /var/lib/vz/snippets/nvidia
# Should show: /var/lib/vz/snippets/nvidia -> /usr/share/lxc/hooks/nvidia
```

:::important
This symlink must exist on every Proxmox node where NVIDIA containers will be created. Without it, containers will be created but GPU passthrough will not function. The application logs a warning during container creation if the hookscript is missing.
:::

## 4. Mark the Node as NVIDIA-Capable

After completing the host-level setup, mark the node as NVIDIA-capable in the application:

1. Navigate to **Nodes** → select the node
2. Enable the **NVIDIA Available** flag
3. Save

Users will then be able to request NVIDIA GPU access when creating containers on this site.
Loading
Loading