Skip to content

Commit aaddc8b

Browse files
committed
WIP slurm compute node image
1 parent ee744b1 commit aaddc8b

File tree

9 files changed

+241
-0
lines changed

9 files changed

+241
-0
lines changed

ansible/linux-slurmcompute.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
3+
- hosts: all
4+
become: yes
5+
roles:
6+
- jupyter-repo2docker
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
openhpc_slurm_packages:
2+
- ohpc-base-compute
3+
- ohpc-slurm-client
4+
- munge
5+
openhpc_extra_packages:
6+
- lmod-ohpc
7+
- slurm-libpmi-ohpc # to allow intel mpi to work properly
8+
- ohpc-gnu9-openmpi4-perf-tools # for hpctests
9+
- openblas-gnu9-ohpc # for hpctests (HPL)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
- name: Move rocky user
2+
ansible.builtin.user:
3+
name: rocky
4+
home: /var/lib/rocky
5+
move_home: true
6+
local: true
7+
become_method: "sudo"
8+
# Need to change working directory otherwise we try to switch back to non-existent directory.
9+
become_flags: '-i'
10+
11+
- name: Reset ssh connection to allow user changes to affect ansible_user
12+
meta: reset_connection
13+
14+
- name: Set SELinux state and policy
15+
ansible.posix.selinux:
16+
state: permissive
17+
policy: targeted
18+
19+
- name: Update base image packages
20+
ansible.builtin.dnf:
21+
name: '*'
22+
state: 'latest'
23+
async: "{{ 30 * 60 }}" # wait for up to 30 minutes
24+
poll: 15 # check every 15 seconds
25+
26+
- name: Reboot to cope with package updates and SELinux changes
27+
ansible.builtin.reboot:
28+
post_reboot_delay: 30
29+
30+
- name: Wait for hosts to be reachable
31+
ansible.builtin.wait_for_connection:
32+
sleep: 15
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
- name: Get builder commit
2+
shell:
3+
cmd: git describe --all --long --dirty
4+
register: builder_commit
5+
delegate_to: localhost
6+
7+
- name: Write builder commit to /var/lib/misc/build.txt
8+
copy:
9+
dest: /var/lib/misc/build.txt
10+
content: "{{ builder_commit.stdout }}"
11+
12+
- include_role:
13+
name: linux-ansible-init
14+
15+
- import_tasks: bootstrap.yml
16+
#- import_tasks: reimage.yml # TODO: if required to support slurm-driven rebuild from compute nodes
17+
- import_tasks: slurm.yml
18+
- import_tasks: ood_vnc.yml
19+
- import_tasks: monitoring.yml
20+
21+
- name: Delete /etc/resolv.conf
22+
# required as if cloud-init (rather than network manager) controls this on next boot it won't be entirely overrwritten
23+
file:
24+
path: /etc/resolv.conf
25+
state: absent
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
- name: Deploy node_exporter
2+
import_role:
3+
name: cloudalchemy.node_exporter
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# See https://osc.github.io/ood-documentation/latest/app-development/tutorials-interactive-apps/add-jupyter/software-requirements.html
2+
# - Will already have openssl and lmod
3+
4+
- name: Install jupyter venv
5+
# Requires separate step so that the upgraded pip is used to install packages
6+
ansible.builtin.pip:
7+
name: pip
8+
state: latest
9+
virtualenv: /opt/jupyter
10+
virtualenv_command: python3 -m venv
11+
12+
- name: Install jupyter package in venv
13+
ansible.builtin.pip:
14+
name: jupyter
15+
virtualenv: /opt/jupyter
16+
virtualenv_command: python3 -m venv
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
- name: Enable TurboVNC repo
2+
ansible.builtin.get_url:
3+
url: https://turbovnc.org/pmwiki/uploads/Downloads/TurboVNC.repo
4+
dest: /etc/yum.repos.d/TurboVNC.repo
5+
6+
- name: Install EPEL
7+
ansible.builtin.dnf:
8+
name: epel-release
9+
10+
- name: Install VNC-related packages
11+
ansible.builtin.dnf:
12+
name:
13+
- turbovnc
14+
- nmap-ncat
15+
- python3
16+
17+
- name: Install Xfce desktop
18+
ansible.builtin.dnf:
19+
name: '@Xfce'
20+
21+
- name: Install websockify venv
22+
# Requires separate step so that the upgraded pip is used to install packages
23+
ansible.builtin.pip:
24+
name: pip
25+
virtualenv: /opt/websockify
26+
virtualenv_command: python3 -m venv
27+
tags: install
28+
29+
- name: Install websockify package in venv
30+
ansible.builtin.pip:
31+
name: websockify
32+
virtualenv: /opt/websockify
33+
virtualenv_command: python3 -m venv
34+
tags: install
35+
36+
- name: Symlink websockify to where Open Ondemand expects
37+
ansible.builtin.file:
38+
src: /opt/websockify/bin/websockify
39+
dest: /opt/websockify/run
40+
state: link
41+
42+
- name: Disable screensaver # as users might not have passwords
43+
ansible.builtin.dnf:
44+
name: xfce4-screensaver
45+
state: absent
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
- name: Install ohpc-release package
2+
yum:
3+
name: "http://repos.openhpc.community/OpenHPC/2/CentOS_8/x86_64/ohpc-release-2-1.el8.x86_64.rpm"
4+
state: present
5+
disable_gpg_check: True
6+
7+
- name: Enable PowerTools repo
8+
community.general.ini_file:
9+
path: /etc/yum.repos.d/CentOS-PowerTools.repo 8.3
10+
create: false
11+
option: enabled
12+
value: 1
13+
14+
- name: Install packages
15+
ansible.builtin.dnf:
16+
name: "{{ openhpc_slurm_packages + openhpc_extra_packages + }}"
17+
18+
# - munge key
19+
# - JobComp logfile exists?
20+
# - name: Set slurmctld location for configless operation
21+
#- name: Ensure Slurm service state
22+
# make sure munge and slurm wait for cloud-init
23+

packer/linux-slurmcompute.pkr.hcl

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Use like:
2+
# $ PACKER_LOG=1 packer build --on-error=ask -var-file=<something>.pkrvars.hcl <thisfile>
3+
4+
# "timestamp" template function replacement:s
5+
locals { timestamp = formatdate("YYMMDD-hhmm", timestamp())}
6+
7+
variable "source_image_name" {
8+
type = string
9+
}
10+
11+
variable "network" {
12+
type = string
13+
}
14+
15+
variable "floating_ip_network" {
16+
type = string
17+
}
18+
19+
variable "flavor" {
20+
type = string
21+
}
22+
23+
variable "security_groups" {
24+
type = list(string)
25+
}
26+
27+
variable "volume_size" {
28+
type = number
29+
default = 10
30+
}
31+
32+
variable "disk_format" {
33+
type = string
34+
default = "qcow2"
35+
}
36+
37+
variable "distro_name" {
38+
type = string
39+
}
40+
41+
variable "ssh_username" {
42+
type = string
43+
}
44+
45+
source "openstack" "linux-slurmcompute" {
46+
image_name = "${var.distro_name}-desktop-${local.timestamp}"
47+
image_visibility = "private"
48+
image_disk_format = "${var.disk_format}"
49+
50+
source_image_name = "${var.source_image_name}"
51+
flavor = "${var.flavor}"
52+
networks = ["${var.network}"]
53+
security_groups = "${var.security_groups}"
54+
floating_ip_network = "${var.floating_ip_network}"
55+
56+
use_blockstorage_volume = true
57+
volume_size = "${var.volume_size}"
58+
59+
communicator = "ssh"
60+
ssh_username = "${var.ssh_username}"
61+
ssh_clear_authorized_keys = true
62+
}
63+
64+
build {
65+
source "source.openstack.linux-desktop" { }
66+
67+
provisioner "ansible" {
68+
galaxy_file = "${path.root}/../requirements.yml"
69+
playbook_file = "${path.root}/../ansible/linux-slurmcompute.yml"
70+
use_proxy = false
71+
extra_arguments = [
72+
"-v",
73+
]
74+
ansible_env_vars = ["ANSIBLE_SSH_RETRIES=10"]
75+
}
76+
77+
post-processor "manifest" {
78+
custom_data = {
79+
source = "${source.name}"
80+
}
81+
}
82+
}

0 commit comments

Comments
 (0)