Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ansible/linux-slurmcompute.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---

- hosts: all
become: yes
roles:
- jupyter-repo2docker
9 changes: 9 additions & 0 deletions ansible/roles/linux-slurmcompute/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
openhpc_slurm_packages:
- ohpc-base-compute
- ohpc-slurm-client
- munge
openhpc_extra_packages:
- lmod-ohpc
- slurm-libpmi-ohpc # to allow intel mpi to work properly
- ohpc-gnu9-openmpi4-perf-tools # for hpctests
- openblas-gnu9-ohpc # for hpctests (HPL)
32 changes: 32 additions & 0 deletions ansible/roles/linux-slurmcompute/tasks/bootstrap.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
- name: Move rocky user
ansible.builtin.user:
name: rocky
home: /var/lib/rocky
move_home: true
local: true
become_method: "sudo"
# Need to change working directory otherwise we try to switch back to non-existent directory.
become_flags: '-i'

- name: Reset ssh connection to allow user changes to affect ansible_user
meta: reset_connection

- name: Set SELinux state and policy
ansible.posix.selinux:
state: permissive
policy: targeted

- name: Update base image packages
ansible.builtin.dnf:
name: '*'
state: 'latest'
async: "{{ 30 * 60 }}" # wait for up to 30 minutes
poll: 15 # check every 15 seconds

- name: Reboot to cope with package updates and SELinux changes
ansible.builtin.reboot:
post_reboot_delay: 30

- name: Wait for hosts to be reachable
ansible.builtin.wait_for_connection:
sleep: 15
25 changes: 25 additions & 0 deletions ansible/roles/linux-slurmcompute/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
- name: Get builder commit
shell:
cmd: git describe --all --long --dirty
register: builder_commit
delegate_to: localhost

- name: Write builder commit to /var/lib/misc/build.txt
copy:
dest: /var/lib/misc/build.txt
content: "{{ builder_commit.stdout }}"

- include_role:
name: linux-ansible-init

- import_tasks: bootstrap.yml
#- import_tasks: reimage.yml # TODO: if required to support slurm-driven rebuild from compute nodes
- import_tasks: slurm.yml
- import_tasks: ood_vnc.yml
- import_tasks: monitoring.yml

- name: Delete /etc/resolv.conf
# required as if cloud-init (rather than network manager) controls this on next boot it won't be entirely overrwritten
file:
path: /etc/resolv.conf
state: absent
3 changes: 3 additions & 0 deletions ansible/roles/linux-slurmcompute/tasks/monitoring.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- name: Deploy node_exporter
import_role:
name: cloudalchemy.node_exporter
16 changes: 16 additions & 0 deletions ansible/roles/linux-slurmcompute/tasks/ood_jupyter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# See https://osc.github.io/ood-documentation/latest/app-development/tutorials-interactive-apps/add-jupyter/software-requirements.html
# - Will already have openssl and lmod

- name: Install jupyter venv
# Requires separate step so that the upgraded pip is used to install packages
ansible.builtin.pip:
name: pip
state: latest
virtualenv: /opt/jupyter
virtualenv_command: python3 -m venv

- name: Install jupyter package in venv
ansible.builtin.pip:
name: jupyter
virtualenv: /opt/jupyter
virtualenv_command: python3 -m venv
45 changes: 45 additions & 0 deletions ansible/roles/linux-slurmcompute/tasks/ood_vnc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
- name: Enable TurboVNC repo
ansible.builtin.get_url:
url: https://turbovnc.org/pmwiki/uploads/Downloads/TurboVNC.repo
dest: /etc/yum.repos.d/TurboVNC.repo

- name: Install EPEL
ansible.builtin.dnf:
name: epel-release

- name: Install VNC-related packages
ansible.builtin.dnf:
name:
- turbovnc
- nmap-ncat
- python3

- name: Install Xfce desktop
ansible.builtin.dnf:
name: '@Xfce'

- name: Install websockify venv
# Requires separate step so that the upgraded pip is used to install packages
ansible.builtin.pip:
name: pip
virtualenv: /opt/websockify
virtualenv_command: python3 -m venv
tags: install

- name: Install websockify package in venv
ansible.builtin.pip:
name: websockify
virtualenv: /opt/websockify
virtualenv_command: python3 -m venv
tags: install

- name: Symlink websockify to where Open Ondemand expects
ansible.builtin.file:
src: /opt/websockify/bin/websockify
dest: /opt/websockify/run
state: link

- name: Disable screensaver # as users might not have passwords
ansible.builtin.dnf:
name: xfce4-screensaver
state: absent
23 changes: 23 additions & 0 deletions ansible/roles/linux-slurmcompute/tasks/slurm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
- name: Install ohpc-release package
yum:
name: "http://repos.openhpc.community/OpenHPC/2/CentOS_8/x86_64/ohpc-release-2-1.el8.x86_64.rpm"
state: present
disable_gpg_check: True

- name: Enable PowerTools repo
community.general.ini_file:
path: /etc/yum.repos.d/CentOS-PowerTools.repo 8.3
create: false
option: enabled
value: 1

- name: Install packages
ansible.builtin.dnf:
name: "{{ openhpc_slurm_packages + openhpc_extra_packages + }}"

# - munge key
# - JobComp logfile exists?
# - name: Set slurmctld location for configless operation
#- name: Ensure Slurm service state
# make sure munge and slurm wait for cloud-init

82 changes: 82 additions & 0 deletions packer/linux-slurmcompute.pkr.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Use like:
# $ PACKER_LOG=1 packer build --on-error=ask -var-file=<something>.pkrvars.hcl <thisfile>

# "timestamp" template function replacement:s
locals { timestamp = formatdate("YYMMDD-hhmm", timestamp())}

variable "source_image_name" {
type = string
}

variable "network" {
type = string
}

variable "floating_ip_network" {
type = string
}

variable "flavor" {
type = string
}

variable "security_groups" {
type = list(string)
}

variable "volume_size" {
type = number
default = 10
}

variable "disk_format" {
type = string
default = "qcow2"
}

variable "distro_name" {
type = string
}

variable "ssh_username" {
type = string
}

source "openstack" "linux-slurmcompute" {
image_name = "${var.distro_name}-desktop-${local.timestamp}"
image_visibility = "private"
image_disk_format = "${var.disk_format}"

source_image_name = "${var.source_image_name}"
flavor = "${var.flavor}"
networks = ["${var.network}"]
security_groups = "${var.security_groups}"
floating_ip_network = "${var.floating_ip_network}"

use_blockstorage_volume = true
volume_size = "${var.volume_size}"

communicator = "ssh"
ssh_username = "${var.ssh_username}"
ssh_clear_authorized_keys = true
}

build {
source "source.openstack.linux-desktop" { }

provisioner "ansible" {
galaxy_file = "${path.root}/../requirements.yml"
playbook_file = "${path.root}/../ansible/linux-slurmcompute.yml"
use_proxy = false
extra_arguments = [
"-v",
]
ansible_env_vars = ["ANSIBLE_SSH_RETRIES=10"]
}

post-processor "manifest" {
custom_data = {
source = "${source.name}"
}
}
}