diff --git a/ansible/roles/k3s/files/start_k3s.yml b/ansible/roles/k3s/files/start_k3s.yml index b9b82f1c4..4ff72bcd8 100644 --- a/ansible/roles/k3s/files/start_k3s.yml +++ b/ansible/roles/k3s/files/start_k3s.yml @@ -5,6 +5,7 @@ k3s_token: "{{ os_metadata.meta.k3s_token }}" k3s_server_name: "{{ os_metadata.meta.control_address }}" service_name: "{{ 'k3s-agent' if k3s_server_name is defined else 'k3s' }}" + access_ip: "{{ os_metadata.meta.access_ip }}" tasks: - name: Ensure password directory exists ansible.builtin.file: @@ -22,6 +23,13 @@ path: "/etc/systemd/system/{{ service_name }}.service.env" line: "K3S_TOKEN={{ k3s_token }}" + - name: Add the node IP to the environment + # NB this isn't natively setable via envvars, have to modify + # INSTALL_K3S_EXEC to support it + ansible.builtin.lineinfile: + path: "/etc/systemd/system/{{ service_name }}.service.env" + line: "K3S_NODE_IP={{ access_ip }}" + - name: Add server url to agents ansible.builtin.lineinfile: path: "/etc/systemd/system/{{ service_name }}.service.env" diff --git a/ansible/roles/k3s/tasks/install.yml b/ansible/roles/k3s/tasks/install.yml index 77b95a509..579a75bc4 100644 --- a/ansible/roles/k3s/tasks/install.yml +++ b/ansible/roles/k3s/tasks/install.yml @@ -47,7 +47,7 @@ cmd: /usr/bin/k3s-install.sh environment: INSTALL_K3S_VERSION: "{{ k3s_version }}" - INSTALL_K3S_EXEC: "{{ item }}" + INSTALL_K3S_EXEC: "{{ item }} --node-ip=${K3S_NODE_IP}" INSTALL_K3S_SKIP_START: "true" INSTALL_K3S_SKIP_ENABLE: "true" INSTALL_K3S_BIN_DIR: "/usr/bin" diff --git a/docs/networks.md b/docs/networks.md new file mode 100644 index 000000000..09e3bc5a6 --- /dev/null +++ b/docs/networks.md @@ -0,0 +1,102 @@ +# Networking + +The default OpenTofu configurations in the appliance do not provision networks, +subnets or associated infrastructure such as routers. The requirements are that: +1. At least one network exists. +2. The first network defined spans all nodes, referred to as the "access network". +3. Only one subnet per network is attached to nodes. +4. At least one network on each node provides outbound internet access (either +directly, or via a proxy). + +Futhermore, it is recommended that the deploy host has an interface on the +access network. While it is possible to e.g. use a floating IP on a login node +as an SSH proxy to access the other nodes, this can create problems in recovering +the cluster if the login node is unavailable and can make Ansible problems harder +to debug. + +This page describes supported configurations and how to implement them using +the OpenTofu variables. These will normally be set in +`environments/site/tofu/terraform.tfvars` for the site base environment. If they +need to be overriden for specific environments, this can be done via an OpenTofu +module as discussed [here](./production.md). + +Note that if an OpenStack subnet has a gateway IP defined then nodes with ports +attached to that subnet will get a default route set via that gateway. + +## Single network +This is the simplest possible configuration. A single network and subnet is +used for all nodes. The subnet provides outbound internet access via the default +route defined by the subnet gateway (often an OpenStack router to an external +network). + +```terraform +cluster_networks = [ + { + network = "netA" + subnet = "subnetA" + } +] +... +``` + +## Multiple homogenous networks +This is similar to the above, except each node has multiple networks. The first +network, "netA" is the access network. Note that only one subnet must have a +gateway defined, else default routes via both subnets will be present causing +routing problems. It also shows the second network (netB) using direct-type +vNICs for RDMA. + +```terraform +cluster_networks = [ + { + network = "netA" + subnet = "subnetA" + }, + { + network = "netB" + subnet = "subnetB" + }, +] + +vnic_types = { + netB = "direct" +} +... +``` + + +## Additional networks on some nodes + +This example shows how to modify variables for specific node groups. In this +case a baremetal node group has a second network attached. As above, only a +single subnet can have a gateway IP. + +```terraform +cluster_networks = [ + { + network = "netA" + subnet = "subnetA" + } +] + +compute = { + general = { + nodes = ["general-0", "general-1"] + } + baremetal = { + nodes = ["baremetal-0", "baremetal-1"] + extra_networks = [ + { + network = "netB" + subnet = "subnetB" + } + ] + vnic_types = { + netA = "baremetal" + netB = "baremetal" + ... + } + } +} +... +``` diff --git a/environments/.stackhpc/tofu/LEAFCLOUD.tfvars b/environments/.stackhpc/tofu/LEAFCLOUD.tfvars index 5e73896c8..135aadc64 100644 --- a/environments/.stackhpc/tofu/LEAFCLOUD.tfvars +++ b/environments/.stackhpc/tofu/LEAFCLOUD.tfvars @@ -1,5 +1,9 @@ -cluster_net = "slurmapp-ci" -cluster_subnet = "slurmapp-ci" +cluster_networks = [ + { + network = "slurmapp-ci" + subnet = "slurmapp-ci" + } +] control_node_flavor = "ec1.medium" # small ran out of memory, medium gets down to ~100Mi mem free on deployment other_node_flavor = "en1.xsmall" state_volume_type = "unencrypted" diff --git a/environments/.stackhpc/tofu/main.tf b/environments/.stackhpc/tofu/main.tf index e1aae6d89..cdf3e2f72 100644 --- a/environments/.stackhpc/tofu/main.tf +++ b/environments/.stackhpc/tofu/main.tf @@ -30,12 +30,10 @@ variable "cluster_image" { type = map(string) } -variable "cluster_net" {} +variable "cluster_networks" {} -variable "cluster_subnet" {} - -variable "vnic_type" { - default = "normal" +variable "vnic_types" { + default = {} } variable "state_volume_type"{ @@ -63,9 +61,8 @@ module "cluster" { source = "../../skeleton/{{cookiecutter.environment}}/tofu/" cluster_name = var.cluster_name - cluster_net = var.cluster_net - cluster_subnet = var.cluster_subnet - vnic_type = var.vnic_type + cluster_networks = var.cluster_networks + vnic_types = var.vnic_types key_pair = "slurm-app-ci" cluster_image_id = data.openstack_images_image_v2.cluster.id control_node_flavor = var.control_node_flavor diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf index 53e941517..ca48d2aac 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/compute.tf @@ -7,25 +7,27 @@ module "compute" { nodes = each.value.nodes flavor = each.value.flavor + # always taken from top-level value: cluster_name = var.cluster_name cluster_domain_suffix = var.cluster_domain_suffix - cluster_net_id = data.openstack_networking_network_v2.cluster_net.id - cluster_subnet_id = data.openstack_networking_subnet_v2.cluster_subnet.id - + key_pair = var.key_pair + environment_root = var.environment_root + # can be set for group, defaults to top-level value: image_id = lookup(each.value, "image_id", var.cluster_image_id) - vnic_type = lookup(each.value, "vnic_type", var.vnic_type) - vnic_profile = lookup(each.value, "vnic_profile", var.vnic_profile) + vnic_types = lookup(each.value, "vnic_types", var.vnic_types) + vnic_profiles = lookup(each.value, "vnic_profiles", var.vnic_profiles) volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances) root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size) + + # optionally set for group + networks = concat(var.cluster_networks, lookup(each.value, "extra_networks", [])) extra_volumes = lookup(each.value, "extra_volumes", {}) - compute_init_enable = lookup(each.value, "compute_init_enable", []) ignore_image_changes = lookup(each.value, "ignore_image_changes", false) - key_pair = var.key_pair - environment_root = var.environment_root + # computed k3s_token = local.k3s_token - control_address = [for n in openstack_compute_instance_v2.control["control"].network: n.fixed_ip_v4 if n.access_network][0] + control_address = openstack_compute_instance_v2.control.access_ip_v4 security_group_ids = [for o in data.openstack_networking_secgroup_v2.nonlogin: o.id] } diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf index b00889fab..0876ce00a 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/control.tf @@ -4,27 +4,27 @@ locals { resource "openstack_networking_port_v2" "control" { - name = "${var.cluster_name}-control" - network_id = data.openstack_networking_network_v2.cluster_net.id + for_each = {for net in var.cluster_networks: net.network => net} + + name = "${var.cluster_name}-control-${each.key}" + network_id = data.openstack_networking_network_v2.cluster_net[each.key].id admin_state_up = "true" fixed_ip { - subnet_id = data.openstack_networking_subnet_v2.cluster_subnet.id + subnet_id = data.openstack_networking_subnet_v2.cluster_subnet[each.key].id } security_group_ids = [for o in data.openstack_networking_secgroup_v2.nonlogin: o.id] binding { - vnic_type = var.vnic_type - profile = var.vnic_profile + vnic_type = lookup(var.vnic_types, each.key, "normal") + profile = lookup(var.vnic_profiles, each.key, "{}") } } resource "openstack_compute_instance_v2" "control" { - for_each = toset(["control"]) - - name = "${var.cluster_name}-${each.key}" + name = "${var.cluster_name}-control" image_id = var.cluster_image_id flavor_name = var.control_node_flavor key_pair = var.key_pair @@ -49,19 +49,23 @@ resource "openstack_compute_instance_v2" "control" { } } - network { - port = openstack_networking_port_v2.control.id - access_network = true + dynamic "network" { + for_each = {for net in var.cluster_networks: net.network => net} + content { + port = openstack_networking_port_v2.control[network.key].id + access_network = network.key == var.cluster_networks[0].network + } } metadata = { environment_root = var.environment_root k3s_token = local.k3s_token + access_ip = openstack_networking_port_v2.control[var.cluster_networks[0].network].all_fixed_ips[0] } user_data = <<-EOF #cloud-config - fqdn: ${var.cluster_name}-${each.key}.${var.cluster_name}.${var.cluster_domain_suffix} + fqdn: ${var.cluster_name}-control.${var.cluster_name}.${var.cluster_domain_suffix} bootcmd: %{for volume in local.control_volumes} diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tf index e2dd2d706..0af7eb30b 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tf @@ -3,7 +3,7 @@ resource "local_file" "hosts" { { "cluster_name": var.cluster_name, "cluster_domain_suffix": var.cluster_domain_suffix, - "control_instances": openstack_compute_instance_v2.control + "control": openstack_compute_instance_v2.control "login_groups": module.login "compute_groups": module.compute "state_dir": var.state_dir diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl index 3a6fe2485..05ec27ef8 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/inventory.tpl @@ -5,15 +5,13 @@ all: control: hosts: -%{ for control in control_instances ~} ${ control.name }: - ansible_host: ${[for n in control.network: n.fixed_ip_v4 if n.access_network][0]} - instance_id: ${ control.id } -%{ endfor ~} + ansible_host: ${control.access_ip_v4} + instance_id: ${control.id} + networks: ${jsonencode({for n in control.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})} vars: appliances_state_dir: ${state_dir} # NB needs to be set on group not host otherwise it is ignored in packer build! - %{ for group_name in keys(login_groups) ~} ${cluster_name}_${group_name}: hosts: @@ -22,6 +20,7 @@ ${cluster_name}_${group_name}: ansible_host: ${node.access_ip_v4} instance_id: ${ node.id } image_id: ${ node.image_id } + networks: ${jsonencode({for n in node.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})} %{ endfor ~} %{ endfor ~} @@ -39,6 +38,7 @@ ${cluster_name}_${group_name}: ansible_host: ${node.access_ip_v4} instance_id: ${ node.id } image_id: ${ node.image_id } + networks: ${jsonencode({for n in node.network: n.name => {"fixed_ip_v4": n.fixed_ip_v4, "fixed_ip_v6": n.fixed_ip_v6}})} %{ endfor ~} %{ endfor ~} diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf index 030b36e0b..8f51b6748 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/login.tf @@ -9,23 +9,25 @@ module "login" { cluster_name = var.cluster_name cluster_domain_suffix = var.cluster_domain_suffix - cluster_net_id = data.openstack_networking_network_v2.cluster_net.id - cluster_subnet_id = data.openstack_networking_subnet_v2.cluster_subnet.id - + # can be set for group, defaults to top-level value: image_id = lookup(each.value, "image_id", var.cluster_image_id) - vnic_type = lookup(each.value, "vnic_type", var.vnic_type) - vnic_profile = lookup(each.value, "vnic_profile", var.vnic_profile) + vnic_types = lookup(each.value, "vnic_types", var.vnic_types) + vnic_profiles = lookup(each.value, "vnic_profiles", var.vnic_profiles) volume_backed_instances = lookup(each.value, "volume_backed_instances", var.volume_backed_instances) root_volume_size = lookup(each.value, "root_volume_size", var.root_volume_size) + + # optionally set for group + networks = concat(var.cluster_networks, lookup(each.value, "extra_networks", [])) extra_volumes = lookup(each.value, "extra_volumes", {}) + # can't be set for login compute_init_enable = [] ignore_image_changes = false key_pair = var.key_pair environment_root = var.environment_root k3s_token = local.k3s_token - control_address = [for n in openstack_compute_instance_v2.control["control"].network: n.fixed_ip_v4 if n.access_network][0] + control_address = openstack_compute_instance_v2.control.access_ip_v4 security_group_ids = [for o in data.openstack_networking_secgroup_v2.login: o.id] } diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/network.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/network.tf index d78e3a114..eb33fb42f 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/network.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/network.tf @@ -1,11 +1,16 @@ data "openstack_networking_network_v2" "cluster_net" { - name = var.cluster_net + + for_each = {for net in var.cluster_networks: net.network => net} + + name = each.value.network } data "openstack_networking_subnet_v2" "cluster_subnet" { - name = var.cluster_subnet + for_each = {for net in var.cluster_networks: net.network => net} + + name = each.value.subnet } data "openstack_networking_secgroup_v2" "login" { diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/network.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/network.tf new file mode 100644 index 000000000..f5763b97b --- /dev/null +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/network.tf @@ -0,0 +1,14 @@ + +data "openstack_networking_network_v2" "network" { + + for_each = {for net in var.networks: net.network => net} + + name = each.value.network +} + +data "openstack_networking_subnet_v2" "subnet" { + + for_each = {for net in var.networks: net.network => net} + + name = each.value.subnet +} diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf index 07b9dfe65..bb15733c7 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/nodes.tf @@ -33,21 +33,23 @@ resource "openstack_compute_volume_attach_v2" "compute" { resource "openstack_networking_port_v2" "compute" { - for_each = toset(var.nodes) + for_each = {for item in setproduct(var.nodes, var.networks): + "${item[0]}-${item[1].network}" => item[1] + } name = "${var.cluster_name}-${each.key}" - network_id = var.cluster_net_id + network_id = data.openstack_networking_network_v2.network[each.value.network].id admin_state_up = "true" fixed_ip { - subnet_id = var.cluster_subnet_id + subnet_id = data.openstack_networking_subnet_v2.subnet[each.value.network].id } security_group_ids = var.security_group_ids binding { - vnic_type = var.vnic_type - profile = var.vnic_profile + vnic_type = lookup(var.vnic_types, each.value.network, "normal") + profile = lookup(var.vnic_profiles, each.value.network, "{}") } } @@ -72,9 +74,12 @@ resource "openstack_compute_instance_v2" "compute_fixed_image" { } } - network { - port = openstack_networking_port_v2.compute[each.key].id - access_network = true + dynamic "network" { + for_each = {for net in var.networks: net.network => net} + content { + port = openstack_networking_port_v2.compute["${each.key}-${network.key}"].id + access_network = network.key == var.networks[0].network + } } metadata = merge( @@ -82,6 +87,7 @@ resource "openstack_compute_instance_v2" "compute_fixed_image" { environment_root = var.environment_root k3s_token = var.k3s_token control_address = var.control_address + access_ip = openstack_networking_port_v2.compute["${each.key}-${var.networks[0].network}"].all_fixed_ips[0] }, {for e in var.compute_init_enable: e => true} ) @@ -120,9 +126,12 @@ resource "openstack_compute_instance_v2" "compute" { } } - network { - port = openstack_networking_port_v2.compute[each.key].id - access_network = true + dynamic "network" { + for_each = {for net in var.networks: net.network => net} + content { + port = openstack_networking_port_v2.compute["${each.key}-${network.key}"].id + access_network = network.key == var.networks[0].network + } } metadata = merge( @@ -130,6 +139,7 @@ resource "openstack_compute_instance_v2" "compute" { environment_root = var.environment_root k3s_token = var.k3s_token control_address = var.control_address + access_ip = openstack_networking_port_v2.compute["${each.key}-${var.networks[0].network}"].all_fixed_ips[0] }, {for e in var.compute_init_enable: e => true} ) diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf index fbb2c73ce..4214753b2 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/node_group/variables.tf @@ -17,14 +17,6 @@ variable "cluster_domain_suffix" { default = "invalid" } -variable "cluster_net_id" { - type = string -} - -variable "cluster_subnet_id" { - type = string -} - variable "key_pair" { type = string description = "Name of an existing keypair in OpenStack" @@ -40,16 +32,14 @@ variable "environment_root" { description = "Path to environment root, automatically set by activate script" } -variable "vnic_type" { - type = string - description = "VNIC type, see https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#vnic_type" - default = "normal" +variable "vnic_types" { + type = map(string) + default = {} } -variable "vnic_profile" { - type = string - description = "VNIC binding profile as json string, see https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#profile." - default = "{}" +variable "vnic_profiles" { + type = map(string) + default = {} } variable "volume_backed_instances" { @@ -99,4 +89,9 @@ variable "ignore_image_changes" { type = bool description = "Whether to ignore changes to the image_id parameter" default = false -} \ No newline at end of file +} + +variable "networks" { + type = list(map(string)) + default = [] +} diff --git a/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf b/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf index 8fda3f76b..0fbf95541 100644 --- a/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf +++ b/environments/skeleton/{{cookiecutter.environment}}/tofu/variables.tf @@ -9,14 +9,13 @@ variable "cluster_domain_suffix" { default = "internal" } -variable "cluster_net" { - type = string - description = "Name of existing cluster network" -} - -variable "cluster_subnet" { - type = string - description = "Name of existing cluster subnet" +variable "cluster_networks" { + type = list(map(string)) + description = <<-EOT + List of mappings defining networks. Mapping key/values: + network: Name of existing network + subnet: Name of existing subnet + EOT } variable "key_pair" { @@ -44,6 +43,7 @@ variable "login" { flavor: String flavor name Optional: image_id: Overrides variable cluster_image_id + extra_networks: List of mappings in same format as cluster_networks vnic_type: Overrides variable vnic_type vnic_profile: Overrides variable vnic_profile volume_backed_instances: Overrides variable volume_backed_instances @@ -75,6 +75,7 @@ variable "compute" { flavor: String flavor name Optional: image_id: Overrides variable cluster_image_id + extra_networks: List of mappings in same format as cluster_networks vnic_type: Overrides variable vnic_type vnic_profile: Overrides variable vnic_profile compute_init_enable: Toggles compute-init rebuild (see compute-init role docs) @@ -124,16 +125,23 @@ variable "home_volume_type" { description = "Type of home volume, if not default type" } -variable "vnic_type" { - type = string - description = "Default VNIC type, see https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#vnic_type" - default = "normal" +variable "vnic_types" { + type = map(string) + description = <<-EOT + Default VNIC types, keyed by network name. See https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#vnic_type + If not given this defaults to the "normal" type. + EOT + default = {} } -variable "vnic_profile" { - type = string - description = "Default VNIC binding profile as json string, see https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#profile." - default = "{}" +variable "vnic_profiles" { + type = map(string) + description = <<-EOT + Default VNIC binding profiles, keyed by network name. Values are json strings. + See https://registry.terraform.io/providers/terraform-provider-openstack/openstack/latest/docs/resources/networking_port_v2#profile. + If not given this defaults to "{}" + EOT + default = {} } variable "login_security_groups" {