diff --git a/cc-ansible b/cc-ansible index 344a10fc..4322dd54 100755 --- a/cc-ansible +++ b/cc-ansible @@ -36,7 +36,7 @@ while [[ $# -gt 0 ]]; do # Add proper flag support for --check as an option for dry-runs export EXTRA_OPTS="${EXTRA_OPTS:-} --check" ;; - decrypt_passwords|edit_passwords|help|install_deps|init|view_passwords|encrypt_file|decrypt_file) + decrypt_passwords|edit_passwords|update_passwords|help|install_deps|init|view_passwords|encrypt_file|decrypt_file) # Special subcommand! command="$key" ;; @@ -147,6 +147,52 @@ decrypt_file() { ansible-vault decrypt \ --vault-password-file "$CC_ANSIBLE_VAULT_PASSWORD" \ ${POSARGS[@]} + +update_passwords() { + local passwords_file="$CC_ANSIBLE_SITE/passwords.yml" + + tmpfile="$(mktemp)" + merged="$(mktemp)" + + _update_passwords_cleanup() { + rm -f "$tmpfile" + rm -f "$merged" + } + TRAPS+=(_update_passwords_cleanup) + + echo "Decrypting passwords..." + ansible-vault view \ + --vault-password-file "$CC_ANSIBLE_VAULT_PASSWORD" \ + "$CC_ANSIBLE_SITE/passwords.yml" >"$tmpfile" + + if [[ ! -s "$tmpfile" ]]; then + echo "Failed to decrypt $passwords_file with vault token." + exit 1 + fi + + # check if any passwords need adding + echo "seeing if any new passwords are needed" + kolla-mergepwd \ + --old "${tmpfile}" \ + --new "${DIR}/site-config.example/passwords.yml" \ + --final "${merged}" + + diff_output=$(diff --new-line-format='%L' --old-line-format='' --unchanged-line-format='' $tmpfile $merged || true) + if [[ -n "$diff_output" ]]; then + cat < diff --git a/roles/k3s/files/nvidia-device-plugin.yaml b/roles/k3s/files/nvidia-device-plugin.yaml deleted file mode 100644 index 95f506ee..00000000 --- a/roles/k3s/files/nvidia-device-plugin.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: nvidia-device-plugin-daemonset - namespace: kube-system -spec: - selector: - matchLabels: - name: nvidia-device-plugin-ds - updateStrategy: - type: RollingUpdate - template: - metadata: - # This annotation is deprecated. Kept here for backward compatibility - # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ - annotations: - scheduler.alpha.kubernetes.io/critical-pod: "" - labels: - name: nvidia-device-plugin-ds - spec: - nodeSelector: - "nvidia-device-plugin/enabled": "true" - tolerations: - # This toleration is deprecated. Kept here for backward compatibility - # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ - - key: CriticalAddonsOnly - operator: Exists - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - # Mark this pod as a critical add-on; when enabled, the critical add-on - # scheduler reserves resources for critical add-on pods so that they can - # be rescheduled after a failure. - # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ - priorityClassName: "system-node-critical" - containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.14.1 - name: nvidia-device-plugin-ctr - env: - - name: FAIL_ON_INIT_ERROR - value: "false" - - name: NVIDIA_VISIBLE_DEVICES - value: all - - name: NVIDIA_DRIVER_CAPABILITIES - value: all - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: ["ALL"] - volumeMounts: - - name: device-plugin - mountPath: /var/lib/kubelet/device-plugins - volumes: - - name: device-plugin - hostPath: - path: /var/lib/kubelet/device-plugins diff --git a/roles/k3s/tasks/config-calico.yml b/roles/k3s/tasks/config-calico.yml deleted file mode 100644 index 7a1605d0..00000000 --- a/roles/k3s/tasks/config-calico.yml +++ /dev/null @@ -1,54 +0,0 @@ ---- -- name: ensure calico config directory exists - become: true - ansible.builtin.file: - name: "{{ k3s_conf_location }}/calico" - state: directory - -- name: Download Calico release - become: true - ansible.builtin.get_url: - url: "https://github.com/projectcalico/calico/releases/download/{{ k3s_calico_version }}/release-{{ k3s_calico_version }}.tgz" - dest: "{{ k3s_conf_location }}/calico/release-{{ k3s_calico_version }}.tgz" - owner: root - group: root - mode: 0755 - -- name: Unpack Calico release - become: true - ansible.builtin.unarchive: - remote_src: yes - src: "{{ k3s_conf_location }}/calico/release-{{ k3s_calico_version }}.tgz" - dest: "{{ k3s_conf_location }}/calico/" - creates: "{{ k3s_conf_location }}/calico/release-{{ k3s_calico_version }}" - register: calico_release - -# note: server-side-apply used due to the large size -- name: Apply Calico operator - delegate_to: "{{ groups['deployment'][0] }}" - kubernetes.core.k8s: - state: present - src: "{{ k3s_conf_location }}/calico/release-{{ k3s_calico_version }}/manifests/tigera-operator.yaml" - apply: yes - server_side_apply: - field_manager: ansible - force_conflicts: yes - when: - - not (k3s_dry_run | bool) - -- name: Install calicoctl - become: true - ansible.builtin.copy: - # TODO: use facts to optionally install ARM or otherwise - src: "{{ k3s_conf_location }}/calico/release-{{ k3s_calico_version }}/bin/calicoctl/calicoctl-linux-amd64" - remote_src: yes - dest: /usr/local/bin/kubectl-calico - mode: u=rwx,g=rx,o=rx - -- name: Apply Calico global network policies - delegate_to: "{{ groups['deployment'][0] }}" - command: kubectl calico apply --filename="{{ role_path }}/files/calico-global-networkpolicy-{{ item }}.yaml" - loop: - - default-deny - - allow-ping - become: yes diff --git a/roles/k3s/tasks/config-device-plugins.yml b/roles/k3s/tasks/config-device-plugins.yml deleted file mode 100644 index 2a9f7c12..00000000 --- a/roles/k3s/tasks/config-device-plugins.yml +++ /dev/null @@ -1,24 +0,0 @@ ---- -# FUTURE NOTE: change all the kubectl invocations to use the kubernetes.core.k8s collection! -# This will make detecting changes way easier and more reliable. - -- name: Apply nvidia device plugin daemonset - delegate_to: "{{ groups['deployment'][0] }}" - kubernetes.core.k8s: - state: present - template: "nvidia-device-plugin.yaml.j2" - apply: yes - when: - - not (k3s_dry_run | bool) - -- name: Apply daemonsets for smarter-device plugins - delegate_to: "{{ groups['deployment'][0] }}" - kubernetes.core.k8s: - state: present - template: "smarter-device-manager-ds-{{ item }}.yaml.j2" - apply: yes - loop: - - jetson - - rpi - when: - - not (k3s_dry_run | bool) diff --git a/roles/k3s/tasks/config-k3s-client.yml b/roles/k3s/tasks/config-k3s-client.yml deleted file mode 100644 index 8f0e7bf4..00000000 --- a/roles/k3s/tasks/config-k3s-client.yml +++ /dev/null @@ -1,82 +0,0 @@ ---- -- name: Wait for node-token - become: true - ansible.builtin.wait_for: - path: "{{ k3s_server_location }}/server/node-token" - -- name: Register node-token file access mode - become: true - ansible.builtin.stat: - path: "{{ k3s_server_location }}/server/node-token" - register: p - -- name: Change file access node-token - become: true - ansible.builtin.file: - path: "{{ k3s_server_location }}/server/node-token" - mode: "g+rx,o+rx" - -- name: Read node-token from master - become: true - ansible.builtin.slurp: - path: "{{ k3s_server_location }}/server/node-token" - register: node_token - -- name: Store Master node-token - become: true - ansible.builtin.set_fact: - token: "{{ node_token.content | b64decode | regex_replace('\n', '') }}" - -- name: Restore node-token file access - become: true - ansible.builtin.file: - path: "{{ k3s_server_location }}/server/node-token" - mode: "{{ p.stat.mode }}" - -- name: Create directory .kube - ansible.builtin.file: - path: ~{{ ansible_user_id }}/.kube - state: directory - owner: "{{ ansible_user_id }}" - mode: "u=rwx,g=rx,o=" - -- name: Copy config file to user home directory - become: true - ansible.builtin.copy: - src: "{{ k3s_conf_location }}/k3s.yaml" - dest: ~{{ ansible_user_id }}/.kube/config - remote_src: yes - owner: "{{ ansible_user_id }}" - mode: "u=rw,g=,o=" - -- name: Update cluster server address - # FIXME: should be ansible.builtin.command in later versions of Ansible - command: | - k3s kubectl config set-cluster default - --server=https://{{ k3s_server_ip }}:{{ k3s_port }} - --kubeconfig ~{{ ansible_user_id }}/.kube/config - changed_when: true - -- name: Create kubectl symlink - become: true - ansible.builtin.file: - src: /usr/local/bin/k3s - dest: /usr/local/bin/kubectl - state: link - -- name: Create crictl symlink - become: true - ansible.builtin.file: - src: /usr/local/bin/k3s - dest: /usr/local/bin/crictl - state: link - -- name: Copy debug manifests - become: true - ansible.builtin.copy: - src: "{{ item }}" - dest: "{{ k3s_conf_location }}/" - owner: root - group: root - mode: 0755 - with_fileglob: "debug-*.yaml" diff --git a/roles/k3s/tasks/config-k3s-service.yml b/roles/k3s/tasks/config-k3s-service.yml deleted file mode 100644 index f565512b..00000000 --- a/roles/k3s/tasks/config-k3s-service.yml +++ /dev/null @@ -1,69 +0,0 @@ ---- -- name: Download and install k3s binary - become: true - ansible.builtin.get_url: - url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s - checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-amd64.txt - dest: /usr/local/bin/k3s - owner: root - group: root - mode: 0755 - -- name: Copy K3s env file - become: true - ansible.builtin.template: - src: "k3s.env.j2" - dest: /etc/default/k3s - owner: root - group: root - mode: 0600 - -- set_fact: - kolla_external_vip_address_family: ipv4 - -- name: Ensure k3s conf dir exists - become: true - ansible.builtin.file: - path: "{{ k3s_conf_location }}" - state: directory - owner: root - group: root - mode: "0755" - -- name: Copy K3s config file - become: true - ansible.builtin.template: - src: "config.yaml.j2" - dest: "{{ k3s_conf_location }}/config.yaml" - owner: root - group: root - mode: 0644 - vars: - # This _address_family var is not set by default; this is just to make kolla_address - # happy when it tries to template the primary address of the VIP interface. - kolla_external_vip_address_family: ipv4 - -- name: Copy K3s systemd service file - register: k3s_service - become: true - ansible.builtin.template: - src: "k3s.service.j2" - dest: "{{ k3s_systemd_dir }}/k3s.service" - owner: root - group: root - mode: 0644 - -- name: Enable and check K3s service - become: true - systemd: - name: k3s - daemon_reload: yes - state: started - enabled: yes - -- name: Install Kubernetes Python client - become: true - pip: - name: kubernetes - virtualenv: "{{ virtualenv }}" - state: present \ No newline at end of file diff --git a/roles/k3s/tasks/config-neutron.yml b/roles/k3s/tasks/config-neutron.yml deleted file mode 100644 index bb225bb2..00000000 --- a/roles/k3s/tasks/config-neutron.yml +++ /dev/null @@ -1,86 +0,0 @@ ---- -- name: Create calico network - kolla_toolbox: - container_engine: "{{ kolla_container_engine }}" - module_name: os_network - module_args: - auth: "{{ openstack_auth }}" - project: "{{ keystone_admin_project }}" - name: caliconet - provider_network_type: flat - provider_physical_network: calico - shared: yes - state: present - run_once: true - become: yes - -- name: Create calico subnet - kolla_toolbox: - module_name: os_subnet - module_args: - auth: "{{ openstack_auth }}" - project: "{{ keystone_admin_project }}" - network_name: caliconet - name: caliconet-subnet - cidr: "{{ k3s_cluster_cidr }}" - enable_dhcp: no - become: yes - -- name: Create tunelo-calico network - kolla_toolbox: - module_name: os_network - module_args: - auth: "{{ openstack_auth }}" - project: "{{ keystone_admin_project }}" - name: tunelo-calico - provider_network_type: flat - provider_physical_network: tunelo - shared: false - become: yes - -- name: Create tunelo-calico subnet - kolla_toolbox: - module_name: os_subnet - module_args: - auth: "{{ openstack_auth }}" - project: "{{ keystone_admin_project }}" - network_name: tunelo-calico - name: tunelo-calico-subnet - cidr: "{{ k3s_tunelo_cidr }}" - enable_dhcp: yes - become: yes - -# FIXME(jason): this doesn't actually add the subnet to the router! -# When we have updated to a later Ansible we can potentially fetch the router's -# interfaces with routers_info and then merge this interface into the list? -# Update (Soufiane June 25 2024) This still does not add the subnet to the router -# two extra manual steps have to be performed after the succesful execution of this task -# 1. Add the caliconet-subnet as an internal interface to the admin router -# 2. Add an external gateway to the public network -# Both of the above are pre-requisites to the proper functioning of floating ips to containers -- name: Fetch existing NAT router - kolla_toolbox: - container_engine: "{{ kolla_container_engine }}" - module_name: os_router - module_args: - auth: "{{ openstack_auth }}" - project: "{{ keystone_admin_project }}" - name: sharednet-router - # interfaces: - # - caliconet-subnet - register: calico_router - run_once: true - become: yes - -- name: Generate Calico/Neutron connection script - ansible.builtin.template: - src: neutron-calico-connect.j2 - dest: "{{ k3s_conf_location }}/neutron-calico-connect.sh" - owner: root - group: root - mode: "0700" - vars: - neutron_router_id: "{{ calico_router.router.id }}" - when: - - k3s_enable_calico | bool - become: yes diff --git a/roles/k3s/tasks/config-registry-proxy.yml b/roles/k3s/tasks/config-registry-proxy.yml deleted file mode 100644 index 5a6a8806..00000000 --- a/roles/k3s/tasks/config-registry-proxy.yml +++ /dev/null @@ -1,14 +0,0 @@ ---- -- name: Template deployment for registry proxy - ansible.builtin.template: - src: "docker-registry-deployment.yaml.j2" - dest: "{{ k3s_conf_location }}/docker-registry-deployment.yaml" - owner: root - group: root - mode: "0644" - -- name: Apply deployment for registry proxy - command: >- - kubectl apply -f {{ k3s_conf_location }}/docker-registry-deployment.yaml - when: - - not (k3s_dry_run | bool) diff --git a/roles/k3s/tasks/main.yml b/roles/k3s/tasks/main.yml deleted file mode 100644 index 320ca627..00000000 --- a/roles/k3s/tasks/main.yml +++ /dev/null @@ -1,14 +0,0 @@ ---- -- include_tasks: config-k3s-service.yml -- include_tasks: config-k3s-client.yml - -- include_tasks: config-calico.yml - when: k3s_enable_calico | bool - -- include_tasks: config-device-plugins.yml - -- include_tasks: config-neutron.yml - when: enable_neutron | bool - -- include_tasks: config-registry-proxy.yml - when: k3s_enable_registry_proxy | bool diff --git a/roles/k3s/templates/calico-custom-resources.yaml.j2 b/roles/k3s/templates/calico-custom-resources.yaml.j2 deleted file mode 100644 index 42681e31..00000000 --- a/roles/k3s/templates/calico-custom-resources.yaml.j2 +++ /dev/null @@ -1,40 +0,0 @@ -# This is based on the default value of the installation configuration found at -# https://raw.githubusercontent.com/projectcalico/calico/v3.24.1/manifests/custom-resources.yaml - -# This section includes base Calico installation configuration. -# For more information, see: https://projectcalico.docs.tigera.io/reference/installation/api#operator.tigera.io/v1.Installation -apiVersion: operator.tigera.io/v1 -kind: Installation -metadata: - name: default -spec: - controlPlaneNodeSelector: - node-role.kubernetes.io/control-plane: "true" - # Configures Calico networking. - calicoNetwork: - # Note: The ipPools section cannot be modified post-install. - ipPools: - - cidr: {{ k3s_cluster_cidr }} - blockSize: {{ k3s_cluster_node_blocksize }} - encapsulation: IPIP - natOutgoing: Enabled - nodeSelector: all() - # Ensure calico uses the last Wireguard interface - nodeAddressAutodetectionV4: - firstFound: false - interface: wg-.* - containerIPForwarding: Enabled - # Lower MTU to accommodate Wireguard encapsulation; Calico uses IPIP encapsulation - # which adds a 20-byte overhead. The underlying Wireguard interface has an MTU of - # 1420 configured, so we subtract 20 to get 1400. - mtu: 1400 - ---- - -# This section configures the Calico API server. -# For more information, see: https://docs.projectcalico.org/v3.21/reference/installation/api#operator.tigera.io/v1.APIServer -apiVersion: operator.tigera.io/v1 -kind: APIServer -metadata: - name: default -spec: {} diff --git a/roles/k3s/templates/config.yaml.j2 b/roles/k3s/templates/config.yaml.j2 deleted file mode 100644 index dececc1f..00000000 --- a/roles/k3s/templates/config.yaml.j2 +++ /dev/null @@ -1,14 +0,0 @@ ---- -cluster-cidr: {{ k3s_cluster_cidr }} -# HA for K3s is not yet supported; there is no HAProxy frontend serving the traffic -# to the API. Once that is functional, we can use the VIP address here. -bind-address: {{ k3s_server_ip }} -node-external-ip: {{ k3s_server_ip }} -secrets-encryption: True -{% if k3s_enable_calico | bool %} -flannel-backend: none -disable-network-policy: True -disable: - - servicelb - - traefik -{% endif %} diff --git a/roles/k3s/templates/docker-registry-deployment.yaml.j2 b/roles/k3s/templates/docker-registry-deployment.yaml.j2 deleted file mode 100644 index a0eb1eff..00000000 --- a/roles/k3s/templates/docker-registry-deployment.yaml.j2 +++ /dev/null @@ -1,50 +0,0 @@ ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: docker-registry -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: docker-registry - template: - metadata: - labels: - app.kubernetes.io/name: docker-registry - spec: - containers: - - resources: - name: registry - image: registry:2 - ports: - - name: registry-port - containerPort: 5000 - volumeMounts: - - mountPath: /var/lib/registry - name: registry-data - env: - - name: REGISTRY_PROXY_REMOTEURL - value: https://registry-1.docker.io - - name: REGISTRY_STORAGE_DELETE_ENABLED - value: "true" - volumes: - - name: registry-data - hostPath: - path: /var/lib/registry - nodeSelector: - node-role.kubernetes.io/master: "true" - ---- -apiVersion: v1 -kind: Service -metadata: - name: docker-registry -spec: - selector: - app.kubernetes.io/name: docker-registry - ports: - - name: registry-port - protocol: TCP - port: 80 - targetPort: registry-port diff --git a/roles/k3s/templates/k3s.env.j2 b/roles/k3s/templates/k3s.env.j2 deleted file mode 100644 index 48089d59..00000000 --- a/roles/k3s/templates/k3s.env.j2 +++ /dev/null @@ -1 +0,0 @@ -K3S_CONFIG_FILE={{ k3s_conf_location }}/config.yaml diff --git a/roles/k3s/templates/k3s.service.j2 b/roles/k3s/templates/k3s.service.j2 deleted file mode 100644 index 2da1fcbc..00000000 --- a/roles/k3s/templates/k3s.service.j2 +++ /dev/null @@ -1,25 +0,0 @@ -[Unit] -Description=Lightweight Kubernetes -Documentation=https://k3s.io -After=network-online.target - -[Service] -Type=notify -EnvironmentFile=-/etc/default/%N -ExecStartPre=-/sbin/modprobe br_netfilter -ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ k3s_extra_server_args | default("") }} -KillMode=process -Delegate=yes -# Having non-zero Limit*s causes performance problems due to accounting overhead -# in the kernel. We recommend using cgroups to do container-local accounting. -LimitNOFILE=1048576 -LimitNPROC=infinity -LimitCORE=infinity -TasksMax=infinity -TimeoutStartSec=0 -Restart=always -RestartSec=5s - -[Install] -WantedBy=multi-user.target diff --git a/roles/k3s/templates/neutron-calico-connect.j2 b/roles/k3s/templates/neutron-calico-connect.j2 deleted file mode 100644 index ed4d6ead..00000000 --- a/roles/k3s/templates/neutron-calico-connect.j2 +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# -# Bridges Neutron to the Calico network for the purposes of assigning Floating IP -# addresses to pods running on K3s. - -calico_cidr="{{ k3s_cluster_cidr }}" -host_addr="192.168.150.1/24" -ns_addr="192.168.150.2/24" - -neutron_ns=qrouter-"{{ neutron_router_id }}" - -# Create veth pair to connect Neutron router ns to default ns -ip link show veth-cali0 >/dev/null || ip link add veth-cali0 type veth peer veth-caliN - -ip link set veth-cali0 up -ip addr replace "$host_addr" dev veth-cali0 - -ip link set veth-caliN netns "$neutron_ns" up -ip netns exec "$neutron_ns" ip addr replace "$ns_addr" dev veth-caliN - -# Allow traffic from Neutron router ns to travel through default ns for Calico subnet -# Also ensure we delete Neutron's default route it adds due to the subnet having a -# router interface. -ip netns exec "$neutron_ns" ip route replace "$calico_cidr" via "${host_addr%%/24}" - -# Required for any traffic going over IPIP tunnel to other Calico nodes -iptables -t nat -S | grep -q neutron-calico-connect:01 || { - iptables -t nat -A POSTROUTING -o tunl0 -j MASQUERADE \ - -m comment --comment "(neutron-calico-connect:01) bridge Neutron floating IPs to K3s" -} -# Floating IPs normally do DNAT, which does not change the source address. This breaks -# Calico, which does not understand how to return the packets. -iptables -t nat -S | grep -q neutron-calico-connect:02 || { - ip netns exec "$neutron_ns" iptables -t nat -A POSTROUTING -o veth-caliN -J MASQUERADE \ - -m comment --comment "(neutron-calico-connect:02) rewrite source for Calico-bound packets" -} diff --git a/roles/k3s/templates/nvidia-device-plugin.yaml.j2 b/roles/k3s/templates/nvidia-device-plugin.yaml.j2 deleted file mode 100644 index 0b2075a8..00000000 --- a/roles/k3s/templates/nvidia-device-plugin.yaml.j2 +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: nvidia-device-plugin-daemonset - namespace: kube-system -spec: - selector: - matchLabels: - name: nvidia-device-plugin-ds - updateStrategy: - type: RollingUpdate - template: - metadata: - # This annotation is deprecated. Kept here for backward compatibility - # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ - annotations: - scheduler.alpha.kubernetes.io/critical-pod: "" - labels: - name: nvidia-device-plugin-ds - spec: - nodeSelector: - "nvidia-device-plugin/enabled": "true" - tolerations: - # This toleration is deprecated. Kept here for backward compatibility - # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ - - key: CriticalAddonsOnly - operator: Exists - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - - key: "{{ k3s_worker_taint.key }}" - operator: Equal - value: "{{ k3s_worker_taint.value }}" - effect: "{{ k3s_worker_taint.effect }}" - # Mark this pod as a critical add-on; when enabled, the critical add-on - # scheduler reserves resources for critical add-on pods so that they can - # be rescheduled after a failure. - # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ - priorityClassName: "system-node-critical" - containers: - - image: nvcr.io/nvidia/k8s-device-plugin:v0.14.1 - name: nvidia-device-plugin-ctr - env: - - name: FAIL_ON_INIT_ERROR - value: "false" - - name: NVIDIA_VISIBLE_DEVICES - value: all - - name: NVIDIA_DRIVER_CAPABILITIES - value: all - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: ["ALL"] - volumeMounts: - - name: device-plugin - mountPath: /var/lib/kubelet/device-plugins - volumes: - - name: device-plugin - hostPath: - path: /var/lib/kubelet/device-plugins diff --git a/roles/k3s/templates/smarter-device-manager-ds-jetson.yaml.j2 b/roles/k3s/templates/smarter-device-manager-ds-jetson.yaml.j2 deleted file mode 100644 index 9f6f4ae6..00000000 --- a/roles/k3s/templates/smarter-device-manager-ds-jetson.yaml.j2 +++ /dev/null @@ -1,118 +0,0 @@ -# This file is a synthesis of smarter-device-manager-configmap-xavier.yaml and smarter-device-manager-ds.yaml -# from the smarter-device-manager repo[0]. -# [0]: https://gitlab.com/arm-research/smarter/smarter-device-manager/ -apiVersion: v1 -kind: Namespace -metadata: - name: smarter-device-manager-system - labels: - name: smarter-device-manager-system ---- -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: smarter-device-manager-system - name: smarter-device-manager-jetson -data: - conf.yaml: | - - devicematch: ^snd$ - nummaxdevices: 20 - - devicematch: ^gpiomem$ - nummaxdevices: 40 - - devicematch: ^gpiochip[0-9]*$ - nummaxdevices: 20 - - devicematch: ^hci[0-9]*$ - nummaxdevices: 1 - - devicematch: ^i2c-[0-9]*$ - nummaxdevices: 1 - - devicematch: ^rtc0$ - nummaxdevices: 20 - - devicematch: ^video[0-9]*$ - nummaxdevices: 20 - - devicematch: ^vchiq$ - nummaxdevices: 20 - - devicematch: ^vcsm.*$ - nummaxdevices: 20 - - devicematch: ^ttyUSB[0-9]*$ - nummaxdevices: 1 - - devicematch: ^ttyACM[0-9]*$ - nummaxdevices: 1 - - devicematch: ^ttyTHS[0-9]*$ - nummaxdevices: 1 - - devicematch: ^ttyS[0-9]*$ - nummaxdevices: 1 - - devicematch: nvidia-gpu - nummaxdevices: 20 ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - namespace: smarter-device-manager-system - name: smarter-device-manager-jetson - labels: - # FIXME: not sure if labels.name is also needed if we already have 'name' above - name: smarter-device-manager-jetson - role: agent -spec: - selector: - matchLabels: - smarter-device-manager/configmap: jetson - updateStrategy: - type: RollingUpdate - template: - metadata: - labels: - smarter-device-manager/configmap: jetson - annotations: - node.kubernetes.io/bootstrap-checkpoint: "true" - spec: - nodeSelector: - smarter-device-manager: enabled - smarter-device-manager/configmap: jetson - tolerations: - - key: "{{ k3s_worker_taint.key }}" - operator: Equal - value: "{{ k3s_worker_taint.value }}" - effect: "{{ k3s_worker_taint.effect }}" - priorityClassName: "system-node-critical" - hostname: smarter-device-management - hostNetwork: true - dnsPolicy: ClusterFirstWithHostNet - containers: - - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:{{ k3s_smarter_device_plugin_version }} - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: ["ALL"] - resources: - limits: - cpu: 100m - memory: 15Mi - requests: - cpu: 10m - memory: 15Mi - volumeMounts: - - name: device-plugin - mountPath: /var/lib/kubelet/device-plugins - - name: dev-dir - mountPath: /dev - - name: sys-dir - mountPath: /sys - - name: config - mountPath: /root/config - volumes: - - name: device-plugin - hostPath: - path: /var/lib/kubelet/device-plugins - - name: dev-dir - hostPath: - path: /dev - - name: sys-dir - hostPath: - path: /sys - - name: config - configMap: - name: smarter-device-manager-jetson - terminationGracePeriodSeconds: 30 diff --git a/roles/k3s/templates/smarter-device-manager-ds-rpi.yaml.j2 b/roles/k3s/templates/smarter-device-manager-ds-rpi.yaml.j2 deleted file mode 100644 index 13f765f9..00000000 --- a/roles/k3s/templates/smarter-device-manager-ds-rpi.yaml.j2 +++ /dev/null @@ -1,116 +0,0 @@ -# This file is a synthesis of smarter-device-manager-configmap-rpi.yaml and smarter-device-manager-ds.yaml -# from the smarter-device-manager repo[0]. -# [0]: https://gitlab.com/arm-research/smarter/smarter-device-manager/ -apiVersion: v1 -kind: Namespace -metadata: - name: smarter-device-manager-system - labels: - name: smarter-device-manager-system ---- -apiVersion: v1 -kind: ConfigMap -metadata: - namespace: smarter-device-manager-system - name: smarter-device-manager-rpi -data: - conf.yaml: | - - devicematch: ^snd$ - nummaxdevices: 20 - - devicematch: ^gpiomem$ - nummaxdevices: 40 - - devicematch: ^gpiochip[0-9]*$ - nummaxdevices: 20 - - devicematch: ^hci[0-9]*$ - nummaxdevices: 1 - - devicematch: ^i2c-[0-9]*$ - nummaxdevices: 1 - - devicematch: ^rtc0$ - nummaxdevices: 20 - - devicematch: ^video[0-9]*$ - nummaxdevices: 20 - - devicematch: ^vchiq$ - nummaxdevices: 20 - - devicematch: ^vcsm.*$ - nummaxdevices: 20 - - devicematch: ^ttyUSB[0-9]*$ - nummaxdevices: 1 - - devicematch: ^ttyACM[0-9]*$ - nummaxdevices: 1 - - devicematch: ^ttyTHS[0-9]*$ - nummaxdevices: 1 - - devicematch: ^ttyS[0-9]*$ - nummaxdevices: 1 ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - namespace: smarter-device-manager-system - name: smarter-device-manager-rpi - labels: - # FIXME: not sure if labels.name is also needed if we already have 'name' above - name: smarter-device-manager-rpi - role: agent -spec: - selector: - matchLabels: - smarter-device-manager/configmap: rpi - updateStrategy: - type: RollingUpdate - template: - metadata: - labels: - smarter-device-manager/configmap: rpi - annotations: - node.kubernetes.io/bootstrap-checkpoint: "true" - spec: - nodeSelector: - smarter-device-manager: enabled - smarter-device-manager/configmap: rpi - tolerations: - - key: "{{ k3s_worker_taint.key }}" - operator: Equal - value: "{{ k3s_worker_taint.value }}" - effect: "{{ k3s_worker_taint.effect }}" - priorityClassName: "system-node-critical" - hostname: smarter-device-management - hostNetwork: true - dnsPolicy: ClusterFirstWithHostNet - containers: - - name: smarter-device-manager - image: registry.gitlab.com/arm-research/smarter/smarter-device-manager:{{ k3s_smarter_device_plugin_version }} - imagePullPolicy: IfNotPresent - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: ["ALL"] - resources: - limits: - cpu: 100m - memory: 15Mi - requests: - cpu: 10m - memory: 15Mi - volumeMounts: - - name: device-plugin - mountPath: /var/lib/kubelet/device-plugins - - name: dev-dir - mountPath: /dev - - name: sys-dir - mountPath: /sys - - name: config - mountPath: /root/config - volumes: - - name: device-plugin - hostPath: - path: /var/lib/kubelet/device-plugins - - name: dev-dir - hostPath: - path: /dev - - name: sys-dir - hostPath: - path: /sys - - name: config - configMap: - name: smarter-device-manager-rpi - terminationGracePeriodSeconds: 30 diff --git a/site-config.example/passwords.yml b/site-config.example/passwords.yml index cc677468..da3ac399 100644 --- a/site-config.example/passwords.yml +++ b/site-config.example/passwords.yml @@ -282,6 +282,11 @@ libvirt_sasl_password: proxysql_admin_password: proxysql_stats_password: +################## +# k3s options +################## +k3s_token: + ############ # OpenSearch ############ diff --git a/src/kolla-ansible b/src/kolla-ansible index 568df787..3eefb0cf 160000 --- a/src/kolla-ansible +++ b/src/kolla-ansible @@ -1 +1 @@ -Subproject commit 568df787eeba52116c081ba575c60152242d7135 +Subproject commit 3eefb0cfce3acf61fbcff6717d334420eb03f208