diff --git a/README.md b/README.md index 4358901b..85daee05 100644 --- a/README.md +++ b/README.md @@ -211,7 +211,8 @@ xxxxx-xxxxx-xxxxx-xxxxx * [Deploy Single Node OpenShift (SNO) clusters via jetlag from a Performance Lab Bastion Machine](docs/deploy-sno-performancelab.md) * [Deploy Single Node OpenShift (SNO) clusters via jetlag on IBMcloud ](docs/deploy-sno-ibmcloud.md) * [Deploy a VMNO](docs/deploy-vmno.md) -* [Scale up MNO](docs/scale-out-mno.md) +* [Scale out MNO](docs/scale-out-mno.md) +* [Scale out SNO](docs/scale-out-sno.md) ## Tips and Troubleshooting diff --git a/ansible/roles/create-inventory/tasks/main.yml b/ansible/roles/create-inventory/tasks/main.yml index 952f4557..80672413 100644 --- a/ansible/roles/create-inventory/tasks/main.yml +++ b/ansible/roles/create-inventory/tasks/main.yml @@ -214,6 +214,20 @@ set_fact: ocpinventory_sno_nodes: "{{ ocpinventory.json.nodes[1:] }}" + - name: SNO - Set max number of nodes + set_fact: + max_nodes: "{{ ocpinventory.json.nodes|length }}" + when: worker_node_count == None + + - name: SNO - Set max number of nodes (worker_node_count set) + set_fact: + max_nodes: "{{ worker_node_count|int + 2 }}" + when: worker_node_count != None + + - name: SNO - Set ocpinventory worker nodes (for scale-out scenarios) + set_fact: + ocpinventory_worker_nodes: "{{ ocpinventory.json.nodes[2:max_nodes|int] }}" + - name: SNO - Get lab data for each sno uri: url: "https://{{ labs[lab]['foreman'] }}/api/hosts/{{ item.pm_addr | replace('mgmt-','') }}" @@ -224,6 +238,18 @@ register: sno_foreman_data loop: "{{ ocpinventory_sno_nodes }}" + - name: SNO - Get lab mac address for worker nodes + uri: + url: "https://{{ labs[lab]['foreman'] }}/api/hosts/{{ item.pm_addr | replace('mgmt-','') }}" + force_basic_auth: yes + user: "{{ lab_cloud }}" + password: "{{ bmc_password }}" + validate_certs: false + with_items: + - "{{ ocpinventory_worker_nodes }}" + when: cluster_type == "sno" and ocpinventory_worker_nodes|length > 0 + register: sno_worker_foreman_data + - name: set json query fact set_fact: mac_query: "json.interfaces[?type=='interface'].mac" diff --git a/ansible/roles/create-inventory/templates/inventory-sno.j2 b/ansible/roles/create-inventory/templates/inventory-sno.j2 index c6ba79be..afb4b97c 100644 --- a/ansible/roles/create-inventory/templates/inventory-sno.j2 +++ b/ansible/roles/create-inventory/templates/inventory-sno.j2 @@ -32,11 +32,37 @@ bmc_password={{ bmc_password }} [controlplane:vars] # Unused +{% if ocpinventory_worker_nodes is defined and ocpinventory_worker_nodes|length > 0 %} [worker] -# Unused +{% for worker in ocpinventory_worker_nodes %} +{% if sno_worker_foreman_data is defined and sno_worker_foreman_data.results is defined %} +{{ worker.pm_addr.split('.')[0] | replace('mgmt-','') }} bmc_address={{ worker.pm_addr }} mac_address={{ worker.mac[controlplane_network_interface_idx|int] }} lab_mac={{ ( (sno_worker_foreman_data.results| selectattr('json.name', 'eq', worker.pm_addr | replace('mgmt-',''))|first).json.interfaces | selectattr('primary', 'eq', True)|first).mac }} ip={{ controlplane_network | ansible.utils.nthhost(loop.index + sno_controlplane_ip_offset) }} vendor={{ hw_vendor[(worker.pm_addr.split('.')[0]).split('-')[-1]] }} install_disk={{ worker_install_disk }} +{% else %} +# {{ worker.pm_addr.split('.')[0] | replace('mgmt-','') }} bmc_address={{ worker.pm_addr }} mac_address={{ worker.mac[controlplane_network_interface_idx|int] }} lab_mac=UNDEFINED ip={{ controlplane_network | ansible.utils.nthhost(loop.index + sno_controlplane_ip_offset) }} vendor={{ hw_vendor[(worker.pm_addr.split('.')[0]).split('-')[-1]] }} install_disk={{ worker_install_disk }} +{% endif %} +{% endfor %} [worker:vars] -# Unused +role=worker +bmc_user={{ bmc_user }} +bmc_password={{ bmc_password }} +lab_interface={{ controlplane_lab_interface }} +network_interface={{ controlplane_network_interface }} +network_prefix={{ controlplane_network_prefix }} +gateway={{ controlplane_network_gateway }} +{% if controlplane_bastion_as_dns %} +dns1={{ bastion_controlplane_ip }} +{% else %} +dns1={{ labs[lab]['dns'][0] }} +dns2={{ labs[lab]['dns'][1] | default('') }} +{% endif %} +{% else %} +[worker] +# No worker nodes configured + +[worker:vars] +# No worker nodes configured +{% endif %} [sno] # Only one can be uncommented and deployed at a time, but you can deploy additional SNOs diff --git a/ansible/roles/mno-scale-out-csr/tasks/check_nodes_joined.yml b/ansible/roles/mno-scale-out-csr/tasks/check_nodes_joined.yml index 81cc666b..8aa9b3dc 100644 --- a/ansible/roles/mno-scale-out-csr/tasks/check_nodes_joined.yml +++ b/ansible/roles/mno-scale-out-csr/tasks/check_nodes_joined.yml @@ -4,6 +4,10 @@ r_qry: "{{ qry }}" r_worker_counter: "{{ worker_counter }}" +- name: Set KUBECONFIG path based on cluster type + set_fact: + cluster_kubeconfig: "{{ bastion_cluster_config_dir }}/{{ 'kubeconfig' if cluster_type != 'sno' else groups['sno'][0] + '/kubeconfig' }}" + - name: approve CSRs and check if nodes have joined the cluster block: - name: Increment the retry count @@ -16,21 +20,24 @@ when: retry|int > 0 - name: Get CSRs - shell: | - KUBECONFIG={{ bastion_cluster_config_dir }}/kubeconfig oc get csr -o json + shell: oc get csr -o json register: oc_get_csr + environment: + KUBECONFIG: "{{ cluster_kubeconfig }}" - name: Approve pending CSRs - shell: | - KUBECONFIG={{ bastion_cluster_config_dir }}/kubeconfig oc adm certificate approve {{ item.metadata.name }} + shell: oc adm certificate approve {{ item.metadata.name }} loop: "{{ oc_get_csr.stdout | from_json | json_query(qry) }}" loop_control: label: "{{ item.metadata.name }}" + environment: + KUBECONFIG: "{{ cluster_kubeconfig }}" - name: Get worker node count - shell: | - KUBECONFIG={{ bastion_cluster_config_dir }}/kubeconfig oc get nodes | {{ worker_counter }} + shell: oc get nodes | {{ worker_counter }} register: oc_get_nodes_workers + environment: + KUBECONFIG: "{{ cluster_kubeconfig }}" - name: Current Worker Node Count debug: diff --git a/ansible/roles/mno-scale-out/tasks/main.yml b/ansible/roles/mno-scale-out/tasks/main.yml index a03248fa..71296c6f 100644 --- a/ansible/roles/mno-scale-out/tasks/main.yml +++ b/ansible/roles/mno-scale-out/tasks/main.yml @@ -16,10 +16,16 @@ vars: workers: "{{ groups['worker'][current_worker_count:current_worker_count+scale_out_count] }}" +- name: Set KUBECONFIG path based on cluster type + set_fact: + cluster_kubeconfig: "{{ bastion_cluster_config_dir }}/{{ 'kubeconfig' if cluster_type != 'sno' else groups['sno'][0] + '/kubeconfig' }}" + - name: Add Nodes to cluster and generate boot iso (Takes a min or two) - command: oc adm node-image create --kubeconfig {{ bastion_cluster_config_dir }}/kubeconfig + command: oc adm node-image create args: chdir: /root/mno-scale-out/ + environment: + KUBECONFIG: "{{ cluster_kubeconfig }}" - name: Copy scale out discovery iso to http server ansible.builtin.copy: diff --git a/docs/scale-out-sno.md b/docs/scale-out-sno.md new file mode 100644 index 00000000..a4ba6c77 --- /dev/null +++ b/docs/scale-out-sno.md @@ -0,0 +1,48 @@ +# Scale Out a Single-Node Openshift Deployment + +A JetLag deployed Single-Node Openshift deployment can be scaled out via JetLag. Workers can be added using JetLag Inventory and Playbooks. This guide assumes you have an existing Single-Node OCP cluster deployed via JetLag. The worker section in the JetLag inventory file should contain records that represent the worker nodes currently joined to the running cluster. + +_**Steps to Scale Out:**_ +- [Add New Node Entries to Worker Inventory](#add-new-node-entries-to-worker-inventory) +- [Update scale_out.yml](#update-scale_out.yml) +- [Run mno-scale-out.yml](#run-mno-scale-out.yml) + +## Add Nodes to Worker Inventory + +To add new node entries to the worker inventory there are two potential options. + +1. New bare metal nodes are to be added to SNO Cluster + + If more nodes were added to SNO Cluster, update worker_node_count in the ansible/vars/all.yml file and rerun the create-inventory playbook. Be sure to compare the previous inventory file to the new one to ensure that everything is the same except the new nodes added to the worker section. Make sure to populate the extra vars related to Worker nodes. + +2. Manual entry + + You can add new entries to the worker inventory section manually. Place them at the end of the list of worker entries. + + The new bare metal nodes, must be placed at the end of the worker nodes inventory. The scale out playbook is designed to use the last n nodes in the inventory. + + Populate the worker node vars([worker:vars]) same as SNO node vars ([sno:vars]) and update the role parameter as worker in [worker:vars]. + +## Update scale_out.yml +There are two variables in ansible/vars/scale_out.yml that indicate which entries from the worker inventory section should be added to the existing cluster. + +- current_worker_count: This value indicates the number of entries in the worker inventory section to skip before starting to add nodes to the existing cluster. This number should match the current number of worker nodes associated with the existing cluster. +- scale_out_count: This value indicates the number of entries in the worker inventory section that will be added as new workers to the existing cluster. + +Example: If the initial OCP deployment had three baremetal workers and the intended worker count was ten, current_worker_count would be 3 and scale_out_count would be 7. Scale out from three existing workers, adding seven new workers, for a total of ten worker nodes. + +## Run mno-scale-out.yml +Once the new worker records are added and Worker node variables are properly populated in the inventory and the scale_out.yml file has the proper values. The final step is to run the mno-scale-out.yml playbook. + +```console +(.ansible) [root@xxx-h01-000-r650 jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/mno-scale-out.yml +... +``` + +This playbook will: +- Generate node configuration yml +- Invoke ```oc adm node-image create``` with the node configuration, which generates a discovery ISO +- Boot the new worker nodes off of the generated discovery ISO +- Approve generated CSRs + +This workflow can be run repeatedly to add more workers to the existing cluster. \ No newline at end of file