Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPEDGE-1192: feat: initial arbiter node addition #1731

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion 02_configure_host.sh
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ ansible-playbook \
-e "baremetal_network_name=${BAREMETAL_NETWORK_NAME}" \
-e "working_dir=$WORKING_DIR" \
-e "num_masters=$NUM_MASTERS" \
-e "num_arbiters=$NUM_ARBITERS" \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The playbook we're using here comes from https://github.com/metal3-io/metal3-dev-env/ which is an upstream project and thus does not (and can not) know about OCP-specific node types.

-e "num_workers=$NUM_WORKERS" \
-e "num_extraworkers=$NUM_EXTRA_WORKERS" \
-e "libvirt_firmware=$LIBVIRT_FIRMWARE" \
Expand All @@ -147,6 +148,7 @@ ansible-playbook \
-e "nodes_file=$NODES_FILE" \
-e "virtualbmc_base_port=$VBMC_BASE_PORT" \
-e "master_hostname_format=$MASTER_HOSTNAME_FORMAT" \
-e "arbiter_hostname_format=$ARBITER_HOSTNAME_FORMAT" \
-e "worker_hostname_format=$WORKER_HOSTNAME_FORMAT" \
-e "libvirt_arch=$(uname -m)" \
-e "enable_vnc_console=$VNC_CONSOLE" \
Expand All @@ -165,7 +167,7 @@ if [ ${NUM_EXTRA_WORKERS} -ne 0 ]; then
ORIG_NODES_FILE="${NODES_FILE}.orig"
cp -f ${NODES_FILE} ${ORIG_NODES_FILE}
sudo chown -R $USER:$GROUP ${NODES_FILE}
jq "{nodes: .nodes[:$((NUM_MASTERS + NUM_WORKERS))]}" ${ORIG_NODES_FILE} | tee ${NODES_FILE}
jq "{nodes: .nodes[:$((NUM_MASTERS + NUM_ARBITERS + NUM_WORKERS))]}" ${ORIG_NODES_FILE} | tee ${NODES_FILE}
jq "{nodes: .nodes[-${NUM_EXTRA_WORKERS}:]}" ${ORIG_NODES_FILE} | tee ${EXTRA_NODES_FILE}
fi

Expand Down Expand Up @@ -410,6 +412,7 @@ fi
if [[ ! -z "${BOND_PRIMARY_INTERFACE:-}" ]]; then

setup_bond master $NUM_MASTERS
setup_bond arbiter $NUM_ARBITERS
setup_bond worker $NUM_WORKERS
fi

Expand Down
4 changes: 2 additions & 2 deletions 05_create_install_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ set_api_and_ingress_vip
if [ ! -f ${OCP_DIR}/install-config.yaml ]; then
# Validate there are enough nodes to avoid confusing errors later..
NODES_LEN=$(jq '.nodes | length' ${NODES_FILE})
if (( $NODES_LEN < ( $NUM_MASTERS + $NUM_WORKERS ) )); then
echo "ERROR: ${NODES_FILE} contains ${NODES_LEN} nodes, but ${NUM_MASTERS} masters and ${NUM_WORKERS} workers requested"
if (( $NODES_LEN < ( $NUM_MASTERS + $NUM_ARBITERS + $NUM_WORKERS ) )); then
echo "ERROR: ${NODES_FILE} contains ${NODES_LEN} nodes, but ${NUM_MASTERS} masters, ${NUM_ARBITERS} arbiter and ${NUM_WORKERS} workers requested"
exit 1
fi

Expand Down
11 changes: 10 additions & 1 deletion agent/05_agent_configure.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ function configure_node() {
local cluster_name="${CLUSTER_NAME}_${node_type}_${node_num}"
local hostname="$(printf "${MASTER_HOSTNAME_FORMAT}" "${node_num}")"
local ip=$((base_ip + node_num))
if [[ "$node_type" == "arbiter" ]]; then
local hostname="$(printf "${ARBITER_HOSTNAME_FORMAT}" "${node_num}")"
local ip=$((base_ip + ${NUM_ARBITERS} + node_num))
fi
if [[ "$node_type" == "worker" ]]; then
local hostname="$(printf "${WORKER_HOSTNAME_FORMAT}" "${node_num}")"
local ip=$((base_ip + ${NUM_MASTERS} + node_num))
Expand Down Expand Up @@ -135,6 +139,11 @@ function get_static_ips_and_macs() {
configure_node "master" "$i"
done

for (( i=0; i<${NUM_ARBITERS}; i++ ))
do
configure_node "arbiter" "$i"
done

for (( i=0; i<${NUM_WORKERS}; i++ ))
do
configure_node "worker" "$i"
Expand Down Expand Up @@ -520,7 +529,7 @@ function get_nodes_bmc_info() {
AGENT_NODES_BMC_PASSWORDS=()
AGENT_NODES_BMC_ADDRESSES=()

number_nodes=$NUM_MASTERS+$NUM_WORKERS
number_nodes=$NUM_MASTERS+$NUM_ARBITERS+$NUM_WORKERS

for (( i=0; i<${number_nodes}; i++ ))
do
Expand Down
28 changes: 22 additions & 6 deletions agent/plugins/modules/preset_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
cluster_topology:
description: Cluster topology defines the number cluster nodes.
default: ha
choices: ['ha', 'compact', 'sno']
choices: ['ha', 'compact', 'sno', 'arbiter']
type: str
resource_profile:
description: Resource profile controls resource size of each node.
Expand Down Expand Up @@ -99,7 +99,7 @@ def main():
openshift_version=dict(type='str'),
cluster_name=dict(type='str', default='ostest'),
base_domain=dict(type='str', default='test.metalkube.org'),
cluster_topology=dict(type='str', default='ha', choices=['ha', 'compact', 'sno']),
cluster_topology=dict(type='str', default='ha', choices=['ha', 'compact', 'sno', 'arbiter']),
resource_profile=dict(type='str', default='minimal', choices=['minimal', 'recommended']),
extra_workers_profile=dict(type='str', default='none', choices=['none', 'day2active', 'day2inactive']),
ip_stack=dict(type='str', default='v6', choices=['v4', 'v6', 'v4v6']),
Expand Down Expand Up @@ -231,20 +231,23 @@ def generate_preset_base(home_dir, ci_token, ci_server, working_dir, ssh_pub_key
return facts

def determine_cluster_topology(cluster_topology, resource_profile, extra_workers_profile):
num_masters, num_workers, num_extra_workers = 0, 0, 0
num_masters, num_arbiters, num_workers, num_extra_workers = 0, 0, 0, 0
master_memory, master_disk, master_vcpu = None, None, None
arbiter_memory, arbiter_disk, arbiter_vcpu = None, None, None
worker_memory, worker_disk, worker_vcpu = None, None, None
extra_worker_memory, extra_worker_disk, extra_worker_vcpu = None, None, None
apply_extra_workers = None
node_hostname_static_ip = []

# Define the number of nodes based off topology
if cluster_topology == 'ha':
num_masters, num_workers = 3, 2
num_masters, num_arbiters, num_workers = 3, 0, 2
elif cluster_topology == 'compact':
num_masters, num_workers = 3, 0
num_masters, num_arbiters, num_workers = 3, 0, 0
elif cluster_topology == 'arbiter':
num_masters, num_arbiters, num_workers = 2, 1, 0
elif cluster_topology == 'sno':
num_masters, num_workers = 1, 0
num_masters, num_arbiters, num_workers = 1, 0, 0

if extra_workers_profile == 'none':
num_extra_workers, apply_extra_workers = 0, None
Expand Down Expand Up @@ -272,6 +275,15 @@ def determine_cluster_topology(cluster_topology, resource_profile, extra_workers
master_memory, master_disk, master_vcpu = '16384', '120', '8'
else:
pass
# Define the worker node resources
if num_arbiters > 0:
cluster_topology = 'arbiter'
if resource_profile == 'minimal':
arbiter_memory, arbiter_disk, arbiter_vcpu = '8192', '20', '2'
elif resource_profile == 'recommended':
arbiter_memory, arbiter_disk, arbiter_vcpu = '16384', '120', '4'
else:
pass

# Define the worker node resources
if num_workers > 0:
Expand Down Expand Up @@ -301,6 +313,10 @@ def determine_cluster_topology(cluster_topology, resource_profile, extra_workers
'master_memory': master_memory,
'master_disk': master_disk,
'master_vcpu': master_vcpu,
'num_arbiters': str(num_arbiters),
'arbiter_memory': arbiter_memory,
'arbiter_disk': arbiter_disk,
'arbiter_vcpu': arbiter_vcpu,
'worker_memory': worker_memory,
'worker_disk': worker_disk,
'worker_vcpu': worker_vcpu,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ controlPlane:
hyperthreading: Enabled
name: master
replicas: {{ num_masters }}
arbiter:
architecture: {{ goCPUArchitecture }}
hyperthreading: Enabled
name: arbiter
replicas: {{ num_arbiters }}
fips: {{ fips_mode }}
metadata:
name: {{ cluster_name }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ controlPlane:
hyperthreading: Enabled
name: master
replicas: {{ num_masters }}
arbiter:
architecture: {{ goCPUArchitecture }}
hyperthreading: Enabled
name: arbiter
replicas: {{ num_arbiters }}
fips: {{ fips_mode }}
metadata:
name: {{ cluster_name }}
Expand Down
5 changes: 5 additions & 0 deletions agent/roles/manifests/templates/install-config_yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ controlPlane:
hyperthreading: Enabled
name: master
replicas: {{ num_masters }}
arbiter:
architecture: {{ goCPUArchitecture }}
hyperthreading: Enabled
name: arbiter
replicas: {{ num_arbiters }}
fips: {{ fips_mode }}
metadata:
name: {{ cluster_name }}
Expand Down
1 change: 1 addition & 0 deletions agent/roles/manifests/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ mirror_command: "{{ lookup('env', 'MIRROR_COMMAND') }}"
networking_mode: "{{ lookup('env', 'NETWORKING_MODE') }}"
network_type: "{{ lookup('env', 'NETWORK_TYPE') }}"
num_masters: "{{ lookup('env', 'NUM_MASTERS') }}"
num_arbiters: "{{ lookup('env', 'NUM_ARBITERS') }}"
num_workers: "{{ lookup('env', 'NUM_WORKERS') }}"
num_extra_workers: "{{ lookup('env', 'NUM_EXTRA_WORKERS', default='0') }}"
no_proxy: "{{ lookup('env', 'AGENT_NO_PROXY') }}"
Expand Down
20 changes: 20 additions & 0 deletions assets/templates/98_arbiter-chronyd-custom.yaml.optional
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
apiVersion: machineconfiguration.openshift.io/v1
kind: MachineConfig
metadata:
generation: 1
labels:
machineconfiguration.openshift.io/role: arbiter
name: 00-arbiter-chronyd-custom
spec:
config:
ignition:
version: IGNITION_VERSION
storage:
files:
- contents:
verification: {}
source: 'data:text/plain;charset=utf-8;base64,NTPFILECONTENT'
filesystem: root
mode: 0644
path: /etc/chrony.conf
overwrite: true
19 changes: 18 additions & 1 deletion common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -264,19 +264,25 @@ export CONTAINER_RUNTIME="podman"

export NUM_MASTERS=${NUM_MASTERS:-"3"}
export NUM_WORKERS=${NUM_WORKERS:-"2"}
export NUM_ARBITERS=${NUM_ARBITERS:-"0"}
export NUM_EXTRA_WORKERS=${NUM_EXTRA_WORKERS:-"0"}
export EXTRA_WORKERS_ONLINE_STATUS=${EXTRA_WORKERS_ONLINE_STATUS:-"true"}
export VM_EXTRADISKS=${VM_EXTRADISKS:-"false"}
export VM_EXTRADISKS_LIST=${VM_EXTRADISKS_LIST:-"vdb"}
export VM_EXTRADISKS_SIZE=${VM_EXTRADISKS_SIZE:-"8G"}
export MASTER_HOSTNAME_FORMAT=${MASTER_HOSTNAME_FORMAT:-"master-%d"}
export ARBITER_HOSTNAME_FORMAT=${ARBITER_HOSTNAME_FORMAT:-"arbiter-%d"}
export WORKER_HOSTNAME_FORMAT=${WORKER_HOSTNAME_FORMAT:-"worker-%d"}
export EXTRA_WORKER_HOSTNAME_FORMAT=${EXTRA_WORKER_HOSTNAME_FORMAT:-"extraworker-%d"}

export MASTER_MEMORY=${MASTER_MEMORY:-16384}
export MASTER_DISK=${MASTER_DISK:-50}
export MASTER_VCPU=${MASTER_VCPU:-8}

export ARBITER_MEMORY=${MASTER_MEMORY:-8192}
export ARBITER_DISK=${MASTER_DISK:-30}
export ARBITER_VCPU=${MASTER_VCPU:-4}

export WORKER_MEMORY=${WORKER_MEMORY:-8192}
export WORKER_DISK=${WORKER_DISK:-30}
export WORKER_VCPU=${WORKER_VCPU:-4}
Expand All @@ -294,7 +300,7 @@ export IRONIC_IMAGES_DIR="${IRONIC_DATA_DIR}/html/images"
export VBMC_IMAGE=${VBMC_IMAGE:-"quay.io/metal3-io/vbmc"}
export SUSHY_TOOLS_IMAGE=${SUSHY_TOOLS_IMAGE:-"quay.io/metal3-io/sushy-tools"}
export VBMC_BASE_PORT=${VBMC_BASE_PORT:-"6230"}
export VBMC_MAX_PORT=$((VBMC_BASE_PORT + NUM_MASTERS + NUM_WORKERS + NUM_EXTRA_WORKERS - 1))
export VBMC_MAX_PORT=$((VBMC_BASE_PORT + NUM_MASTERS + NUM_ARBITERS + NUM_WORKERS + NUM_EXTRA_WORKERS - 1))
export REDFISH_EMULATOR_IGNORE_BOOT_DEVICE="${REDFISH_EMULATOR_IGNORE_BOOT_DEVICE:-False}"

# Which docker registry image should we use?
Expand Down Expand Up @@ -434,6 +440,17 @@ if [[ ! -z ${AGENT_E2E_TEST_SCENARIO} ]]; then
export MASTER_MEMORY=32768
export NUM_WORKERS=0
;;
"ARBITER" )
export NUM_MASTERS=2
export MASTER_VCPU=4
export MASTER_DISK=100
export MASTER_MEMORY=32768
export NUM_ARBITERS=1
export ARBITER_VCPU=2
export ARBITER_DISK=50
export ARBITER_MEMORY=8192
export NUM_WORKERS=0
;;
"HA" )
export NUM_MASTERS=3
export MASTER_VCPU=4
Expand Down
1 change: 1 addition & 0 deletions host_cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ ansible-playbook \
-e "baremetal_network_name=${BAREMETAL_NETWORK_NAME}" \
-e "working_dir=$WORKING_DIR" \
-e "num_masters=$NUM_MASTERS" \
-e "num_arbiters=$NUM_ARBITERS" \
-e "num_workers=$NUM_WORKERS" \
-e "num_extraworkers=$NUM_EXTRA_WORKERS" \
-e "virthost=$HOSTNAME" \
Expand Down
2 changes: 2 additions & 0 deletions network-configs/bond/ostest-arbiter-0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
networkConfig: *BOND

3 changes: 3 additions & 0 deletions network-configs/static/hosts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@
- ip: 192.168.111.114
hostnames:
- "worker-1"
- ip: 192.168.111.115
hostnames:
- "arbiter-0"
19 changes: 19 additions & 0 deletions network-configs/static/ostest-arbiter-0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
networkConfig:
interfaces:
- name: enp2s0
type: ethernet
state: up
ipv4:
address:
- ip: "192.168.111.115"
prefix-length: 24
enabled: true
dns-resolver:
config:
server:
- 192.168.111.1
routes:
config:
- destination: 0.0.0.0/0
next-hop-address: 192.168.111.1
next-hop-interface: enp2s0
4 changes: 4 additions & 0 deletions ocp_cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ fi
if test -f assets/templates/98_worker-chronyd-redhat.yaml ; then
rm -f assets/templates/98_worker-chronyd-redhat.yaml
fi
if test -f assets/templates/98_arbiter-chronyd-redhat.yaml ; then
rm -f assets/templates/98_arbiter-chronyd-redhat.yaml
fi


# If the installer fails before terraform completes the destroy bootstrap
# cleanup doesn't clean up the VM/volumes created..
Expand Down
21 changes: 19 additions & 2 deletions ocp_install_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,20 @@ EOF
fi
}

function arbiterNodeStanza() {
if [[ "$NUM_ARBITERS" -gt "0" ]]; then
cat <<EOF
arbiter:
architecture: amd64
hyperthreading: Enabled
replicas: ${NUM_ARBITERS}
name: arbiter
platform:
baremetal: {}
EOF
fi
}

function libvirturi() {
if [[ "$REMOTE_LIBVIRT" -ne 0 ]]; then
cat <<EOF
Expand Down Expand Up @@ -314,6 +328,7 @@ controlPlane:
architecture: $(get_arch install_config)
platform:
baremetal: {}
$(arbiterNodeStanza)
$(featureSet)
platform:
baremetal:
Expand All @@ -332,12 +347,14 @@ EOF
if [ -z "${HOSTS_SWAP_DEFINITION:-}" ]; then
cat >> "${outdir}/install-config.yaml" << EOF
$(node_map_to_install_config_hosts $NUM_MASTERS 0 master)
$(node_map_to_install_config_hosts $NUM_WORKERS $NUM_MASTERS worker)
$(node_map_to_install_config_hosts $NUM_WORKERS $(($NUM_MASTERS + $NUM_ARBITERS)) worker)
$(node_map_to_install_config_hosts $NUM_ARBITERS $NUM_MASTERS arbiter)
EOF
else
cat >> "${outdir}/install-config.yaml" << EOF
$(node_map_to_install_config_hosts $NUM_WORKERS $NUM_MASTERS worker)
$(node_map_to_install_config_hosts $NUM_WORKERS $(($NUM_MASTERS + $NUM_ARBITERS)) worker)
$(node_map_to_install_config_hosts $NUM_MASTERS 0 master)
$(node_map_to_install_config_hosts $NUM_ARBITERS $NUM_MASTERS arbiter)
EOF
fi

Expand Down
1 change: 1 addition & 0 deletions remote_nodes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ function playbook() {
-e "virtualbmc_base_port=$VBMC_BASE_PORT" \
-e "virtbmc_action=$VIRTBMC_ACTION" \
-e "master_hostname_format=$MASTER_HOSTNAME_FORMAT" \
-e "arbiter_hostname_format=$ARBITER_HOSTNAME_FORMAT" \
-e "worker_hostname_format=$WORKER_HOSTNAME_FORMAT" \
-e "provisioning_network_name=noprov" \
-e "num_masters=$REMOTE_CLUSTER_NUM_MASTERS" \
Expand Down
5 changes: 3 additions & 2 deletions utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ function custom_ntp(){
if [ -n "$NTP_SERVERS" ]; then
cp assets/templates/98_worker-chronyd-custom.yaml.optional assets/generated/98_worker-chronyd-custom.yaml
cp assets/templates/98_master-chronyd-custom.yaml.optional assets/generated/98_master-chronyd-custom.yaml
cp assets/templates/98_arbiter-chronyd-custom.yaml.optional assets/generated/98_arbiter-chronyd-custom.yaml
NTPFILECONTENT=$(cat assets/files/etc/chrony.conf)
for ntp in $(echo $NTP_SERVERS | tr ";" "\n"); do
NTPFILECONTENT="${NTPFILECONTENT}"$'\n'"pool ${ntp} iburst"
Expand Down Expand Up @@ -224,7 +225,7 @@ function node_map_to_install_config_hosts() {
start_idx="$2"
role="$3"

for ((idx=$start_idx;idx<$(($1 + $start_idx));idx++)); do
for ((idx=$start_idx;idx<$(($num_hosts + $start_idx));idx++)); do
name=$(node_val ${idx} "name")
mac=$(node_val ${idx} "ports[0].address")

Expand Down Expand Up @@ -266,7 +267,7 @@ EOF
# FIXME(stbenjam) Worker code in installer should accept
# "default" as well -- currently the mapping doesn't work,
# so we use the raw value for BMO's default which is "unknown"
if [[ "$role" == "master" ]]; then
if [[ "$role" == "master" ]] || [[ "$role" == "arbiter" ]] ; then
if [ -z "${MASTER_HARDWARE_PROFILE:-}" ]; then
cat <<EOF
rootDeviceHints:
Expand Down
Loading