Skip to content

Commit

Permalink
feat: update kata config for containerd v2 (#5804)
Browse files Browse the repository at this point in the history
  • Loading branch information
bravebeaver authored Feb 11, 2025
1 parent 9f4e118 commit 75756bb
Show file tree
Hide file tree
Showing 33 changed files with 4,237 additions and 131 deletions.
273 changes: 150 additions & 123 deletions pkg/agent/baker.go

Large diffs are not rendered by default.

35 changes: 30 additions & 5 deletions pkg/agent/baker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1441,6 +1441,30 @@ oom_score = -999
Expect(exist).To(BeFalse())
},
),

Entry("CustomizedImageKata VHD with k8s 1.32+ should have proper containerd config", "CustomizedImageKata+1.32", ">=1.32.x",
func(c *datamodel.NodeBootstrappingConfiguration) {
c.ContainerService.Properties.AgentPoolProfiles[0].KubernetesConfig = &datamodel.KubernetesConfig{
ContainerRuntime: datamodel.Containerd,
}
c.ContainerService.Properties.AgentPoolProfiles[0].Distro = datamodel.CustomizedImageKata
c.ContainerService.Properties.OrchestratorProfile.OrchestratorVersion = "1.32.0"
}, func(o *nodeBootstrappingOutput) {
_, exist := o.files["/opt/azure/containers/provision_start.sh"]

Expect(exist).To(BeFalse())
containerdConfigFileContent, err := getBase64DecodedValue([]byte(o.vars["CONTAINERD_CONFIG_CONTENT"]))
Expect(err).To(BeNil())
expectedContainerdV2KataConfig := `
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.kata]
`
deprecatedContainerdV1KataConfig := `
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.kata]
`
Expect(containerdConfigFileContent).To(ContainSubstring(expectedContainerdV2KataConfig))
Expect(containerdConfigFileContent).NotTo(ContainSubstring(deprecatedContainerdV1KataConfig))
},
),
Entry("AKSUbuntu2204 DisableSSH with enabled ssh", "AKSUbuntu2204+SSHStatusOn", "1.24.2", func(config *datamodel.NodeBootstrappingConfiguration) {
config.SSHStatus = datamodel.SSHOn
}, nil),
Expand Down Expand Up @@ -1685,11 +1709,12 @@ oom_score = -999
expectedRuncConfig := `
[plugins."io.containerd.cri.v1.runtime".containerd]
default_runtime_name = "runc"
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options]
BinaryName = "/usr/bin/runc"
SystemdCgroup = true
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes]
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.cri.v1.runtime".containerd.runtimes.runc.options]
BinaryName = "/usr/bin/runc"
SystemdCgroup = true
`
deprecatedRuncConfig := `
[plugins."io.containerd.grpc.v1.cri".containerd]
Expand Down

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pkg/agent/testdata/AKSUbuntu2404+NetworkPolicy/CSECommand

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pkg/agent/testdata/AKSUbuntu2404+Teleport/CSECommand

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/CSECommand

Large diffs are not rendered by default.

316 changes: 316 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/CustomData

Large diffs are not rendered by default.

35 changes: 35 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/line100.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[Unit]
Description=Kubelet
ConditionPathExists=/usr/local/bin/kubelet
Wants=network-online.target containerd.service
After=network-online.target containerd.service

[Service]
Restart=always
RestartSec=2
EnvironmentFile=/etc/default/kubelet
SuccessExitStatus=143
ExecStartPre=/bin/bash /opt/azure/containers/kubelet.sh
ExecStartPre=/bin/bash /opt/azure/containers/ensure_imds_restriction.sh
ExecStartPre=/bin/mkdir -p /var/lib/kubelet
ExecStartPre=/bin/mkdir -p /var/lib/cni
ExecStartPre=/bin/bash -c "if [ $(mount | grep \"/var/lib/kubelet\" | wc -l) -le 0 ] ; then /bin/mount --bind /var/lib/kubelet /var/lib/kubelet ; fi"
ExecStartPre=/bin/mount --make-shared /var/lib/kubelet

ExecStartPre=-/sbin/ebtables -t nat --list
ExecStartPre=-/sbin/iptables -t nat --numeric --list

ExecStart=/usr/local/bin/kubelet \
--enable-server \
--node-labels="${KUBELET_NODE_LABELS}" \
--v=2 \
--volume-plugin-dir=/etc/kubernetes/volumeplugins \
$KUBELET_TLS_BOOTSTRAP_FLAGS \
$KUBELET_CONFIG_FILE_FLAGS \
$KUBELET_CONTAINERD_FLAGS \
$KUBELET_CONTAINER_RUNTIME_FLAG \
$KUBELET_CGROUP_FLAGS \
$KUBELET_FLAGS

[Install]
WantedBy=multi-user.target
10 changes: 10 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/line107.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[Unit]
Description=Apply MIG configuration on Nvidia A100 GPU

[Service]
Restart=on-failure
ExecStartPre=/usr/bin/nvidia-smi -mig 1
ExecStart=/bin/bash /opt/azure/containers/mig-partition.sh ${GPU_INSTANCE_PROFILE}

[Install]
WantedBy=multi-user.target
27 changes: 27 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/line114.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

#NOTE: Currently, Nvidia library mig-parted (https://github.com/NVIDIA/mig-parted) cannot work properly because of the outdated GPU driver version
#TODO: Use mig-parted library to do the partition after the above issue is fixed
MIG_PROFILE=${1}
case ${MIG_PROFILE} in
"MIG1g")
nvidia-smi mig -cgi 19,19,19,19,19,19,19
;;
"MIG2g")
nvidia-smi mig -cgi 14,14,14
;;
"MIG3g")
nvidia-smi mig -cgi 9,9
;;
"MIG4g")
nvidia-smi mig -cgi 5
;;
"MIG7g")
nvidia-smi mig -cgi 0
;;
*)
echo "not a valid GPU instance profile"
exit 1
;;
esac
nvidia-smi mig -cci
99 changes: 99 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/line122.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env bash

set -o nounset
set -e

source /opt/azure/containers/provision_source_distro.sh

unattended_upgrade() {
retries=10
for i in $(seq 1 $retries); do
unattended-upgrade -v && break
if [ $i -eq $retries ]; then
return 1
else sleep 5
fi
done
echo Executed unattended upgrade $i times
}

cfg_has_option() {
file=$1
option=$2
line=$(sed -n "/^$option:/ p" "$file")
[ -n "$line" ]
}

cfg_set_option() {
file=$1
option=$2
value=$3
if ! cfg_has_option "$file" "$option"; then
echo "$option: $value" >> "$file"
else
sed -i 's/'"$option"':.*$/'"$option: $value"'/g' "$file"
fi
}

KUBECTL="/usr/local/bin/kubectl --kubeconfig /var/lib/kubelet/kubeconfig"

source_list_path=/etc/apt/sources.list
source_list_backup_path=/etc/apt/sources.list.backup
cloud_cfg_path=/etc/cloud/cloud.cfg

while [ ! -f /var/lib/kubelet/kubeconfig ]; do
echo 'Waiting for TLS bootstrapping'
sleep 3
done

node_name=$(hostname)
if [ -z "${node_name}" ]; then
echo "cannot get node name"
exit 1
fi

node_name=$(echo "$node_name" | tr '[:upper:]' '[:lower:]')

golden_timestamp=$($KUBECTL get node ${node_name} -o jsonpath="{.metadata.annotations['kubernetes\.azure\.com/live-patching-golden-timestamp']}")
if [ -z "${golden_timestamp}" ]; then
echo "golden timestamp is not set, skip live patching"
exit 0
fi
echo "golden timestamp is: ${golden_timestamp}"

current_timestamp=$($KUBECTL get node ${node_name} -o jsonpath="{.metadata.annotations['kubernetes\.azure\.com/live-patching-current-timestamp']}")
if [ -n "${current_timestamp}" ]; then
echo "current timestamp is: ${current_timestamp}"

if [[ "${golden_timestamp}" == "${current_timestamp}" ]]; then
echo "golden and current timestamp is the same, nothing to patch"
exit 0
fi
fi

old_source_list=$(cat ${source_list_path})
sed -i 's/http:\/\/azure.archive.ubuntu.com\/ubuntu\//https:\/\/snapshot.ubuntu.com\/ubuntu\/'"${golden_timestamp}"'/g' ${source_list_path}
sed -i 's/https:\/\/snapshot.ubuntu.com\/ubuntu\/\([0-9]\{8\}T[0-9]\{6\}Z\)/https:\/\/snapshot.ubuntu.com\/ubuntu\/'"${golden_timestamp}"'/g' ${source_list_path}
option=apt_preserve_sources_list
option_value=true
cfg_set_option ${cloud_cfg_path} ${option} ${option_value}

new_source_list=$(cat ${source_list_path})
if [[ "${old_source_list}" != "${new_source_list}" ]]; then
echo "$old_source_list" > ${source_list_backup_path}
echo "/etc/apt/sources.list is updated:"
diff ${source_list_backup_path} ${source_list_path} || true
fi

if ! apt_get_update; then
echo "apt_get_update failed"
exit 1
fi
if ! unattended_upgrade; then
echo "unattended_upgrade failed"
exit 1
fi

$KUBECTL annotate --overwrite node ${node_name} kubernetes.azure.com/live-patching-current-timestamp=${golden_timestamp}

echo snapshot update completed successfully
6 changes: 6 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/line129.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[Unit]
Description=Snapshot Update Service

[Service]
Type=oneshot
ExecStart=/opt/azure/containers/ubuntu-snapshot-update.sh
9 changes: 9 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/line136.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[Unit]
Description=Runs snapshot update script periodically

[Timer]
OnBootSec=10min
OnUnitActiveSec=10min

[Install]
WantedBy=multi-user.target
24 changes: 24 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/line144.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
set -x

#

MOUNT_POINT="/mnt/aks"

KUBELET_MOUNT_POINT="${MOUNT_POINT}/kubelet"
KUBELET_DIR="/var/lib/kubelet"

mkdir -p "${MOUNT_POINT}"

SENTINEL_FILE="/opt/azure/containers/bind-sentinel"
if [ ! -e "$SENTINEL_FILE" ]; then
mv "$KUBELET_DIR" "$MOUNT_POINT"
touch "$SENTINEL_FILE"
fi

mkdir -p "${KUBELET_DIR}"
mount --bind "${KUBELET_MOUNT_POINT}" "${KUBELET_DIR}"
chmod a+w "${KUBELET_DIR}"
13 changes: 13 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/line151.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[Unit]
Description=Bind mount kubelet data
Requires=mnt.mount
After=mnt.mount

[Service]
Restart=on-failure
RemainAfterExit=yes
Type=oneshot
ExecStart=/bin/bash /opt/azure/containers/bind-mount.sh

[Install]
WantedBy=multi-user.target
11 changes: 11 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/line158.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[Unit]
Description=enabledhcpv6
After=network-online.target

[Service]
Type=oneshot
ExecStart=/opt/azure/containers/enable-dhcpv6.sh

[Install]
WantedBy=multi-user.target
#EOF
27 changes: 27 additions & 0 deletions pkg/agent/testdata/CustomizedImageKata+1.32/line165.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash

set -e
set -o pipefail
set -u

DHCLIENT6_CONF_FILE=/etc/dhcp/dhclient6.conf
CLOUD_INIT_CFG=/etc/network/interfaces.d/50-cloud-init.cfg

read -r -d '' NETWORK_CONFIGURATION << EOC || true
iface eth0 inet6 auto
up sleep 5
up dhclient -1 -6 -cf /etc/dhcp/dhclient6.conf -lf /var/lib/dhcp/dhclient6.eth0.leases -v eth0 || true
EOC

add_if_not_exists() {
grep -qxF "${1}" "${2}" || echo "${1}" >> "${2}"
}

echo "Configuring dhcpv6 ..."

touch /etc/dhcp/dhclient6.conf && add_if_not_exists "timeout 10;" ${DHCLIENT6_CONF_FILE} && \
add_if_not_exists "${NETWORK_CONFIGURATION}" ${CLOUD_INIT_CFG} && \
sudo ifdown eth0 && sudo ifup eth0

echo "Configuration complete"
#EOF
Loading

0 comments on commit 75756bb

Please sign in to comment.