Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add expand-and-modify test type to ebs-scale-test #2330

Merged
merged 2 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion hack/ebs-scale-test/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ Set the `CLUSTER_TYPE` and `TEST_TYPE` environment variables to set up and run d
- 'pre-allocated': Additional worker nodes are created during cluster setup. By default, we pre-allocate 1 `m7a.48xlarge` EC2 instance for every 100 StatefulSet replicas.

- `TEST_TYPE` dictates what type of scalability test we want to run. Options include:
- 'scale-sts': Scales a StatefulSet to `$REPLICAS`. Waits for all pods to be ready. Delete Sts. Waits for all PVs to be deleted. Exercises the complete dynamic provisioning lifecycle for block volumes.
- 'scale-sts': Scales a StatefulSet to `$REPLICAS`. Waits for all pods to be ready. Delete Sts. Waits for all PVs to be deleted. Exercises the complete dynamic provisioning lifecycle for block volumes.
- 'expand-and-modify': Creates `$REPLICAS` block volumes. Patches PVC capacity and VACName at rate of 5 PVCs per second. Ensures PVCs are expanded and modified before deleting them. Exercises ControllerExpandVolume & ControllerModifyVolume. Set `MODIFY_ONLY` or `EXPAND_ONLY` to 'true' to test solely volume modification/expansion.

You can mix and match `CLUSTER_TYPE` and `TEST_TYPE`.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,13 @@ collect-and-export-metrics() {
collect_metrics() {
echo "Port-forwarding ebs-csi-controller containers"
kubectl port-forward "$CONTROLLER_POD_NAME" 3301:3301 -n kube-system &
PID_3301=$!
kubectl port-forward "$CONTROLLER_POD_NAME" 8081:8081 -n kube-system &
PID_8081=$!
kubectl port-forward "$CONTROLLER_POD_NAME" 8082:8082 -n kube-system &
PID_8082=$!
kubectl port-forward "$CONTROLLER_POD_NAME" 8084:8084 -n kube-system &
PID_8084=$!

echo "Collecting metrics"
for port in 3301 8081 8082 8084; do
Expand All @@ -58,6 +62,8 @@ collect_metrics() {
sleep 20
echo "WARNING: Could not collect metrics from port ${port}. Something may be wrong in cluster."
done
# Stop forwarding ports after metrics collected.
kill $PID_3301 $PID_8081 $PID_8082 $PID_8084
}

clean_metrics() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/bin/bash
# Copyright 2025 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

### Helper script for running EBS-backed PVC expansion and modification test

# We expect this helper script is sourced from hack/ebs-scale-test
path_to_resize_and_modify_test_dir="${BASE_DIR}/helpers/scale-test/expand-and-modify-test"

export EXPAND_ONLY=${EXPAND_ONLY:="false"}
export MODIFY_ONLY=${MODIFY_ONLY:="false"}

expand_and_modify_test() {
manifest_path="$path_to_resize_and_modify_test_dir/expand-and-modify.yaml"
export_manifest_path="$EXPORT_DIR/expand-and-modify.yaml"

echo "Applying $manifest_path. Exported to $export_manifest_path"
gomplate -f "$manifest_path" -o "$export_manifest_path"
kubectl apply -f "$export_manifest_path"

# Cleanup K8s resources upon script interruption
trap 'echo "Test interrupted! Deleting test resources to prevent leak"; kubectl delete -f $export_manifest_path' EXIT

echo "Waiting for all PVCs to be dynamically provisioned"
wait_for_pvcs_to_bind

case "$EXPAND_ONLY-$MODIFY_ONLY" in
*false-false*) patches='[{"op": "replace", "path": "/spec/volumeAttributesClassName", "value": "ebs-scale-test-expand-and-modify"},{"op": "replace", "path": "/spec/resources/requests/storage", "value": "2Gi"}]' ;;
*false-true*) patches='[{"op": "replace", "path": "/spec/volumeAttributesClassName", "value": "ebs-scale-test-expand-and-modify"}]' ;;
*true-false* | *delete*) patches='[{"op": "replace", "path": "/spec/resources/requests/storage", "value": "2Gi"}]' ;;
*) echo "Environment variables EXPAND_ONLY ('$EXPAND_ONLY') and MODIFY_ONLY ('$MODIFY_ONLY') are not properly set to either 'true' or 'false'" ;;
esac

echo "Patching PVCs with $patches"
kubectl get pvc -o name | sed -e 's/.*\///g' | xargs -P 5 -I {} kubectl patch pvc {} --type=json -p="$patches"

echo "Waiting until volumes modified and/or expanded"
ensure_volumes_modified

echo "Deleting resources"
kubectl delete -f "$export_manifest_path"

echo "Waiting for all PVs to be deleted"
wait_for_pvs_to_delete

trap - EXIT
}

ensure_volumes_modified() {
if [[ "$EXPAND_ONLY" == "false" ]]; then
while true; do
modified_volumes_count=$(kubectl get pvc -o json | jq '.items | map(select(.status.currentVolumeAttributesClassName == "ebs-scale-test-expand-and-modify")) | length')
echo "$modified_volumes_count/$REPLICAS volumes modified"
if [[ "$modified_volumes_count" == "$REPLICAS" ]]; then
echo "All volumes modified"
break
fi
sleep 5
done
fi

if [[ "$MODIFY_ONLY" == "false" ]]; then
while true; do
expanded_volumes_count=$(kubectl get pvc -o json | jq '.items | map(select(.status.capacity.storage == "2Gi")) | length')
echo "$expanded_volumes_count/$REPLICAS volumes expanded"
if [[ "$expanded_volumes_count" == "$REPLICAS" ]]; then
echo "All volumes expanded"
break
fi
sleep 5
done
fi
}

wait_for_pvcs_to_bind() {
while true; do
bound_pvc_count=$(kubectl get pvc -o json | jq '.items | map(select(.status.phase == "Bound")) | length')
if [[ "$bound_pvc_count" -ge "$REPLICAS" ]]; then
echo "All PVCs bound, proceeding..."
break
else
echo "Only $bound_pvc_count PVCs are bound, waiting for a total of $REPLICAS..."
sleep 5
fi
done
}

(return 0 2>/dev/null) || (
echo "This script is not meant to be run directly, only sourced as a helper!"
exit 1
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright 2025 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ebs-scale-test-expand-and-modify
provisioner: ebs.csi.aws.com
reclaimPolicy: Delete
allowVolumeExpansion: true
# We create immediately bounded volumes so that we do not have to attach/detach volumes in expand-and-modify scale test.
volumeBindingMode: Immediate
parameters:
type: gp2
tagSpecification_1: "ebs-scale-test={{ .Env.SCALABILITY_TEST_RUN_NAME }}"
---
apiVersion: storage.k8s.io/v1beta1
kind: VolumeAttributesClass
driverName: ebs.csi.aws.com
metadata:
name: ebs-scale-test-expand-and-modify
parameters:
type: gp3
---
{{- range $index, $value := seq 1 .Env.REPLICAS }}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Decided to rely on Immediately bound PVCs instead of scaling statefulset (as we do in the scale-sts test).

Pros:

  • Separation of concerns between scale-sts test and expand-and-modify test
  • Order of magnitude shorter test: no waiting for all volumes to attach/detach + creating karpenter instances.
  • Two way door. Changing this test to scale sts is trivial.

Cons:

  • We do not excercise NodeExpandVolume at scale. (Though to me this sounds like a kubelet, AMI, resize2fs testing responsibility. We already have e2e tests)

Discussed this offline with @torredil, who lightly agreed with me.

apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ebs-claim-{{ $index }}
spec:
accessModes:
- ReadWriteOnce
storageClassName: ebs-scale-test-expand-and-modify
resources:
requests:
storage: 1Gi
# We use block mode so that no filesystem expansion occurs (via NodeExpandVolume)
volumeMode: "Block"
---
{{- end }}
5 changes: 3 additions & 2 deletions hack/ebs-scale-test/helpers/scale-test/pre_test_validation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ pre_test_validation() {
exit 1
fi

echo "Updating kubeconfig and restarting ebs-csi-controller pod"
echo "Updating kubeconfig and restarting ebs-csi-controller Deployment"
aws eks update-kubeconfig --name "$CLUSTER_NAME"
kubectl delete pod -n kube-system -l app=ebs-csi-controller
kubectl rollout restart deployment/ebs-csi-controller -n kube-system
kubectl rollout status deployment/ebs-csi-controller -n kube-system --timeout=30s
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ sts_scale_test() {
gomplate -f "$manifest_path" -o "$export_manifest_path"
kubectl apply -f "$export_manifest_path"

# Cleanup K8s resources upon script interruption
trap 'echo "Test interrupted! Deleting test resources to prevent leak"; kubectl delete -f $export_manifest_path' EXIT

echo "Scaling StatefulSet $REPLICAS replicas"
kubectl scale sts --replicas "$REPLICAS" ebs-scale-test
kubectl rollout status statefulset ebs-scale-test
Expand All @@ -35,11 +38,13 @@ sts_scale_test() {

echo "Waiting for all PVs to be deleted"
wait_for_pvs_to_delete

trap - EXIT
}

wait_for_pvs_to_delete() {
while true; do
pv_count=$(kubectl get pv --no-headers | wc -l)
pv_count=$(kubectl get pv -o json | jq '.items | length')
if [ "$pv_count" -eq 0 ]; then
echo "No PVs exist in the cluster, proceeding..."
break
Expand Down
12 changes: 10 additions & 2 deletions hack/ebs-scale-test/scale-test
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ source "${BASE_DIR}/helpers/scale-test/collect-and-export-metrics.sh"
source "${BASE_DIR}/helpers/scale-test/pre_test_validation.sh"

source "${BASE_DIR}/helpers/scale-test/scale-sts-test/scale-sts.sh"
source "${BASE_DIR}/helpers/scale-test/expand-and-modify-test/expand-and-modify.sh"

usage() {
echo "Usage: $0 [base-cmd]"
Expand All @@ -64,7 +65,7 @@ usage() {
}

check_dependencies_helper() {
local readonly dependencies=("kubectl" "aws" "eksctl" "gomplate")
local readonly dependencies=("kubectl" "aws" "eksctl" "gomplate" "jq")

for cmd in "${dependencies[@]}"; do
if ! command -v "${cmd}" &>/dev/null; then
Expand All @@ -86,7 +87,14 @@ setup_scale() {
run_scale() {
pre_test_validation

sts_scale_test
case "$TEST_TYPE" in
*scale-sts*) sts_scale_test ;;
*expand-and-modify*) expand_and_modify_test ;;
*)
echo "Invalid TEST_TYPE '$TEST_TYPE'. Please set to 'scale-sts' or 'expand-and-modify'"
exit 1
;;
esac

collect-and-export-metrics
}
Expand Down