Skip to content

Commit

Permalink
fixup! Verify environment variables in e2e test
Browse files Browse the repository at this point in the history
  • Loading branch information
nojnhuh committed Jan 15, 2025
1 parent 1ddc998 commit a4795f1
Showing 1 changed file with 279 additions and 15 deletions.
294 changes: 279 additions & 15 deletions test/e2e/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,41 @@ kubectl create -f demo/gpu-test3.yaml
kubectl create -f demo/gpu-test4.yaml
kubectl create -f demo/gpu-test5.yaml

function verify-env {
local namespace="$1"
local pod="$2"
for ctr in $(kubectl get pod -n "$namespace" "$pod" -o jsonpath='{.spec.containers[*].name}'); do
if ! kubectl logs -n "$namespace" "$pod" -c "$ctr" | grep -q "GPU_DEVICE_"; then
echo "Pod $namespace/$pod, container $ctr missing GPU_DEVICE_ environment variables"
exit 1
fi
function gpus-from-logs {
local logs="$1"
echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_[[:digit:]]+=\"(.+)\"$/\1/p"
}

function gpu-id {
local gpu="$1"
echo "$gpu" | sed -nE "s/^gpu-([[:digit:]]+)$/\1/p"
}

function gpu-sharing-strategy-from-logs {
local logs="$1"
local id="$2"
echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_${id}_SHARING_STRATEGY=\"(.+)\"$/\1/p"
}

function gpu-timeslice-interval-from-logs {
local logs="$1"
local id="$2"
echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_${id}_TIMESLICE_INTERVAL=\"(.+)\"$/\1/p"
}

function gpu-partition-count-from-logs {
local logs="$1"
local id="$2"
echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_${id}_PARTITION_COUNT=\"(.+)\"$/\1/p"
}

declare -a observed_gpus
function gpu-already-seen {
local gpu="$1"
for seen in "${observed_gpus[@]}"; do
if [[ "$gpu" == "$seen" ]]; then return 0; fi;
done
return 1
}

kubectl wait --for=condition=Ready -n gpu-test1 pod/pod0 --timeout=120s
Expand All @@ -45,8 +71,36 @@ if [ $gpu_test_1 != 2 ]; then
echo "gpu_test_1 $gpu_test_1 failed to match against 2 expected pods"
exit 1
fi
verify-env gpu-test1 pod0
verify-env gpu-test1 pod1

gpu_test1_pod0_ctr0_logs=$(kubectl logs -n gpu-test1 pod0 -c ctr0)
gpu_test1_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test1_pod0_ctr0_logs")
gpu_test1_pod0_ctr0_gpus_count=$(echo "$gpu_test1_pod0_ctr0_gpus" | wc -w)
if [[ $gpu_test1_pod0_ctr0_gpus_count != 1 ]]; then
echo "Expected Pod gpu-test1/pod0, container ctr0 to have 1 GPU, but got $gpu_test1_pod0_ctr0_gpus_count: $gpu_test1_pod0_ctr0_gpus"
exit 1
fi
gpu_test1_pod0_ctr0_gpu="$gpu_test1_pod0_ctr0_gpus"
if gpu-already-seen "$gpu_test1_pod0_ctr0_gpu"; then
echo "Pod gpu-test1/pod0, container ctr0 should have a new GPU but claimed $gpu_test1_pod0_ctr0_gpu which is already claimed"
exit 1
fi
echo "Pod gpu-test1/pod0, container ctr0 claimed $gpu_test1_pod0_ctr0_gpu"
observed_gpus+=("$gpu_test1_pod0_ctr0_gpu")

gpu_test1_pod1_ctr0_logs=$(kubectl logs -n gpu-test1 pod1 -c ctr0)
gpu_test1_pod1_ctr0_gpus=$(gpus-from-logs "$gpu_test1_pod1_ctr0_logs")
gpu_test1_pod1_ctr0_gpus_count=$(echo "$gpu_test1_pod1_ctr0_gpus" | wc -w)
if [[ $gpu_test1_pod1_ctr0_gpus_count != 1 ]]; then
echo "Expected Pod gpu-test1/pod1, container ctr0 to have 1 GPU, but got $gpu_test1_pod1_ctr0_gpus_count: $gpu_test1_pod1_ctr0_gpus"
exit 1
fi
gpu_test1_pod1_ctr0_gpu="$gpu_test1_pod1_ctr0_gpus"
if gpu-already-seen "$gpu_test1_pod1_ctr0_gpu"; then
echo "Pod gpu-test1/pod1, container ctr0 should have a new GPU but claimed $gpu_test1_pod1_ctr0_gpu which is already claimed"
exit 1
fi
echo "Pod gpu-test1/pod1, container ctr0 claimed $gpu_test1_pod1_ctr0_gpu"
observed_gpus+=("$gpu_test1_pod1_ctr0_gpu")


kubectl wait --for=condition=Ready -n gpu-test2 pod/pod0 --timeout=120s
Expand All @@ -55,15 +109,80 @@ if [ $gpu_test_2 != 1 ]; then
echo "gpu_test_2 $gpu_test_2 failed to match against 1 expected pod"
exit 1
fi
verify-env gpu-test2 pod0

gpu_test2_pod0_ctr0_logs=$(kubectl logs -n gpu-test2 pod0 -c ctr0)
gpu_test2_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test2_pod0_ctr0_logs")
gpu_test2_pod0_ctr0_gpus_count=$(echo "$gpu_test2_pod0_ctr0_gpus" | wc -w)
if [[ $gpu_test2_pod0_ctr0_gpus_count != 2 ]]; then
echo "Expected Pod gpu-test2/pod0, container ctr0 to have 2 GPUs, but got $gpu_test2_pod0_ctr0_gpus_count: $gpu_test2_pod0_ctr0_gpus"
exit 1
fi
echo "$gpu_test2_pod0_ctr0_gpus" | while read gpu_test2_pod0_ctr0_gpu; do
if gpu-already-seen "$gpu_test2_pod0_ctr0_gpu"; then
echo "Pod gpu-test2/pod0, container ctr0 should have a new GPU but claimed $gpu_test2_pod0_ctr0_gpu which is already claimed"
exit 1
fi
echo "Pod gpu-test2/pod0, container ctr0 claimed $gpu_test2_pod0_ctr0_gpu"
observed_gpus+=("$gpu_test2_pod0_ctr0_gpu")
done


kubectl wait --for=condition=Ready -n gpu-test3 pod/pod0 --timeout=120s
gpu_test_3=$(kubectl get pods -n gpu-test3 | grep -c 'Running')
if [ $gpu_test_3 != 1 ]; then
echo "gpu_test_3 $gpu_test_3 failed to match against 1 expected pod"
exit 1
fi
verify-env gpu-test3 pod0

gpu_test3_pod0_ctr0_logs=$(kubectl logs -n gpu-test3 pod0 -c ctr0)
gpu_test3_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test3_pod0_ctr0_logs")
gpu_test3_pod0_ctr0_gpus_count=$(echo "$gpu_test3_pod0_ctr0_gpus" | wc -w)
if [[ $gpu_test3_pod0_ctr0_gpus_count != 1 ]]; then
echo "Expected Pod gpu-test3/pod0, container ctr0 to have 1 GPU, but got $gpu_test3_pod0_ctr0_gpus_count: $gpu_test3_pod0_ctr0_gpus"
exit 1
fi
gpu_test3_pod0_ctr0_gpu="$gpu_test3_pod0_ctr0_gpus"
if gpu-already-seen "$gpu_test3_pod0_ctr0_gpu"; then
echo "Pod gpu-test3/pod0, container ctr0 should have a new GPU but claimed $gpu_test3_pod0_ctr0_gpu which is already claimed"
exit 1
fi
echo "Pod gpu-test3/pod0, container ctr0 claimed $gpu_test3_pod0_ctr0_gpu"
observed_gpus+=("$gpu_test3_pod0_ctr0_gpu")
gpu_test3_pod0_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test3_pod0_ctr0_logs" $(gpu-id "$gpu_test3_pod0_ctr0_gpu"))
if [[ "$gpu_test3_pod0_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
echo "Expected Pod gpu-test3/pod0, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test3_pod0_ctr0_sharing_strategy"
exit 1
fi
gpu_test3_pod0_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test3_pod0_ctr0_logs" $(gpu-id "$gpu_test3_pod0_ctr0_gpu"))
if [[ "$gpu_test3_pod0_ctr0_timeslice_interval" != "Default" ]]; then
echo "Expected Pod gpu-test3/pod0, container ctr0 to have timeslice interval Default, got $gpu_test3_pod0_ctr0_timeslice_interval"
exit 1
fi

gpu_test3_pod0_ctr1_logs=$(kubectl logs -n gpu-test3 pod0 -c ctr1)
gpu_test3_pod0_ctr1_gpus=$(gpus-from-logs "$gpu_test3_pod0_ctr1_logs")
gpu_test3_pod0_ctr1_gpus_count=$(echo "$gpu_test3_pod0_ctr1_gpus" | wc -w)
if [[ $gpu_test3_pod0_ctr1_gpus_count != 1 ]]; then
echo "Expected Pod gpu-test3/pod0, container ctr1 to have 1 GPU, but got $gpu_test3_pod0_ctr1_gpus_count: $gpu_test3_pod0_ctr1_gpus"
exit 1
fi
gpu_test3_pod0_ctr1_gpu="$gpu_test3_pod0_ctr1_gpus"
echo "Pod gpu-test3/pod0, container ctr1 claimed $gpu_test3_pod0_ctr1_gpu"
if [[ "$gpu_test3_pod0_ctr1_gpu" != "$gpu_test3_pod0_ctr0_gpu" ]]; then
echo "Pod gpu-test3/pod0, container ctr1 should claim the same GPU as Pod gpu-test3/pod0, container ctr0, but did not"
exit 1
fi
gpu_test3_pod0_ctr1_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test3_pod0_ctr1_logs" $(gpu-id "$gpu_test3_pod0_ctr1_gpu"))
if [[ "$gpu_test3_pod0_ctr1_sharing_strategy" != "TimeSlicing" ]]; then
echo "Expected Pod gpu-test3/pod0, container ctr1 to have sharing strategy TimeSlicing, got $gpu_test3_pod0_ctr1_sharing_strategy"
exit 1
fi
gpu_test3_pod0_ctr1_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test3_pod0_ctr1_logs" $(gpu-id "$gpu_test3_pod0_ctr1_gpu"))
if [[ "$gpu_test3_pod0_ctr1_timeslice_interval" != "Default" ]]; then
echo "Expected Pod gpu-test3/pod0, container ctr1 to have timeslice interval Default, got $gpu_test3_pod0_ctr1_timeslice_interval"
exit 1
fi


kubectl wait --for=condition=Ready -n gpu-test4 pod/pod0 --timeout=120s
kubectl wait --for=condition=Ready -n gpu-test4 pod/pod1 --timeout=120s
Expand All @@ -72,16 +191,161 @@ if [ $gpu_test_4 != 2 ]; then
echo "gpu_test_4 $gpu_test_4 failed to match against 2 expected pods"
exit 1
fi
verify-env gpu-test4 pod0
verify-env gpu-test4 pod1

gpu_test4_pod0_ctr0_logs=$(kubectl logs -n gpu-test4 pod0 -c ctr0)
gpu_test4_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test4_pod0_ctr0_logs")
gpu_test4_pod0_ctr0_gpus_count=$(echo "$gpu_test4_pod0_ctr0_gpus" | wc -w)
if [[ $gpu_test4_pod0_ctr0_gpus_count != 1 ]]; then
echo "Expected Pod gpu-test4/pod0, container ctr0 to have 1 GPU, but got $gpu_test4_pod0_ctr0_gpus_count: $gpu_test4_pod0_ctr0_gpus"
exit 1
fi
gpu_test4_pod0_ctr0_gpu="$gpu_test4_pod0_ctr0_gpus"
if gpu-already-seen "$gpu_test4_pod0_ctr0_gpu"; then
echo "Pod gpu-test4/pod0, container ctr0 should have a new GPU but claimed $gpu_test4_pod0_ctr0_gpu which is already claimed"
exit 1
fi
echo "Pod gpu-test4/pod0, container ctr0 claimed $gpu_test4_pod0_ctr0_gpu"
observed_gpus+=("$gpu_test4_pod0_ctr0_gpu")
gpu_test4_pod0_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test4_pod0_ctr0_logs" $(gpu-id "$gpu_test4_pod0_ctr0_gpu"))
if [[ "$gpu_test4_pod0_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
echo "Expected Pod gpu-test4/pod0, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test4_pod0_ctr0_sharing_strategy"
exit 1
fi
gpu_test4_pod0_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test4_pod0_ctr0_logs" $(gpu-id "$gpu_test4_pod0_ctr0_gpu"))
if [[ "$gpu_test4_pod0_ctr0_timeslice_interval" != "Default" ]]; then
echo "Expected Pod gpu-test4/pod0, container ctr0 to have timeslice interval Default, got $gpu_test4_pod0_ctr0_timeslice_interval"
exit 1
fi

gpu_test4_pod1_ctr0_logs=$(kubectl logs -n gpu-test4 pod1 -c ctr0)
gpu_test4_pod1_ctr0_gpus=$(gpus-from-logs "$gpu_test4_pod1_ctr0_logs")
gpu_test4_pod1_ctr0_gpus_count=$(echo "$gpu_test4_pod1_ctr0_gpus" | wc -w)
if [[ $gpu_test4_pod1_ctr0_gpus_count != 1 ]]; then
echo "Expected Pod gpu-test4/pod1, container ctr0 to have 1 GPU, but got $gpu_test4_pod1_ctr0_gpus_count: $gpu_test4_pod1_ctr0_gpus"
exit 1
fi
gpu_test4_pod1_ctr0_gpu="$gpu_test4_pod1_ctr0_gpus"
echo "Pod gpu-test4/pod1, container ctr0 claimed $gpu_test4_pod1_ctr0_gpu"
if [[ "$gpu_test4_pod1_ctr0_gpu" != "$gpu_test4_pod1_ctr0_gpu" ]]; then
echo "Pod gpu-test4/pod1, container ctr0 should claim the same GPU as Pod gpu-test4/pod1, container ctr0, but did not"
exit 1
fi
gpu_test4_pod1_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test4_pod1_ctr0_logs" $(gpu-id "$gpu_test4_pod1_ctr0_gpu"))
if [[ "$gpu_test4_pod1_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
echo "Expected Pod gpu-test4/pod1, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test4_pod1_ctr0_sharing_strategy"
exit 1
fi
gpu_test4_pod1_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test4_pod1_ctr0_logs" $(gpu-id "$gpu_test4_pod1_ctr0_gpu"))
if [[ "$gpu_test4_pod1_ctr0_timeslice_interval" != "Default" ]]; then
echo "Expected Pod gpu-test4/pod1, container ctr0 to have timeslice interval Default, got $gpu_test4_pod1_ctr0_timeslice_interval"
exit 1
fi


kubectl wait --for=condition=Ready -n gpu-test5 pod/pod0 --timeout=120s
gpu_test_5=$(kubectl get pods -n gpu-test5 | grep -c 'Running')
if [ $gpu_test_5 != 1 ]; then
echo "gpu_test_5 $gpu_test_5 failed to match against 1 expected pod"
exit 1
fi
verify-env gpu-test5 pod0

gpu_test5_pod0_ts_ctr0_logs=$(kubectl logs -n gpu-test5 pod0 -c ts-ctr0)
gpu_test5_pod0_ts_ctr0_gpus=$(gpus-from-logs "$gpu_test5_pod0_ts_ctr0_logs")
gpu_test5_pod0_ts_ctr0_gpus_count=$(echo "$gpu_test5_pod0_ts_ctr0_gpus" | wc -w)
if [[ $gpu_test5_pod0_ts_ctr0_gpus_count != 1 ]]; then
echo "Expected Pod gpu-test5/pod0, container ts-ctr0 to have 1 GPU, but got $gpu_test5_pod0_ts_ctr0_gpus_count: $gpu_test5_pod0_ts_ctr0_gpus"
exit 1
fi
gpu_test5_pod0_ts_ctr0_gpu="$gpu_test5_pod0_ts_ctr0_gpus"
if gpu-already-seen "$gpu_test5_pod0_ts_ctr0_gpu"; then
echo "Pod gpu-test5/pod0, container ts-ctr0 should have a new GPU but claimed $gpu_test5_pod0_ts_ctr0_gpu which is already claimed"
exit 1
fi
echo "Pod gpu-test5/pod0, container ts-ctr0 claimed $gpu_test5_pod0_ts_ctr0_gpu"
observed_gpus+=("$gpu_test5_pod0_ts_ctr0_gpu")
gpu_test5_pod0_ts_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_ts_ctr0_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr0_gpu"))
if [[ "$gpu_test5_pod0_ts_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
echo "Expected Pod gpu-test5/pod0, container ts-ctr0 to have sharing strategy TimeSlicing, got $gpu_test5_pod0_ts_ctr0_sharing_strategy"
exit 1
fi
gpu_test5_pod0_ts_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test5_pod0_ts_ctr0_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr0_gpu"))
if [[ "$gpu_test5_pod0_ts_ctr0_timeslice_interval" != "Long" ]]; then
echo "Expected Pod gpu-test5/pod0, container ts-ctr0 to have timeslice interval Long, got $gpu_test5_pod0_ts_ctr0_timeslice_interval"
exit 1
fi

gpu_test5_pod0_ts_ctr1_logs=$(kubectl logs -n gpu-test5 pod0 -c ts-ctr1)
gpu_test5_pod0_ts_ctr1_gpus=$(gpus-from-logs "$gpu_test5_pod0_ts_ctr1_logs")
gpu_test5_pod0_ts_ctr1_gpus_count=$(echo "$gpu_test5_pod0_ts_ctr1_gpus" | wc -w)
if [[ $gpu_test5_pod0_ts_ctr1_gpus_count != 1 ]]; then
echo "Expected Pod gpu-test5/pod0, container ts-ctr1 to have 1 GPU, but got $gpu_test5_pod0_ts_ctr1_gpus_count: $gpu_test5_pod0_ts_ctr1_gpus"
exit 1
fi
gpu_test5_pod0_ts_ctr1_gpu="$gpu_test5_pod0_ts_ctr1_gpus"
echo "Pod gpu-test5/pod0, container ts-ctr1 claimed $gpu_test5_pod0_ts_ctr1_gpu"
if [[ "$gpu_test5_pod0_ts_ctr1_gpu" != "$gpu_test5_pod0_ts_ctr0_gpu" ]]; then
echo "Pod gpu-test5/pod0, container ts-ctr1 should claim the same GPU as Pod gpu-test5/pod0, container ts-ctr0, but did not"
exit 1
fi
gpu_test5_pod0_ts_ctr1_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_ts_ctr1_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr1_gpu"))
if [[ "$gpu_test5_pod0_ts_ctr1_sharing_strategy" != "TimeSlicing" ]]; then
echo "Expected Pod gpu-test5/pod0, container ts-ctr1 to have sharing strategy TimeSlicing, got $gpu_test5_pod0_ts_ctr1_sharing_strategy"
exit 1
fi
gpu_test5_pod0_ts_ctr1_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test5_pod0_ts_ctr1_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr1_gpu"))
if [[ "$gpu_test5_pod0_ts_ctr1_timeslice_interval" != "Long" ]]; then
echo "Expected Pod gpu-test5/pod0, container ts-ctr1 to have timeslice interval Long, got $gpu_test5_pod0_ts_ctr1_timeslice_interval"
exit 1
fi

gpu_test5_pod0_sp_ctr0_logs=$(kubectl logs -n gpu-test5 pod0 -c sp-ctr0)
gpu_test5_pod0_sp_ctr0_gpus=$(gpus-from-logs "$gpu_test5_pod0_sp_ctr0_logs")
gpu_test5_pod0_sp_ctr0_gpus_count=$(echo "$gpu_test5_pod0_sp_ctr0_gpus" | wc -w)
if [[ $gpu_test5_pod0_sp_ctr0_gpus_count != 1 ]]; then
echo "Expected Pod gpu-test5/pod0, container sp-ctr0 to have 1 GPU, but got $gpu_test5_pod0_sp_ctr0_gpus_count: $gpu_test5_pod0_sp_ctr0_gpus"
exit 1
fi
gpu_test5_pod0_sp_ctr0_gpu="$gpu_test5_pod0_sp_ctr0_gpus"
if gpu-already-seen "$gpu_test5_pod0_sp_ctr0_gpu"; then
echo "Pod gpu-test5/pod0, container sp-ctr0 should have a new GPU but claimed $gpu_test5_pod0_sp_ctr0_gpu which is already claimed"
exit 1
fi
echo "Pod gpu-test5/pod0, container sp-ctr0 claimed $gpu_test5_pod0_sp_ctr0_gpu"
observed_gpus+=("$gpu_test5_pod0_sp_ctr0_gpu")
gpu_test5_pod0_sp_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_sp_ctr0_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr0_gpu"))
if [[ "$gpu_test5_pod0_sp_ctr0_sharing_strategy" != "SpacePartitioning" ]]; then
echo "Expected Pod gpu-test5/pod0, container sp-ctr0 to have sharing strategy SpacePartitioning, got $gpu_test5_pod0_sp_ctr0_sharing_strategy"
exit 1
fi
gpu_test5_pod0_sp_ctr0_partition_count=$(gpu-partition-count-from-logs "$gpu_test5_pod0_sp_ctr0_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr0_gpu"))
if [[ "$gpu_test5_pod0_sp_ctr0_partition_count" != "10" ]]; then
echo "Expected Pod gpu-test5/pod0, container sp-ctr0 to have partition count 10, got $gpu_test5_pod0_sp_ctr0_partition_count"
exit 1
fi

gpu_test5_pod0_sp_ctr1_logs=$(kubectl logs -n gpu-test5 pod0 -c sp-ctr1)
gpu_test5_pod0_sp_ctr1_gpus=$(gpus-from-logs "$gpu_test5_pod0_sp_ctr1_logs")
gpu_test5_pod0_sp_ctr1_gpus_count=$(echo "$gpu_test5_pod0_sp_ctr1_gpus" | wc -w)
if [[ $gpu_test5_pod0_sp_ctr1_gpus_count != 1 ]]; then
echo "Expected Pod gpu-test5/pod0, container sp-ctr1 to have 1 GPU, but got $gpu_test5_pod0_sp_ctr1_gpus_count: $gpu_test5_pod0_sp_ctr1_gpus"
exit 1
fi
gpu_test5_pod0_sp_ctr1_gpu="$gpu_test5_pod0_sp_ctr1_gpus"
echo "Pod gpu-test5/pod0, container sp-ctr1 claimed $gpu_test5_pod0_sp_ctr1_gpu"
if [[ "$gpu_test5_pod0_sp_ctr1_gpu" != "$gpu_test5_pod0_sp_ctr0_gpu" ]]; then
echo "Pod gpu-test5/pod0, container sp-ctr1 should claim the same GPU as Pod gpu-test5/pod0, container sp-ctr0, but did not"
exit 1
fi
gpu_test5_pod0_sp_ctr1_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_sp_ctr1_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr1_gpu"))
if [[ "$gpu_test5_pod0_sp_ctr1_sharing_strategy" != "SpacePartitioning" ]]; then
echo "Expected Pod gpu-test5/pod0, container sp-ctr1 to have sharing strategy SpacePartitioning, got $gpu_test5_pod0_sp_ctr1_sharing_strategy"
exit 1
fi
gpu_test5_pod0_sp_ctr1_partition_count=$(gpu-partition-count-from-logs "$gpu_test5_pod0_sp_ctr1_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr1_gpu"))
if [[ "$gpu_test5_pod0_sp_ctr1_partition_count" != "10" ]]; then
echo "Expected Pod gpu-test5/pod0, container sp-ctr1 to have partition count 10, got $gpu_test5_pod0_sp_ctr1_partition_count"
exit 1
fi

# test that deletion is fast (less than the default grace period of 30s)
# see https://github.com/kubernetes/kubernetes/issues/127188 for details
Expand Down

0 comments on commit a4795f1

Please sign in to comment.