From b10fc100d8c07b71c06b4f5e903efa45a7002299 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Fri, 14 Nov 2025 13:46:55 +0300 Subject: [PATCH 01/14] =?UTF-8?q?ci:=20nested=20SDS=20setup=20only=20(prep?= =?UTF-8?q?are+cleanup);=20remove=20E2E=20tests=20and=20report;=20restore?= =?UTF-8?q?=20jump-host;=20stabilize=20SDS=20bring-up=20(MPO=20+=20explici?= =?UTF-8?q?t=20ModuleConfigs=20+=20CRD=E2=80=91compatible=20LVG=20+=20safe?= =?UTF-8?q?=20default=20SC=20patch)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 281 +++-- ci/dvp-e2e/Taskfile.yaml | 1097 +++++++++++++++++ ci/dvp-e2e/charts/cluster-config/.helmignore | 23 + ci/dvp-e2e/charts/cluster-config/Chart.yaml | 18 + .../templates/cluster-config.yaml | 48 + .../templates/disabled-modules.yaml | 10 + .../cluster-config/templates/e2e-sa.yaml | 27 + .../cluster-config/templates/ingress.yaml | 17 + .../charts/cluster-config/templates/mc.yaml | 65 + .../charts/cluster-config/templates/nfs.yaml | 34 + .../charts/cluster-config/templates/ngc.yaml | 37 + .../cluster-config/templates/nodegroups.yaml | 40 + .../charts/cluster-config/templates/rbac.yaml | 20 + .../templates/virtualization.yaml | 28 + ci/dvp-e2e/charts/cluster-config/values.yaml | 77 ++ ci/dvp-e2e/charts/infra/.helmignore | 23 + ci/dvp-e2e/charts/infra/Chart.yaml | 17 + .../charts/infra/templates/ingress.yaml | 74 ++ .../infra/templates/jump-host/deploy.yaml | 43 + .../charts/infra/templates/jump-host/svc.yaml | 15 + ci/dvp-e2e/charts/infra/templates/ns.yaml | 6 + .../charts/infra/templates/rbac/rbac.yaml | 41 + .../infra/templates/registry-secret.yaml | 10 + ci/dvp-e2e/charts/infra/templates/vi.yaml | 12 + ci/dvp-e2e/charts/infra/templates/vmc.yaml | 7 + ci/dvp-e2e/charts/infra/values.yaml | 58 + ci/dvp-e2e/charts/support/Chart.yaml | 18 + ci/dvp-e2e/charts/support/values.yaml | 62 + ci/dvp-e2e/manifests/storage/sds-modules.yaml | 48 + ci/dvp-e2e/manifests/storage/sds.yaml | 33 + ci/dvp-e2e/profiles.json | 10 + ci/dvp-e2e/scripts/get_profile_config.sh | 54 + ci/dvp-e2e/scripts/loop_junit_notify.py | 222 ++++ ci/dvp-e2e/scripts/loop_matrix_summary.py | 391 ++++++ ci/dvp-e2e/scripts/loop_notify.py | 87 ++ ci/dvp-e2e/values.yaml | 69 ++ 36 files changed, 3047 insertions(+), 75 deletions(-) create mode 100644 ci/dvp-e2e/Taskfile.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/.helmignore create mode 100644 ci/dvp-e2e/charts/cluster-config/Chart.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/templates/disabled-modules.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/templates/ingress.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/templates/mc.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/templates/ngc.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/templates/rbac.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml create mode 100644 ci/dvp-e2e/charts/cluster-config/values.yaml create mode 100644 ci/dvp-e2e/charts/infra/.helmignore create mode 100644 ci/dvp-e2e/charts/infra/Chart.yaml create mode 100644 ci/dvp-e2e/charts/infra/templates/ingress.yaml create mode 100644 ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml create mode 100644 ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml create mode 100644 ci/dvp-e2e/charts/infra/templates/ns.yaml create mode 100644 ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml create mode 100644 ci/dvp-e2e/charts/infra/templates/registry-secret.yaml create mode 100644 ci/dvp-e2e/charts/infra/templates/vi.yaml create mode 100644 ci/dvp-e2e/charts/infra/templates/vmc.yaml create mode 100644 ci/dvp-e2e/charts/infra/values.yaml create mode 100644 ci/dvp-e2e/charts/support/Chart.yaml create mode 100644 ci/dvp-e2e/charts/support/values.yaml create mode 100644 ci/dvp-e2e/manifests/storage/sds-modules.yaml create mode 100644 ci/dvp-e2e/manifests/storage/sds.yaml create mode 100644 ci/dvp-e2e/profiles.json create mode 100755 ci/dvp-e2e/scripts/get_profile_config.sh create mode 100755 ci/dvp-e2e/scripts/loop_junit_notify.py create mode 100755 ci/dvp-e2e/scripts/loop_matrix_summary.py create mode 100644 ci/dvp-e2e/scripts/loop_notify.py create mode 100644 ci/dvp-e2e/values.yaml diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index c5449b8d17..6f0cddf4da 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -27,10 +27,6 @@ on: - cron: "30 2 * * *" workflow_dispatch: inputs: - profiles: - description: "Storage profiles (comma-separated): sds, cephrbd" - required: false - default: "sds,cephrbd" timeout: description: "Ginkgo timeout (e.g. 2h, 4h)" required: false @@ -43,45 +39,71 @@ env: E2E_K8S_URL: https://api.e2e.virtlab.flant.com jobs: - setup: - name: Setup Profiles + # ============================================ + # 1. SETUP - Environment preparation + # ============================================ + setup-nested-envs: + name: Setup Nested Envs runs-on: ubuntu-latest + concurrency: + group: setup-nested-envs-${{ github.ref }} + cancel-in-progress: true + env: + PROFILE: sds-replicated-volume outputs: - profiles: ${{ steps.profiles.outputs.profiles }} + run_id: ${{ steps.prep.outputs.run_id }} steps: - uses: actions/checkout@v4 - - name: Load storage profiles - id: profiles + - name: Load storage profile + id: load run: | - # Single profile: sds with storage class sds-replicated-volume - echo 'profiles=["sds"]' >> "$GITHUB_OUTPUT" + cd ci/dvp-e2e + # Map sds-replicated-volume to sds profile from profiles.json + PROFILE=$(jq -r '.[0].name' profiles.json) + echo "profile=$PROFILE" >> "$GITHUB_OUTPUT" + echo "Will test profile: $PROFILE (mapped from sds-replicated-volume)" - - name: Print matrix + - name: Prepare run context + id: prep run: | - echo "Will test profiles: ${{ steps.profiles.outputs.profiles }}" + RUN_ID="nightly-nested-e2e-sds-$(date +%H%M%S)" + echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" + mkdir -p ./tmp/run-context + echo "profile: ${PROFILE}" > ./tmp/run-context/config.yaml + echo "run_id: ${RUN_ID}" >> ./tmp/run-context/config.yaml + echo "timestamp: $(date -Iseconds)" >> ./tmp/run-context/config.yaml + - name: Upload run context + uses: actions/upload-artifact@v4 + with: + name: run-context-${{ steps.prep.outputs.run_id }} + path: ./tmp/run-context + + # ============================================ + # 2. PREPARE - Cluster preparation + # ============================================ prepare: - name: Matrix Setup (${{ matrix.profile }}) - needs: [setup] + name: Prepare Cluster + needs: [setup-nested-envs] runs-on: ubuntu-latest timeout-minutes: 300 concurrency: - group: prepare-${{ github.ref }}-${{ matrix.profile }} + group: prepare-${{ github.ref }}-sds-replicated-volume cancel-in-progress: true - strategy: - fail-fast: false - matrix: - profile: ${{ fromJson(needs.setup.outputs.profiles) }} - env: + PROFILE: sds-replicated-volume GO_VERSION: "1.24.6" TMP_ROOT: ${{ github.workspace }}/ci/dvp-e2e/tmp - STORAGE_CLASS: sds-replicated-volume + LOOP_WEBHOOK: ${{ secrets.LOOP_WEBHOOK_URL || secrets.LOOP_WEBHOOK }} + LOOP_CHANNEL: ${{ secrets.LOOP_CHANNEL || 'test-virtualization-loop-alerts' }} # TODO: replace with channel secret after successful run outputs: run_id: ${{ steps.prep.outputs.run_id }} - storage_class: ${{ env.STORAGE_CLASS }} + storage_class: ${{ steps.profile-config.outputs.storage_class }} + image_storage_class: ${{ steps.profile-config.outputs.image_storage_class }} + snapshot_storage_class: ${{ steps.profile-config.outputs.snapshot_storage_class }} + attach_disk_size: ${{ steps.profile-config.outputs.attach_disk_size }} steps: - uses: actions/checkout@v4 @@ -102,7 +124,7 @@ jobs: - name: Install kubectl uses: azure/setup-kubectl@v4 with: - version: 'latest' + version: "latest" - name: Install Deckhouse CLI env: @@ -123,13 +145,20 @@ jobs: curl -L -o /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 chmod +x /usr/local/bin/yq + - name: Restore run context + uses: actions/download-artifact@v4 + with: + name: run-context-${{ needs.setup-nested-envs.outputs.run_id }} + path: . + - name: Prepare environment id: prep run: | - RUN_ID="nightly-nested-e2e-${{ matrix.profile }}-$(date +%H%M)" + RUN_ID="${{ needs.setup-nested-envs.outputs.run_id }}" echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" echo "RUN_ID=$RUN_ID" >> "$GITHUB_ENV" - echo "PROFILE=${{ matrix.profile }}" >> "$GITHUB_ENV" + # Map sds-replicated-volume to sds for profile config + echo "PROFILE=sds" >> "$GITHUB_ENV" echo "TMP_ROOT=${{ env.TMP_ROOT }}" >> "$GITHUB_ENV" mkdir -p "${{ env.TMP_ROOT }}/shared" "${{ env.TMP_ROOT }}/matrix-logs" @@ -169,12 +198,154 @@ jobs: RUN_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" echo "VALUES_TEMPLATE_FILE=${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" >> $GITHUB_ENV + - name: Configure registry auth (REGISTRY_DOCKER_CFG) + run: | + prod_user="${{ secrets.PROD_READ_REGISTRY_USER }}" + prod_pass="${{ secrets.PROD_READ_REGISTRY_PASSWORD }}" + dev_user="${{ secrets.BOOTSTRAP_DEV_REGISTRY_LOGIN }}" + dev_pass="${{ secrets.BOOTSTRAP_DEV_REGISTRY_PASSWORD }}" + echo "::add-mask::$prod_user" + echo "::add-mask::$prod_pass" + echo "::add-mask::$dev_user" + echo "::add-mask::$dev_pass" + prod_auth_b64=$(printf '%s:%s' "$prod_user" "$prod_pass" | base64 | tr -d '\n') + dev_auth_b64=$(printf '%s:%s' "$dev_user" "$dev_pass" | base64 | tr -d '\n') + docker_cfg=$(printf '{"auths":{"registry.deckhouse.io":{"auth":"%s"},"dev-registry.deckhouse.io":{"auth":"%s"}}}' "$prod_auth_b64" "$dev_auth_b64") + docker_cfg_b64=$(printf '%s' "$docker_cfg" | base64 | tr -d '\n') + echo "::add-mask::$docker_cfg_b64" + { + echo "REGISTRY_DOCKER_CFG=$docker_cfg_b64" + echo "DECKHOUSE_REGISTRY_USER=$prod_user" + echo "DECKHOUSE_REGISTRY_PASSWORD=$prod_pass" + } >> "$GITHUB_ENV" + + - name: Inject REGISTRY_DOCKER_CFG into values.yaml + working-directory: ci/dvp-e2e + run: | + yq eval --inplace '.deckhouse.registryDockerCfg = strenv(REGISTRY_DOCKER_CFG)' "${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" + + - name: Docker login to Deckhouse registry + uses: docker/login-action@v3 + with: + registry: registry.deckhouse.io + username: ${{ env.DECKHOUSE_REGISTRY_USER }} + password: ${{ env.DECKHOUSE_REGISTRY_PASSWORD }} + + - name: Docker login to dev-registry + uses: docker/login-action@v3 + with: + registry: ${{ vars.DEV_REGISTRY }} + username: ${{ secrets.BOOTSTRAP_DEV_REGISTRY_LOGIN }} + password: ${{ secrets.BOOTSTRAP_DEV_REGISTRY_PASSWORD }} + - name: Configure storage profile working-directory: ci/dvp-e2e + id: profile-config + env: + PROFILE: sds run: | - # Set storage profile to sds with storage class sds-replicated-volume + # Get storage class configuration from profiles.json + PROFILE_CONFIG=$(./scripts/get_profile_config.sh "${PROFILE}") + + # Parse the output more carefully + STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^STORAGE_CLASS=" | cut -d'=' -f2) + IMAGE_STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^IMAGE_STORAGE_CLASS=" | cut -d'=' -f2) + SNAPSHOT_STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^SNAPSHOT_STORAGE_CLASS=" | cut -d'=' -f2) + ATTACH_DISK_SIZE=$(echo "$PROFILE_CONFIG" | grep "^ATTACH_DISK_SIZE=" | cut -d'=' -f2) + + echo "Profile: ${PROFILE}" + echo "Storage Class: ${STORAGE_CLASS}" + echo "Image Storage Class: ${IMAGE_STORAGE_CLASS}" + echo "Snapshot Storage Class: ${SNAPSHOT_STORAGE_CLASS}" + echo "Attach Disk Size: ${ATTACH_DISK_SIZE}" + + # Export variables safely + echo "STORAGE_CLASS=${STORAGE_CLASS}" >> $GITHUB_ENV + echo "IMAGE_STORAGE_CLASS=${IMAGE_STORAGE_CLASS}" >> $GITHUB_ENV + echo "SNAPSHOT_STORAGE_CLASS=${SNAPSHOT_STORAGE_CLASS}" >> $GITHUB_ENV + echo "ATTACH_DISK_SIZE=${ATTACH_DISK_SIZE}" >> $GITHUB_ENV + echo "storage_class=$STORAGE_CLASS" >> $GITHUB_OUTPUT + echo "image_storage_class=$IMAGE_STORAGE_CLASS" >> $GITHUB_OUTPUT + echo "snapshot_storage_class=$SNAPSHOT_STORAGE_CLASS" >> $GITHUB_OUTPUT + echo "attach_disk_size=$ATTACH_DISK_SIZE" >> $GITHUB_OUTPUT + # Pass storage profile into run values for Helm templates PROFILE='sds' yq eval --inplace '.storageProfile = strenv(PROFILE)' "${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" - echo "Configured storage profile: sds with storage class: ${STORAGE_CLASS}" + # Effective disk SC used for worker data disks (prefer image SC when set) + EFF_DISK_SC=${IMAGE_STORAGE_CLASS:-$STORAGE_CLASS} + echo "EFFECTIVE_DISK_SC=${EFF_DISK_SC}" >> $GITHUB_ENV + + - name: Install infra (namespace/RBAC/ingress) + working-directory: ci/dvp-e2e + run: | + USE_GH_SSH_KEYS=true SSH_FILE_NAME=id_ed task render-infra \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + PARENT_KUBECONFIG="${KUBECONFIG}" \ + SSH_FILE_NAME="id_ed" + USE_GH_SSH_KEYS=true SSH_FILE_NAME=id_ed task infra-deploy \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + PARENT_KUBECONFIG="${KUBECONFIG}" \ + SSH_FILE_NAME="id_ed" + + - name: Bootstrap nested cluster (via jump-host) + working-directory: ci/dvp-e2e + run: | + echo "๐Ÿš€ dhctl bootstrap (profile: sds-replicated-volume -> sds)" + task dhctl-bootstrap \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + PARENT_KUBECONFIG="${KUBECONFIG}" \ + SSH_FILE_NAME="id_ed" \ + TARGET_STORAGE_CLASS="ceph-pool-r2-csi-rbd-immediate" + + - name: Attach data disks to worker VMs using hotplug + working-directory: ci/dvp-e2e + run: | + task infra:attach-storage-disks-hotplug \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + PARENT_KUBECONFIG="${KUBECONFIG}" \ + DISK_SIZE="${ATTACH_DISK_SIZE:-10Gi}" \ + STORAGE_CLASS="ceph-pool-r2-csi-rbd-immediate" \ + DISK_COUNT="2" + + - name: Build nested kubeconfig + working-directory: ci/dvp-e2e + run: | + task nested:kubeconfig \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + NAMESPACE="${{ env.RUN_ID }}" \ + SSH_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/ssh" \ + SSH_FILE_NAME="id_ed" \ + NESTED_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested" \ + NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" \ + PARENT_KUBECONFIG="${KUBECONFIG}" + + - name: Configure storage classes + working-directory: ci/dvp-e2e + run: | + echo "๐Ÿ’พ Configuring storage classes for profile: sds-replicated-volume -> sds" + task nested:storage:configure \ + STORAGE_PROFILE="sds" \ + TARGET_STORAGE_CLASS="${{ steps.profile-config.outputs.storage_class }}" \ + TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ + VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + GENERATED_VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/generated-values.yaml" \ + SSH_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/ssh" \ + SSH_FILE_NAME="id_ed" \ + PASSWORD_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/password.txt" \ + PASSWORD_HASH_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/password-hash.txt" \ + NAMESPACE="${{ env.RUN_ID }}" \ + DOMAIN="" \ + DEFAULT_USER="ubuntu" \ + NESTED_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested" \ + NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" + + # Ingress smoke disabled: not required for storage config + + # Ceph CSI smoke check removed per request - name: Upload run context if: always() @@ -185,35 +356,11 @@ jobs: ci/dvp-e2e/tmp/runs/${{ env.RUN_ID }} ci/dvp-e2e/tmp/shared if-no-files-found: warn - - run-e2e: - name: E2E (${{ matrix.profile }}) [skeleton] - needs: [setup, prepare] - runs-on: ubuntu-latest - concurrency: - group: e2e-${{ github.ref }}-${{ matrix.profile }} - cancel-in-progress: true - strategy: - fail-fast: false - matrix: - profile: ${{ fromJson(needs.setup.outputs.profiles) }} - steps: - - name: Echo run - run: | - echo "E2E stage for profile=${{ matrix.profile }} (skeleton - placeholder)" - report: - name: Report [skeleton] - needs: [setup, run-e2e] - if: always() - runs-on: ubuntu-latest - steps: - - name: Echo report - run: | - echo "Report stage (skeleton). Collecting results from matrix..." + overwrite: true cleanup: - name: Cleanup Resources - needs: report + name: Cleanup [skeleton] + needs: [setup-nested-envs, prepare] if: always() runs-on: ubuntu-latest steps: @@ -225,7 +372,7 @@ jobs: - name: Install kubectl uses: azure/setup-kubectl@v4 with: - version: 'latest' + version: "latest" - name: Build parent kubeconfig from secret (cleanup) shell: bash @@ -257,25 +404,9 @@ jobs: - name: Cleanup test namespaces run: | set -euo pipefail - PREFIX="nightly-nested-e2e-" - echo "๐Ÿงน Cleaning up namespaces matching prefix '${PREFIX}'" - mapfile -t CANDIDATES < <(kubectl get ns -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' | grep "^${PREFIX}" || true) - OURS=() - for ns in "${CANDIDATES[@]:-}"; do - [ -z "$ns" ] && continue - if kubectl -n "$ns" get deploy jump-host >/dev/null 2>&1; then - OURS+=("$ns") - fi - done - if [ "${#OURS[@]}" -eq 0 ]; then - echo "[INFO] No namespaces to delete." - else - echo "[INFO] Deleting namespaces:" - printf ' - %s\n' "${OURS[@]}" - for ns in "${OURS[@]}"; do - kubectl delete ns "$ns" --wait=false || true - done - fi + echo "๐Ÿงน Cleaning up namespaces matching 'nightly-nested-e2e-*'" + kubectl get ns -o name | grep "namespace/nightly-nested-e2e-" | cut -d/ -f2 | \ + xargs -r kubectl delete ns --wait=false || echo "[INFO] No namespaces to delete" - name: Report cleanup results if: always() diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml new file mode 100644 index 0000000000..f2df178ee1 --- /dev/null +++ b/ci/dvp-e2e/Taskfile.yaml @@ -0,0 +1,1097 @@ +version: "3" +dotenv: + - .env + +vars: + # Paths and defaults + TMP_ROOT: + sh: git rev-parse --show-toplevel 2>/dev/null | xargs -I{} printf "%s/ci/dvp-e2e/tmp" {} + VALUES_TEMPLATE_FILE: values.yaml + SSH_FILE_NAME: cloud + + # Charts + INFRA_CHART_PATH: ./charts/infra + CLUSTER_CONFIG_CHART_PATH: ./charts/cluster-config + +tasks: + # ------------------------------------------------------------ + # Preflight + # ------------------------------------------------------------ + default: + silent: true + desc: Check required utilities + cmds: + - | + deps=("kubectl" "jq" "yq" "docker" "helm" "htpasswd" "ssh-keygen" "curl" "d8" "openssl") + for dep in "${deps[@]}"; do + if ! command -v "$dep" >/dev/null 2>&1; then + echo "Required utility '$dep' not found!" >&2 + exit 1 + fi + done + echo "All dependencies are installed!" + + password-gen: + desc: Generate password (openssl + bcrypt) + silent: true + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + PASSWORD_FILE: '{{ printf "%s/%s" .TMP_DIR "password.txt" }}' + PASSWORD_HASH_FILE: '{{ printf "%s/%s" .TMP_DIR "password-hash.txt" }}' + cmds: + - mkdir -p {{ .TMP_DIR }} + - openssl rand -base64 20 > {{ .PASSWORD_FILE }} + - | + pw="$(cat {{ .PASSWORD_FILE }})" + htpasswd -BinC 10 "" <<< "$pw" | cut -d: -f2 | (base64 --wrap=0 2>/dev/null || base64 -w0 2>/dev/null || base64) > {{ .PASSWORD_HASH_FILE }} + status: + - test -f "{{ .PASSWORD_FILE }}" + - test -f "{{ .PASSWORD_HASH_FILE }}" + + ssh-gen: + desc: Generate ssh keypair for bootstrap + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' + SSH_PUB_KEY_FILE: '{{ printf "%s/%s.pub" .SSH_DIR .SSH_FILE_NAME }}' + cmds: + - mkdir -p "{{ .SSH_DIR }}" + - ssh-keygen -t ed25519 -o -a 64 -N "" -C "cloud" -f {{ .SSH_PRIV_KEY_FILE }} -q + - chmod 0600 "{{ .SSH_PRIV_KEY_FILE }}" + - chmod 0644 "{{ .SSH_PUB_KEY_FILE }}" + status: + - test -f "{{ .SSH_PRIV_KEY_FILE }}" + + # ------------------------------------------------------------ + # Values per run (namespaces, domain, prefix) + # ------------------------------------------------------------ + run:values:prepare: + desc: Prepare values.yaml for the run + vars: + RUN_ID: "{{ .RUN_ID }}" + RUN_NAMESPACE: "{{ .RUN_NAMESPACE }}" + RUN_DIR: '{{ .RUN_DIR | default (printf "%s/runs/%s" .TMP_ROOT .RUN_ID) }}' + TARGET_VALUES_FILE: '{{ printf "%s/%s" .RUN_DIR "values.yaml" }}' + BASE_DOMAIN: + sh: yq eval '.domain // ""' {{ .VALUES_TEMPLATE_FILE }} + BASE_CLUSTER_PREFIX: + sh: yq eval '.clusterConfigurationPrefix // "cluster"' {{ .VALUES_TEMPLATE_FILE }} + cmds: + - mkdir -p {{ .RUN_DIR }} + - cp {{ .VALUES_TEMPLATE_FILE }} {{ .TARGET_VALUES_FILE }} + - yq eval --inplace '.namespace = "{{ .RUN_NAMESPACE }}"' {{ .TARGET_VALUES_FILE }} + - | + set -euo pipefail + DOMAIN_INPUT="{{ .BASE_DOMAIN }}" + if [ -n "$DOMAIN_INPUT" ]; then + DOMAIN_VAL="{{ .RUN_ID }}.$DOMAIN_INPUT" + else + DOMAIN_VAL="{{ .RUN_ID }}" + fi + export DOMAIN_VAL + yq eval --inplace '.domain = strenv(DOMAIN_VAL)' {{ .TARGET_VALUES_FILE }} + - | + set -euo pipefail + if command -v shasum >/dev/null 2>&1; then + RUN_ID_HASH=$(printf "%s" "{{ .RUN_ID }}" | shasum | awk '{print $1}' | cut -c1-6) + else + RUN_ID_HASH=$(printf "%s" "{{ .RUN_ID }}" | sha1sum 2>/dev/null | awk '{print $1}' | cut -c1-6) + fi + PREFIX_INPUT="{{ .BASE_CLUSTER_PREFIX }}-${RUN_ID_HASH}" + [ ${#PREFIX_INPUT} -gt 16 ] && PREFIX_INPUT="${PREFIX_INPUT:0:16}" + export PREFIX_INPUT + yq eval --inplace '.clusterConfigurationPrefix = strenv(PREFIX_INPUT)' {{ .TARGET_VALUES_FILE }} + + # ------------------------------------------------------------ + # Infra manifests and deployment + # ------------------------------------------------------------ + render-infra: + desc: Generate infra manifests + deps: + - task: ssh:ensure + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + SSH_FILE_NAME: "{{ .SSH_FILE_NAME }}" + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + GENERATED_VALUES_FILE: '{{ printf "%s/%s" .TMP_DIR "generated-values.yaml" }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PUB_KEY_FILE: '{{ printf "%s/%s.pub" .SSH_DIR .SSH_FILE_NAME }}' + DOMAIN: + sh: yq eval '.domain // ""' {{ .VALUES_FILE }} + sources: + - "./charts/infra/**/*" + - "{{ .VALUES_FILE }}" + generates: + - "{{ .TMP_DIR }}/infra.yaml" + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - mkdir -p {{ .TMP_DIR }} + - printf "" > {{ .GENERATED_VALUES_FILE }} + - | + export SSH_PUB_KEY="$(cat {{ .SSH_PUB_KEY_FILE }})" + yq eval --inplace '.sshPublicKey = env(SSH_PUB_KEY)' {{ .GENERATED_VALUES_FILE }} + - | + DOMAIN_VALUE="{{ .DOMAIN }}" + if [ -n "$DOMAIN_VALUE" ] && [ "$DOMAIN_VALUE" != "null" ]; then + export DOMAIN_VALUE + yq eval --inplace '.domain = env(DOMAIN_VALUE)' {{ .GENERATED_VALUES_FILE }} + fi + - helm template dvp-over-dvp-infra {{ .INFRA_CHART_PATH }} -f {{ .VALUES_FILE }} -f {{ .GENERATED_VALUES_FILE }} > {{ .TMP_DIR }}/infra.yaml + + infra-deploy: + desc: Deploy infra (Namespace/RBAC/Ingress) + deps: + - task: render-infra + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default "" }}' + SSH_FILE_NAME: "{{ .SSH_FILE_NAME }}" + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' + SSH_PUB_KEY_FILE: '{{ printf "%s/%s.pub" .SSH_DIR .SSH_FILE_NAME }}' + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - kubectl apply --server-side --force-conflicts --validate=false -f {{ .TMP_DIR }}/infra.yaml || kubectl apply --validate=false -f {{ .TMP_DIR }}/infra.yaml + - | + # Persist SSH keypair in parent cluster namespace for diagnostics tools (nested_diag.sh) + # Secret contains private and public parts; will be removed with namespace cleanup + kubectl -n {{ .NAMESPACE }} create secret generic e2e-ssh-key \ + --dry-run=client -o yaml \ + --from-file=id_ed={{ .SSH_PRIV_KEY_FILE }} \ + --from-file=id_ed.pub={{ .SSH_PUB_KEY_FILE }} \ + | kubectl apply -f - + + infra:create-storage-disks: + desc: Create storage disks for worker VMs before cluster bootstrap (for SDS) + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + DISK_SIZE: '{{ .DISK_SIZE | default "10Gi" }}' + STORAGE_CLASS: '{{ .STORAGE_CLASS | default "linstor-thin-r2" }}' + DISK_COUNT: '{{ .DISK_COUNT | default "2" }}' + WORKER_COUNT: '{{ .WORKER_COUNT | default "3" }}' + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + echo "[INFRA] Creating {{ .DISK_COUNT }} storage disks per worker VM ({{ .WORKER_COUNT }} workers) in namespace {{ .NAMESPACE }}" + + # Create VirtualDisks for all expected worker VMs + # We'll use predictable naming based on Deckhouse's naming pattern + for worker_idx in $(seq 0 $(({{ .WORKER_COUNT }} - 1))); do + for disk_num in $(seq 1 {{ .DISK_COUNT }}); do + # Deckhouse generates VM names like: {prefix}-{hash}-worker-{suffix} + vd="storage-disk-${disk_num}-worker-${worker_idx}" + echo "[INFRA] Creating VirtualDisk $vd ({{ .DISK_SIZE }}, sc={{ .STORAGE_CLASS }})" + cat > /tmp/vd-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/vd-$vd.yaml + echo "[INFRA] VirtualDisk $vd created" + done + done + + infra:attach-storage-disks-hotplug: + desc: Attach storage disks to worker VMs using hotplug (VirtualMachineBlockDeviceAttachment) + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + DISK_SIZE: '{{ .DISK_SIZE | default "10Gi" }}' + STORAGE_CLASS: '{{ .STORAGE_CLASS | default "linstor-thin-r2" }}' + DISK_COUNT: '{{ .DISK_COUNT | default "2" }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_FILE_NAME: '{{ .SSH_FILE_NAME | default "id_ed" }}' + DEFAULT_USER: + sh: yq eval '.image.defaultUser' {{ .VALUES_FILE }} + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + # Enable shell tracing when DEBUG_HOTPLUG is set + [ -n "${DEBUG_HOTPLUG:-}" ] && set -x || true + echo "[INFRA] Attaching {{ .DISK_COUNT }} storage disks to worker VMs using hotplug in namespace {{ .NAMESPACE }}" + + # Wait for worker VMs + for i in $(seq 1 50); do + worker_count=$(kubectl -n {{ .NAMESPACE }} get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep worker | wc -l) + if [ "$worker_count" -gt 0 ]; then + echo "[INFRA] Found $worker_count worker VMs"; break + fi + echo "[INFRA] Waiting for worker VMs... ($i/50)"; sleep 10 + done + + workers=() + while IFS= read -r line; do + [ -n "$line" ] && workers+=("$line") + done < <(kubectl -n {{ .NAMESPACE }} get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep worker || true) + + if [ ${#workers[@]} -eq 0 ]; then + echo "[INFRA] No worker VMs found; nothing to do"; exit 0 + fi + + echo "[INFRA] Found ${#workers[@]} worker VMs: ${workers[*]}" + + for vm in "${workers[@]}"; do + [ -z "$vm" ] && continue + echo "[INFRA] Processing VM: $vm" + + # Wait for VM to be Running + for i in $(seq 1 50); do + phase=$(kubectl -n {{ .NAMESPACE }} get vm "$vm" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$phase" = "Running" ]; then + echo "[INFRA] VM $vm is Running"; break + fi + echo "[INFRA] VM $vm phase=$phase; retry $i/50"; sleep 10 + done + + for disk_num in $(seq 1 {{ .DISK_COUNT }}); do + vd="storage-disk-${disk_num}-$vm" + echo "[INFRA] Creating VirtualDisk $vd ({{ .DISK_SIZE }}, sc={{ .STORAGE_CLASS }})" + cat > /tmp/vd-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/vd-$vd.yaml + + # Wait for VirtualDisk to be Ready and PVC to be Bound + echo "[INFRA] Waiting for VirtualDisk $vd to be Ready..." + vd_phase="" + for j in $(seq 1 50); do + vd_phase=$(kubectl -n {{ .NAMESPACE }} get vd "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$vd_phase" = "Ready" ]; then + echo "[INFRA] VirtualDisk $vd is Ready"; break + fi + echo "[INFRA] VD $vd phase=$vd_phase; retry $j/50"; sleep 5 + done + if [ "$vd_phase" != "Ready" ]; then + echo "[ERROR] VirtualDisk $vd not Ready" + kubectl -n {{ .NAMESPACE }} get vd "$vd" -o yaml || true + kubectl -n {{ .NAMESPACE }} get events --sort-by=.lastTimestamp | tail -n 100 || true + exit 1 + fi + + pvc_name="" + for j in $(seq 1 30); do + pvc_name=$(kubectl -n {{ .NAMESPACE }} get vd "$vd" -o jsonpath='{.status.target.persistentVolumeClaimName}' 2>/dev/null || true) + [ -n "$pvc_name" ] && break + echo "[INFRA] Waiting for PVC name for VD $vd; retry $j/30"; sleep 3 + done + if [ -n "$pvc_name" ]; then + echo "[INFRA] Waiting PVC $pvc_name to reach phase=Bound..." + pvc_phase="" + for j in $(seq 1 120); do + pvc_phase=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$pvc_phase" = "Bound" ]; then + break + fi + echo "[INFRA] PVC $pvc_name phase=$pvc_phase; retry $j/120"; sleep 2 + done + if [ "$pvc_phase" != "Bound" ]; then + echo "[ERROR] PVC $pvc_name did not reach Bound" + kubectl -n {{ .NAMESPACE }} describe pvc "$pvc_name" || true + kubectl -n {{ .NAMESPACE }} get events --sort-by=.lastTimestamp | tail -n 100 || true + exit 1 + fi + sc=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.spec.storageClassName}' 2>/dev/null || true) + pv=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.spec.volumeName}' 2>/dev/null || true) + vmode=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.spec.volumeMode}' 2>/dev/null || true) + echo "[INFRA] PVC $pvc_name is Bound (sc=${sc:-?}, pv=${pv:-?}, mode=${vmode:-?})" + else + echo "[WARN] PVC name for VD $vd is empty; proceeding with attachment" + fi + + echo "[INFRA] Creating VirtualMachineBlockDeviceAttachment for $vd" + cat > /tmp/attach-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/attach-$vd.yaml + + echo "[INFRA] Waiting for hotplug attachment of $vd..." + success_by_vm=0 + for i in $(seq 1 30); do + phase=$(kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$phase" = "Attached" ]; then + echo "[INFRA] Disk $vd successfully attached to VM $vm"; break + fi + # Quick success path: rely on VM status even if VMBDA still InProgress + if kubectl -n {{ .NAMESPACE }} get vm "$vm" -o json \ + | jq -e --arg vd "$vd" '([.status.blockDeviceRefs[]? | select((.virtualMachineBlockDeviceAttachmentName==$vd) or (.name==$vd)) | select((.attached==true) and (.hotplugged==true))] | length) > 0' >/dev/null; then + echo "[INFRA] VM reports disk $vd attached/hotplugged; proceeding" + success_by_vm=1 + break + fi + + # Print status approximately every 30 seconds (poll interval is 5s) + if [ $((i % 6)) -eq 0 ]; then + echo "[INFRA] Disk $vd phase=$phase; retry $i/30" + fi + sleep 5 + + # Minimal periodic debug snapshot approximately every 60 seconds + if [ $((i % 12)) -eq 0 ]; then + echo "[DEBUG] VMBDA $vd summary:" + kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o json \ + | jq -r '{phase: .status.phase, conditions: (.status.conditions // []) | map({type, status, reason, message})}' || true + echo "[DEBUG] VM $vm block devices (summary):" + kubectl -n {{ .NAMESPACE }} get vm "$vm" -o json \ + | jq -r '{phase: .status.phase, blockDeviceRefs: (.status.blockDeviceRefs // []) | map({name, virtualMachineBlockDeviceAttachmentName, attached, hotplugged})}' || true + fi + done + + if [ "$phase" != "Attached" ] && [ "${success_by_vm:-0}" -ne 1 ]; then + echo "[ERROR] Disk $vd failed to attach to VM $vm within timeout" >&2 + echo "[DEBUG] Final VMBDA summary:" + kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o json \ + | jq -r '{phase: .status.phase, conditions: (.status.conditions // []) | map({type, status, reason, message})}' || true + echo "[DEBUG] VM $vm block devices (summary):" + kubectl -n {{ .NAMESPACE }} get vm "$vm" -o json \ + | jq -r '{phase: .status.phase, blockDeviceRefs: (.status.blockDeviceRefs // []) | map({name, virtualMachineBlockDeviceAttachmentName, attached, hotplugged})}' || true + exit 1 + fi + done + + echo "[INFRA] VM $vm configured with hotplug disks" + + # Optional on-node lsblk debug snapshot (requires d8 and SSH key). Always sudo for block devices visibility. + if command -v d8 >/dev/null 2>&1; then + echo "[DEBUG] Collecting lsblk from VM $vm..." + if ! d8 v ssh --username='{{ .DEFAULT_USER }}' \ + --identity-file='{{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}' \ + --local-ssh=true \ + --local-ssh-opts='-o StrictHostKeyChecking=no' \ + --local-ssh-opts='-o UserKnownHostsFile=/dev/null' \ + "${vm}.{{ .NAMESPACE }}" -c "sudo lsblk -o NAME,KNAME,TYPE,SIZE,MODEL,TRAN,FSTYPE,MOUNTPOINT -p"; then + echo "[WARN] lsblk collection failed for $vm (SSH)" >&2 + fi + else + echo "[WARN] 'd8' CLI not found, skipping lsblk collection for $vm" >&2 + fi + done + + echo "[INFRA] All worker VMs configured with storage disks via hotplug" + + infra:attach-worker-disks: + desc: Attach additional data disks to worker VMs (for SDS) + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + DISK_SIZE: '{{ .DISK_SIZE | default "10Gi" }}' + STORAGE_CLASS: '{{ .STORAGE_CLASS | default "linstor-thin-r2" }}' + DISK_COUNT: '{{ .DISK_COUNT | default "2" }}' + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + echo "[INFRA] Attaching {{ .DISK_COUNT }} storage disks to worker VMs in namespace {{ .NAMESPACE }}" + workers=() + while IFS= read -r line; do + [ -n "$line" ] && workers+=("$line") + done < <(kubectl -n {{ .NAMESPACE }} get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep worker || true) + if [ ${#workers[@]} -eq 0 ]; then + echo "[INFRA] No worker VMs found"; exit 0 + fi + for vm in "${workers[@]}"; do + [ -z "$vm" ] && continue + for disk_num in $(seq 1 {{ .DISK_COUNT }}); do + vd="storage-disk-${disk_num}-$vm" + cat > /tmp/vd-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/vd-$vd.yaml + cat > /tmp/attach-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/attach-$vd.yaml + + echo "[INFRA] Waiting for hotplug attachment of $vd..." + for i in $(seq 1 30); do + phase=$(kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$phase" = "Attached" ]; then + echo "[INFRA] Disk $vd successfully attached to VM $vm"; break + fi + # Print status approximately every 30 seconds + if [ $((i % 6)) -eq 0 ]; then + echo "[INFRA] Disk $vd phase=$phase; retry $i/30" + fi + sleep 5 + + # Periodic debug snapshot approximately every 60 seconds + if [ $((i % 12)) -eq 0 ]; then + echo "[DEBUG] VMBDA $vd status:" + kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o json | jq -r '.status' || true + fi + done + + if [ "$phase" != "Attached" ]; then + # Fallback on VM events confirming successful hotplug + echo "[DEBUG] Checking VM events for hotplug success fallback..." + if kubectl -n {{ .NAMESPACE }} get events \ + --field-selector involvedObject.kind=VirtualMachine,involvedObject.name="$vm" \ + --sort-by=.lastTimestamp -ojson \ + | jq -r '.items[].message' 2>/dev/null \ + | grep -q -E "Successfully attach hotplugged volume.*\b$vd\b"; then + echo "[WARN] VMBDA phase not Attached, but VM reported success; treating as Attached (fallback)" + else + echo "[ERROR] Disk $vd failed to attach to VM $vm" >&2 + echo "[DEBUG] Final VMBDA status:" + kubectl -n {{ .NAMESPACE }} describe virtualmachineblockdeviceattachment "$vd" || true + # Filter controller/handler logs by our namespace/VM/VD + kubectl -n d8-virtualization logs deploy/virtualization-controller --tail=200 2>/dev/null | grep -E "{{ .NAMESPACE }}|$vm|$vd" || true + for h in $(kubectl -n d8-virtualization get pods -l app=virt-handler -o name 2>/dev/null || true); do + kubectl -n d8-virtualization logs --tail=200 "$h" | grep -E "{{ .NAMESPACE }}|$vm|$vd" || true + done + exit 1 + fi + fi + done + done + + # ------------------------------------------------------------ + # Kubeconfig for bootstrap and cluster config + # ------------------------------------------------------------ + render-kubeconfig: + desc: Generate kubeconfig for bootstrap (external parent API) + deps: + - password-gen + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + SERVER: + sh: | + # Use external parent cluster API (ingress host) so that both dhctl Job + # and components inside the nested cluster can reach the parent API. + HOST=$(kubectl -n d8-user-authn get ingress kubernetes-api -o json | jq -r '.spec.rules[0].host') + [ -z "$HOST" -o "$HOST" = "null" ] && { echo "[ERR] kubernetes-api ingress host not found" >&2; exit 1; } + echo "https://$HOST" + TOKEN: + sh: | + for i in $(seq 1 5); do + TOKEN=$(kubectl -n {{ .NAMESPACE }} create token dkp-sa --duration=10h 2>/dev/null) && break + echo "[WARN] Failed to issue SA token (attempt $i); retrying in 3s" >&2 + sleep 3 + done + [ -z "${TOKEN:-}" ] && { echo "[ERR] Unable to obtain token for dkp-sa" >&2; exit 1; } + echo "$TOKEN" + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + silent: true + cmds: + - mkdir -p {{ .TMP_DIR }} + - | + cat < {{ .TMP_DIR }}/kubeconfig.yaml + apiVersion: v1 + clusters: + - cluster: + server: {{ .SERVER }} + insecure-skip-tls-verify: true + name: dvp + contexts: + - context: + cluster: dvp + namespace: {{ .NAMESPACE }} + user: {{ .NAMESPACE }}@dvp + name: {{ .NAMESPACE }}@dvp + current-context: {{ .NAMESPACE }}@dvp + kind: Config + preferences: {} + users: + - name: {{ .NAMESPACE }}@dvp + user: + token: {{ .TOKEN }} + EOF + + render-cluster-config: + desc: Generate cluster config (helm template) + silent: true + deps: + - render-kubeconfig + - password-gen + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + GENERATED_VALUES_FILE: '{{ printf "%s/%s" .TMP_DIR "generated-values.yaml" }}' + PASSWORD_HASH_FILE: '{{ printf "%s/%s" .TMP_DIR "password-hash.txt" }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PUB_KEY_FILE: '{{ printf "%s/%s.pub" .SSH_DIR .SSH_FILE_NAME }}' + cmds: + - printf "" > {{ .GENERATED_VALUES_FILE }} + - | + export PASSWORD_HASH="$(cat {{ .PASSWORD_HASH_FILE }})" + yq eval --inplace '.passwordHash = env(PASSWORD_HASH)' {{ .GENERATED_VALUES_FILE }} + - | + export NEW_KUBECONFIG_B64="$(cat {{ .TMP_DIR }}/kubeconfig.yaml | base64 | tr -d '\n')" + yq eval --inplace '.kubeconfigDataBase64 = env(NEW_KUBECONFIG_B64)' {{ .GENERATED_VALUES_FILE }} + - | + # Inject registry Docker config from environment (set by modules-actions/setup or manually) + if [ -n "${REGISTRY_DOCKER_CFG:-}" ]; then + yq eval --inplace '.deckhouse.registryDockerCfg = env(REGISTRY_DOCKER_CFG)' {{ .GENERATED_VALUES_FILE }} + fi + - | + if [ -n "{{ .TARGET_STORAGE_CLASS | default "" }}" ]; then + export _SC='{{ .TARGET_STORAGE_CLASS }}' + yq eval --inplace '.storageClass = env(_SC)' {{ .GENERATED_VALUES_FILE }} + yq eval --inplace '.storageClasses.controlPlane.root = env(_SC)' {{ .GENERATED_VALUES_FILE }} + yq eval --inplace '.storageClasses.controlPlane.etcd = env(_SC)' {{ .GENERATED_VALUES_FILE }} + yq eval --inplace '.storageClasses.workers.root = env(_SC)' {{ .GENERATED_VALUES_FILE }} + yq eval --inplace '.storageClasses.workers.data = env(_SC)' {{ .GENERATED_VALUES_FILE }} + fi + - | + export SSH_PUB_KEY="$(cat {{ .SSH_PUB_KEY_FILE }})" + yq eval --inplace '.sshPublicKey = env(SSH_PUB_KEY)' {{ .GENERATED_VALUES_FILE }} + - helm template dvp-over-dvp-cluster-config {{ .CLUSTER_CONFIG_CHART_PATH }} -f {{ .VALUES_FILE }} -f {{ .GENERATED_VALUES_FILE }} > {{ .TMP_DIR }}/config.yaml + + dhctl-bootstrap: + desc: Bootstrap Deckhouse over DVP via jump-host (docker dhctl with bastion) + deps: + - render-cluster-config + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' + GENERATED_VALUES_FILE: '{{ printf "%s/%s" .TMP_DIR "generated-values.yaml" }}' + NAMESPACE: + sh: yq eval '.namespace' {{ .VALUES_FILE }} + DEFAULT_USER: + sh: yq eval '.image.defaultUser' {{ .VALUES_FILE }} + JUMPHOST_EXT_IP: + sh: export KUBECONFIG='{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}'; kubectl -n {{ .NAMESPACE }} exec -it deployment/jump-host -- dig @resolver4.opendns.com myip.opendns.com +short | tr -d '\r' + JUMPHOST_NODEPORT: + sh: export KUBECONFIG='{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}'; kubectl -n {{ .NAMESPACE }} get svc jump-host -o json | jq '.spec.ports[] | select(.port==2222) | .nodePort' + IMAGE: "dev-registry.deckhouse.io/sys/deckhouse-oss/install:main" + env: + KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + # Pull dhctl image locally (runner authenticated in workflow) + docker pull --platform=linux/amd64 "{{ .IMAGE }}" + # Run dhctl bootstrap with SSH bastion (jump-host) + docker run --rm --platform=linux/amd64 \ + -v "{{ .TMP_DIR }}:/work" \ + "{{ .IMAGE }}" \ + dhctl bootstrap \ + --config=/work/config.yaml \ + --ssh-agent-private-keys=/work/ssh/{{ .SSH_FILE_NAME }} \ + --ssh-user={{ .DEFAULT_USER }} \ + --ssh-bastion-port={{ .JUMPHOST_NODEPORT }} \ + --ssh-bastion-host={{ .JUMPHOST_EXT_IP }} \ + --ssh-bastion-user=user \ + --preflight-skip-availability-ports-check \ + --preflight-skip-deckhouse-user-check \ + --preflight-skip-registry-credential \ + --preflight-skip-deckhouse-edition-check + - | + docker image rm {{ .IMAGE }} >/dev/null 2>&1 || true + + # ------------------------------------------------------------ + # SSH Keys management (use GH keys or generate new ones) + # ------------------------------------------------------------ + ssh:import-gh: + desc: Download predefined SSH keys from deckhouse/virtualization repo + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_FILE_NAME: '{{ .SSH_FILE_NAME | default "id_ed" }}' + GH_RAW_URL_PRIV: "https://raw.githubusercontent.com/deckhouse/virtualization/main/test/e2e/legacy/testdata/sshkeys/id_ed" + GH_RAW_URL_PUB: "https://raw.githubusercontent.com/deckhouse/virtualization/main/test/e2e/legacy/testdata/sshkeys/id_ed.pub" + cmds: + - mkdir -p {{ .SSH_DIR }} + - curl -fsSL {{ .GH_RAW_URL_PRIV }} -o {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }} + - curl -fsSL {{ .GH_RAW_URL_PUB }} -o {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}.pub + - chmod 0600 {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }} + - chmod 0644 {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}.pub + status: + - test -f "{{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}" + + ssh:ensure: + desc: Ensure SSH keys exist (import from GH when USE_GH_SSH_KEYS=true) + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_FILE_NAME: '{{ .SSH_FILE_NAME | default (env "SSH_FILE_NAME") | default "cloud" }}' + USE_GH_SSH_KEYS: '{{ .USE_GH_SSH_KEYS | default (env "USE_GH_SSH_KEYS") | default "false" }}' + cmds: + - | + set -euo pipefail + if [ "{{ .USE_GH_SSH_KEYS }}" = "true" ]; then + echo "[SSH] Importing GH keys to {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}" + task ssh:import-gh SSH_DIR='{{ .SSH_DIR }}' SSH_FILE_NAME='{{ .SSH_FILE_NAME }}' + else + echo "[SSH] Generating new SSH keypair at {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}" + task ssh-gen SSH_DIR='{{ .SSH_DIR }}' SSH_FILE_NAME='{{ .SSH_FILE_NAME }}' + fi + + # ------------------------------------------------------------ + # Local flow wrappers with logs (DVP-over-DVP) + # ------------------------------------------------------------ + local:bootstrap: + desc: Local flow โ€” deploy infra + bootstrap nested (logs saved) + vars: + RUN_ID: '{{ .RUN_ID | default (printf "local-%s" (now | date "20060102-150405")) }}' + RUN_NAMESPACE: '{{ .RUN_NAMESPACE | default (printf "dvp-e2e-local-%s" .RUN_ID) }}' + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/runs/%s" .TMP_ROOT .RUN_ID) }}' + LOG_FILE: '{{ .LOG_FILE | default (printf "%s/%s" .TMP_DIR "bootstrap.log") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + TARGET_STORAGE_CLASS: '{{ .TARGET_STORAGE_CLASS | default "ceph-pool-r2-csi-rbd-immediate" }}' + USE_GH_SSH_KEYS: '{{ .USE_GH_SSH_KEYS | default (env "USE_GH_SSH_KEYS") | default "true" }}' + cmds: + - mkdir -p {{ .TMP_DIR }} + - | + set -euo pipefail + echo "[FLOW] Using RUN_ID={{ .RUN_ID }}, namespace={{ .RUN_NAMESPACE }}" + { + task run:values:prepare RUN_ID='{{ .RUN_ID }}' RUN_NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' + task render-infra VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' USE_GH_SSH_KEYS='{{ .USE_GH_SSH_KEYS }}' SSH_FILE_NAME='id_ed' + task infra-deploy VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' SSH_FILE_NAME='id_ed' + task render-cluster-config VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' TARGET_STORAGE_CLASS='{{ .TARGET_STORAGE_CLASS }}' SSH_FILE_NAME='id_ed' + task dhctl-bootstrap VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' SSH_FILE_NAME='id_ed' + } 2>&1 | tee '{{ .LOG_FILE }}' + + local:tests: + desc: Local flow โ€” prepare nested kubeconfig and run E2E (logs saved) + vars: + RUN_ID: '{{ .RUN_ID | default (printf "local-%s" (now | date "20060102-150405")) }}' + RUN_NAMESPACE: '{{ .RUN_NAMESPACE | default (printf "dvp-e2e-local-%s" .RUN_ID) }}' + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/runs/%s" .TMP_ROOT .RUN_ID) }}' + LOG_FILE: '{{ .LOG_FILE | default (printf "%s/%s" .TMP_DIR "tests.log") }}' + E2E_DIR: '{{ .E2E_DIR | default (env "E2E_DIR") | default "../../tests/e2e" }}' + NESTED_SC: '{{ .NESTED_SC | default "ceph-pool-r2-csi-rbd-immediate" }}' + cmds: + - mkdir -p {{ .TMP_DIR }} + - | + set -euo pipefail + { + task nested:kubeconfig NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' + task nested:storage:sds NESTED_KUBECONFIG='{{ .TMP_DIR }}/nested-{{ .RUN_NAMESPACE }}/kubeconfig' SDS_SC_NAME='{{ .NESTED_SC }}' + task nested:ensure-sc NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' SC_NAME='{{ .NESTED_SC }}' + task nested:ensure-vmclass-default NESTED_KUBECONFIG='{{ .TMP_DIR }}/nested-{{ .RUN_NAMESPACE }}/kubeconfig' + task nested:e2e NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' E2E_DIR='{{ .E2E_DIR }}' + } 2>&1 | tee '{{ .LOG_FILE }}' + + # ------------------------------------------------------------ + # Nested cluster helpers (SC + kubeconfig) + # ------------------------------------------------------------ + nested:kubeconfig: + desc: Build kubeconfig for nested cluster + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: "{{ .NAMESPACE }}" + DOMAIN: + sh: yq eval '.domain // ""' {{ .VALUES_FILE }} + DEFAULT_USER: + sh: yq eval '.image.defaultUser' {{ .VALUES_FILE }} + SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' + NESTED_DIR: '{{ .NESTED_DIR | default (printf "%s/nested-%s" .TMP_DIR .NAMESPACE) }}' + NESTED_KUBECONFIG: '{{ .NESTED_KUBECONFIG | default (printf "%s/kubeconfig" .NESTED_DIR) }}' + PARENT_KUBECONFIG_PATH: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + if [ ! -s "{{ .PARENT_KUBECONFIG_PATH }}" ]; then + echo "[ERR] parent kubeconfig not found at {{ .PARENT_KUBECONFIG_PATH }}" + exit 1 + fi + mkdir -p {{ .NESTED_DIR }} + MASTER_NAME=$(KUBECONFIG={{ .PARENT_KUBECONFIG_PATH }} kubectl -n {{ .NAMESPACE }} get vm -l dvp.deckhouse.io/node-group=master -o jsonpath='{.items[0].metadata.name}') + if [ -z "$MASTER_NAME" ]; then + echo "[ERR] master VM not found in namespace {{ .NAMESPACE }}" >&2 + exit 1 + fi + TOKEN_FILE="{{ .NESTED_DIR }}/token.txt" + rm -f "$TOKEN_FILE" + SSH_OK=0 + for attempt in $(seq 1 6); do + if KUBECONFIG={{ .PARENT_KUBECONFIG_PATH }} d8 v ssh --username={{ .DEFAULT_USER }} --identity-file={{ .SSH_PRIV_KEY_FILE }} --local-ssh=true --local-ssh-opts="-o StrictHostKeyChecking=no" --local-ssh-opts="-o UserKnownHostsFile=/dev/null" "${MASTER_NAME}.{{ .NAMESPACE }}" -c ' + set -euo pipefail + SUDO="sudo /opt/deckhouse/bin/kubectl" + $SUDO -n kube-system get sa e2e-admin >/dev/null 2>&1 || $SUDO -n kube-system create sa e2e-admin >/dev/null 2>&1 + $SUDO -n kube-system get clusterrolebinding e2e-admin >/dev/null 2>&1 || $SUDO -n kube-system create clusterrolebinding e2e-admin --clusterrole=cluster-admin --serviceaccount=kube-system:e2e-admin >/dev/null 2>&1 + for i in $(seq 1 10); do + TOKEN=$($SUDO -n kube-system create token e2e-admin --duration=24h 2>/dev/null) && echo "$TOKEN" && break + echo "[WARN] Failed to create token (attempt $i/10); retrying in 3s" >&2 + sleep 3 + done + if [ -z "${TOKEN:-}" ]; then + echo "[ERR] Unable to create token for e2e-admin after 10 attempts" >&2 + exit 1 + fi + ' > "$TOKEN_FILE"; then + SSH_OK=1 + break + fi + echo "[WARN] d8 v ssh attempt $attempt failed; retry in 15s..." + sleep 15 + done + if [ "$SSH_OK" -ne 1 ] || [ ! -s "$TOKEN_FILE" ]; then + echo "[ERR] Failed to obtain nested token via d8 v ssh after multiple attempts" >&2 + cat "$TOKEN_FILE" 2>/dev/null || true + exit 1 + fi + NESTED_TOKEN=$(cat {{ .NESTED_DIR }}/token.txt) + SERVER_URL="https://api.{{ .NAMESPACE }}.{{ .DOMAIN }}" + { + printf 'apiVersion: v1\n' + printf 'kind: Config\n' + printf 'clusters:\n' + printf '- cluster:\n' + printf ' insecure-skip-tls-verify: true\n' + printf ' server: %s\n' "${SERVER_URL}" + printf ' name: nested\n' + printf 'contexts:\n' + printf '- context:\n' + printf ' cluster: nested\n' + printf ' user: e2e-admin\n' + printf ' name: nested\n' + printf 'current-context: nested\n' + printf 'users:\n' + printf '- name: e2e-admin\n' + printf ' user:\n' + printf ' token: %s\n' "${NESTED_TOKEN}" + } > {{ .NESTED_KUBECONFIG }} + chmod 600 {{ .NESTED_KUBECONFIG }} + echo "Generated nested kubeconfig at {{ .NESTED_KUBECONFIG }}" + + nested:ensure-sc: + desc: Ensure StorageClass exists in nested cluster + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + NAMESPACE: "{{ .NAMESPACE }}" + NESTED_DIR: '{{ .NESTED_DIR | default (printf "%s/nested-%s" .TMP_DIR .NAMESPACE) }}' + NESTED_KUBECONFIG: '{{ .NESTED_KUBECONFIG | default (printf "%s/kubeconfig" .NESTED_DIR) }}' + SC_NAME: '{{ .SC_NAME | default "linstor-thin-r2" }}' + cmds: + - | + set -euo pipefail + if ! KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get sc "{{ .SC_NAME }}" >/dev/null 2>&1; then + echo "[ERR] StorageClass '{{ .SC_NAME }}' is missing in nested cluster" + exit 1 + fi + + nested:ensure-vmclass-default: + desc: Ensure default VMClass generic-for-e2e exists in nested cluster + vars: + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + cmds: + - | + set -euo pipefail + for i in $(seq 1 18); do + if KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get vmclass generic >/dev/null 2>&1; then + KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get vmclass generic -o json \ + | jq 'del(.status) | .metadata={"name":"generic-for-e2e","annotations":{"virtualmachineclass.virtualization.deckhouse.io/is-default-class":"true"}}' \ + | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - >/dev/null + break + fi + echo "[INFO] Waiting for vmclass/generic to appear (attempt $i)..." + sleep 10 + done + + nested:storage:configure: + desc: Configure SDS storage profile inside nested cluster + vars: + STORAGE_PROFILE: '{{ .STORAGE_PROFILE | default "sds" }}' + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + TARGET_STORAGE_CLASS: "{{ .TARGET_STORAGE_CLASS }}" + STORAGE_PROFILE_NORMALIZED: + sh: | + case '{{ .STORAGE_PROFILE }}' in + sds|sds-local|sds_local|sds-replicated|sds_replicated) echo sds ;; + *) echo '{{ .STORAGE_PROFILE }}' ;; + esac + cmds: + - cmd: 'echo "[STORAGE] normalized profile = {{ .STORAGE_PROFILE_NORMALIZED }}"' + - | + set -euo pipefail + if [ '{{ .STORAGE_PROFILE_NORMALIZED }}' != "sds" ]; then + echo "[ERR] Only SDS storage profile is supported. Got: {{ .STORAGE_PROFILE_NORMALIZED }}" >&2 + exit 1 + fi + - | + echo "[SDS] Configuring SDS storage..." + - | + task nested:storage:sds \ + NESTED_KUBECONFIG='{{ .NESTED_KUBECONFIG }}' \ + SDS_SC_NAME='{{ .TARGET_STORAGE_CLASS }}' + + nested:storage:sds: + desc: Configure SDS storage profile in nested cluster + vars: + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + SDS_SC_NAME: '{{ .SDS_SC_NAME | default "linstor-thin-r2" }}' + SDS_DVCR_SIZE: '{{ .SDS_DVCR_SIZE | default "5Gi" }}' + cmds: + - | + set -euo pipefail + echo "[SDS] Waiting for API server to be ready..." + for i in $(seq 1 30); do + if KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get nodes >/dev/null 2>&1; then + echo "[SDS] API server is ready!" + break + fi + echo "[SDS] API server not ready yet, retry $i/30"; sleep 10 + done + - | + set -euo pipefail + echo "[SDS] Step 1: Enabling sds-node-configurator..." + # Ensure ModulePullOverride exists so that releases are available (use main by default) + cat <<'EOF' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply --validate=false -f - + apiVersion: deckhouse.io/v1alpha2 + kind: ModulePullOverride + metadata: + name: sds-node-configurator + spec: + imageTag: main + scanInterval: 15s + EOF + cat <<'EOF' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl -n d8-system apply --validate=false -f - + apiVersion: deckhouse.io/v1alpha1 + kind: ModuleConfig + metadata: + name: sds-node-configurator + namespace: d8-system + spec: + enabled: true + version: 1 + settings: + disableDs: false + enableThinProvisioning: true + EOF + - | + set -euo pipefail + echo "[SDS] Step 2: Waiting for sds-node-configurator to be Ready..." + for i in $(seq 1 60); do + STATUS=$(KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get module sds-node-configurator -o json | jq -r '.status.phase // .status.status // ""' 2>/dev/null || true) + if [ "$STATUS" = "Ready" ]; then + echo "[SDS] sds-node-configurator is Ready!" + break + fi + echo "[SDS] sds-node-configurator status: ${STATUS:-NotFound}, retry $i/60" + sleep 10 + done + if [ "$STATUS" != "Ready" ]; then + echo "[WARN] sds-node-configurator not Ready after 10 minutes, proceeding anyway..." >&2 + fi + - | + set -euo pipefail + echo "[SDS] Step 3: Enabling sds-replicated-volume..." + # Ensure ModulePullOverride exists so that releases are available (use main by default) + cat <<'EOF' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply --validate=false -f - + apiVersion: deckhouse.io/v1alpha2 + kind: ModulePullOverride + metadata: + name: sds-replicated-volume + spec: + imageTag: main + scanInterval: 15s + EOF + cat <<'EOF' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl -n d8-system apply --validate=false -f - + apiVersion: deckhouse.io/v1alpha1 + kind: ModuleConfig + metadata: + name: sds-replicated-volume + namespace: d8-system + spec: + enabled: true + version: 1 + EOF + - | + set -euo pipefail + echo "[SDS] Step 4: Waiting for sds-replicated-volume to be Ready..." + for i in $(seq 1 60); do + STATUS=$(KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get module sds-replicated-volume -o json | jq -r '.status.phase // .status.status // ""' 2>/dev/null || true) + if [ "$STATUS" = "Ready" ]; then + echo "[SDS] sds-replicated-volume is Ready!" + break + fi + echo "[SDS] sds-replicated-volume status: ${STATUS:-NotFound}, retry $i/60" + sleep 10 + done + if [ "$STATUS" != "Ready" ]; then + echo "[WARN] sds-replicated-volume not Ready after 10 minutes, proceeding anyway..." >&2 + fi + - | + set -euo pipefail + echo "[SDS] Waiting for SDS CRDs to be established..." + for crd in lvmvolumegroups.storage.deckhouse.io replicatedstoragepools.storage.deckhouse.io replicatedstorageclasses.storage.deckhouse.io; do + echo "[SDS] Waiting for CRD '$crd'..." + found=0 + for i in $(seq 1 50); do + if KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get crd "$crd" >/dev/null 2>&1; then + found=1 + break + fi + echo "[SDS] CRD '$crd' not found yet, retry $i/50"; sleep 5 + done + if [ "$found" -eq 1 ]; then + KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl wait --for=condition=Established --timeout=180s crd "$crd" || true + else + echo "[WARN] CRD '$crd' not found after waiting" >&2 + fi + done + - | + set -euo pipefail + echo "[SDS] Creating per-node LVMVolumeGroups (type=Local)..." + NODES=$(KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get nodes -o json \ + | jq -r '.items[] | select(.metadata.labels["node-role.kubernetes.io/control-plane"]!=true and .metadata.labels["node-role.kubernetes.io/master"]!=true) | .metadata.name') + if [ -z "$NODES" ]; then + NODES=$(KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get nodes -o json | jq -r '.items[].metadata.name') + fi + for node in $NODES; do + [ -z "$node" ] && continue + # Build matchExpressions for known device paths per docs (label key is storage.deckhouse.io/device-path) + MATCH_EXPR=$(yq eval -n ' + .key = "storage.deckhouse.io/device-path" | + .operator = "In" | + .values = ["/dev/sdb","/dev/vdb","/dev/xvdb","/dev/sdc","/dev/vdc","/dev/xvdc","/dev/sdd","/dev/vdd","/dev/xvdd"] + ') + NODE="$node" MATCH_EXPR="$MATCH_EXPR" yq eval -n ' + .apiVersion = "storage.deckhouse.io/v1alpha1" | + .kind = "LVMVolumeGroup" | + .metadata.name = "data-" + env(NODE) | + .spec.type = "Local" | + .spec.local.nodeName = env(NODE) | + .spec.actualVGNameOnTheNode = "data" | + .spec.blockDeviceSelector.matchExpressions = [ env(MATCH_EXPR) ] + ' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - + done + echo "[SDS] Creating ReplicatedStoragePool 'data' from LVMVolumeGroups..." + LVGS=$(printf "%s\n" $NODES | sed 's/^/ - name: data-/') + { + echo "apiVersion: storage.deckhouse.io/v1alpha1" + echo "kind: ReplicatedStoragePool" + echo "metadata:" + echo " name: data" + echo "spec:" + echo " type: LVM" + echo " lvmVolumeGroups:" + printf "%s\n" "$LVGS" + } | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - + echo "[SDS] Creating ReplicatedStorageClass '{{ .SDS_SC_NAME }}'..." + yq eval -n ' + .apiVersion = "storage.deckhouse.io/v1alpha1" | + .kind = "ReplicatedStorageClass" | + .metadata.name = "{{ .SDS_SC_NAME }}" | + .spec.storagePool = "data" | + .spec.reclaimPolicy = "Delete" | + .spec.topology = "Ignored" | + .spec.volumeAccess = "Local" + ' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - + - | + set -euo pipefail + if ! KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get storageclass "{{ .SDS_SC_NAME }}" >/dev/null 2>&1; then + echo "[ERR] StorageClass '{{ .SDS_SC_NAME }}' not found in nested cluster" >&2 + exit 1 + fi + - | + echo "[SDS] Setting {{ .SDS_SC_NAME }} as default StorageClass..." + DEFAULT_STORAGE_CLASS="{{ .SDS_SC_NAME }}" + PATCH=$(jq -cn --arg v "$DEFAULT_STORAGE_CLASS" '[{"op":"replace","path":"/spec/settings/defaultClusterStorageClass","value":$v}]') + KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl patch mc global --type='json' -p="$PATCH" + + # ------------------------------------------------------------ + # Run E2E + # ------------------------------------------------------------ + nested:e2e: + desc: Run virtualization E2E tests against nested cluster + vars: + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + NAMESPACE: "{{ .NAMESPACE }}" + NESTED_DIR: '{{ .NESTED_DIR | default (printf "%s/nested-%s" .TMP_DIR .NAMESPACE) }}' + NESTED_KUBECONFIG: '{{ .NESTED_KUBECONFIG | default (printf "%s/kubeconfig" .NESTED_DIR) }}' + E2E_DIR: '{{ .E2E_DIR | default (env "E2E_DIR") | default "../../tests/e2e" }}' + FOCUS: '{{ or .FOCUS "" }}' + SKIP: '{{ or .SKIP "" }}' + LABELS: '{{ or .LABELS "" }}' + TIMEOUT: '{{ or .TIMEOUT "4h" }}' + JUNIT_PATH: '{{ or .JUNIT_PATH "" }}' + TARGET_STORAGE_CLASS: '{{ .STORAGE_CLASS | default "ceph-pool-r2-csi-rbd-immediate" }}' + cmds: + - task: nested:kubeconfig + vars: + TMP_DIR: "{{ .TMP_DIR }}" + VALUES_FILE: "{{ .VALUES_FILE }}" + NAMESPACE: "{{ .NAMESPACE }}" + NESTED_DIR: "{{ .NESTED_DIR }}" + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + - task: nested:ensure-sc + vars: + TMP_DIR: "{{ .TMP_DIR }}" + NAMESPACE: "{{ .NAMESPACE }}" + NESTED_DIR: "{{ .NESTED_DIR }}" + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + SC_NAME: "{{ .TARGET_STORAGE_CLASS }}" + - task: nested:ensure-vmclass-default + vars: + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" + - | + set -euo pipefail + export KUBECONFIG="{{ .NESTED_KUBECONFIG }}" + cd {{ .E2E_DIR }} + task run TIMEOUT='{{ .TIMEOUT }}' {{ if .FOCUS }}FOCUS='{{ .FOCUS }}'{{ end }} {{ if .LABELS }}LABELS='{{ .LABELS }}'{{ end }} {{ if .JUNIT_PATH }}JUNIT_PATH='{{ .JUNIT_PATH }}'{{ end }} diff --git a/ci/dvp-e2e/charts/cluster-config/.helmignore b/ci/dvp-e2e/charts/cluster-config/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/ci/dvp-e2e/charts/cluster-config/Chart.yaml b/ci/dvp-e2e/charts/cluster-config/Chart.yaml new file mode 100644 index 0000000000..344eb6ee44 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/Chart.yaml @@ -0,0 +1,18 @@ +apiVersion: v2 +name: cluster-config +description: Cluster configuration for E2E testing +type: application +version: 0.1.0 +appVersion: "1.0.0" +keywords: + - cluster + - configuration + - e2e + - testing +home: https://github.com/deckhouse/deckhouse +sources: + - https://github.com/deckhouse/deckhouse +maintainers: + - name: Deckhouse Team + email: team@deckhouse.io +dependencies: [] diff --git a/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml b/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml new file mode 100644 index 0000000000..bc9c836bfc --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml @@ -0,0 +1,48 @@ +# Cluster configuration for DVP-over-DVP E2E testing +apiVersion: deckhouse.io/v1 +kind: ClusterConfiguration +clusterType: Cloud +cloud: + provider: DVP + prefix: {{ .Values.clusterConfigurationPrefix | default "demo-cluster" }} +podSubnetCIDR: 10.112.0.0/16 +serviceSubnetCIDR: 10.223.0.0/16 +kubernetesVersion: "{{ .Values.deckhouse.kubernetesVersion }}" +clusterDomain: "internal.cluster.local" +--- +apiVersion: deckhouse.io/v1 +kind: InitConfiguration +deckhouse: + imagesRepo: dev-registry.deckhouse.io/sys/deckhouse-oss + registryDockerCfg: {{ .Values.deckhouse.registryDockerCfg | quote }} + devBranch: {{ .Values.deckhouse.tag }} +--- +apiVersion: deckhouse.io/v1 +kind: DVPClusterConfiguration +layout: Standard +sshPublicKey: {{ .Values.sshPublicKey }} +masterNodeGroup: + replicas: {{ .Values.instances.masterNodes.count }} + instanceClass: + virtualMachine: + bootloader: {{ .Values.image.bootloader }} + cpu: + cores: {{ .Values.instances.masterNodes.cores }} + coreFraction: {{ .Values.instances.masterNodes.coreFraction }} + memory: + size: {{ .Values.instances.masterNodes.memory }} + ipAddresses: + - Auto + virtualMachineClassName: "{{ .Values.namespace }}-cpu" + rootDisk: + size: 50Gi + storageClass: {{ .Values.storageClasses.controlPlane.root }} + image: + kind: VirtualImage + name: image + etcdDisk: + size: 15Gi + storageClass: {{ .Values.storageClasses.controlPlane.etcd }} +provider: + kubeconfigDataBase64: {{ .Values.kubeconfigDataBase64 }} + namespace: {{ .Values.namespace }} \ No newline at end of file diff --git a/ci/dvp-e2e/charts/cluster-config/templates/disabled-modules.yaml b/ci/dvp-e2e/charts/cluster-config/templates/disabled-modules.yaml new file mode 100644 index 0000000000..2887a2b168 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/disabled-modules.yaml @@ -0,0 +1,10 @@ +{{- $modules := list "upmeter" "local-path-provisioner" "pod-reloader" "secret-copier" "namespace-configurator" -}} +{{ range $modules }} +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: {{ . }} +spec: + enabled: false +{{ end }} diff --git a/ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml b/ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml new file mode 100644 index 0000000000..dad2d77cd6 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml @@ -0,0 +1,27 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: e2e-runner + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: e2e-runner-admin +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: +- kind: ServiceAccount + name: e2e-runner + namespace: kube-system + + + + + + + + diff --git a/ci/dvp-e2e/charts/cluster-config/templates/ingress.yaml b/ci/dvp-e2e/charts/cluster-config/templates/ingress.yaml new file mode 100644 index 0000000000..387a3c89bc --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/ingress.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: deckhouse.io/v1 +kind: IngressNginxController +metadata: + name: main +spec: + inlet: HostPort + enableIstioSidecar: false + ingressClass: nginx + hostPort: + httpPort: 80 + httpsPort: 443 + nodeSelector: + node-role.kubernetes.io/master: '' + tolerations: + - effect: NoSchedule + operator: Exists diff --git a/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml b/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml new file mode 100644 index 0000000000..0156178a61 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml @@ -0,0 +1,65 @@ +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: deckhouse +spec: + version: 1 + enabled: true + settings: + bundle: Default + logLevel: Info +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: global +spec: + version: 1 + settings: + defaultClusterStorageClass: ceph-pool-r2-csi-rbd-immediate + modules: + publicDomainTemplate: "%s.{{ .Values.namespace }}.{{ .Values.domain }}" +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: user-authn +spec: + version: 1 + enabled: true + settings: + controlPlaneConfigurator: + dexCAMode: DoNotNeed + publishAPI: + enabled: true + https: + mode: Global + global: + kubeconfigGeneratorMasterCA: "" +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: cni-cilium +spec: + version: 1 + enabled: true + settings: + tunnelMode: VXLAN +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: snapshot-controller +spec: + version: 1 + enabled: true +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: snapshot-controller +spec: + imageTag: main + scanInterval: 15s diff --git a/ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml b/ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml new file mode 100644 index 0000000000..85b627695d --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml @@ -0,0 +1,34 @@ +{{- if .Values.features.nfs.enabled }} +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: csi-nfs +spec: + source: deckhouse + enabled: true + version: 1 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: csi-nfs +spec: + imageTag: main + scanInterval: 10m +--- +apiVersion: storage.deckhouse.io/v1alpha1 +kind: NFSStorageClass +metadata: + name: nfs +spec: + connection: + host: "nfs-server.{{ .Values.namespace }}.svc.cluster.local" + share: / + nfsVersion: "4.2" + mountOptions: + mountMode: hard + timeout: 60 + retransmissions: 3 + reclaimPolicy: Delete + volumeBindingMode: Immediate +{{- end }} diff --git a/ci/dvp-e2e/charts/cluster-config/templates/ngc.yaml b/ci/dvp-e2e/charts/cluster-config/templates/ngc.yaml new file mode 100644 index 0000000000..3672dc8e79 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/ngc.yaml @@ -0,0 +1,37 @@ +--- +apiVersion: deckhouse.io/v1alpha1 +kind: NodeGroupConfiguration +metadata: + name: qemu-guest-agent-install-ubuntu.sh +spec: + weight: 98 + nodeGroups: ["*"] + bundles: ["ubuntu-lts", "debian"] + content: | + bb-apt-install qemu-guest-agent + systemctl enable --now qemu-guest-agent +--- +apiVersion: deckhouse.io/v1alpha1 +kind: NodeGroupConfiguration +metadata: + name: install-tools.sh +spec: + weight: 98 + nodeGroups: ["*"] + bundles: ["*"] + content: | + bb-sync-file /etc/profile.d/01-kubectl-aliases.sh - << "EOF" + source <(/opt/deckhouse/bin/kubectl completion bash) + alias k=kubectl + complete -o default -F __start_kubectl k + EOF + + if [ ! -f /usr/local/bin/k9s ]; then + K9S_URL=$(curl -s https://api.github.com/repos/derailed/k9s/releases/latest | jq '.assets[] | select(.name=="k9s_Linux_amd64.tar.gz") | .browser_download_url' -r) + curl -L "${K9S_URL}" | tar -xz -C /usr/local/bin/ "k9s" + fi + + if [ ! -f /usr/local/bin/stern ]; then + STERN_URL=$(curl -s https://api.github.com/repos/stern/stern/releases/latest | jq '.assets[].browser_download_url | select(. | test("linux_amd64"))' -r) + curl -L "${STERN_URL}" | tar -xz -C /usr/local/bin/ "stern" + fi diff --git a/ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml b/ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml new file mode 100644 index 0000000000..3779fb52a3 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml @@ -0,0 +1,40 @@ +{{ range .Values.instances.additionalNodes }} +--- +apiVersion: deckhouse.io/v1alpha1 +kind: DVPInstanceClass +metadata: + name: {{ .name }} +spec: + virtualMachine: + virtualMachineClassName: "{{ $.Values.namespace }}-cpu" + cpu: + cores: {{ .cores }} + coreFraction: {{ .coreFraction }} + memory: + size: {{ .memory }} + bootloader: {{ $.Values.image.bootloader }} + rootDisk: + size: 50Gi + storageClass: {{ $.Values.storageClasses.workers.root }} + image: + kind: VirtualImage + name: image +--- +apiVersion: deckhouse.io/v1 +kind: NodeGroup +metadata: + name: {{ .name }} +spec: +{{- if eq .name "system" }} + nodeTemplate: + labels: + node-role.deckhouse.io/system: "" +{{- end }} + nodeType: {{ .nodeType | default "CloudEphemeral" }} + cloudInstances: + minPerZone: {{ .count }} + maxPerZone: {{ .count }} + classReference: + kind: DVPInstanceClass + name: {{ .name }} +{{ end }} diff --git a/ci/dvp-e2e/charts/cluster-config/templates/rbac.yaml b/ci/dvp-e2e/charts/cluster-config/templates/rbac.yaml new file mode 100644 index 0000000000..6b8998a1e8 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/rbac.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: deckhouse.io/v1 +kind: ClusterAuthorizationRule +metadata: + name: admin +spec: + subjects: + - kind: User + name: admin@deckhouse.io + accessLevel: SuperAdmin + portForwarding: true +--- +apiVersion: deckhouse.io/v1 +kind: User +metadata: + name: admin +spec: + email: admin@deckhouse.io + # echo "t3chn0l0gi4" | htpasswd -BinC 10 "" | cut -d: -f2 | base64 -w0 + password: {{ .Values.passwordHash }} diff --git a/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml b/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml new file mode 100644 index 0000000000..dffaf8b115 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml @@ -0,0 +1,28 @@ +--- +{{- if hasKey .Values "features" }} +{{- if .Values.features.virtualization }} +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: virtualization +spec: + enabled: true + version: 1 + settings: + dvcr: + storage: + persistentVolumeClaim: + size: 10Gi + type: PersistentVolumeClaim + virtualMachineCIDRs: + - 192.168.10.0/24 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: virtualization +spec: + imageTag: {{ .Values.virtualization.tag }} + scanInterval: 15s +{{- end }} +{{- end }} diff --git a/ci/dvp-e2e/charts/cluster-config/values.yaml b/ci/dvp-e2e/charts/cluster-config/values.yaml new file mode 100644 index 0000000000..8c158bf0f4 --- /dev/null +++ b/ci/dvp-e2e/charts/cluster-config/values.yaml @@ -0,0 +1,77 @@ +# Cluster configuration values for E2E testing + +# Instance configuration +instances: + masterNodes: + count: 1 + cores: 8 + coreFraction: 50% + memory: 20Gi + additionalNodes: + - name: worker + count: 3 + cores: 6 + coreFraction: 50% + memory: 12Gi + +# Deckhouse configuration +deckhouse: + tag: main + kubernetesVersion: Automatic + +# Virtualization configuration +virtualization: + tag: main + +# Image configuration +image: + url: https://89d64382-20df-4581-8cc7-80df331f67fa.selstorage.ru/ubuntu/noble-server-cloudimg-amd64.img + defaultUser: ubuntu + bootloader: EFI + +# Ingress hosts +ingressHosts: + - api + - grafana + - dex + - prometheus + - console + - virtualization + +# Storage classes configuration (for parent cluster) +storageClasses: + controlPlane: + root: ceph-pool-r2-csi-rbd-immediate + etcd: ceph-pool-r2-csi-rbd-immediate + workers: + root: ceph-pool-r2-csi-rbd-immediate + +# Infrastructure components +infra: + nfs: + storageClass: nfs-4-1-wffc + dvcr: + storageClass: ceph-pool-r2-csi-rbd-immediate + +# Virtual disks configuration +virtualDisks: + os: + storageClass: ceph-pool-r2-csi-rbd-immediate + data: + storageClass: nfs-4-1-wffc + +# Security settings +security: + admissionPolicyEngine: + enabled: true + networkPolicies: + enabled: true + +# Feature flags +features: + virtualization: true + monitoring: true + logging: true + ingress: true + nfs: + enabled: false diff --git a/ci/dvp-e2e/charts/infra/.helmignore b/ci/dvp-e2e/charts/infra/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/ci/dvp-e2e/charts/infra/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/ci/dvp-e2e/charts/infra/Chart.yaml b/ci/dvp-e2e/charts/infra/Chart.yaml new file mode 100644 index 0000000000..5eb2c3bfc0 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/Chart.yaml @@ -0,0 +1,17 @@ +apiVersion: v2 +name: infra +description: Infrastructure components for E2E testing +type: application +version: 0.1.0 +appVersion: "1.0.0" +keywords: + - infrastructure + - e2e + - testing +home: https://github.com/deckhouse/deckhouse +sources: + - https://github.com/deckhouse/deckhouse +maintainers: + - name: Deckhouse Team + email: team@deckhouse.io +dependencies: [] diff --git a/ci/dvp-e2e/charts/infra/templates/ingress.yaml b/ci/dvp-e2e/charts/infra/templates/ingress.yaml new file mode 100644 index 0000000000..b813234319 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/ingress.yaml @@ -0,0 +1,74 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: dvp-over-dvp-80 + namespace: {{ .Values.namespace }} +spec: + ports: + - port: 80 + targetPort: 80 + protocol: TCP + name: http + selector: + dvp.deckhouse.io/node-group: master +--- +apiVersion: v1 +kind: Service +metadata: + name: dvp-over-dvp-443 + namespace: {{ .Values.namespace }} +spec: + ports: + - port: 443 + targetPort: 443 + protocol: TCP + name: https + selector: + dvp.deckhouse.io/node-group: master +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wildcard-https + namespace: {{ .Values.namespace }} + annotations: + nginx.ingress.kubernetes.io/ssl-passthrough: "true" + nginx.ingress.kubernetes.io/backend-protocol: "HTTPS" +spec: + ingressClassName: nginx + rules: + {{- range .Values.ingressHosts }} + - host: "{{ . }}.{{ $.Values.namespace }}.{{ $.Values.domain }}" + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: dvp-over-dvp-443 + port: + number: 443 + {{- end }} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wildcard-http + namespace: {{ .Values.namespace }} + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "false" + nginx.ingress.kubernetes.io/rewrite-target: / +spec: + ingressClassName: nginx + rules: + - host: "*.{{ .Values.namespace }}.{{ .Values.domain }}" + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: dvp-over-dvp-80 + port: + number: 80 diff --git a/ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml b/ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml new file mode 100644 index 0000000000..a6bee4278a --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/jump-host/deploy.yaml @@ -0,0 +1,43 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: jump-host + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: jump-host + template: + metadata: + labels: + app: jump-host + spec: + containers: + - name: jump-host + image: registry-dvp.dev.flant.dev/tools/jump-host:v0.1.2 + imagePullPolicy: Always + resources: + limits: + cpu: "200m" + memory: "200Mi" + requests: + cpu: "200m" + memory: "200Mi" + ports: + - containerPort: 2222 + env: + - name: SSH_KEY + value: "{{ .Values.sshPublicKey }}" + securityContext: + runAsNonRoot: true + runAsUser: 1000 + securityContext: + runAsNonRoot: true + runAsUser: 1000 + tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" + diff --git a/ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml b/ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml new file mode 100644 index 0000000000..e2b809dcab --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/jump-host/svc.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: jump-host + namespace: {{ .Values.namespace }} +spec: + type: NodePort + selector: + app: jump-host + ports: + - protocol: TCP + port: 2222 + targetPort: 2222 + diff --git a/ci/dvp-e2e/charts/infra/templates/ns.yaml b/ci/dvp-e2e/charts/infra/templates/ns.yaml new file mode 100644 index 0000000000..064087cab7 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/ns.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Values.namespace }} + labels: + heritage: deckhouse diff --git a/ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml b/ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml new file mode 100644 index 0000000000..1a6a4b9846 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/rbac/rbac.yaml @@ -0,0 +1,41 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: dkp-sa + namespace: {{ .Values.namespace }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: dkp-sa-secret + namespace: {{ .Values.namespace }} + annotations: + kubernetes.io/service-account.name: dkp-sa +type: kubernetes.io/service-account-token +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: dkp-sa-rb + namespace: {{ .Values.namespace }} +subjects: + - kind: ServiceAccount + name: dkp-sa + namespace: {{ .Values.namespace }} +roleRef: + kind: ClusterRole + name: d8:use:role:manager + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: dkp-sa-cluster-admin-{{ .Values.namespace }} +subjects: + - kind: ServiceAccount + name: dkp-sa + namespace: {{ .Values.namespace }} +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io diff --git a/ci/dvp-e2e/charts/infra/templates/registry-secret.yaml b/ci/dvp-e2e/charts/infra/templates/registry-secret.yaml new file mode 100644 index 0000000000..d26d10600e --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/registry-secret.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + name: dhctl-regcred + namespace: {{ .Values.namespace }} +type: kubernetes.io/dockerconfigjson +data: + .dockerconfigjson: {{ .Values.deckhouse.registryDockerCfg | quote }} + diff --git a/ci/dvp-e2e/charts/infra/templates/vi.yaml b/ci/dvp-e2e/charts/infra/templates/vi.yaml new file mode 100644 index 0000000000..66034a649d --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/vi.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: virtualization.deckhouse.io/v1alpha2 +kind: VirtualImage +metadata: + name: image + namespace: {{ .Values.namespace }} +spec: + storage: ContainerRegistry + dataSource: + type: HTTP + http: + url: {{ .Values.image.url }} diff --git a/ci/dvp-e2e/charts/infra/templates/vmc.yaml b/ci/dvp-e2e/charts/infra/templates/vmc.yaml new file mode 100644 index 0000000000..39330ced39 --- /dev/null +++ b/ci/dvp-e2e/charts/infra/templates/vmc.yaml @@ -0,0 +1,7 @@ +apiVersion: virtualization.deckhouse.io/v1alpha2 +kind: VirtualMachineClass +metadata: + name: "{{ .Values.namespace }}-cpu" +spec: + cpu: + type: Discovery diff --git a/ci/dvp-e2e/charts/infra/values.yaml b/ci/dvp-e2e/charts/infra/values.yaml new file mode 100644 index 0000000000..9fe1b1ab6e --- /dev/null +++ b/ci/dvp-e2e/charts/infra/values.yaml @@ -0,0 +1,58 @@ +# Infrastructure values for E2E testing + +# Storage profiles (from original values.yaml) +storageProfiles: + default: + controlPlane: + root: linstor-thin-r2 + etcd: linstor-thin-r2 + workers: + root: linstor-thin-r2 + infra: + nfs: nfs-4-1-wffc + dvcr: linstor-thin-r2 + virtualDisks: + os: linstor-thin-r2 + data: nfs-4-1-wffc + cephrbd: + controlPlane: + root: ceph-pool-r2-csi-rbd + etcd: ceph-pool-r2-csi-rbd + workers: + root: ceph-pool-r2-csi-rbd + infra: + nfs: nfs-4-1-wffc + dvcr: ceph-pool-r2-csi-rbd + virtualDisks: + os: ceph-pool-r2-csi-rbd + data: ceph-pool-r2-csi-rbd + sds-local: + controlPlane: + root: sds-local-storage + etcd: sds-local-storage + workers: + root: sds-local-storage + infra: + nfs: nfs-4-1-wffc + dvcr: sds-local-storage + virtualDisks: + os: sds-local-storage + data: sds-local-storage + +# Network configuration +network: + domain: e2e.virtlab.flant.com + clusterConfigurationPrefix: x + internalNetworkCIDRs: + - "10.241.0.0/16" + +# Registry configuration +registry: + dockerCfg: "" + insecure: false + +# Monitoring configuration +monitoring: + enabled: true + retention: "7d" + scrapeInterval: "30s" diff --git a/ci/dvp-e2e/charts/support/Chart.yaml b/ci/dvp-e2e/charts/support/Chart.yaml new file mode 100644 index 0000000000..8eefb78886 --- /dev/null +++ b/ci/dvp-e2e/charts/support/Chart.yaml @@ -0,0 +1,18 @@ +apiVersion: v2 +name: support +description: Support components for E2E testing +type: application +version: 0.1.0 +appVersion: "1.0.0" +keywords: + - support + - utilities + - e2e + - testing +home: https://github.com/deckhouse/deckhouse +sources: + - https://github.com/deckhouse/deckhouse +maintainers: + - name: Deckhouse Team + email: team@deckhouse.io +dependencies: [] diff --git a/ci/dvp-e2e/charts/support/values.yaml b/ci/dvp-e2e/charts/support/values.yaml new file mode 100644 index 0000000000..8d3f37bc5b --- /dev/null +++ b/ci/dvp-e2e/charts/support/values.yaml @@ -0,0 +1,62 @@ +# Support components values for E2E testing + +# Namespace configuration +namespace: nightly-e2e + +# Loop integration +loop: + webhook: "" + channel: "test-virtualization-loop-alerts" + enabled: true + +# Logging configuration +logging: + level: "info" + format: "json" + retention: "7d" + +# Notification settings +notifications: + slack: + enabled: false + webhook: "" + channel: "" + email: + enabled: false + smtp: + host: "" + port: 587 + username: "" + password: "" + +# Backup configuration +backup: + enabled: true + schedule: "0 2 * * *" + retention: "7d" + storage: + type: "local" + path: "/backups" + +# Health checks +healthChecks: + enabled: true + interval: "30s" + timeout: "10s" + retries: 3 + +# Resource monitoring +monitoring: + enabled: true + metrics: + enabled: true + port: 8080 + alerts: + enabled: true + rules: [] + +# Debug settings +debug: + enabled: false + verbose: false + trace: false diff --git a/ci/dvp-e2e/manifests/storage/sds-modules.yaml b/ci/dvp-e2e/manifests/storage/sds-modules.yaml new file mode 100644 index 0000000000..42030bda40 --- /dev/null +++ b/ci/dvp-e2e/manifests/storage/sds-modules.yaml @@ -0,0 +1,48 @@ +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: sds-node-configurator +spec: + enabled: true + version: 1 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: sds-node-configurator +spec: + imageTag: main + scanInterval: 15s +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: sds-local-volume +spec: + enabled: true + version: 1 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: sds-local-volume +spec: + imageTag: main + scanInterval: 15s +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: sds-replicated-volume +spec: + enabled: true + version: 1 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: sds-replicated-volume +spec: + imageTag: main + scanInterval: 15s diff --git a/ci/dvp-e2e/manifests/storage/sds.yaml b/ci/dvp-e2e/manifests/storage/sds.yaml new file mode 100644 index 0000000000..0b8e27da48 --- /dev/null +++ b/ci/dvp-e2e/manifests/storage/sds.yaml @@ -0,0 +1,33 @@ +--- +apiVersion: storage.deckhouse.io/v1alpha1 +kind: LVMVolumeGroup +metadata: + name: data +spec: + # Local VG; explicit local section is required for type=Local + type: Local + local: + actualVGNameOnTheNode: data + blockDeviceSelector: + devicePaths: + - /dev/sdd +--- +apiVersion: storage.deckhouse.io/v1alpha1 +kind: ReplicatedStoragePool +metadata: + name: data +spec: + # Pool type must be LVM or LVMThin + type: LVM + lvmVolumeGroups: + - name: data +--- +apiVersion: storage.deckhouse.io/v1alpha1 +kind: ReplicatedStorageClass +metadata: + name: linstor-thin-r2 +spec: + storagePool: data + reclaimPolicy: Delete + topology: Ignored + volumeAccess: Local diff --git a/ci/dvp-e2e/profiles.json b/ci/dvp-e2e/profiles.json new file mode 100644 index 0000000000..624ce7cfeb --- /dev/null +++ b/ci/dvp-e2e/profiles.json @@ -0,0 +1,10 @@ +[ + { + "name": "sds", + "storage_class": "linstor-thin-r2", + "image_storage_class": "linstor-thin-r1-immediate", + "snapshot_storage_class": "linstor-thin-r2", + "worker_data_disk_size": "10Gi", + "description": "SDS storage with LINSTOR thin provisioning" + } +] diff --git a/ci/dvp-e2e/scripts/get_profile_config.sh b/ci/dvp-e2e/scripts/get_profile_config.sh new file mode 100755 index 0000000000..f29dc17e34 --- /dev/null +++ b/ci/dvp-e2e/scripts/get_profile_config.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Script to get storage class configuration from profiles.json +# Usage: get_profile_config.sh + +set -euo pipefail + +PROFILE="${1:-}" +PROFILES_FILE="${2:-./profiles.json}" + +if [[ -z "$PROFILE" ]]; then + echo "Usage: $0 [profiles_file]" >&2 + exit 1 +fi + +if [[ ! -f "$PROFILES_FILE" ]]; then + echo "Profiles file not found: $PROFILES_FILE" >&2 + exit 1 +fi + +# Use jq to find profile by exact name only +PROFILE_CONFIG=$(jq -r --arg profile "$PROFILE" ' + .[] | select(.name == $profile) | + "\(.storage_class)|\(.image_storage_class)|\(.snapshot_storage_class)|\(.worker_data_disk_size // "10Gi")" +' "$PROFILES_FILE") + +if [[ -z "$PROFILE_CONFIG" || "$PROFILE_CONFIG" == "null" ]]; then + echo "Profile '$PROFILE' not found in $PROFILES_FILE" >&2 + echo "Available profiles:" >&2 + jq -r '.[] | " - \(.name)"' "$PROFILES_FILE" >&2 + exit 1 +fi + +# Split the result and export variables +IFS='|' read -r SC IMG_SC SNAP_SC ATTACH_SIZE <<< "$PROFILE_CONFIG" + +echo "STORAGE_CLASS=$SC" +echo "IMAGE_STORAGE_CLASS=$IMG_SC" +echo "SNAPSHOT_STORAGE_CLASS=$SNAP_SC" +echo "ATTACH_DISK_SIZE=$ATTACH_SIZE" diff --git a/ci/dvp-e2e/scripts/loop_junit_notify.py b/ci/dvp-e2e/scripts/loop_junit_notify.py new file mode 100755 index 0000000000..2ead80cc6e --- /dev/null +++ b/ci/dvp-e2e/scripts/loop_junit_notify.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parse JUnit XML and send test results to Loop webhook.""" + +import argparse +import json +import os +import sys +import urllib.error +import urllib.request +import xml.etree.ElementTree as ET +from datetime import datetime +from pathlib import Path + + +def load_env_file(env_path: Path) -> None: + """Load environment variables from .env file.""" + if not env_path.exists(): + return + + with open(env_path, 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, value = line.split('=', 1) + # Don't override existing env vars + if key not in os.environ: + os.environ[key] = value.strip('"').strip("'") + + +def parse_junit_xml(junit_file: Path) -> dict: + """Parse JUnit XML file and extract test results.""" + try: + tree = ET.parse(junit_file) + root = tree.getroot() + + # Handle both testsuites and testsuite root elements + if root.tag == 'testsuites': + testsuites = root + else: + testsuites = root + + total_tests = int(testsuites.get('tests', 0)) + total_failures = int(testsuites.get('failures', 0)) + total_errors = int(testsuites.get('errors', 0)) + total_skipped = int(testsuites.get('skipped', 0)) + total_time = float(testsuites.get('time', 0)) + + # Calculate success rate + successful_tests = total_tests - total_failures - total_errors + success_rate = (successful_tests / total_tests * 100) if total_tests > 0 else 0 + + # Extract failed test details + failed_tests = [] + for testsuite in testsuites.findall('testsuite'): + for testcase in testsuite.findall('testcase'): + failure = testcase.find('failure') + error = testcase.find('error') + if failure is not None or error is not None: + failed_tests.append({ + 'name': testcase.get('name', 'unknown'), + 'class': testcase.get('classname', 'unknown'), + 'time': float(testcase.get('time', 0)), + 'message': (failure.get('message', '') if failure is not None else '') or + (error.get('message', '') if error is not None else '') + }) + + return { + 'total_tests': total_tests, + 'successful_tests': successful_tests, + 'failed_tests': total_failures + total_errors, + 'skipped_tests': total_skipped, + 'success_rate': success_rate, + 'total_time': total_time, + 'failed_test_details': failed_tests[:5], # Limit to first 5 failures + 'has_more_failures': len(failed_tests) > 5 + } + except ET.ParseError as e: + print(f"[ERR] Failed to parse JUnit XML: {e}", file=sys.stderr) + return None + except Exception as e: + print(f"[ERR] Error processing JUnit file: {e}", file=sys.stderr) + return None + + +def format_test_results(results: dict, run_id: str, storage_profile: str, timeout: str) -> str: + """Format test results into a readable message.""" + if results is None: + return f"โŒ Failed to parse test results for {run_id}" + + # Determine status emoji and color + if results['failed_tests'] == 0: + status_emoji = "โœ…" + status_text = "SUCCESS" + elif results['success_rate'] >= 80: + status_emoji = "โš ๏ธ" + status_text = "PARTIALLY SUCCESS" + else: + status_emoji = "โŒ" + status_text = "FAILED" + + # Format time + time_str = f"{results['total_time']:.1f}s" + if results['total_time'] > 60: + minutes = int(results['total_time'] // 60) + seconds = int(results['total_time'] % 60) + time_str = f"{minutes}m {seconds}s" + + # Build message + message_lines = [ + f"{status_emoji} E2E tests for virtualization completed", + f"๐Ÿ“‹ Run ID: {run_id}", + f"๐Ÿ’พ Storage: {storage_profile}", + f"โฑ๏ธ Timeout: {timeout}", + f"๐Ÿ• Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", + "", + f"๐Ÿ“Š Results: {status_text}", + f"โ€ข Total tests: {results['total_tests']}", + f"โ€ข Passed: {results['successful_tests']}", + f"โ€ข Failed: {results['failed_tests']}", + f"โ€ข Skipped: {results['skipped_tests']}", + f"โ€ข Success rate: {results['success_rate']:.1f}%", + f"โ€ข Duration: {time_str}" + ] + + # Add failed test details if any + if results['failed_test_details']: + message_lines.extend([ + "", + "๐Ÿ” Failed tests:" + ]) + for test in results['failed_test_details']: + message_lines.append(f"โ€ข {test['class']}.{test['name']}") + if test['message']: + # Truncate long messages + msg = test['message'][:100] + "..." if len(test['message']) > 100 else test['message'] + message_lines.append(f" {msg}") + + if results['has_more_failures']: + message_lines.append(f"โ€ข ... and {len(results['failed_test_details']) - 5} more tests") + + return "\n".join(message_lines) + + +def send_to_loop(webhook_url: str, channel: str, message: str) -> bool: + """Send message to Loop webhook.""" + try: + payload = json.dumps({"channel": channel, "text": message}).encode("utf-8") + request = urllib.request.Request( + webhook_url, + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + with urllib.request.urlopen(request, timeout=30) as response: + response.read() + return True + except urllib.error.HTTPError as e: + print(f"[ERR] HTTP error {e.code}: {e.reason}", file=sys.stderr) + return False + except urllib.error.URLError as e: + print(f"[ERR] URL error: {e.reason}", file=sys.stderr) + return False + except Exception as e: + print(f"[ERR] Unexpected error: {e}", file=sys.stderr) + return False + + +def main(argv: list[str]) -> int: + # Load .env file if it exists + env_path = Path(__file__).parent.parent / '.env' + load_env_file(env_path) + + parser = argparse.ArgumentParser(description="Parse JUnit XML and send results to Loop") + parser.add_argument("--junit-file", required=True, help="Path to JUnit XML file") + parser.add_argument("--run-id", required=True, help="Test run ID") + parser.add_argument("--storage-profile", required=True, help="Storage profile used") + parser.add_argument("--webhook-url", required=False, help="Loop webhook URL", default=os.getenv('LOOP_WEBHOOK')) + parser.add_argument("--channel", required=False, help="Loop channel name", default=os.getenv('LOOP_CHANNEL', 'test-virtualization-loop-alerts')) + parser.add_argument("--timeout", default="30m", help="Test timeout") + + args = parser.parse_args(argv) + + if not args.webhook_url: + print("[ERR] LOOP_WEBHOOK not set. Set via --webhook-url or LOOP_WEBHOOK env variable", file=sys.stderr) + return 1 + + junit_file = Path(args.junit_file) + if not junit_file.exists(): + print(f"[ERR] JUnit file not found: {junit_file}", file=sys.stderr) + return 1 + + # Parse JUnit results + results = parse_junit_xml(junit_file) + + # Format message + message = format_test_results(results, args.run_id, args.storage_profile, args.timeout) + + # Send to Loop + if send_to_loop(args.webhook_url, args.channel, message): + print(f"[OK] Results sent to Loop channel '{args.channel}'") + return 0 + else: + print(f"[ERR] Failed to send results to Loop", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/ci/dvp-e2e/scripts/loop_matrix_summary.py b/ci/dvp-e2e/scripts/loop_matrix_summary.py new file mode 100755 index 0000000000..cb248e140f --- /dev/null +++ b/ci/dvp-e2e/scripts/loop_matrix_summary.py @@ -0,0 +1,391 @@ +#!/usr/bin/env python3 +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Parse matrix test logs and send summary to Loop webhook.""" + +import argparse +import json +import os +import re +import sys +import urllib.error +import urllib.request +from datetime import datetime +from pathlib import Path + + +def load_env_file(env_path: Path) -> None: + """Load environment variables from .env file.""" + if not env_path.exists(): + return + + with open(env_path, 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, value = line.split('=', 1) + # Don't override existing env vars + if key not in os.environ: + os.environ[key] = value.strip('"').strip("'") + + +def parse_test_log(log_file: Path) -> dict: + """Parse test log file and extract results.""" + try: + content = log_file.read_text(encoding='utf-8') + + # Extract run ID from filename or content + run_id = log_file.stem + + # Look for test completion patterns + success_patterns = [ + r'\[OK\] run_id=([^\s]+) finished', + r'Ginkgo ran \d+ spec in [\d.]+s', + r'Test Suite Passed' + ] + + failure_patterns = [ + r'\[ERR\] run_id=([^\s]+) failed', + r'Ginkgo ran \d+ spec in [\d.]+s.*FAILED', + r'Test Suite Failed', + r'API response status: Failure', + r'admission webhook .* too long', + r'Unable to connect to the server', + r'Error while process exit code: exit status 1', + r'task: Failed to run task .* exit status', + r'Infrastructure runner \"master-node\" process exited' + ] + + # Check for explicit status markers first + start_match = re.search(r'\[START\].*run_id=([^\s]+)', content) + finish_match = re.search(r'\[FINISH\].*run_id=([^\s]+).*status=(\w+)', content) + + if start_match and finish_match: + # Use explicit status markers + success = finish_match.group(2) == 'ok' + failure = finish_match.group(2) == 'error' + else: + # Fallback to pattern matching + success = any(re.search(pattern, content, re.IGNORECASE) for pattern in success_patterns) + failure = any(re.search(pattern, content, re.IGNORECASE) for pattern in failure_patterns) + + # Extract storage profile from run_id + storage_profile = "unknown" + if '-' in run_id: + parts = run_id.split('-') + if len(parts) >= 2: + # Format: {prefix}-{profile}-{timestamp}-{random} + # For "test-sds-20251009-221516-17193", we want "sds" + storage_profile = parts[1] + + # Extract test statistics + test_stats = {'total': 0, 'passed': 0, 'failed': 0, 'skipped': 0} + + # Look for Ginkgo test results + ginkgo_match = re.search(r'Ran (\d+) of (\d+) Specs.*?(\d+) Passed.*?(\d+) Failed.*?(\d+) Skipped', content, re.DOTALL) + if ginkgo_match: + test_stats['total'] = int(ginkgo_match.group(1)) + test_stats['passed'] = int(ginkgo_match.group(3)) + test_stats['failed'] = int(ginkgo_match.group(4)) + test_stats['skipped'] = int(ginkgo_match.group(5)) + + # Extract timing information + duration = "unknown" + # Prefer explicit START/FINISH ISO markers + start_match = re.search(r'^\[START\].*time=([^\s]+)', content, re.MULTILINE) + finish_match = re.search(r'^\[FINISH\].*time=([^\s]+)', content, re.MULTILINE) + if start_match and finish_match: + try: + started = datetime.fromisoformat(start_match.group(1)) + finished = datetime.fromisoformat(finish_match.group(1)) + delta = finished - started + total_seconds = int(delta.total_seconds()) + hours = total_seconds // 3600 + minutes = (total_seconds % 3600) // 60 + seconds = total_seconds % 60 + duration = f"{hours}h {minutes}m {seconds}s" + except Exception: + pass + else: + # Fallback: try to find H:M:S pattern + time_match = re.search(r'(\d+):(\d+):(\d+)', content) + if time_match: + hours, minutes, seconds = time_match.groups() + duration = f"{hours}h {minutes}m {seconds}s" + + # Extract error details - only from E2E test execution + error_details = [] + if failure: + # Look for E2E test errors after "Running Suite" or "go run ginkgo" + e2e_start_patterns = [ + r'Running Suite:', + r'go run.*ginkgo', + r'Will run.*specs' + ] + + # Find E2E test section + e2e_start_pos = -1 + for pattern in e2e_start_patterns: + match = re.search(pattern, content, re.IGNORECASE) + if match: + e2e_start_pos = match.start() + break + + if e2e_start_pos > 0: + # Extract content after E2E tests started + e2e_content = content[e2e_start_pos:] + + # Look for actual test failures with cleaner patterns + test_error_patterns = [ + r'\[FAIL\].*?([^\n]+)', + r'FAIL!.*?--.*?(\d+) Passed.*?(\d+) Failed', + r'Test Suite Failed', + r'Ginkgo ran.*FAILED', + r'Error occurred during reconciliation.*?([^\n]+)', + r'Failed to update resource.*?([^\n]+)', + r'admission webhook.*denied the request.*?([^\n]+)', + r'context deadline exceeded', + r'timed out waiting for the condition.*?([^\n]+)', + r'panic.*?([^\n]+)' + ] + + for pattern in test_error_patterns: + matches = re.findall(pattern, e2e_content, re.IGNORECASE | re.DOTALL) + for match in matches: + if isinstance(match, tuple): + # Clean up the error message + error_msg = f"{match[0]}: {match[1]}" + else: + error_msg = match + + # Clean up ANSI escape codes and extra whitespace + error_msg = re.sub(r'\x1b\[[0-9;]*[mK]', '', error_msg) + error_msg = re.sub(r'\[0m\s*\[38;5;9m\s*\[1m', '', error_msg) + error_msg = re.sub(r'\[0m', '', error_msg) + error_msg = error_msg.strip() + + # Skip empty, very short messages, or artifacts + if len(error_msg) > 10 and not re.match(r'^\d+:\s*\d+$', error_msg): + error_details.append(error_msg) + + # Remove duplicates and limit to most meaningful errors + error_details = list(dict.fromkeys(error_details))[:2] + + return { + 'run_id': run_id, + 'storage_profile': storage_profile, + 'success': success and not failure, + 'failure': failure, + 'duration': duration, + 'test_stats': test_stats, + 'error_details': error_details, + 'log_file': str(log_file) + } + except Exception as e: + print(f"[WARN] Failed to parse log {log_file}: {e}", file=sys.stderr) + return { + 'run_id': log_file.stem, + 'storage_profile': 'unknown', + 'success': False, + 'failure': True, + 'duration': 'unknown', + 'test_stats': {'total': 0, 'passed': 0, 'failed': 0, 'skipped': 0}, + 'error_details': [f"Failed to parse log: {e}"], + 'log_file': str(log_file) + } + + +def format_matrix_summary(results: list, run_id_prefix: str, profiles: str, github_run_url: str = None) -> str: + """Format matrix test results into a readable message.""" + total_runs = len(results) + successful_runs = sum(1 for r in results if r['success']) + # Treat any non-success as failure for overall counters + failed_runs = total_runs - successful_runs + + # Calculate total test statistics + total_tests = sum(r['test_stats']['total'] for r in results) + total_passed = sum(r['test_stats']['passed'] for r in results) + total_failed = sum(r['test_stats']['failed'] for r in results) + total_skipped = sum(r['test_stats']['skipped'] for r in results) + + # Determine overall status + if total_runs == 0: + status_emoji = "โšช" + status_text = "NO RUNS" + elif failed_runs > 0: + status_emoji = "โŒ" + status_text = "FAILED" + else: + # No failures. Consider Passed if any run succeeded (skips allowed) + status_emoji = "โœ…" + status_text = "PASSED" + + # Group results by storage profile + profile_results = {} + for result in results: + profile = result['storage_profile'] + if profile not in profile_results: + profile_results[profile] = { + 'success': 0, + 'failure': 0, + 'test_stats': {'total': 0, 'passed': 0, 'failed': 0, 'skipped': 0} + } + if result['success']: + profile_results[profile]['success'] += 1 + else: + profile_results[profile]['failure'] += 1 + + # Aggregate test stats + for key in profile_results[profile]['test_stats']: + profile_results[profile]['test_stats'][key] += result['test_stats'][key] + + # Build message with table format + current_date = datetime.now().strftime('%Y-%m-%d') + test_type = "Nightly" if run_id_prefix in ["n", "nightly"] else run_id_prefix.upper() + + message_lines = [ + f"# :dvp: DVP-virtualization {current_date} {test_type} e2e Tests" + ] + + # Add table format for profile results + if profile_results: + message_lines.extend([ + "", + "| Storage Profile | Status | Passed | Failed | Skipped | Success Rate | Duration |", + "|----------------|--------|--------|--------|---------|--------------|----------|" + ]) + + for profile, stats in profile_results.items(): + total_configs = stats['success'] + stats['failure'] + config_success_rate = (stats['success'] / total_configs * 100) if total_configs > 0 else 0 + + test_stats = stats['test_stats'] + test_success_rate = (test_stats['passed'] / test_stats['total'] * 100) if test_stats['total'] > 0 else 0 + + status_emoji = "โœ…" if stats['failure'] == 0 else "โŒ" if stats['success'] == 0 else "โš ๏ธ" + status_text = "PASSED" if stats['failure'] == 0 else "FAILED" if stats['success'] == 0 else "PARTIAL" + + # Get duration and build linked profile name + profile_duration = "unknown" + for result in results: + if result['storage_profile'] == profile: + profile_duration = result['duration'] + break + name_md = f"[{profile.upper()}]({github_run_url})" if github_run_url else profile.upper() + + message_lines.append( + f"| {name_md} | {status_emoji} **{status_text}** | {test_stats['passed']} | {test_stats['failed']} | {test_stats['skipped']} | {test_success_rate:.1f}% | {profile_duration} |" + ) + + return "\n".join(message_lines) + + +def send_to_loop(webhook_url: str, channel: str, message: str) -> bool: + """Send message to Loop webhook.""" + try: + payload = json.dumps({"channel": channel, "text": message}).encode("utf-8") + request = urllib.request.Request( + webhook_url, + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + with urllib.request.urlopen(request, timeout=30) as response: + response.read() + return True + except urllib.error.HTTPError as e: + print(f"[ERR] HTTP error {e.code}: {e.reason}", file=sys.stderr) + return False + except urllib.error.URLError as e: + print(f"[ERR] URL error: {e.reason}", file=sys.stderr) + return False + except Exception as e: + print(f"[ERR] Unexpected error: {e}", file=sys.stderr) + return False + + +def main(argv: list[str]) -> int: + # Load .env file if it exists + env_path = Path(__file__).parent.parent / '.env' + load_env_file(env_path) + + parser = argparse.ArgumentParser(description="Parse matrix test logs and send summary to Loop") + parser.add_argument("--profiles", required=True, help="Comma-separated list of storage profiles") + parser.add_argument("--run-id-prefix", required=True, help="Run ID prefix") + parser.add_argument("--log-dir", required=True, help="Directory containing log files") + parser.add_argument("--webhook-url", required=False, help="Loop webhook URL", default=os.getenv('LOOP_WEBHOOK')) + parser.add_argument("--channel", required=False, help="Loop channel name", default=os.getenv('LOOP_CHANNEL', 'test-virtualization-loop-alerts')) + parser.add_argument("--github-run-url", required=False, help="GitHub Actions run URL to link from profile name") + + args = parser.parse_args(argv) + + if not args.webhook_url: + print("[ERR] LOOP_WEBHOOK not set. Set via --webhook-url or LOOP_WEBHOOK env variable", file=sys.stderr) + return 1 + + log_dir = Path(args.log_dir) + if not log_dir.exists(): + print(f"[ERR] Log directory not found: {log_dir}", file=sys.stderr) + return 1 + + # Find all log files + log_files = list(log_dir.glob("*.log")) + if not log_files: + print(f"[WARN] No log files found in {log_dir}", file=sys.stderr) + return 0 + + # Parse all log files + results = [] + for log_file in log_files: + result = parse_test_log(log_file) + results.append(result) + + # Filter by run_id_prefix and profile (no aliases; use canonical names) + allowed_profiles = set([p.strip() for p in args.profiles.split(",")]) + filtered_results = [] + + for result in results: + # Filter by run_id prefix (more flexible matching) + if not result['run_id'].startswith(args.run_id_prefix): + continue + + # Filter by canonical profile name from run_id + normalized_profile = result['storage_profile'] + if normalized_profile not in allowed_profiles: + continue + + result['storage_profile'] = normalized_profile + filtered_results.append(result) + + results = filtered_results + + if not results: + print(f"[WARN] No results to report", file=sys.stderr) + return 0 + + # Format message + message = format_matrix_summary(results, args.run_id_prefix, args.profiles, github_run_url=args.github_run_url) + + # Send to Loop + if send_to_loop(args.webhook_url, args.channel, message): + print(f"[OK] Matrix summary sent to Loop channel '{args.channel}'") + return 0 + else: + print(f"[ERR] Failed to send matrix summary to Loop", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/ci/dvp-e2e/scripts/loop_notify.py b/ci/dvp-e2e/scripts/loop_notify.py new file mode 100644 index 0000000000..eac831ce78 --- /dev/null +++ b/ci/dvp-e2e/scripts/loop_notify.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Send notifications to Loop webhook.""" + +import argparse +import json +import os +import sys +import urllib.error +import urllib.request +from pathlib import Path + + +def load_env_file(env_path: Path) -> None: + """Load environment variables from .env file.""" + if not env_path.exists(): + return + + with open(env_path, 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#') and '=' in line: + key, value = line.split('=', 1) + # Don't override existing env vars + if key not in os.environ: + os.environ[key] = value.strip('"').strip("'") + + +def send_post_request(url: str, channel: str, text: str) -> None: + """Send JSON payload to Loop webhook.""" + + payload = json.dumps({"channel": channel, "text": text}).encode("utf-8") + request = urllib.request.Request( + url, + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + with urllib.request.urlopen(request, timeout=30) as response: # noqa: S310 + # We just ensure the request succeeded; the body is usually empty. + response.read() + + +def main(argv: list[str]) -> int: + # Load .env file if it exists + env_path = Path(__file__).parent.parent / '.env' + load_env_file(env_path) + + parser = argparse.ArgumentParser(description="Send message to Loop webhook") + parser.add_argument("--url", required=False, help="Loop webhook URL", default=os.getenv('LOOP_WEBHOOK')) + parser.add_argument("--channel", required=False, help="Loop channel name", default=os.getenv('LOOP_CHANNEL', 'test-virtualization-loop-alerts')) + parser.add_argument("--text", required=True, help="Message text") + + args = parser.parse_args(argv) + + if not args.url: + print("[ERR] LOOP_WEBHOOK not set. Set via --url or LOOP_WEBHOOK env variable", file=sys.stderr) + return 1 + + try: + send_post_request(url=args.url, channel=args.channel, text=args.text) + except urllib.error.HTTPError as exc: # pragma: no cover - network failure path + print(f"[ERR] HTTP error {exc.code}: {exc.reason}", file=sys.stderr) + return 1 + except urllib.error.URLError as exc: # pragma: no cover - network failure path + print(f"[ERR] URL error: {exc.reason}", file=sys.stderr) + return 1 + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) + diff --git a/ci/dvp-e2e/values.yaml b/ci/dvp-e2e/values.yaml new file mode 100644 index 0000000000..3379c93532 --- /dev/null +++ b/ci/dvp-e2e/values.yaml @@ -0,0 +1,69 @@ +storageProfiles: + default: + controlPlane: + root: linstor-thin-r2 + etcd: linstor-thin-r2 + workers: + root: linstor-thin-r2 + infra: + nfs: nfs-4-1-wffc + dvcr: linstor-thin-r2 + virtualDisks: + os: linstor-thin-r2 + data: nfs-4-1-wffc + cephrbd: + controlPlane: + root: ceph-pool-r2-csi-rbd + etcd: ceph-pool-r2-csi-rbd + workers: + root: ceph-pool-r2-csi-rbd + infra: + nfs: nfs-4-1-wffc + dvcr: ceph-pool-r2-csi-rbd + virtualDisks: + os: ceph-pool-r2-csi-rbd + data: ceph-pool-r2-csi-rbd + sds-local: + controlPlane: + root: sds-local-storage + etcd: sds-local-storage + workers: + root: sds-local-storage + infra: + nfs: nfs-4-1-wffc + dvcr: sds-local-storage + virtualDisks: + os: sds-local-storage + data: sds-local-storage +domain: e2e.virtlab.flant.com +clusterConfigurationPrefix: e2e +deckhouse: + tag: main + kubernetesVersion: Automatic +virtualization: + tag: main +features: + virtualization: true +image: + url: https://89d64382-20df-4581-8cc7-80df331f67fa.selstorage.ru/ubuntu/noble-server-cloudimg-amd64.img + defaultUser: ubuntu + bootloader: EFI +ingressHosts: + - api + - grafana + - dex + - prometheus + - console + - virtualization +instances: + masterNodes: + count: 1 + cores: 8 + coreFraction: 50% + memory: 20Gi + additionalNodes: + - name: worker + count: 3 + cores: 6 + coreFraction: 50% + memory: 12Gi From bc0e586ec3686c13c72543617bf56a9508d48a60 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Sat, 15 Nov 2025 00:57:24 +0300 Subject: [PATCH 02/14] ci(e2e): clean values and debug; simplify SDS wiring; minimal helper scripts; HTTPS-only ingress; param cleanup wait\n\n- remove report scripts and local tests task\n- drop unused values (storageProfiles, infra/virtualDisks/security, extra features)\n- add build_parent_kubeconfig.sh + inject_registry_cfg.sh and use them in workflow\n- call task nested:storage:sds directly from workflow; remove wrapper\n- trim logs (no lsblk, no set -x); keep concise status\n- ingress: drop 80/wildcard-http; keep HTTPS passthrough only\n- cleanup: param prefix + wait for namespace deletion Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 99 ++--- ci/dvp-e2e/Taskfile.yaml | 83 +--- .../templates/virtualization.yaml | 4 +- ci/dvp-e2e/charts/cluster-config/values.yaml | 26 +- .../charts/infra/templates/ingress.yaml | 37 -- ci/dvp-e2e/charts/infra/values.yaml | 60 +-- ci/dvp-e2e/scripts/build_parent_kubeconfig.sh | 47 +++ ci/dvp-e2e/scripts/inject_registry_cfg.sh | 24 ++ ci/dvp-e2e/scripts/loop_junit_notify.py | 222 ---------- ci/dvp-e2e/scripts/loop_matrix_summary.py | 391 ------------------ ci/dvp-e2e/scripts/loop_notify.py | 87 ---- ci/dvp-e2e/values.yaml | 37 -- 12 files changed, 110 insertions(+), 1007 deletions(-) create mode 100755 ci/dvp-e2e/scripts/build_parent_kubeconfig.sh create mode 100755 ci/dvp-e2e/scripts/inject_registry_cfg.sh delete mode 100755 ci/dvp-e2e/scripts/loop_junit_notify.py delete mode 100755 ci/dvp-e2e/scripts/loop_matrix_summary.py delete mode 100644 ci/dvp-e2e/scripts/loop_notify.py diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index 6f0cddf4da..3c569eabda 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -166,28 +166,10 @@ jobs: shell: bash run: | set -euo pipefail - mkdir -p "$HOME/.kube" - cat > "$HOME/.kube/config" <> "$GITHUB_ENV" + chmod +x ci/dvp-e2e/scripts/build_parent_kubeconfig.sh + KCFG="$HOME/.kube/config" + ci/dvp-e2e/scripts/build_parent_kubeconfig.sh -o "$KCFG" -a "${E2E_K8S_URL}" -t "${{ secrets.E2E_NESTED_SA_SECRET }}" + echo "KUBECONFIG=$KCFG" >> "$GITHUB_ENV" - name: Prepare run values.yaml working-directory: ci/dvp-e2e @@ -222,7 +204,9 @@ jobs: - name: Inject REGISTRY_DOCKER_CFG into values.yaml working-directory: ci/dvp-e2e run: | - yq eval --inplace '.deckhouse.registryDockerCfg = strenv(REGISTRY_DOCKER_CFG)' "${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" + chmod +x scripts/inject_registry_cfg.sh + VALS="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" + REGISTRY_DOCKER_CFG="${REGISTRY_DOCKER_CFG}" scripts/inject_registry_cfg.sh -f "$VALS" -v "$REGISTRY_DOCKER_CFG" - name: Docker login to Deckhouse registry uses: docker/login-action@v3 @@ -323,29 +307,14 @@ jobs: NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" \ PARENT_KUBECONFIG="${KUBECONFIG}" - - name: Configure storage classes + - name: Configure SDS in nested cluster working-directory: ci/dvp-e2e run: | - echo "๐Ÿ’พ Configuring storage classes for profile: sds-replicated-volume -> sds" - task nested:storage:configure \ - STORAGE_PROFILE="sds" \ - TARGET_STORAGE_CLASS="${{ steps.profile-config.outputs.storage_class }}" \ + echo "๐Ÿ’พ Configuring SDS storage (sds-node-configurator + sds-replicated-volume)" + task nested:storage:sds \ TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ - VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ - GENERATED_VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/generated-values.yaml" \ - SSH_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/ssh" \ - SSH_FILE_NAME="id_ed" \ - PASSWORD_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/password.txt" \ - PASSWORD_HASH_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/password-hash.txt" \ - NAMESPACE="${{ env.RUN_ID }}" \ - DOMAIN="" \ - DEFAULT_USER="ubuntu" \ - NESTED_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested" \ - NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" - - # Ingress smoke disabled: not required for storage config - - # Ceph CSI smoke check removed per request + NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" \ + SDS_SC_NAME="${{ steps.profile-config.outputs.storage_class }}" - name: Upload run context if: always() @@ -378,35 +347,29 @@ jobs: shell: bash run: | set -euo pipefail - mkdir -p "$HOME/.kube" - cat > "$HOME/.kube/config" <> "$GITHUB_ENV" + chmod +x ci/dvp-e2e/scripts/build_parent_kubeconfig.sh + KCFG="$HOME/.kube/config" + ci/dvp-e2e/scripts/build_parent_kubeconfig.sh -o "$KCFG" -a "${E2E_K8S_URL}" -t "${{ secrets.E2E_NESTED_SA_SECRET }}" + echo "KUBECONFIG=$KCFG" >> "$GITHUB_ENV" - name: Cleanup test namespaces + env: + CLEANUP_PREFIX: ${{ vars.CLEANUP_PREFIX || 'nightly-nested-e2e-' }} run: | set -euo pipefail - echo "๐Ÿงน Cleaning up namespaces matching 'nightly-nested-e2e-*'" - kubectl get ns -o name | grep "namespace/nightly-nested-e2e-" | cut -d/ -f2 | \ - xargs -r kubectl delete ns --wait=false || echo "[INFO] No namespaces to delete" + echo "๐Ÿงน Cleaning namespaces with prefix '${CLEANUP_PREFIX}'" + ns_list=$(kubectl get ns -o json | jq -r --arg p "$CLEANUP_PREFIX" '.items[].metadata.name | select(startswith($p))') + if [ -z "$ns_list" ]; then + echo "[INFO] No namespaces to delete"; exit 0 + fi + for ns in $ns_list; do + echo "[CLEANUP] Deleting namespace $ns ..." + kubectl delete ns "$ns" --wait=false || true + done + echo "[CLEANUP] Waiting for namespaces to be deleted..." + for ns in $ns_list; do + kubectl wait --for=delete ns/"$ns" --timeout=600s || echo "[WARN] Namespace $ns was not fully deleted within timeout" + done - name: Report cleanup results if: always() diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index f2df178ee1..f2020a107c 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -232,8 +232,6 @@ tasks: cmds: - | set -euo pipefail - # Enable shell tracing when DEBUG_HOTPLUG is set - [ -n "${DEBUG_HOTPLUG:-}" ] && set -x || true echo "[INFRA] Attaching {{ .DISK_COUNT }} storage disks to worker VMs using hotplug in namespace {{ .NAMESPACE }}" # Wait for worker VMs @@ -368,45 +366,18 @@ tasks: fi sleep 5 - # Minimal periodic debug snapshot approximately every 60 seconds - if [ $((i % 12)) -eq 0 ]; then - echo "[DEBUG] VMBDA $vd summary:" - kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o json \ - | jq -r '{phase: .status.phase, conditions: (.status.conditions // []) | map({type, status, reason, message})}' || true - echo "[DEBUG] VM $vm block devices (summary):" - kubectl -n {{ .NAMESPACE }} get vm "$vm" -o json \ - | jq -r '{phase: .status.phase, blockDeviceRefs: (.status.blockDeviceRefs // []) | map({name, virtualMachineBlockDeviceAttachmentName, attached, hotplugged})}' || true - fi - done + done if [ "$phase" != "Attached" ] && [ "${success_by_vm:-0}" -ne 1 ]; then echo "[ERROR] Disk $vd failed to attach to VM $vm within timeout" >&2 - echo "[DEBUG] Final VMBDA summary:" - kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o json \ - | jq -r '{phase: .status.phase, conditions: (.status.conditions // []) | map({type, status, reason, message})}' || true - echo "[DEBUG] VM $vm block devices (summary):" - kubectl -n {{ .NAMESPACE }} get vm "$vm" -o json \ - | jq -r '{phase: .status.phase, blockDeviceRefs: (.status.blockDeviceRefs // []) | map({name, virtualMachineBlockDeviceAttachmentName, attached, hotplugged})}' || true + # final debug snapshots removed exit 1 fi done echo "[INFRA] VM $vm configured with hotplug disks" - # Optional on-node lsblk debug snapshot (requires d8 and SSH key). Always sudo for block devices visibility. - if command -v d8 >/dev/null 2>&1; then - echo "[DEBUG] Collecting lsblk from VM $vm..." - if ! d8 v ssh --username='{{ .DEFAULT_USER }}' \ - --identity-file='{{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}' \ - --local-ssh=true \ - --local-ssh-opts='-o StrictHostKeyChecking=no' \ - --local-ssh-opts='-o UserKnownHostsFile=/dev/null' \ - "${vm}.{{ .NAMESPACE }}" -c "sudo lsblk -o NAME,KNAME,TYPE,SIZE,MODEL,TRAN,FSTYPE,MOUNTPOINT -p"; then - echo "[WARN] lsblk collection failed for $vm (SSH)" >&2 - fi - else - echo "[WARN] 'd8' CLI not found, skipping lsblk collection for $vm" >&2 - fi + done echo "[INFRA] All worker VMs configured with storage disks via hotplug" @@ -711,26 +682,7 @@ tasks: task dhctl-bootstrap VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' SSH_FILE_NAME='id_ed' } 2>&1 | tee '{{ .LOG_FILE }}' - local:tests: - desc: Local flow โ€” prepare nested kubeconfig and run E2E (logs saved) - vars: - RUN_ID: '{{ .RUN_ID | default (printf "local-%s" (now | date "20060102-150405")) }}' - RUN_NAMESPACE: '{{ .RUN_NAMESPACE | default (printf "dvp-e2e-local-%s" .RUN_ID) }}' - TMP_DIR: '{{ .TMP_DIR | default (printf "%s/runs/%s" .TMP_ROOT .RUN_ID) }}' - LOG_FILE: '{{ .LOG_FILE | default (printf "%s/%s" .TMP_DIR "tests.log") }}' - E2E_DIR: '{{ .E2E_DIR | default (env "E2E_DIR") | default "../../tests/e2e" }}' - NESTED_SC: '{{ .NESTED_SC | default "ceph-pool-r2-csi-rbd-immediate" }}' - cmds: - - mkdir -p {{ .TMP_DIR }} - - | - set -euo pipefail - { - task nested:kubeconfig NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' - task nested:storage:sds NESTED_KUBECONFIG='{{ .TMP_DIR }}/nested-{{ .RUN_NAMESPACE }}/kubeconfig' SDS_SC_NAME='{{ .NESTED_SC }}' - task nested:ensure-sc NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' SC_NAME='{{ .NESTED_SC }}' - task nested:ensure-vmclass-default NESTED_KUBECONFIG='{{ .TMP_DIR }}/nested-{{ .RUN_NAMESPACE }}/kubeconfig' - task nested:e2e NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' E2E_DIR='{{ .E2E_DIR }}' - } 2>&1 | tee '{{ .LOG_FILE }}' + # ------------------------------------------------------------ # Nested cluster helpers (SC + kubeconfig) @@ -851,33 +803,6 @@ tasks: sleep 10 done - nested:storage:configure: - desc: Configure SDS storage profile inside nested cluster - vars: - STORAGE_PROFILE: '{{ .STORAGE_PROFILE | default "sds" }}' - NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" - TARGET_STORAGE_CLASS: "{{ .TARGET_STORAGE_CLASS }}" - STORAGE_PROFILE_NORMALIZED: - sh: | - case '{{ .STORAGE_PROFILE }}' in - sds|sds-local|sds_local|sds-replicated|sds_replicated) echo sds ;; - *) echo '{{ .STORAGE_PROFILE }}' ;; - esac - cmds: - - cmd: 'echo "[STORAGE] normalized profile = {{ .STORAGE_PROFILE_NORMALIZED }}"' - - | - set -euo pipefail - if [ '{{ .STORAGE_PROFILE_NORMALIZED }}' != "sds" ]; then - echo "[ERR] Only SDS storage profile is supported. Got: {{ .STORAGE_PROFILE_NORMALIZED }}" >&2 - exit 1 - fi - - | - echo "[SDS] Configuring SDS storage..." - - | - task nested:storage:sds \ - NESTED_KUBECONFIG='{{ .NESTED_KUBECONFIG }}' \ - SDS_SC_NAME='{{ .TARGET_STORAGE_CLASS }}' - nested:storage:sds: desc: Configure SDS storage profile in nested cluster vars: diff --git a/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml b/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml index dffaf8b115..5011c2b2b6 100644 --- a/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml +++ b/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml @@ -1,6 +1,5 @@ --- -{{- if hasKey .Values "features" }} -{{- if .Values.features.virtualization }} +{{- if and (hasKey .Values "features") (.Values.features.virtualization) }} apiVersion: deckhouse.io/v1alpha1 kind: ModuleConfig metadata: @@ -25,4 +24,3 @@ spec: imageTag: {{ .Values.virtualization.tag }} scanInterval: 15s {{- end }} -{{- end }} diff --git a/ci/dvp-e2e/charts/cluster-config/values.yaml b/ci/dvp-e2e/charts/cluster-config/values.yaml index 8c158bf0f4..720fa90efa 100644 --- a/ci/dvp-e2e/charts/cluster-config/values.yaml +++ b/ci/dvp-e2e/charts/cluster-config/values.yaml @@ -46,32 +46,8 @@ storageClasses: workers: root: ceph-pool-r2-csi-rbd-immediate -# Infrastructure components -infra: - nfs: - storageClass: nfs-4-1-wffc - dvcr: - storageClass: ceph-pool-r2-csi-rbd-immediate - -# Virtual disks configuration -virtualDisks: - os: - storageClass: ceph-pool-r2-csi-rbd-immediate - data: - storageClass: nfs-4-1-wffc - -# Security settings -security: - admissionPolicyEngine: - enabled: true - networkPolicies: - enabled: true - -# Feature flags +# Feature flags (only those used by templates) features: virtualization: true - monitoring: true - logging: true - ingress: true nfs: enabled: false diff --git a/ci/dvp-e2e/charts/infra/templates/ingress.yaml b/ci/dvp-e2e/charts/infra/templates/ingress.yaml index b813234319..b419188353 100644 --- a/ci/dvp-e2e/charts/infra/templates/ingress.yaml +++ b/ci/dvp-e2e/charts/infra/templates/ingress.yaml @@ -1,18 +1,3 @@ ---- -apiVersion: v1 -kind: Service -metadata: - name: dvp-over-dvp-80 - namespace: {{ .Values.namespace }} -spec: - ports: - - port: 80 - targetPort: 80 - protocol: TCP - name: http - selector: - dvp.deckhouse.io/node-group: master ---- apiVersion: v1 kind: Service metadata: @@ -50,25 +35,3 @@ spec: port: number: 443 {{- end }} ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: wildcard-http - namespace: {{ .Values.namespace }} - annotations: - nginx.ingress.kubernetes.io/ssl-redirect: "false" - nginx.ingress.kubernetes.io/rewrite-target: / -spec: - ingressClassName: nginx - rules: - - host: "*.{{ .Values.namespace }}.{{ .Values.domain }}" - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: dvp-over-dvp-80 - port: - number: 80 diff --git a/ci/dvp-e2e/charts/infra/values.yaml b/ci/dvp-e2e/charts/infra/values.yaml index 9fe1b1ab6e..d43de3435c 100644 --- a/ci/dvp-e2e/charts/infra/values.yaml +++ b/ci/dvp-e2e/charts/infra/values.yaml @@ -1,58 +1,2 @@ -# Infrastructure values for E2E testing - -# Storage profiles (from original values.yaml) -storageProfiles: - default: - controlPlane: - root: linstor-thin-r2 - etcd: linstor-thin-r2 - workers: - root: linstor-thin-r2 - infra: - nfs: nfs-4-1-wffc - dvcr: linstor-thin-r2 - virtualDisks: - os: linstor-thin-r2 - data: nfs-4-1-wffc - cephrbd: - controlPlane: - root: ceph-pool-r2-csi-rbd - etcd: ceph-pool-r2-csi-rbd - workers: - root: ceph-pool-r2-csi-rbd - infra: - nfs: nfs-4-1-wffc - dvcr: ceph-pool-r2-csi-rbd - virtualDisks: - os: ceph-pool-r2-csi-rbd - data: ceph-pool-r2-csi-rbd - sds-local: - controlPlane: - root: sds-local-storage - etcd: sds-local-storage - workers: - root: sds-local-storage - infra: - nfs: nfs-4-1-wffc - dvcr: sds-local-storage - virtualDisks: - os: sds-local-storage - data: sds-local-storage - -# Network configuration -network: - domain: e2e.virtlab.flant.com - clusterConfigurationPrefix: x - internalNetworkCIDRs: - - "10.241.0.0/16" - -# Registry configuration -registry: - dockerCfg: "" - insecure: false - -# Monitoring configuration -monitoring: - enabled: true - retention: "7d" - scrapeInterval: "30s" +# Minimal defaults; templates primarily consume values from external run values. +# This file can stay intentionally small to avoid confusion. diff --git a/ci/dvp-e2e/scripts/build_parent_kubeconfig.sh b/ci/dvp-e2e/scripts/build_parent_kubeconfig.sh new file mode 100755 index 0000000000..fdd481ec42 --- /dev/null +++ b/ci/dvp-e2e/scripts/build_parent_kubeconfig.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: +# build_parent_kubeconfig.sh -o /path/to/kubeconfig -a https://api.server -t + +out="" +api="${E2E_K8S_URL:-}" +tok="${E2E_SA_TOKEN:-}" + +while getopts ":o:a:t:" opt; do + case $opt in + o) out="$OPTARG" ;; + a) api="$OPTARG" ;; + t) tok="$OPTARG" ;; + *) echo "Usage: $0 -o -a -t " >&2; exit 2 ;; + esac +done + +if [ -z "${out}" ] || [ -z "${api}" ] || [ -z "${tok}" ]; then + echo "Usage: $0 -o -a -t " >&2 + exit 2 +fi + +mkdir -p "$(dirname "$out")" +cat >"$out" < + +file="" +val="${REGISTRY_DOCKER_CFG:-}" + +while getopts ":f:v:" opt; do + case $opt in + f) file="$OPTARG" ;; + v) val="$OPTARG" ;; + *) echo "Usage: $0 -f -v " >&2; exit 2 ;; + esac +done + +if [ -z "${file}" ] || [ -z "${val}" ]; then + echo "Usage: $0 -f -v " >&2 + exit 2 +fi + +export VAL="$val" +yq eval --inplace '.deckhouse.registryDockerCfg = strenv(VAL)' "$file" diff --git a/ci/dvp-e2e/scripts/loop_junit_notify.py b/ci/dvp-e2e/scripts/loop_junit_notify.py deleted file mode 100755 index 2ead80cc6e..0000000000 --- a/ci/dvp-e2e/scripts/loop_junit_notify.py +++ /dev/null @@ -1,222 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Flant JSC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Parse JUnit XML and send test results to Loop webhook.""" - -import argparse -import json -import os -import sys -import urllib.error -import urllib.request -import xml.etree.ElementTree as ET -from datetime import datetime -from pathlib import Path - - -def load_env_file(env_path: Path) -> None: - """Load environment variables from .env file.""" - if not env_path.exists(): - return - - with open(env_path, 'r') as f: - for line in f: - line = line.strip() - if line and not line.startswith('#') and '=' in line: - key, value = line.split('=', 1) - # Don't override existing env vars - if key not in os.environ: - os.environ[key] = value.strip('"').strip("'") - - -def parse_junit_xml(junit_file: Path) -> dict: - """Parse JUnit XML file and extract test results.""" - try: - tree = ET.parse(junit_file) - root = tree.getroot() - - # Handle both testsuites and testsuite root elements - if root.tag == 'testsuites': - testsuites = root - else: - testsuites = root - - total_tests = int(testsuites.get('tests', 0)) - total_failures = int(testsuites.get('failures', 0)) - total_errors = int(testsuites.get('errors', 0)) - total_skipped = int(testsuites.get('skipped', 0)) - total_time = float(testsuites.get('time', 0)) - - # Calculate success rate - successful_tests = total_tests - total_failures - total_errors - success_rate = (successful_tests / total_tests * 100) if total_tests > 0 else 0 - - # Extract failed test details - failed_tests = [] - for testsuite in testsuites.findall('testsuite'): - for testcase in testsuite.findall('testcase'): - failure = testcase.find('failure') - error = testcase.find('error') - if failure is not None or error is not None: - failed_tests.append({ - 'name': testcase.get('name', 'unknown'), - 'class': testcase.get('classname', 'unknown'), - 'time': float(testcase.get('time', 0)), - 'message': (failure.get('message', '') if failure is not None else '') or - (error.get('message', '') if error is not None else '') - }) - - return { - 'total_tests': total_tests, - 'successful_tests': successful_tests, - 'failed_tests': total_failures + total_errors, - 'skipped_tests': total_skipped, - 'success_rate': success_rate, - 'total_time': total_time, - 'failed_test_details': failed_tests[:5], # Limit to first 5 failures - 'has_more_failures': len(failed_tests) > 5 - } - except ET.ParseError as e: - print(f"[ERR] Failed to parse JUnit XML: {e}", file=sys.stderr) - return None - except Exception as e: - print(f"[ERR] Error processing JUnit file: {e}", file=sys.stderr) - return None - - -def format_test_results(results: dict, run_id: str, storage_profile: str, timeout: str) -> str: - """Format test results into a readable message.""" - if results is None: - return f"โŒ Failed to parse test results for {run_id}" - - # Determine status emoji and color - if results['failed_tests'] == 0: - status_emoji = "โœ…" - status_text = "SUCCESS" - elif results['success_rate'] >= 80: - status_emoji = "โš ๏ธ" - status_text = "PARTIALLY SUCCESS" - else: - status_emoji = "โŒ" - status_text = "FAILED" - - # Format time - time_str = f"{results['total_time']:.1f}s" - if results['total_time'] > 60: - minutes = int(results['total_time'] // 60) - seconds = int(results['total_time'] % 60) - time_str = f"{minutes}m {seconds}s" - - # Build message - message_lines = [ - f"{status_emoji} E2E tests for virtualization completed", - f"๐Ÿ“‹ Run ID: {run_id}", - f"๐Ÿ’พ Storage: {storage_profile}", - f"โฑ๏ธ Timeout: {timeout}", - f"๐Ÿ• Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", - "", - f"๐Ÿ“Š Results: {status_text}", - f"โ€ข Total tests: {results['total_tests']}", - f"โ€ข Passed: {results['successful_tests']}", - f"โ€ข Failed: {results['failed_tests']}", - f"โ€ข Skipped: {results['skipped_tests']}", - f"โ€ข Success rate: {results['success_rate']:.1f}%", - f"โ€ข Duration: {time_str}" - ] - - # Add failed test details if any - if results['failed_test_details']: - message_lines.extend([ - "", - "๐Ÿ” Failed tests:" - ]) - for test in results['failed_test_details']: - message_lines.append(f"โ€ข {test['class']}.{test['name']}") - if test['message']: - # Truncate long messages - msg = test['message'][:100] + "..." if len(test['message']) > 100 else test['message'] - message_lines.append(f" {msg}") - - if results['has_more_failures']: - message_lines.append(f"โ€ข ... and {len(results['failed_test_details']) - 5} more tests") - - return "\n".join(message_lines) - - -def send_to_loop(webhook_url: str, channel: str, message: str) -> bool: - """Send message to Loop webhook.""" - try: - payload = json.dumps({"channel": channel, "text": message}).encode("utf-8") - request = urllib.request.Request( - webhook_url, - data=payload, - headers={"Content-Type": "application/json"}, - method="POST", - ) - - with urllib.request.urlopen(request, timeout=30) as response: - response.read() - return True - except urllib.error.HTTPError as e: - print(f"[ERR] HTTP error {e.code}: {e.reason}", file=sys.stderr) - return False - except urllib.error.URLError as e: - print(f"[ERR] URL error: {e.reason}", file=sys.stderr) - return False - except Exception as e: - print(f"[ERR] Unexpected error: {e}", file=sys.stderr) - return False - - -def main(argv: list[str]) -> int: - # Load .env file if it exists - env_path = Path(__file__).parent.parent / '.env' - load_env_file(env_path) - - parser = argparse.ArgumentParser(description="Parse JUnit XML and send results to Loop") - parser.add_argument("--junit-file", required=True, help="Path to JUnit XML file") - parser.add_argument("--run-id", required=True, help="Test run ID") - parser.add_argument("--storage-profile", required=True, help="Storage profile used") - parser.add_argument("--webhook-url", required=False, help="Loop webhook URL", default=os.getenv('LOOP_WEBHOOK')) - parser.add_argument("--channel", required=False, help="Loop channel name", default=os.getenv('LOOP_CHANNEL', 'test-virtualization-loop-alerts')) - parser.add_argument("--timeout", default="30m", help="Test timeout") - - args = parser.parse_args(argv) - - if not args.webhook_url: - print("[ERR] LOOP_WEBHOOK not set. Set via --webhook-url or LOOP_WEBHOOK env variable", file=sys.stderr) - return 1 - - junit_file = Path(args.junit_file) - if not junit_file.exists(): - print(f"[ERR] JUnit file not found: {junit_file}", file=sys.stderr) - return 1 - - # Parse JUnit results - results = parse_junit_xml(junit_file) - - # Format message - message = format_test_results(results, args.run_id, args.storage_profile, args.timeout) - - # Send to Loop - if send_to_loop(args.webhook_url, args.channel, message): - print(f"[OK] Results sent to Loop channel '{args.channel}'") - return 0 - else: - print(f"[ERR] Failed to send results to Loop", file=sys.stderr) - return 1 - - -if __name__ == "__main__": - raise SystemExit(main(sys.argv[1:])) diff --git a/ci/dvp-e2e/scripts/loop_matrix_summary.py b/ci/dvp-e2e/scripts/loop_matrix_summary.py deleted file mode 100755 index cb248e140f..0000000000 --- a/ci/dvp-e2e/scripts/loop_matrix_summary.py +++ /dev/null @@ -1,391 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Flant JSC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Parse matrix test logs and send summary to Loop webhook.""" - -import argparse -import json -import os -import re -import sys -import urllib.error -import urllib.request -from datetime import datetime -from pathlib import Path - - -def load_env_file(env_path: Path) -> None: - """Load environment variables from .env file.""" - if not env_path.exists(): - return - - with open(env_path, 'r') as f: - for line in f: - line = line.strip() - if line and not line.startswith('#') and '=' in line: - key, value = line.split('=', 1) - # Don't override existing env vars - if key not in os.environ: - os.environ[key] = value.strip('"').strip("'") - - -def parse_test_log(log_file: Path) -> dict: - """Parse test log file and extract results.""" - try: - content = log_file.read_text(encoding='utf-8') - - # Extract run ID from filename or content - run_id = log_file.stem - - # Look for test completion patterns - success_patterns = [ - r'\[OK\] run_id=([^\s]+) finished', - r'Ginkgo ran \d+ spec in [\d.]+s', - r'Test Suite Passed' - ] - - failure_patterns = [ - r'\[ERR\] run_id=([^\s]+) failed', - r'Ginkgo ran \d+ spec in [\d.]+s.*FAILED', - r'Test Suite Failed', - r'API response status: Failure', - r'admission webhook .* too long', - r'Unable to connect to the server', - r'Error while process exit code: exit status 1', - r'task: Failed to run task .* exit status', - r'Infrastructure runner \"master-node\" process exited' - ] - - # Check for explicit status markers first - start_match = re.search(r'\[START\].*run_id=([^\s]+)', content) - finish_match = re.search(r'\[FINISH\].*run_id=([^\s]+).*status=(\w+)', content) - - if start_match and finish_match: - # Use explicit status markers - success = finish_match.group(2) == 'ok' - failure = finish_match.group(2) == 'error' - else: - # Fallback to pattern matching - success = any(re.search(pattern, content, re.IGNORECASE) for pattern in success_patterns) - failure = any(re.search(pattern, content, re.IGNORECASE) for pattern in failure_patterns) - - # Extract storage profile from run_id - storage_profile = "unknown" - if '-' in run_id: - parts = run_id.split('-') - if len(parts) >= 2: - # Format: {prefix}-{profile}-{timestamp}-{random} - # For "test-sds-20251009-221516-17193", we want "sds" - storage_profile = parts[1] - - # Extract test statistics - test_stats = {'total': 0, 'passed': 0, 'failed': 0, 'skipped': 0} - - # Look for Ginkgo test results - ginkgo_match = re.search(r'Ran (\d+) of (\d+) Specs.*?(\d+) Passed.*?(\d+) Failed.*?(\d+) Skipped', content, re.DOTALL) - if ginkgo_match: - test_stats['total'] = int(ginkgo_match.group(1)) - test_stats['passed'] = int(ginkgo_match.group(3)) - test_stats['failed'] = int(ginkgo_match.group(4)) - test_stats['skipped'] = int(ginkgo_match.group(5)) - - # Extract timing information - duration = "unknown" - # Prefer explicit START/FINISH ISO markers - start_match = re.search(r'^\[START\].*time=([^\s]+)', content, re.MULTILINE) - finish_match = re.search(r'^\[FINISH\].*time=([^\s]+)', content, re.MULTILINE) - if start_match and finish_match: - try: - started = datetime.fromisoformat(start_match.group(1)) - finished = datetime.fromisoformat(finish_match.group(1)) - delta = finished - started - total_seconds = int(delta.total_seconds()) - hours = total_seconds // 3600 - minutes = (total_seconds % 3600) // 60 - seconds = total_seconds % 60 - duration = f"{hours}h {minutes}m {seconds}s" - except Exception: - pass - else: - # Fallback: try to find H:M:S pattern - time_match = re.search(r'(\d+):(\d+):(\d+)', content) - if time_match: - hours, minutes, seconds = time_match.groups() - duration = f"{hours}h {minutes}m {seconds}s" - - # Extract error details - only from E2E test execution - error_details = [] - if failure: - # Look for E2E test errors after "Running Suite" or "go run ginkgo" - e2e_start_patterns = [ - r'Running Suite:', - r'go run.*ginkgo', - r'Will run.*specs' - ] - - # Find E2E test section - e2e_start_pos = -1 - for pattern in e2e_start_patterns: - match = re.search(pattern, content, re.IGNORECASE) - if match: - e2e_start_pos = match.start() - break - - if e2e_start_pos > 0: - # Extract content after E2E tests started - e2e_content = content[e2e_start_pos:] - - # Look for actual test failures with cleaner patterns - test_error_patterns = [ - r'\[FAIL\].*?([^\n]+)', - r'FAIL!.*?--.*?(\d+) Passed.*?(\d+) Failed', - r'Test Suite Failed', - r'Ginkgo ran.*FAILED', - r'Error occurred during reconciliation.*?([^\n]+)', - r'Failed to update resource.*?([^\n]+)', - r'admission webhook.*denied the request.*?([^\n]+)', - r'context deadline exceeded', - r'timed out waiting for the condition.*?([^\n]+)', - r'panic.*?([^\n]+)' - ] - - for pattern in test_error_patterns: - matches = re.findall(pattern, e2e_content, re.IGNORECASE | re.DOTALL) - for match in matches: - if isinstance(match, tuple): - # Clean up the error message - error_msg = f"{match[0]}: {match[1]}" - else: - error_msg = match - - # Clean up ANSI escape codes and extra whitespace - error_msg = re.sub(r'\x1b\[[0-9;]*[mK]', '', error_msg) - error_msg = re.sub(r'\[0m\s*\[38;5;9m\s*\[1m', '', error_msg) - error_msg = re.sub(r'\[0m', '', error_msg) - error_msg = error_msg.strip() - - # Skip empty, very short messages, or artifacts - if len(error_msg) > 10 and not re.match(r'^\d+:\s*\d+$', error_msg): - error_details.append(error_msg) - - # Remove duplicates and limit to most meaningful errors - error_details = list(dict.fromkeys(error_details))[:2] - - return { - 'run_id': run_id, - 'storage_profile': storage_profile, - 'success': success and not failure, - 'failure': failure, - 'duration': duration, - 'test_stats': test_stats, - 'error_details': error_details, - 'log_file': str(log_file) - } - except Exception as e: - print(f"[WARN] Failed to parse log {log_file}: {e}", file=sys.stderr) - return { - 'run_id': log_file.stem, - 'storage_profile': 'unknown', - 'success': False, - 'failure': True, - 'duration': 'unknown', - 'test_stats': {'total': 0, 'passed': 0, 'failed': 0, 'skipped': 0}, - 'error_details': [f"Failed to parse log: {e}"], - 'log_file': str(log_file) - } - - -def format_matrix_summary(results: list, run_id_prefix: str, profiles: str, github_run_url: str = None) -> str: - """Format matrix test results into a readable message.""" - total_runs = len(results) - successful_runs = sum(1 for r in results if r['success']) - # Treat any non-success as failure for overall counters - failed_runs = total_runs - successful_runs - - # Calculate total test statistics - total_tests = sum(r['test_stats']['total'] for r in results) - total_passed = sum(r['test_stats']['passed'] for r in results) - total_failed = sum(r['test_stats']['failed'] for r in results) - total_skipped = sum(r['test_stats']['skipped'] for r in results) - - # Determine overall status - if total_runs == 0: - status_emoji = "โšช" - status_text = "NO RUNS" - elif failed_runs > 0: - status_emoji = "โŒ" - status_text = "FAILED" - else: - # No failures. Consider Passed if any run succeeded (skips allowed) - status_emoji = "โœ…" - status_text = "PASSED" - - # Group results by storage profile - profile_results = {} - for result in results: - profile = result['storage_profile'] - if profile not in profile_results: - profile_results[profile] = { - 'success': 0, - 'failure': 0, - 'test_stats': {'total': 0, 'passed': 0, 'failed': 0, 'skipped': 0} - } - if result['success']: - profile_results[profile]['success'] += 1 - else: - profile_results[profile]['failure'] += 1 - - # Aggregate test stats - for key in profile_results[profile]['test_stats']: - profile_results[profile]['test_stats'][key] += result['test_stats'][key] - - # Build message with table format - current_date = datetime.now().strftime('%Y-%m-%d') - test_type = "Nightly" if run_id_prefix in ["n", "nightly"] else run_id_prefix.upper() - - message_lines = [ - f"# :dvp: DVP-virtualization {current_date} {test_type} e2e Tests" - ] - - # Add table format for profile results - if profile_results: - message_lines.extend([ - "", - "| Storage Profile | Status | Passed | Failed | Skipped | Success Rate | Duration |", - "|----------------|--------|--------|--------|---------|--------------|----------|" - ]) - - for profile, stats in profile_results.items(): - total_configs = stats['success'] + stats['failure'] - config_success_rate = (stats['success'] / total_configs * 100) if total_configs > 0 else 0 - - test_stats = stats['test_stats'] - test_success_rate = (test_stats['passed'] / test_stats['total'] * 100) if test_stats['total'] > 0 else 0 - - status_emoji = "โœ…" if stats['failure'] == 0 else "โŒ" if stats['success'] == 0 else "โš ๏ธ" - status_text = "PASSED" if stats['failure'] == 0 else "FAILED" if stats['success'] == 0 else "PARTIAL" - - # Get duration and build linked profile name - profile_duration = "unknown" - for result in results: - if result['storage_profile'] == profile: - profile_duration = result['duration'] - break - name_md = f"[{profile.upper()}]({github_run_url})" if github_run_url else profile.upper() - - message_lines.append( - f"| {name_md} | {status_emoji} **{status_text}** | {test_stats['passed']} | {test_stats['failed']} | {test_stats['skipped']} | {test_success_rate:.1f}% | {profile_duration} |" - ) - - return "\n".join(message_lines) - - -def send_to_loop(webhook_url: str, channel: str, message: str) -> bool: - """Send message to Loop webhook.""" - try: - payload = json.dumps({"channel": channel, "text": message}).encode("utf-8") - request = urllib.request.Request( - webhook_url, - data=payload, - headers={"Content-Type": "application/json"}, - method="POST", - ) - - with urllib.request.urlopen(request, timeout=30) as response: - response.read() - return True - except urllib.error.HTTPError as e: - print(f"[ERR] HTTP error {e.code}: {e.reason}", file=sys.stderr) - return False - except urllib.error.URLError as e: - print(f"[ERR] URL error: {e.reason}", file=sys.stderr) - return False - except Exception as e: - print(f"[ERR] Unexpected error: {e}", file=sys.stderr) - return False - - -def main(argv: list[str]) -> int: - # Load .env file if it exists - env_path = Path(__file__).parent.parent / '.env' - load_env_file(env_path) - - parser = argparse.ArgumentParser(description="Parse matrix test logs and send summary to Loop") - parser.add_argument("--profiles", required=True, help="Comma-separated list of storage profiles") - parser.add_argument("--run-id-prefix", required=True, help="Run ID prefix") - parser.add_argument("--log-dir", required=True, help="Directory containing log files") - parser.add_argument("--webhook-url", required=False, help="Loop webhook URL", default=os.getenv('LOOP_WEBHOOK')) - parser.add_argument("--channel", required=False, help="Loop channel name", default=os.getenv('LOOP_CHANNEL', 'test-virtualization-loop-alerts')) - parser.add_argument("--github-run-url", required=False, help="GitHub Actions run URL to link from profile name") - - args = parser.parse_args(argv) - - if not args.webhook_url: - print("[ERR] LOOP_WEBHOOK not set. Set via --webhook-url or LOOP_WEBHOOK env variable", file=sys.stderr) - return 1 - - log_dir = Path(args.log_dir) - if not log_dir.exists(): - print(f"[ERR] Log directory not found: {log_dir}", file=sys.stderr) - return 1 - - # Find all log files - log_files = list(log_dir.glob("*.log")) - if not log_files: - print(f"[WARN] No log files found in {log_dir}", file=sys.stderr) - return 0 - - # Parse all log files - results = [] - for log_file in log_files: - result = parse_test_log(log_file) - results.append(result) - - # Filter by run_id_prefix and profile (no aliases; use canonical names) - allowed_profiles = set([p.strip() for p in args.profiles.split(",")]) - filtered_results = [] - - for result in results: - # Filter by run_id prefix (more flexible matching) - if not result['run_id'].startswith(args.run_id_prefix): - continue - - # Filter by canonical profile name from run_id - normalized_profile = result['storage_profile'] - if normalized_profile not in allowed_profiles: - continue - - result['storage_profile'] = normalized_profile - filtered_results.append(result) - - results = filtered_results - - if not results: - print(f"[WARN] No results to report", file=sys.stderr) - return 0 - - # Format message - message = format_matrix_summary(results, args.run_id_prefix, args.profiles, github_run_url=args.github_run_url) - - # Send to Loop - if send_to_loop(args.webhook_url, args.channel, message): - print(f"[OK] Matrix summary sent to Loop channel '{args.channel}'") - return 0 - else: - print(f"[ERR] Failed to send matrix summary to Loop", file=sys.stderr) - return 1 - - -if __name__ == "__main__": - raise SystemExit(main(sys.argv[1:])) diff --git a/ci/dvp-e2e/scripts/loop_notify.py b/ci/dvp-e2e/scripts/loop_notify.py deleted file mode 100644 index eac831ce78..0000000000 --- a/ci/dvp-e2e/scripts/loop_notify.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2025 Flant JSC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Send notifications to Loop webhook.""" - -import argparse -import json -import os -import sys -import urllib.error -import urllib.request -from pathlib import Path - - -def load_env_file(env_path: Path) -> None: - """Load environment variables from .env file.""" - if not env_path.exists(): - return - - with open(env_path, 'r') as f: - for line in f: - line = line.strip() - if line and not line.startswith('#') and '=' in line: - key, value = line.split('=', 1) - # Don't override existing env vars - if key not in os.environ: - os.environ[key] = value.strip('"').strip("'") - - -def send_post_request(url: str, channel: str, text: str) -> None: - """Send JSON payload to Loop webhook.""" - - payload = json.dumps({"channel": channel, "text": text}).encode("utf-8") - request = urllib.request.Request( - url, - data=payload, - headers={"Content-Type": "application/json"}, - method="POST", - ) - - with urllib.request.urlopen(request, timeout=30) as response: # noqa: S310 - # We just ensure the request succeeded; the body is usually empty. - response.read() - - -def main(argv: list[str]) -> int: - # Load .env file if it exists - env_path = Path(__file__).parent.parent / '.env' - load_env_file(env_path) - - parser = argparse.ArgumentParser(description="Send message to Loop webhook") - parser.add_argument("--url", required=False, help="Loop webhook URL", default=os.getenv('LOOP_WEBHOOK')) - parser.add_argument("--channel", required=False, help="Loop channel name", default=os.getenv('LOOP_CHANNEL', 'test-virtualization-loop-alerts')) - parser.add_argument("--text", required=True, help="Message text") - - args = parser.parse_args(argv) - - if not args.url: - print("[ERR] LOOP_WEBHOOK not set. Set via --url or LOOP_WEBHOOK env variable", file=sys.stderr) - return 1 - - try: - send_post_request(url=args.url, channel=args.channel, text=args.text) - except urllib.error.HTTPError as exc: # pragma: no cover - network failure path - print(f"[ERR] HTTP error {exc.code}: {exc.reason}", file=sys.stderr) - return 1 - except urllib.error.URLError as exc: # pragma: no cover - network failure path - print(f"[ERR] URL error: {exc.reason}", file=sys.stderr) - return 1 - - return 0 - - -if __name__ == "__main__": - raise SystemExit(main(sys.argv[1:])) - diff --git a/ci/dvp-e2e/values.yaml b/ci/dvp-e2e/values.yaml index 3379c93532..4607684343 100644 --- a/ci/dvp-e2e/values.yaml +++ b/ci/dvp-e2e/values.yaml @@ -1,40 +1,3 @@ -storageProfiles: - default: - controlPlane: - root: linstor-thin-r2 - etcd: linstor-thin-r2 - workers: - root: linstor-thin-r2 - infra: - nfs: nfs-4-1-wffc - dvcr: linstor-thin-r2 - virtualDisks: - os: linstor-thin-r2 - data: nfs-4-1-wffc - cephrbd: - controlPlane: - root: ceph-pool-r2-csi-rbd - etcd: ceph-pool-r2-csi-rbd - workers: - root: ceph-pool-r2-csi-rbd - infra: - nfs: nfs-4-1-wffc - dvcr: ceph-pool-r2-csi-rbd - virtualDisks: - os: ceph-pool-r2-csi-rbd - data: ceph-pool-r2-csi-rbd - sds-local: - controlPlane: - root: sds-local-storage - etcd: sds-local-storage - workers: - root: sds-local-storage - infra: - nfs: nfs-4-1-wffc - dvcr: sds-local-storage - virtualDisks: - os: sds-local-storage - data: sds-local-storage domain: e2e.virtlab.flant.com clusterConfigurationPrefix: e2e deckhouse: From 22b75f5e143135757c94a0623a2e5ff4b638f0b8 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Sun, 16 Nov 2025 16:22:38 +0300 Subject: [PATCH 03/14] ci(e2e): workflow fixes and cleanups for SDS-only PR - use DEV_REGISTRY_DOCKER_CFG only; inject into values; login via docker cfg - fix GitHub Actions 'if' expressions (no secrets.* in if; use env) - add Apache-2.0 headers to helper scripts (dmtlint) - format ci/dvp-e2e/Taskfile.yaml with repo Prettier (prettier) - trim push triggers to ci-e2e-nested-sds only; remove noisy comments - keep setup/prepare/cleanup only (tests/report moved to next PR) Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 57 ++++++++----------- ci/dvp-e2e/Taskfile.yaml | 2 - ci/dvp-e2e/scripts/build_parent_kubeconfig.sh | 15 ++++- ci/dvp-e2e/scripts/inject_registry_cfg.sh | 14 +++++ 4 files changed, 53 insertions(+), 35 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index 3c569eabda..2ac9190ddf 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -18,6 +18,7 @@ on: push: branches: - chore/ci/e2e-matrix-skeleton + - ci-e2e-nested-sds pull_request: types: [opened, reopened, synchronize, labeled, unlabeled] branches: @@ -180,47 +181,39 @@ jobs: RUN_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" echo "VALUES_TEMPLATE_FILE=${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" >> $GITHUB_ENV - - name: Configure registry auth (REGISTRY_DOCKER_CFG) + - name: Configure registry auth (DEV_REGISTRY_DOCKER_CFG) run: | - prod_user="${{ secrets.PROD_READ_REGISTRY_USER }}" - prod_pass="${{ secrets.PROD_READ_REGISTRY_PASSWORD }}" - dev_user="${{ secrets.BOOTSTRAP_DEV_REGISTRY_LOGIN }}" - dev_pass="${{ secrets.BOOTSTRAP_DEV_REGISTRY_PASSWORD }}" - echo "::add-mask::$prod_user" - echo "::add-mask::$prod_pass" - echo "::add-mask::$dev_user" - echo "::add-mask::$dev_pass" - prod_auth_b64=$(printf '%s:%s' "$prod_user" "$prod_pass" | base64 | tr -d '\n') - dev_auth_b64=$(printf '%s:%s' "$dev_user" "$dev_pass" | base64 | tr -d '\n') - docker_cfg=$(printf '{"auths":{"registry.deckhouse.io":{"auth":"%s"},"dev-registry.deckhouse.io":{"auth":"%s"}}}' "$prod_auth_b64" "$dev_auth_b64") - docker_cfg_b64=$(printf '%s' "$docker_cfg" | base64 | tr -d '\n') - echo "::add-mask::$docker_cfg_b64" - { - echo "REGISTRY_DOCKER_CFG=$docker_cfg_b64" - echo "DECKHOUSE_REGISTRY_USER=$prod_user" - echo "DECKHOUSE_REGISTRY_PASSWORD=$prod_pass" - } >> "$GITHUB_ENV" + dev_cfg_b64='${{ secrets.DEV_REGISTRY_DOCKER_CFG }}' + if [ -n "$dev_cfg_b64" ]; then + echo "::add-mask::$dev_cfg_b64" + echo "REGISTRY_DOCKER_CFG=$dev_cfg_b64" >> "$GITHUB_ENV" + else + echo "[WARN] DEV_REGISTRY_DOCKER_CFG is empty; proceeding without registry cfg" + fi - name: Inject REGISTRY_DOCKER_CFG into values.yaml + if: ${{ env.REGISTRY_DOCKER_CFG != '' }} working-directory: ci/dvp-e2e run: | chmod +x scripts/inject_registry_cfg.sh VALS="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" REGISTRY_DOCKER_CFG="${REGISTRY_DOCKER_CFG}" scripts/inject_registry_cfg.sh -f "$VALS" -v "$REGISTRY_DOCKER_CFG" - - name: Docker login to Deckhouse registry - uses: docker/login-action@v3 - with: - registry: registry.deckhouse.io - username: ${{ env.DECKHOUSE_REGISTRY_USER }} - password: ${{ env.DECKHOUSE_REGISTRY_PASSWORD }} - - - name: Docker login to dev-registry - uses: docker/login-action@v3 - with: - registry: ${{ vars.DEV_REGISTRY }} - username: ${{ secrets.BOOTSTRAP_DEV_REGISTRY_LOGIN }} - password: ${{ secrets.BOOTSTRAP_DEV_REGISTRY_PASSWORD }} + - name: Docker login from DEV_REGISTRY_DOCKER_CFG (optional) + if: ${{ secrets.DEV_REGISTRY_DOCKER_CFG != '' }} + run: | + set -euo pipefail + cfg=$(printf '%s' '${{ secrets.DEV_REGISTRY_DOCKER_CFG }}' | base64 -d) + reg_list=$(printf '%s' "$cfg" | jq -r '.auths | keys[]') + for reg in $reg_list; do + auth=$(printf '%s' "$cfg" | jq -r --arg r "$reg" '.auths[$r].auth // ""') + [ -z "$auth" ] && continue + creds=$(printf '%s' "$auth" | base64 -d) + user=${creds%%:*} + pass=${creds#*:} + echo "Logging into $reg" + echo "$pass" | docker login "$reg" -u "$user" --password-stdin + done - name: Configure storage profile working-directory: ci/dvp-e2e diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index f2020a107c..9621c5eab4 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -682,8 +682,6 @@ tasks: task dhctl-bootstrap VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' SSH_FILE_NAME='id_ed' } 2>&1 | tee '{{ .LOG_FILE }}' - - # ------------------------------------------------------------ # Nested cluster helpers (SC + kubeconfig) # ------------------------------------------------------------ diff --git a/ci/dvp-e2e/scripts/build_parent_kubeconfig.sh b/ci/dvp-e2e/scripts/build_parent_kubeconfig.sh index fdd481ec42..b83cd5ef21 100755 --- a/ci/dvp-e2e/scripts/build_parent_kubeconfig.sh +++ b/ci/dvp-e2e/scripts/build_parent_kubeconfig.sh @@ -1,4 +1,18 @@ #!/usr/bin/env bash + +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. set -euo pipefail # Usage: @@ -44,4 +58,3 @@ users: EOF chmod 600 "$out" echo "KUBECONFIG=$out" - diff --git a/ci/dvp-e2e/scripts/inject_registry_cfg.sh b/ci/dvp-e2e/scripts/inject_registry_cfg.sh index ed011aaf0f..302e00fad8 100755 --- a/ci/dvp-e2e/scripts/inject_registry_cfg.sh +++ b/ci/dvp-e2e/scripts/inject_registry_cfg.sh @@ -1,4 +1,18 @@ #!/usr/bin/env bash + +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. set -euo pipefail # Usage: From 66d4150ea5d3c504e8d1e1dfdd26546dc06fb1d1 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Sun, 16 Nov 2025 16:27:48 +0300 Subject: [PATCH 04/14] ci(e2e): workflow fixes and cleanups for SDS-only PR\n\n- top-level concurrency + unified job concurrency (head_ref||ref_name) to prevent duplicate runs\n- use DEV_REGISTRY_DOCKER_CFG only; inject into values; login via docker cfg\n- fix Actions 'if' expressions (no secrets.* in if; use env)\n- add Apache-2.0 headers to helper scripts (dmtlint)\n- format ci/dvp-e2e/Taskfile.yaml with repo Prettier (prettier)\n- trim push triggers to ci-e2e-nested-sds only; remove noisy comments Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index 2ac9190ddf..a0704fd7a0 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -36,6 +36,10 @@ on: permissions: contents: read +concurrency: + group: e2e-matrix-${{ github.head_ref || github.ref_name }} + cancel-in-progress: true + env: E2E_K8S_URL: https://api.e2e.virtlab.flant.com @@ -47,7 +51,7 @@ jobs: name: Setup Nested Envs runs-on: ubuntu-latest concurrency: - group: setup-nested-envs-${{ github.ref }} + group: setup-nested-envs-${{ github.head_ref || github.ref_name }} cancel-in-progress: true env: PROFILE: sds-replicated-volume @@ -90,14 +94,12 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 300 concurrency: - group: prepare-${{ github.ref }}-sds-replicated-volume + group: prepare-${{ github.head_ref || github.ref_name }}-sds-replicated-volume cancel-in-progress: true env: PROFILE: sds-replicated-volume GO_VERSION: "1.24.6" TMP_ROOT: ${{ github.workspace }}/ci/dvp-e2e/tmp - LOOP_WEBHOOK: ${{ secrets.LOOP_WEBHOOK_URL || secrets.LOOP_WEBHOOK }} - LOOP_CHANNEL: ${{ secrets.LOOP_CHANNEL || 'test-virtualization-loop-alerts' }} # TODO: replace with channel secret after successful run outputs: run_id: ${{ steps.prep.outputs.run_id }} @@ -200,10 +202,10 @@ jobs: REGISTRY_DOCKER_CFG="${REGISTRY_DOCKER_CFG}" scripts/inject_registry_cfg.sh -f "$VALS" -v "$REGISTRY_DOCKER_CFG" - name: Docker login from DEV_REGISTRY_DOCKER_CFG (optional) - if: ${{ secrets.DEV_REGISTRY_DOCKER_CFG != '' }} + if: ${{ env.REGISTRY_DOCKER_CFG != '' }} run: | set -euo pipefail - cfg=$(printf '%s' '${{ secrets.DEV_REGISTRY_DOCKER_CFG }}' | base64 -d) + cfg=$(printf '%s' "$REGISTRY_DOCKER_CFG" | base64 -d) reg_list=$(printf '%s' "$cfg" | jq -r '.auths | keys[]') for reg in $reg_list; do auth=$(printf '%s' "$cfg" | jq -r --arg r "$reg" '.auths[$r].auth // ""') From 6945030d2e1e7186acdd8be3269d1ffec8634258 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Sun, 16 Nov 2025 22:43:32 +0300 Subject: [PATCH 05/14] ci(e2e): remove unused task infra:attach-worker-disks (redundant; replaced by infra:attach-storage-disks-hotplug) Signed-off-by: Anton Yachmenev --- ci/dvp-e2e/Taskfile.yaml | 52 ---------------------------------------- 1 file changed, 52 deletions(-) diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index 9621c5eab4..f1b88e2181 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -382,58 +382,6 @@ tasks: echo "[INFRA] All worker VMs configured with storage disks via hotplug" - infra:attach-worker-disks: - desc: Attach additional data disks to worker VMs (for SDS) - vars: - TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' - VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" - NAMESPACE: - sh: yq eval '.namespace' {{ .VALUES_FILE }} - DISK_SIZE: '{{ .DISK_SIZE | default "10Gi" }}' - STORAGE_CLASS: '{{ .STORAGE_CLASS | default "linstor-thin-r2" }}' - DISK_COUNT: '{{ .DISK_COUNT | default "2" }}' - env: - KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' - cmds: - - | - set -euo pipefail - echo "[INFRA] Attaching {{ .DISK_COUNT }} storage disks to worker VMs in namespace {{ .NAMESPACE }}" - workers=() - while IFS= read -r line; do - [ -n "$line" ] && workers+=("$line") - done < <(kubectl -n {{ .NAMESPACE }} get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep worker || true) - if [ ${#workers[@]} -eq 0 ]; then - echo "[INFRA] No worker VMs found"; exit 0 - fi - for vm in "${workers[@]}"; do - [ -z "$vm" ] && continue - for disk_num in $(seq 1 {{ .DISK_COUNT }}); do - vd="storage-disk-${disk_num}-$vm" - cat > /tmp/vd-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/vd-$vd.yaml - cat > /tmp/attach-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/attach-$vd.yaml echo "[INFRA] Waiting for hotplug attachment of $vd..." for i in $(seq 1 30); do From 092aa3f7c0edce659ba54212af4d9f585cfab959 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Sun, 16 Nov 2025 22:51:15 +0300 Subject: [PATCH 06/14] ci(e2e): hardcode kubernetesVersion=Automatic in ClusterConfiguration and drop deckhouse.kubernetesVersion from values Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 5 +-- .../templates/cluster-config.yaml | 4 +-- .../charts/cluster-config/templates/nfs.yaml | 34 ------------------- ci/dvp-e2e/charts/cluster-config/values.yaml | 1 - ci/dvp-e2e/values.yaml | 1 - 5 files changed, 3 insertions(+), 42 deletions(-) delete mode 100644 ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index a0704fd7a0..e759328aee 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -330,9 +330,6 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install Task - uses: arduino/setup-task@v2 - - name: Install kubectl uses: azure/setup-kubectl@v4 with: @@ -371,4 +368,4 @@ jobs: run: | echo "### Cleanup Results" >> $GITHUB_STEP_SUMMARY echo "โœ… Cleanup job completed" >> $GITHUB_STEP_SUMMARY - echo "๐Ÿงน Attempted to clean up namespaces matching 'nightly-nested-e2e-*'" >> $GITHUB_STEP_SUMMARY + echo "๐Ÿงน Attempted to clean up namespaces with prefix '${CLEANUP_PREFIX}'" >> $GITHUB_STEP_SUMMARY diff --git a/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml b/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml index bc9c836bfc..af38c34fa6 100644 --- a/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml +++ b/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml @@ -7,7 +7,7 @@ cloud: prefix: {{ .Values.clusterConfigurationPrefix | default "demo-cluster" }} podSubnetCIDR: 10.112.0.0/16 serviceSubnetCIDR: 10.223.0.0/16 -kubernetesVersion: "{{ .Values.deckhouse.kubernetesVersion }}" +kubernetesVersion: "Automatic" clusterDomain: "internal.cluster.local" --- apiVersion: deckhouse.io/v1 @@ -45,4 +45,4 @@ masterNodeGroup: storageClass: {{ .Values.storageClasses.controlPlane.etcd }} provider: kubeconfigDataBase64: {{ .Values.kubeconfigDataBase64 }} - namespace: {{ .Values.namespace }} \ No newline at end of file + namespace: {{ .Values.namespace }} diff --git a/ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml b/ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml deleted file mode 100644 index 85b627695d..0000000000 --- a/ci/dvp-e2e/charts/cluster-config/templates/nfs.yaml +++ /dev/null @@ -1,34 +0,0 @@ -{{- if .Values.features.nfs.enabled }} -apiVersion: deckhouse.io/v1alpha1 -kind: ModuleConfig -metadata: - name: csi-nfs -spec: - source: deckhouse - enabled: true - version: 1 ---- -apiVersion: deckhouse.io/v1alpha2 -kind: ModulePullOverride -metadata: - name: csi-nfs -spec: - imageTag: main - scanInterval: 10m ---- -apiVersion: storage.deckhouse.io/v1alpha1 -kind: NFSStorageClass -metadata: - name: nfs -spec: - connection: - host: "nfs-server.{{ .Values.namespace }}.svc.cluster.local" - share: / - nfsVersion: "4.2" - mountOptions: - mountMode: hard - timeout: 60 - retransmissions: 3 - reclaimPolicy: Delete - volumeBindingMode: Immediate -{{- end }} diff --git a/ci/dvp-e2e/charts/cluster-config/values.yaml b/ci/dvp-e2e/charts/cluster-config/values.yaml index 720fa90efa..5e62b9f968 100644 --- a/ci/dvp-e2e/charts/cluster-config/values.yaml +++ b/ci/dvp-e2e/charts/cluster-config/values.yaml @@ -17,7 +17,6 @@ instances: # Deckhouse configuration deckhouse: tag: main - kubernetesVersion: Automatic # Virtualization configuration virtualization: diff --git a/ci/dvp-e2e/values.yaml b/ci/dvp-e2e/values.yaml index 4607684343..a3dbb0b7d3 100644 --- a/ci/dvp-e2e/values.yaml +++ b/ci/dvp-e2e/values.yaml @@ -2,7 +2,6 @@ domain: e2e.virtlab.flant.com clusterConfigurationPrefix: e2e deckhouse: tag: main - kubernetesVersion: Automatic virtualization: tag: main features: From c6babf13da2ab5f5b6db362a9d1aeb6da59f3afa Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Mon, 17 Nov 2025 11:00:51 +0300 Subject: [PATCH 07/14] ci(e2e): move namespace cleanup logic into Taskfile (task cleanup:namespaces) and call it from workflow; prefix stays configurable Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 19 ++++--------- ci/dvp-e2e/Taskfile.yaml | 30 ++++++++++++++++++++ ci/dvp-e2e/charts/cluster-config/values.yaml | 2 +- 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index e759328aee..a860d02bf3 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -344,24 +344,15 @@ jobs: ci/dvp-e2e/scripts/build_parent_kubeconfig.sh -o "$KCFG" -a "${E2E_K8S_URL}" -t "${{ secrets.E2E_NESTED_SA_SECRET }}" echo "KUBECONFIG=$KCFG" >> "$GITHUB_ENV" + - name: Install Task + uses: arduino/setup-task@v2 + - name: Cleanup test namespaces + working-directory: ci/dvp-e2e env: CLEANUP_PREFIX: ${{ vars.CLEANUP_PREFIX || 'nightly-nested-e2e-' }} run: | - set -euo pipefail - echo "๐Ÿงน Cleaning namespaces with prefix '${CLEANUP_PREFIX}'" - ns_list=$(kubectl get ns -o json | jq -r --arg p "$CLEANUP_PREFIX" '.items[].metadata.name | select(startswith($p))') - if [ -z "$ns_list" ]; then - echo "[INFO] No namespaces to delete"; exit 0 - fi - for ns in $ns_list; do - echo "[CLEANUP] Deleting namespace $ns ..." - kubectl delete ns "$ns" --wait=false || true - done - echo "[CLEANUP] Waiting for namespaces to be deleted..." - for ns in $ns_list; do - kubectl wait --for=delete ns/"$ns" --timeout=600s || echo "[WARN] Namespace $ns was not fully deleted within timeout" - done + task cleanup:namespaces PREFIX="${CLEANUP_PREFIX}" PARENT_KUBECONFIG="${KUBECONFIG}" - name: Report cleanup results if: always() diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index f1b88e2181..07a5ea7db1 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -924,6 +924,36 @@ tasks: PATCH=$(jq -cn --arg v "$DEFAULT_STORAGE_CLASS" '[{"op":"replace","path":"/spec/settings/defaultClusterStorageClass","value":$v}]') KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl patch mc global --type='json' -p="$PATCH" + # ------------------------------------------------------------ + # Cleanup helpers + # ------------------------------------------------------------ + cleanup:namespaces: + desc: Delete namespaces by prefix and wait for deletion + vars: + PREFIX: '{{ .PREFIX | default (env "CLEANUP_PREFIX") | default "nightly-nested-e2e-" }}' + PARENT_KUBECONFIG_PATH: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + cmds: + - | + set -euo pipefail + if [ ! -s "{{ .PARENT_KUBECONFIG_PATH }}" ]; then + echo "[ERR] parent kubeconfig not found (KUBECONFIG)" >&2 + exit 1 + fi + export KUBECONFIG='{{ .PARENT_KUBECONFIG_PATH }}' + echo "[CLEANUP] Prefix='{{ .PREFIX }}'" + ns_list=$(kubectl get ns -o json | jq -r --arg p '{{ .PREFIX }}' '.items[].metadata.name | select(startswith($p))') + if [ -z "${ns_list}" ]; then + echo "[INFO] No namespaces to delete"; exit 0 + fi + for ns in $ns_list; do + echo "[CLEANUP] Deleting namespace $ns ..." + kubectl delete ns "$ns" --wait=false || true + done + echo "[CLEANUP] Waiting for namespaces to be deleted..." + for ns in $ns_list; do + kubectl wait --for=delete ns/"$ns" --timeout=600s || echo "[WARN] Namespace $ns was not fully deleted within timeout" + done + # ------------------------------------------------------------ # Run E2E # ------------------------------------------------------------ diff --git a/ci/dvp-e2e/charts/cluster-config/values.yaml b/ci/dvp-e2e/charts/cluster-config/values.yaml index 5e62b9f968..79f0062022 100644 --- a/ci/dvp-e2e/charts/cluster-config/values.yaml +++ b/ci/dvp-e2e/charts/cluster-config/values.yaml @@ -1,4 +1,4 @@ -# Cluster configuration values for E2E testing +# Cluster configuration for setting up nested clusters for running nightly E2E tests # Instance configuration instances: From d887e7d6cd8e9c92decdda7c3cf7e930b0fa76d2 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Mon, 17 Nov 2025 11:10:45 +0300 Subject: [PATCH 08/14] =?UTF-8?q?ci(e2e):=20address=20review=20=E2=80=94?= =?UTF-8?q?=20move=20kubeconfig/registry=20steps=20into=20Taskfile=20(pare?= =?UTF-8?q?nt:kubeconfig,=20values:inject-registry);=20workflow=20calls=20?= =?UTF-8?q?tasks;=20keep=20params=20minimal?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 15 +++++---------- ci/dvp-e2e/Taskfile.yaml | 30 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index a860d02bf3..55c7e8bef0 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -166,12 +166,10 @@ jobs: mkdir -p "${{ env.TMP_ROOT }}/shared" "${{ env.TMP_ROOT }}/matrix-logs" - name: Build parent kubeconfig from secret - shell: bash + working-directory: ci/dvp-e2e run: | - set -euo pipefail - chmod +x ci/dvp-e2e/scripts/build_parent_kubeconfig.sh KCFG="$HOME/.kube/config" - ci/dvp-e2e/scripts/build_parent_kubeconfig.sh -o "$KCFG" -a "${E2E_K8S_URL}" -t "${{ secrets.E2E_NESTED_SA_SECRET }}" + task parent:kubeconfig OUTPUT="$KCFG" API_URL="${E2E_K8S_URL}" SA_TOKEN="${{ secrets.E2E_NESTED_SA_SECRET }}" echo "KUBECONFIG=$KCFG" >> "$GITHUB_ENV" - name: Prepare run values.yaml @@ -197,9 +195,8 @@ jobs: if: ${{ env.REGISTRY_DOCKER_CFG != '' }} working-directory: ci/dvp-e2e run: | - chmod +x scripts/inject_registry_cfg.sh VALS="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" - REGISTRY_DOCKER_CFG="${REGISTRY_DOCKER_CFG}" scripts/inject_registry_cfg.sh -f "$VALS" -v "$REGISTRY_DOCKER_CFG" + task values:inject-registry VALUES_FILE="$VALS" REGISTRY_DOCKER_CFG="${REGISTRY_DOCKER_CFG}" - name: Docker login from DEV_REGISTRY_DOCKER_CFG (optional) if: ${{ env.REGISTRY_DOCKER_CFG != '' }} @@ -336,12 +333,10 @@ jobs: version: "latest" - name: Build parent kubeconfig from secret (cleanup) - shell: bash + working-directory: ci/dvp-e2e run: | - set -euo pipefail - chmod +x ci/dvp-e2e/scripts/build_parent_kubeconfig.sh KCFG="$HOME/.kube/config" - ci/dvp-e2e/scripts/build_parent_kubeconfig.sh -o "$KCFG" -a "${E2E_K8S_URL}" -t "${{ secrets.E2E_NESTED_SA_SECRET }}" + task parent:kubeconfig OUTPUT="$KCFG" API_URL="${E2E_K8S_URL}" SA_TOKEN="${{ secrets.E2E_NESTED_SA_SECRET }}" echo "KUBECONFIG=$KCFG" >> "$GITHUB_ENV" - name: Install Task diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index 07a5ea7db1..6c94355e91 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -954,6 +954,36 @@ tasks: kubectl wait --for=delete ns/"$ns" --timeout=600s || echo "[WARN] Namespace $ns was not fully deleted within timeout" done + # ------------------------------------------------------------ + # CI helpers: kubeconfig + registry + # ------------------------------------------------------------ + parent:kubeconfig: + desc: Build parent kubeconfig from URL + SA token + vars: + OUTPUT: '{{ .OUTPUT | default (env "KUBECONFIG") | default "$HOME/.kube/config" }}' + API_URL: '{{ .API_URL | default (env "E2E_K8S_URL") | default "" }}' + SA_TOKEN: '{{ .SA_TOKEN | default (env "E2E_SA_TOKEN") | default "" }}' + cmds: + - | + set -euo pipefail + if [ -z "{{ .API_URL }}" ] || [ -z "{{ .SA_TOKEN }}" ]; then + echo "[ERR] API_URL/SA_TOKEN is empty" >&2; exit 1; fi + chmod +x ./scripts/build_parent_kubeconfig.sh + ./scripts/build_parent_kubeconfig.sh -o "{{ .OUTPUT }}" -a "{{ .API_URL }}" -t "{{ .SA_TOKEN }}" + + values:inject-registry: + desc: Inject REGISTRY_DOCKER_CFG into values.yaml + vars: + VALUES_FILE: '{{ .VALUES_FILE | default "" }}' + REGISTRY_DOCKER_CFG: '{{ .REGISTRY_DOCKER_CFG | default (env "REGISTRY_DOCKER_CFG") | default "" }}' + cmds: + - | + set -euo pipefail + if [ -z "{{ .VALUES_FILE }}" ] || [ -z "{{ .REGISTRY_DOCKER_CFG }}" ]; then + echo "[ERR] VALUES_FILE/REGISTRY_DOCKER_CFG is empty" >&2; exit 1; fi + chmod +x ./scripts/inject_registry_cfg.sh + REGISTRY_DOCKER_CFG='{{ .REGISTRY_DOCKER_CFG }}' ./scripts/inject_registry_cfg.sh -f "{{ .VALUES_FILE }}" -v "{{ .REGISTRY_DOCKER_CFG }}" + # ------------------------------------------------------------ # Run E2E # ------------------------------------------------------------ From 6ca0c245046ff4939435b244c9b9f4e1b1053f22 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Mon, 17 Nov 2025 12:11:21 +0300 Subject: [PATCH 09/14] ci(e2e): fix infra:attach-storage-disks-hotplug stray duplicate loop (caused unmatched 'done'); ensure Task installed before calling tasks in cleanup Signed-off-by: Anton Yachmenev --- .github/workflows/e2e-matrix.yml | 6 +++++ ci/dvp-e2e/Taskfile.yaml | 44 -------------------------------- 2 files changed, 6 insertions(+), 44 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index 55c7e8bef0..fae90a57fd 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -327,6 +327,12 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Install Task + uses: arduino/setup-task@v2 + with: + version: 3.x + repo-token: ${{ secrets.GITHUB_TOKEN }} + - name: Install kubectl uses: azure/setup-kubectl@v4 with: diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index 6c94355e91..7f66377018 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -382,50 +382,6 @@ tasks: echo "[INFRA] All worker VMs configured with storage disks via hotplug" - - echo "[INFRA] Waiting for hotplug attachment of $vd..." - for i in $(seq 1 30); do - phase=$(kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) - if [ "$phase" = "Attached" ]; then - echo "[INFRA] Disk $vd successfully attached to VM $vm"; break - fi - # Print status approximately every 30 seconds - if [ $((i % 6)) -eq 0 ]; then - echo "[INFRA] Disk $vd phase=$phase; retry $i/30" - fi - sleep 5 - - # Periodic debug snapshot approximately every 60 seconds - if [ $((i % 12)) -eq 0 ]; then - echo "[DEBUG] VMBDA $vd status:" - kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o json | jq -r '.status' || true - fi - done - - if [ "$phase" != "Attached" ]; then - # Fallback on VM events confirming successful hotplug - echo "[DEBUG] Checking VM events for hotplug success fallback..." - if kubectl -n {{ .NAMESPACE }} get events \ - --field-selector involvedObject.kind=VirtualMachine,involvedObject.name="$vm" \ - --sort-by=.lastTimestamp -ojson \ - | jq -r '.items[].message' 2>/dev/null \ - | grep -q -E "Successfully attach hotplugged volume.*\b$vd\b"; then - echo "[WARN] VMBDA phase not Attached, but VM reported success; treating as Attached (fallback)" - else - echo "[ERROR] Disk $vd failed to attach to VM $vm" >&2 - echo "[DEBUG] Final VMBDA status:" - kubectl -n {{ .NAMESPACE }} describe virtualmachineblockdeviceattachment "$vd" || true - # Filter controller/handler logs by our namespace/VM/VD - kubectl -n d8-virtualization logs deploy/virtualization-controller --tail=200 2>/dev/null | grep -E "{{ .NAMESPACE }}|$vm|$vd" || true - for h in $(kubectl -n d8-virtualization get pods -l app=virt-handler -o name 2>/dev/null || true); do - kubectl -n d8-virtualization logs --tail=200 "$h" | grep -E "{{ .NAMESPACE }}|$vm|$vd" || true - done - exit 1 - fi - fi - done - done - # ------------------------------------------------------------ # Kubeconfig for bootstrap and cluster config # ------------------------------------------------------------ From d7ab5558ee64d38a984f1b7bcd36f375512bf307 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Mon, 17 Nov 2025 16:54:53 +0300 Subject: [PATCH 10/14] ci(e2e): address PR review comments - Add parent_storage_class to profiles.json for dynamic SC selection - Use profile-based PARENT_STORAGE_CLASS instead of hardcoded ceph SC - Simplify storageClasses config to single storageClass value - Remove PROFILE override, use dynamic profile variable - Update get_profile_config.sh to export PARENT_STORAGE_CLASS - Remove unused second parameter from get_profile_config.sh - Clarify environment/outputs export comment - Use full profile name 'sds-replicated-volume' everywhere --- .github/workflows/e2e-matrix.yml | 17 ++++++++-------- ci/dvp-e2e/Taskfile.yaml | 20 ++++++++----------- .../templates/cluster-config.yaml | 4 ++-- .../cluster-config/templates/nodegroups.yaml | 2 +- ci/dvp-e2e/charts/cluster-config/values.yaml | 9 ++------- ci/dvp-e2e/profiles.json | 3 ++- ci/dvp-e2e/scripts/get_profile_config.sh | 9 +++++---- 7 files changed, 29 insertions(+), 35 deletions(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index fae90a57fd..b32466e680 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -18,7 +18,6 @@ on: push: branches: - chore/ci/e2e-matrix-skeleton - - ci-e2e-nested-sds pull_request: types: [opened, reopened, synchronize, labeled, unlabeled] branches: @@ -160,8 +159,7 @@ jobs: RUN_ID="${{ needs.setup-nested-envs.outputs.run_id }}" echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" echo "RUN_ID=$RUN_ID" >> "$GITHUB_ENV" - # Map sds-replicated-volume to sds for profile config - echo "PROFILE=sds" >> "$GITHUB_ENV" + echo "PROFILE=sds-replicated-volume" >> "$GITHUB_ENV" echo "TMP_ROOT=${{ env.TMP_ROOT }}" >> "$GITHUB_ENV" mkdir -p "${{ env.TMP_ROOT }}/shared" "${{ env.TMP_ROOT }}/matrix-logs" @@ -218,7 +216,7 @@ jobs: working-directory: ci/dvp-e2e id: profile-config env: - PROFILE: sds + PROFILE: sds-replicated-volume run: | # Get storage class configuration from profiles.json PROFILE_CONFIG=$(./scripts/get_profile_config.sh "${PROFILE}") @@ -227,25 +225,28 @@ jobs: STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^STORAGE_CLASS=" | cut -d'=' -f2) IMAGE_STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^IMAGE_STORAGE_CLASS=" | cut -d'=' -f2) SNAPSHOT_STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^SNAPSHOT_STORAGE_CLASS=" | cut -d'=' -f2) + PARENT_STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^PARENT_STORAGE_CLASS=" | cut -d'=' -f2) ATTACH_DISK_SIZE=$(echo "$PROFILE_CONFIG" | grep "^ATTACH_DISK_SIZE=" | cut -d'=' -f2) echo "Profile: ${PROFILE}" echo "Storage Class: ${STORAGE_CLASS}" echo "Image Storage Class: ${IMAGE_STORAGE_CLASS}" echo "Snapshot Storage Class: ${SNAPSHOT_STORAGE_CLASS}" + echo "Parent Storage Class: ${PARENT_STORAGE_CLASS}" echo "Attach Disk Size: ${ATTACH_DISK_SIZE}" - # Export variables safely + # Export variables to GitHub Actions environment and outputs echo "STORAGE_CLASS=${STORAGE_CLASS}" >> $GITHUB_ENV echo "IMAGE_STORAGE_CLASS=${IMAGE_STORAGE_CLASS}" >> $GITHUB_ENV echo "SNAPSHOT_STORAGE_CLASS=${SNAPSHOT_STORAGE_CLASS}" >> $GITHUB_ENV + echo "PARENT_STORAGE_CLASS=${PARENT_STORAGE_CLASS}" >> $GITHUB_ENV echo "ATTACH_DISK_SIZE=${ATTACH_DISK_SIZE}" >> $GITHUB_ENV echo "storage_class=$STORAGE_CLASS" >> $GITHUB_OUTPUT echo "image_storage_class=$IMAGE_STORAGE_CLASS" >> $GITHUB_OUTPUT echo "snapshot_storage_class=$SNAPSHOT_STORAGE_CLASS" >> $GITHUB_OUTPUT echo "attach_disk_size=$ATTACH_DISK_SIZE" >> $GITHUB_OUTPUT # Pass storage profile into run values for Helm templates - PROFILE='sds' yq eval --inplace '.storageProfile = strenv(PROFILE)' "${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" + yq eval --inplace ".storageProfile = \"${PROFILE}\"" "${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" # Effective disk SC used for worker data disks (prefer image SC when set) EFF_DISK_SC=${IMAGE_STORAGE_CLASS:-$STORAGE_CLASS} echo "EFFECTIVE_DISK_SC=${EFF_DISK_SC}" >> $GITHUB_ENV @@ -267,13 +268,13 @@ jobs: - name: Bootstrap nested cluster (via jump-host) working-directory: ci/dvp-e2e run: | - echo "๐Ÿš€ dhctl bootstrap (profile: sds-replicated-volume -> sds)" + echo "๐Ÿš€ dhctl bootstrap (profile: sds-replicated-volume)" task dhctl-bootstrap \ TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ PARENT_KUBECONFIG="${KUBECONFIG}" \ SSH_FILE_NAME="id_ed" \ - TARGET_STORAGE_CLASS="ceph-pool-r2-csi-rbd-immediate" + TARGET_STORAGE_CLASS="${{ env.PARENT_STORAGE_CLASS }}" - name: Attach data disks to worker VMs using hotplug working-directory: ci/dvp-e2e diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index 7f66377018..7127bfb240 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -301,10 +301,10 @@ tasks: fi pvc_name="" - for j in $(seq 1 30); do + for j in $(seq 1 50); do pvc_name=$(kubectl -n {{ .NAMESPACE }} get vd "$vd" -o jsonpath='{.status.target.persistentVolumeClaimName}' 2>/dev/null || true) [ -n "$pvc_name" ] && break - echo "[INFRA] Waiting for PVC name for VD $vd; retry $j/30"; sleep 3 + echo "[INFRA] Waiting for PVC name for VD $vd; retry $j/50"; sleep 3 done if [ -n "$pvc_name" ]; then echo "[INFRA] Waiting PVC $pvc_name to reach phase=Bound..." @@ -347,7 +347,7 @@ tasks: echo "[INFRA] Waiting for hotplug attachment of $vd..." success_by_vm=0 - for i in $(seq 1 30); do + for i in $(seq 1 50); do phase=$(kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) if [ "$phase" = "Attached" ]; then echo "[INFRA] Disk $vd successfully attached to VM $vm"; break @@ -360,9 +360,9 @@ tasks: break fi - # Print status approximately every 30 seconds (poll interval is 5s) - if [ $((i % 6)) -eq 0 ]; then - echo "[INFRA] Disk $vd phase=$phase; retry $i/30" + # Print status approximately every 50 seconds (poll interval is 5s) + if [ $((i % 10)) -eq 0 ]; then + echo "[INFRA] Disk $vd phase=$phase; retry $i/50" fi sleep 5 @@ -468,10 +468,6 @@ tasks: if [ -n "{{ .TARGET_STORAGE_CLASS | default "" }}" ]; then export _SC='{{ .TARGET_STORAGE_CLASS }}' yq eval --inplace '.storageClass = env(_SC)' {{ .GENERATED_VALUES_FILE }} - yq eval --inplace '.storageClasses.controlPlane.root = env(_SC)' {{ .GENERATED_VALUES_FILE }} - yq eval --inplace '.storageClasses.controlPlane.etcd = env(_SC)' {{ .GENERATED_VALUES_FILE }} - yq eval --inplace '.storageClasses.workers.root = env(_SC)' {{ .GENERATED_VALUES_FILE }} - yq eval --inplace '.storageClasses.workers.data = env(_SC)' {{ .GENERATED_VALUES_FILE }} fi - | export SSH_PUB_KEY="$(cat {{ .SSH_PUB_KEY_FILE }})" @@ -715,12 +711,12 @@ tasks: - | set -euo pipefail echo "[SDS] Waiting for API server to be ready..." - for i in $(seq 1 30); do + for i in $(seq 1 50); do if KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get nodes >/dev/null 2>&1; then echo "[SDS] API server is ready!" break fi - echo "[SDS] API server not ready yet, retry $i/30"; sleep 10 + echo "[SDS] API server not ready yet, retry $i/50"; sleep 10 done - | set -euo pipefail diff --git a/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml b/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml index af38c34fa6..05837e1bc9 100644 --- a/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml +++ b/ci/dvp-e2e/charts/cluster-config/templates/cluster-config.yaml @@ -36,13 +36,13 @@ masterNodeGroup: virtualMachineClassName: "{{ .Values.namespace }}-cpu" rootDisk: size: 50Gi - storageClass: {{ .Values.storageClasses.controlPlane.root }} + storageClass: {{ .Values.storageClass }} image: kind: VirtualImage name: image etcdDisk: size: 15Gi - storageClass: {{ .Values.storageClasses.controlPlane.etcd }} + storageClass: {{ .Values.storageClass }} provider: kubeconfigDataBase64: {{ .Values.kubeconfigDataBase64 }} namespace: {{ .Values.namespace }} diff --git a/ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml b/ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml index 3779fb52a3..4025e441b7 100644 --- a/ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml +++ b/ci/dvp-e2e/charts/cluster-config/templates/nodegroups.yaml @@ -15,7 +15,7 @@ spec: bootloader: {{ $.Values.image.bootloader }} rootDisk: size: 50Gi - storageClass: {{ $.Values.storageClasses.workers.root }} + storageClass: {{ $.Values.storageClass }} image: kind: VirtualImage name: image diff --git a/ci/dvp-e2e/charts/cluster-config/values.yaml b/ci/dvp-e2e/charts/cluster-config/values.yaml index 79f0062022..cb66b6c882 100644 --- a/ci/dvp-e2e/charts/cluster-config/values.yaml +++ b/ci/dvp-e2e/charts/cluster-config/values.yaml @@ -37,13 +37,8 @@ ingressHosts: - console - virtualization -# Storage classes configuration (for parent cluster) -storageClasses: - controlPlane: - root: ceph-pool-r2-csi-rbd-immediate - etcd: ceph-pool-r2-csi-rbd-immediate - workers: - root: ceph-pool-r2-csi-rbd-immediate +# Storage class for parent cluster VMs (overridden by workflow from profile) +storageClass: ceph-pool-r2-csi-rbd-immediate # Feature flags (only those used by templates) features: diff --git a/ci/dvp-e2e/profiles.json b/ci/dvp-e2e/profiles.json index 624ce7cfeb..c5b6307ba9 100644 --- a/ci/dvp-e2e/profiles.json +++ b/ci/dvp-e2e/profiles.json @@ -1,9 +1,10 @@ [ { - "name": "sds", + "name": "sds-replicated-volume", "storage_class": "linstor-thin-r2", "image_storage_class": "linstor-thin-r1-immediate", "snapshot_storage_class": "linstor-thin-r2", + "parent_storage_class": "linstor-thin-r1-immediate", "worker_data_disk_size": "10Gi", "description": "SDS storage with LINSTOR thin provisioning" } diff --git a/ci/dvp-e2e/scripts/get_profile_config.sh b/ci/dvp-e2e/scripts/get_profile_config.sh index f29dc17e34..c6f1e7ea8d 100755 --- a/ci/dvp-e2e/scripts/get_profile_config.sh +++ b/ci/dvp-e2e/scripts/get_profile_config.sh @@ -20,10 +20,10 @@ set -euo pipefail PROFILE="${1:-}" -PROFILES_FILE="${2:-./profiles.json}" +PROFILES_FILE="./profiles.json" if [[ -z "$PROFILE" ]]; then - echo "Usage: $0 [profiles_file]" >&2 + echo "Usage: $0 " >&2 exit 1 fi @@ -35,7 +35,7 @@ fi # Use jq to find profile by exact name only PROFILE_CONFIG=$(jq -r --arg profile "$PROFILE" ' .[] | select(.name == $profile) | - "\(.storage_class)|\(.image_storage_class)|\(.snapshot_storage_class)|\(.worker_data_disk_size // "10Gi")" + "\(.storage_class)|\(.image_storage_class)|\(.snapshot_storage_class)|\(.parent_storage_class)|\(.worker_data_disk_size // "10Gi")" ' "$PROFILES_FILE") if [[ -z "$PROFILE_CONFIG" || "$PROFILE_CONFIG" == "null" ]]; then @@ -46,9 +46,10 @@ if [[ -z "$PROFILE_CONFIG" || "$PROFILE_CONFIG" == "null" ]]; then fi # Split the result and export variables -IFS='|' read -r SC IMG_SC SNAP_SC ATTACH_SIZE <<< "$PROFILE_CONFIG" +IFS='|' read -r SC IMG_SC SNAP_SC PARENT_SC ATTACH_SIZE <<< "$PROFILE_CONFIG" echo "STORAGE_CLASS=$SC" echo "IMAGE_STORAGE_CLASS=$IMG_SC" echo "SNAPSHOT_STORAGE_CLASS=$SNAP_SC" +echo "PARENT_STORAGE_CLASS=$PARENT_SC" echo "ATTACH_DISK_SIZE=$ATTACH_SIZE" From 43f06996aa1293fbb2e9893cb635286b7b9213ee Mon Sep 17 00:00:00 2001 From: Anton Yacmenev Date: Mon, 17 Nov 2025 20:29:13 +0300 Subject: [PATCH 11/14] Update ci/dvp-e2e/charts/cluster-config/values.yaml Co-authored-by: Nikita Korolev <141920865+universal-itengineer@users.noreply.github.com> Signed-off-by: Anton Yacmenev --- ci/dvp-e2e/charts/cluster-config/values.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/dvp-e2e/charts/cluster-config/values.yaml b/ci/dvp-e2e/charts/cluster-config/values.yaml index cb66b6c882..86c976457e 100644 --- a/ci/dvp-e2e/charts/cluster-config/values.yaml +++ b/ci/dvp-e2e/charts/cluster-config/values.yaml @@ -43,5 +43,3 @@ storageClass: ceph-pool-r2-csi-rbd-immediate # Feature flags (only those used by templates) features: virtualization: true - nfs: - enabled: false From 9fa177ec145612f1a7a9c0de1e5c3e9815777fd2 Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Mon, 17 Nov 2025 22:20:40 +0300 Subject: [PATCH 12/14] chore(ci): refactor E2E workflow and Taskfile - Remove duplicates: kubeconfig in cleanup, run_id outputs, Install Task - Remove hardcoded storage class values, use profile variables - Simplify REGISTRY_DOCKER_CFG usage (direct secret access) - Remove USE_GH_SSH_KEYS variable (always import from GitHub) - Remove fetch-depth: 0 (unused) - Remove unused nfs config from cluster-config/values - Add profile to setup-nested-envs outputs - Replace d8 installation with werf/trdl/actions/setup-app@v0.12.2 - Remove PASSWORD_FILE variable (local only) - Change default SSH_FILE_NAME to id_ed - Create unified task install:nested:env for complete setup - Add Docker registry auth via config.json - Ensure nested dir exists before creating kubeconfig --- .github/workflows/e2e-matrix.yml | 217 ++---- ci/dvp-e2e/Taskfile.yaml | 628 +++--------------- ci/dvp-e2e/charts/cluster-config/Chart.yaml | 2 - .../charts/cluster-config/templates/mc.yaml | 2 +- ci/dvp-e2e/charts/infra/Chart.yaml | 2 - ci/dvp-e2e/charts/support/Chart.yaml | 18 - ci/dvp-e2e/charts/support/values.yaml | 62 -- ci/dvp-e2e/scripts/attach_worker_disks.sh | 206 ++++++ ci/dvp-e2e/scripts/build_nested_kubeconfig.sh | 144 ++++ ci/dvp-e2e/scripts/configure_sds_storage.sh | 184 +++++ ci/dvp-e2e/scripts/get_profile_config.sh | 55 -- ci/dvp-e2e/scripts/inject_registry_cfg.sh | 38 -- 12 files changed, 659 insertions(+), 899 deletions(-) delete mode 100644 ci/dvp-e2e/charts/support/Chart.yaml delete mode 100644 ci/dvp-e2e/charts/support/values.yaml create mode 100755 ci/dvp-e2e/scripts/attach_worker_disks.sh create mode 100755 ci/dvp-e2e/scripts/build_nested_kubeconfig.sh create mode 100755 ci/dvp-e2e/scripts/configure_sds_storage.sh delete mode 100755 ci/dvp-e2e/scripts/get_profile_config.sh delete mode 100755 ci/dvp-e2e/scripts/inject_registry_cfg.sh diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index b32466e680..3dd0c267dc 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -26,11 +26,6 @@ on: schedule: - cron: "30 2 * * *" workflow_dispatch: - inputs: - timeout: - description: "Ginkgo timeout (e.g. 2h, 4h)" - required: false - default: "4h" permissions: contents: read @@ -52,10 +47,9 @@ jobs: concurrency: group: setup-nested-envs-${{ github.head_ref || github.ref_name }} cancel-in-progress: true - env: - PROFILE: sds-replicated-volume outputs: run_id: ${{ steps.prep.outputs.run_id }} + profile: ${{ steps.load.outputs.profile }} steps: - uses: actions/checkout@v4 @@ -72,17 +66,14 @@ jobs: id: prep run: | RUN_ID="nightly-nested-e2e-sds-$(date +%H%M%S)" + PROFILE="${{ steps.load.outputs.profile }}" echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" mkdir -p ./tmp/run-context - echo "profile: ${PROFILE}" > ./tmp/run-context/config.yaml - echo "run_id: ${RUN_ID}" >> ./tmp/run-context/config.yaml - echo "timestamp: $(date -Iseconds)" >> ./tmp/run-context/config.yaml - - - name: Upload run context - uses: actions/upload-artifact@v4 - with: - name: run-context-${{ steps.prep.outputs.run_id }} - path: ./tmp/run-context + { + echo "profile: ${PROFILE}" + echo "run_id: ${RUN_ID}" + echo "timestamp: $(date -Iseconds)" + } > ./tmp/run-context/config.yaml # ============================================ # 2. PREPARE - Cluster preparation @@ -93,24 +84,15 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 300 concurrency: - group: prepare-${{ github.head_ref || github.ref_name }}-sds-replicated-volume + group: prepare-${{ github.head_ref || github.ref_name }}-${{ needs.setup-nested-envs.outputs.profile }} cancel-in-progress: true env: - PROFILE: sds-replicated-volume - GO_VERSION: "1.24.6" + PROFILE: ${{ needs.setup-nested-envs.outputs.profile }} TMP_ROOT: ${{ github.workspace }}/ci/dvp-e2e/tmp - - outputs: - run_id: ${{ steps.prep.outputs.run_id }} - storage_class: ${{ steps.profile-config.outputs.storage_class }} - image_storage_class: ${{ steps.profile-config.outputs.image_storage_class }} - snapshot_storage_class: ${{ steps.profile-config.outputs.snapshot_storage_class }} - attach_disk_size: ${{ steps.profile-config.outputs.attach_disk_size }} + REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} steps: - uses: actions/checkout@v4 - with: - fetch-depth: 0 - name: Install Task uses: arduino/setup-task@v2 @@ -128,18 +110,15 @@ jobs: with: version: "latest" - - name: Install Deckhouse CLI - env: - D8_VERSION: v0.13.2 - run: | - set -euo pipefail - echo "Installing d8 ${D8_VERSION}..." - curl -fsSL -o d8.tgz "https://deckhouse.io/downloads/deckhouse-cli/${D8_VERSION}/d8-${D8_VERSION}-linux-amd64.tar.gz" - tar -xzf d8.tgz linux-amd64/bin/d8 - mv linux-amd64/bin/d8 /usr/local/bin/d8 - chmod +x /usr/local/bin/d8 - rm -rf d8.tgz linux-amd64 - d8 --version + - name: Setup d8 + uses: werf/trdl/actions/setup-app@v0.12.2 + with: + repo: d8 + url: https://deckhouse.ru/downloads/deckhouse-cli-trdl/ + root-version: 1 + root-sha512: 343bd5f0d8811254e5f0b6fe292372a7b7eda08d276ff255229200f84e58a8151ab2729df3515cb11372dc3899c70df172a4e54c8a596a73d67ae790466a0491 + group: 0 + channel: stable - name: Install yq run: | @@ -147,19 +126,11 @@ jobs: curl -L -o /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 chmod +x /usr/local/bin/yq - - name: Restore run context - uses: actions/download-artifact@v4 - with: - name: run-context-${{ needs.setup-nested-envs.outputs.run_id }} - path: . - - name: Prepare environment id: prep run: | RUN_ID="${{ needs.setup-nested-envs.outputs.run_id }}" - echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" echo "RUN_ID=$RUN_ID" >> "$GITHUB_ENV" - echo "PROFILE=sds-replicated-volume" >> "$GITHUB_ENV" echo "TMP_ROOT=${{ env.TMP_ROOT }}" >> "$GITHUB_ENV" mkdir -p "${{ env.TMP_ROOT }}/shared" "${{ env.TMP_ROOT }}/matrix-logs" @@ -177,56 +148,29 @@ jobs: RUN_ID="${{ env.RUN_ID }}" \ RUN_NAMESPACE="${{ env.RUN_ID }}" \ RUN_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" - echo "VALUES_TEMPLATE_FILE=${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" >> $GITHUB_ENV - - name: Configure registry auth (DEV_REGISTRY_DOCKER_CFG) - run: | - dev_cfg_b64='${{ secrets.DEV_REGISTRY_DOCKER_CFG }}' - if [ -n "$dev_cfg_b64" ]; then - echo "::add-mask::$dev_cfg_b64" - echo "REGISTRY_DOCKER_CFG=$dev_cfg_b64" >> "$GITHUB_ENV" - else - echo "[WARN] DEV_REGISTRY_DOCKER_CFG is empty; proceeding without registry cfg" - fi - - - name: Inject REGISTRY_DOCKER_CFG into values.yaml - if: ${{ env.REGISTRY_DOCKER_CFG != '' }} - working-directory: ci/dvp-e2e + - name: Configure registry auth for installer pull run: | - VALS="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" - task values:inject-registry VALUES_FILE="$VALS" REGISTRY_DOCKER_CFG="${REGISTRY_DOCKER_CFG}" - - - name: Docker login from DEV_REGISTRY_DOCKER_CFG (optional) - if: ${{ env.REGISTRY_DOCKER_CFG != '' }} - run: | - set -euo pipefail - cfg=$(printf '%s' "$REGISTRY_DOCKER_CFG" | base64 -d) - reg_list=$(printf '%s' "$cfg" | jq -r '.auths | keys[]') - for reg in $reg_list; do - auth=$(printf '%s' "$cfg" | jq -r --arg r "$reg" '.auths[$r].auth // ""') - [ -z "$auth" ] && continue - creds=$(printf '%s' "$auth" | base64 -d) - user=${creds%%:*} - pass=${creds#*:} - echo "Logging into $reg" - echo "$pass" | docker login "$reg" -u "$user" --password-stdin - done + mkdir -p ~/.docker + printf '%s' "$REGISTRY_DOCKER_CFG" | base64 -d > ~/.docker/config.json - name: Configure storage profile working-directory: ci/dvp-e2e id: profile-config - env: - PROFILE: sds-replicated-volume run: | - # Get storage class configuration from profiles.json - PROFILE_CONFIG=$(./scripts/get_profile_config.sh "${PROFILE}") + PROFILE_JSON=$(jq -c --arg profile "$PROFILE" '.[] | select(.name == $profile)' profiles.json) + if [ -z "$PROFILE_JSON" ]; then + echo "Profile '$PROFILE' not found in profiles.json" >&2 + echo "Available profiles:" >&2 + jq -r '.[] | " - \(.name)"' profiles.json >&2 + exit 1 + fi - # Parse the output more carefully - STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^STORAGE_CLASS=" | cut -d'=' -f2) - IMAGE_STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^IMAGE_STORAGE_CLASS=" | cut -d'=' -f2) - SNAPSHOT_STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^SNAPSHOT_STORAGE_CLASS=" | cut -d'=' -f2) - PARENT_STORAGE_CLASS=$(echo "$PROFILE_CONFIG" | grep "^PARENT_STORAGE_CLASS=" | cut -d'=' -f2) - ATTACH_DISK_SIZE=$(echo "$PROFILE_CONFIG" | grep "^ATTACH_DISK_SIZE=" | cut -d'=' -f2) + STORAGE_CLASS=$(jq -r '.storage_class // ""' <<<"$PROFILE_JSON") + IMAGE_STORAGE_CLASS=$(jq -r '.image_storage_class // ""' <<<"$PROFILE_JSON") + SNAPSHOT_STORAGE_CLASS=$(jq -r '.snapshot_storage_class // ""' <<<"$PROFILE_JSON") + PARENT_STORAGE_CLASS=$(jq -r '.parent_storage_class // ""' <<<"$PROFILE_JSON") + ATTACH_DISK_SIZE=$(jq -r '.worker_data_disk_size // "10Gi"' <<<"$PROFILE_JSON") echo "Profile: ${PROFILE}" echo "Storage Class: ${STORAGE_CLASS}" @@ -235,124 +179,57 @@ jobs: echo "Parent Storage Class: ${PARENT_STORAGE_CLASS}" echo "Attach Disk Size: ${ATTACH_DISK_SIZE}" - # Export variables to GitHub Actions environment and outputs + # Export variables to GitHub Actions environment echo "STORAGE_CLASS=${STORAGE_CLASS}" >> $GITHUB_ENV - echo "IMAGE_STORAGE_CLASS=${IMAGE_STORAGE_CLASS}" >> $GITHUB_ENV - echo "SNAPSHOT_STORAGE_CLASS=${SNAPSHOT_STORAGE_CLASS}" >> $GITHUB_ENV echo "PARENT_STORAGE_CLASS=${PARENT_STORAGE_CLASS}" >> $GITHUB_ENV echo "ATTACH_DISK_SIZE=${ATTACH_DISK_SIZE}" >> $GITHUB_ENV - echo "storage_class=$STORAGE_CLASS" >> $GITHUB_OUTPUT - echo "image_storage_class=$IMAGE_STORAGE_CLASS" >> $GITHUB_OUTPUT - echo "snapshot_storage_class=$SNAPSHOT_STORAGE_CLASS" >> $GITHUB_OUTPUT - echo "attach_disk_size=$ATTACH_DISK_SIZE" >> $GITHUB_OUTPUT # Pass storage profile into run values for Helm templates yq eval --inplace ".storageProfile = \"${PROFILE}\"" "${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" # Effective disk SC used for worker data disks (prefer image SC when set) EFF_DISK_SC=${IMAGE_STORAGE_CLASS:-$STORAGE_CLASS} echo "EFFECTIVE_DISK_SC=${EFF_DISK_SC}" >> $GITHUB_ENV - - name: Install infra (namespace/RBAC/ingress) + - name: Install nested environment working-directory: ci/dvp-e2e run: | - USE_GH_SSH_KEYS=true SSH_FILE_NAME=id_ed task render-infra \ - TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ - VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ - PARENT_KUBECONFIG="${KUBECONFIG}" \ - SSH_FILE_NAME="id_ed" - USE_GH_SSH_KEYS=true SSH_FILE_NAME=id_ed task infra-deploy \ + task install:nested:env \ TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ PARENT_KUBECONFIG="${KUBECONFIG}" \ - SSH_FILE_NAME="id_ed" - - - name: Bootstrap nested cluster (via jump-host) - working-directory: ci/dvp-e2e - run: | - echo "๐Ÿš€ dhctl bootstrap (profile: sds-replicated-volume)" - task dhctl-bootstrap \ - TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ - VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ - PARENT_KUBECONFIG="${KUBECONFIG}" \ - SSH_FILE_NAME="id_ed" \ - TARGET_STORAGE_CLASS="${{ env.PARENT_STORAGE_CLASS }}" - - - name: Attach data disks to worker VMs using hotplug - working-directory: ci/dvp-e2e - run: | - task infra:attach-storage-disks-hotplug \ - TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ - VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ - PARENT_KUBECONFIG="${KUBECONFIG}" \ - DISK_SIZE="${ATTACH_DISK_SIZE:-10Gi}" \ - STORAGE_CLASS="ceph-pool-r2-csi-rbd-immediate" \ - DISK_COUNT="2" - - - name: Build nested kubeconfig - working-directory: ci/dvp-e2e - run: | - task nested:kubeconfig \ - TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ - VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ + TARGET_STORAGE_CLASS="${{ env.PARENT_STORAGE_CLASS }}" \ + ATTACH_DISK_SIZE="${{ env.ATTACH_DISK_SIZE }}" \ + EFFECTIVE_DISK_SC="${{ env.EFFECTIVE_DISK_SC }}" \ NAMESPACE="${{ env.RUN_ID }}" \ - SSH_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/ssh" \ - SSH_FILE_NAME="id_ed" \ - NESTED_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested" \ NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" \ - PARENT_KUBECONFIG="${KUBECONFIG}" - - - name: Configure SDS in nested cluster - working-directory: ci/dvp-e2e - run: | - echo "๐Ÿ’พ Configuring SDS storage (sds-node-configurator + sds-replicated-volume)" - task nested:storage:sds \ - TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ - NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" \ - SDS_SC_NAME="${{ steps.profile-config.outputs.storage_class }}" - - - name: Upload run context - if: always() - uses: actions/upload-artifact@v4 - with: - name: run-context-${{ env.RUN_ID }} - path: | - ci/dvp-e2e/tmp/runs/${{ env.RUN_ID }} - ci/dvp-e2e/tmp/shared - if-no-files-found: warn - overwrite: true + SDS_SC_NAME="${{ env.STORAGE_CLASS }}" cleanup: name: Cleanup [skeleton] needs: [setup-nested-envs, prepare] if: always() runs-on: ubuntu-latest + env: + CLEANUP_PREFIX: ${{ vars.CLEANUP_PREFIX || 'nightly-nested-e2e-' }} steps: - uses: actions/checkout@v4 - - name: Install Task - uses: arduino/setup-task@v2 - with: - version: 3.x - repo-token: ${{ secrets.GITHUB_TOKEN }} - - name: Install kubectl uses: azure/setup-kubectl@v4 with: version: "latest" - - name: Build parent kubeconfig from secret (cleanup) + - name: Install Task + uses: arduino/setup-task@v2 + + - name: Build parent kubeconfig from secret working-directory: ci/dvp-e2e run: | KCFG="$HOME/.kube/config" task parent:kubeconfig OUTPUT="$KCFG" API_URL="${E2E_K8S_URL}" SA_TOKEN="${{ secrets.E2E_NESTED_SA_SECRET }}" echo "KUBECONFIG=$KCFG" >> "$GITHUB_ENV" - - name: Install Task - uses: arduino/setup-task@v2 - - name: Cleanup test namespaces working-directory: ci/dvp-e2e - env: - CLEANUP_PREFIX: ${{ vars.CLEANUP_PREFIX || 'nightly-nested-e2e-' }} run: | task cleanup:namespaces PREFIX="${CLEANUP_PREFIX}" PARENT_KUBECONFIG="${KUBECONFIG}" diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index 7127bfb240..49c179f464 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -7,7 +7,7 @@ vars: TMP_ROOT: sh: git rev-parse --show-toplevel 2>/dev/null | xargs -I{} printf "%s/ci/dvp-e2e/tmp" {} VALUES_TEMPLATE_FILE: values.yaml - SSH_FILE_NAME: cloud + SSH_FILE_NAME: id_ed # Charts INFRA_CHART_PATH: ./charts/infra @@ -22,7 +22,7 @@ tasks: desc: Check required utilities cmds: - | - deps=("kubectl" "jq" "yq" "docker" "helm" "htpasswd" "ssh-keygen" "curl" "d8" "openssl") + deps=("kubectl" "jq" "yq" "docker" "helm" "htpasswd" "curl" "openssl" "d8") for dep in "${deps[@]}"; do if ! command -v "$dep" >/dev/null 2>&1; then echo "Required utility '$dep' not found!" >&2 @@ -36,33 +36,15 @@ tasks: silent: true vars: TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' - PASSWORD_FILE: '{{ printf "%s/%s" .TMP_DIR "password.txt" }}' PASSWORD_HASH_FILE: '{{ printf "%s/%s" .TMP_DIR "password-hash.txt" }}' cmds: - mkdir -p {{ .TMP_DIR }} - - openssl rand -base64 20 > {{ .PASSWORD_FILE }} - | - pw="$(cat {{ .PASSWORD_FILE }})" + pw="$(openssl rand -base64 20)" htpasswd -BinC 10 "" <<< "$pw" | cut -d: -f2 | (base64 --wrap=0 2>/dev/null || base64 -w0 2>/dev/null || base64) > {{ .PASSWORD_HASH_FILE }} status: - - test -f "{{ .PASSWORD_FILE }}" - test -f "{{ .PASSWORD_HASH_FILE }}" - ssh-gen: - desc: Generate ssh keypair for bootstrap - vars: - TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' - SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' - SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' - SSH_PUB_KEY_FILE: '{{ printf "%s/%s.pub" .SSH_DIR .SSH_FILE_NAME }}' - cmds: - - mkdir -p "{{ .SSH_DIR }}" - - ssh-keygen -t ed25519 -o -a 64 -N "" -C "cloud" -f {{ .SSH_PRIV_KEY_FILE }} -q - - chmod 0600 "{{ .SSH_PRIV_KEY_FILE }}" - - chmod 0644 "{{ .SSH_PUB_KEY_FILE }}" - status: - - test -f "{{ .SSH_PRIV_KEY_FILE }}" - # ------------------------------------------------------------ # Values per run (namespaces, domain, prefix) # ------------------------------------------------------------ @@ -134,6 +116,10 @@ tasks: - | export SSH_PUB_KEY="$(cat {{ .SSH_PUB_KEY_FILE }})" yq eval --inplace '.sshPublicKey = env(SSH_PUB_KEY)' {{ .GENERATED_VALUES_FILE }} + - | + if [ -n "${REGISTRY_DOCKER_CFG:-}" ]; then + yq eval --inplace '.deckhouse.registryDockerCfg = env(REGISTRY_DOCKER_CFG)' {{ .GENERATED_VALUES_FILE }} + fi - | DOMAIN_VALUE="{{ .DOMAIN }}" if [ -n "$DOMAIN_VALUE" ] && [ "$DOMAIN_VALUE" != "null" ]; then @@ -172,47 +158,6 @@ tasks: --from-file=id_ed.pub={{ .SSH_PUB_KEY_FILE }} \ | kubectl apply -f - - infra:create-storage-disks: - desc: Create storage disks for worker VMs before cluster bootstrap (for SDS) - vars: - TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' - VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" - NAMESPACE: - sh: yq eval '.namespace' {{ .VALUES_FILE }} - DISK_SIZE: '{{ .DISK_SIZE | default "10Gi" }}' - STORAGE_CLASS: '{{ .STORAGE_CLASS | default "linstor-thin-r2" }}' - DISK_COUNT: '{{ .DISK_COUNT | default "2" }}' - WORKER_COUNT: '{{ .WORKER_COUNT | default "3" }}' - env: - KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' - cmds: - - | - set -euo pipefail - echo "[INFRA] Creating {{ .DISK_COUNT }} storage disks per worker VM ({{ .WORKER_COUNT }} workers) in namespace {{ .NAMESPACE }}" - - # Create VirtualDisks for all expected worker VMs - # We'll use predictable naming based on Deckhouse's naming pattern - for worker_idx in $(seq 0 $(({{ .WORKER_COUNT }} - 1))); do - for disk_num in $(seq 1 {{ .DISK_COUNT }}); do - # Deckhouse generates VM names like: {prefix}-{hash}-worker-{suffix} - vd="storage-disk-${disk_num}-worker-${worker_idx}" - echo "[INFRA] Creating VirtualDisk $vd ({{ .DISK_SIZE }}, sc={{ .STORAGE_CLASS }})" - cat > /tmp/vd-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/vd-$vd.yaml - echo "[INFRA] VirtualDisk $vd created" - done - done - infra:attach-storage-disks-hotplug: desc: Attach storage disks to worker VMs using hotplug (VirtualMachineBlockDeviceAttachment) vars: @@ -223,164 +168,17 @@ tasks: DISK_SIZE: '{{ .DISK_SIZE | default "10Gi" }}' STORAGE_CLASS: '{{ .STORAGE_CLASS | default "linstor-thin-r2" }}' DISK_COUNT: '{{ .DISK_COUNT | default "2" }}' - SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' - SSH_FILE_NAME: '{{ .SSH_FILE_NAME | default "id_ed" }}' - DEFAULT_USER: - sh: yq eval '.image.defaultUser' {{ .VALUES_FILE }} env: KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' cmds: + - chmod +x scripts/attach_worker_disks.sh - | - set -euo pipefail - echo "[INFRA] Attaching {{ .DISK_COUNT }} storage disks to worker VMs using hotplug in namespace {{ .NAMESPACE }}" - - # Wait for worker VMs - for i in $(seq 1 50); do - worker_count=$(kubectl -n {{ .NAMESPACE }} get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep worker | wc -l) - if [ "$worker_count" -gt 0 ]; then - echo "[INFRA] Found $worker_count worker VMs"; break - fi - echo "[INFRA] Waiting for worker VMs... ($i/50)"; sleep 10 - done - - workers=() - while IFS= read -r line; do - [ -n "$line" ] && workers+=("$line") - done < <(kubectl -n {{ .NAMESPACE }} get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep worker || true) - - if [ ${#workers[@]} -eq 0 ]; then - echo "[INFRA] No worker VMs found; nothing to do"; exit 0 - fi - - echo "[INFRA] Found ${#workers[@]} worker VMs: ${workers[*]}" - - for vm in "${workers[@]}"; do - [ -z "$vm" ] && continue - echo "[INFRA] Processing VM: $vm" - - # Wait for VM to be Running - for i in $(seq 1 50); do - phase=$(kubectl -n {{ .NAMESPACE }} get vm "$vm" -o jsonpath='{.status.phase}' 2>/dev/null || true) - if [ "$phase" = "Running" ]; then - echo "[INFRA] VM $vm is Running"; break - fi - echo "[INFRA] VM $vm phase=$phase; retry $i/50"; sleep 10 - done - - for disk_num in $(seq 1 {{ .DISK_COUNT }}); do - vd="storage-disk-${disk_num}-$vm" - echo "[INFRA] Creating VirtualDisk $vd ({{ .DISK_SIZE }}, sc={{ .STORAGE_CLASS }})" - cat > /tmp/vd-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/vd-$vd.yaml - - # Wait for VirtualDisk to be Ready and PVC to be Bound - echo "[INFRA] Waiting for VirtualDisk $vd to be Ready..." - vd_phase="" - for j in $(seq 1 50); do - vd_phase=$(kubectl -n {{ .NAMESPACE }} get vd "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) - if [ "$vd_phase" = "Ready" ]; then - echo "[INFRA] VirtualDisk $vd is Ready"; break - fi - echo "[INFRA] VD $vd phase=$vd_phase; retry $j/50"; sleep 5 - done - if [ "$vd_phase" != "Ready" ]; then - echo "[ERROR] VirtualDisk $vd not Ready" - kubectl -n {{ .NAMESPACE }} get vd "$vd" -o yaml || true - kubectl -n {{ .NAMESPACE }} get events --sort-by=.lastTimestamp | tail -n 100 || true - exit 1 - fi - - pvc_name="" - for j in $(seq 1 50); do - pvc_name=$(kubectl -n {{ .NAMESPACE }} get vd "$vd" -o jsonpath='{.status.target.persistentVolumeClaimName}' 2>/dev/null || true) - [ -n "$pvc_name" ] && break - echo "[INFRA] Waiting for PVC name for VD $vd; retry $j/50"; sleep 3 - done - if [ -n "$pvc_name" ]; then - echo "[INFRA] Waiting PVC $pvc_name to reach phase=Bound..." - pvc_phase="" - for j in $(seq 1 120); do - pvc_phase=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.status.phase}' 2>/dev/null || true) - if [ "$pvc_phase" = "Bound" ]; then - break - fi - echo "[INFRA] PVC $pvc_name phase=$pvc_phase; retry $j/120"; sleep 2 - done - if [ "$pvc_phase" != "Bound" ]; then - echo "[ERROR] PVC $pvc_name did not reach Bound" - kubectl -n {{ .NAMESPACE }} describe pvc "$pvc_name" || true - kubectl -n {{ .NAMESPACE }} get events --sort-by=.lastTimestamp | tail -n 100 || true - exit 1 - fi - sc=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.spec.storageClassName}' 2>/dev/null || true) - pv=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.spec.volumeName}' 2>/dev/null || true) - vmode=$(kubectl -n {{ .NAMESPACE }} get pvc "$pvc_name" -o jsonpath='{.spec.volumeMode}' 2>/dev/null || true) - echo "[INFRA] PVC $pvc_name is Bound (sc=${sc:-?}, pv=${pv:-?}, mode=${vmode:-?})" - else - echo "[WARN] PVC name for VD $vd is empty; proceeding with attachment" - fi - - echo "[INFRA] Creating VirtualMachineBlockDeviceAttachment for $vd" - cat > /tmp/attach-$vd.yaml </dev/null 2>&1 || kubectl -n {{ .NAMESPACE }} apply -f /tmp/attach-$vd.yaml - - echo "[INFRA] Waiting for hotplug attachment of $vd..." - success_by_vm=0 - for i in $(seq 1 50); do - phase=$(kubectl -n {{ .NAMESPACE }} get virtualmachineblockdeviceattachment "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) - if [ "$phase" = "Attached" ]; then - echo "[INFRA] Disk $vd successfully attached to VM $vm"; break - fi - # Quick success path: rely on VM status even if VMBDA still InProgress - if kubectl -n {{ .NAMESPACE }} get vm "$vm" -o json \ - | jq -e --arg vd "$vd" '([.status.blockDeviceRefs[]? | select((.virtualMachineBlockDeviceAttachmentName==$vd) or (.name==$vd)) | select((.attached==true) and (.hotplugged==true))] | length) > 0' >/dev/null; then - echo "[INFRA] VM reports disk $vd attached/hotplugged; proceeding" - success_by_vm=1 - break - fi - - # Print status approximately every 50 seconds (poll interval is 5s) - if [ $((i % 10)) -eq 0 ]; then - echo "[INFRA] Disk $vd phase=$phase; retry $i/50" - fi - sleep 5 - - done - - if [ "$phase" != "Attached" ] && [ "${success_by_vm:-0}" -ne 1 ]; then - echo "[ERROR] Disk $vd failed to attach to VM $vm within timeout" >&2 - # final debug snapshots removed - exit 1 - fi - done - - echo "[INFRA] VM $vm configured with hotplug disks" - - - done - - echo "[INFRA] All worker VMs configured with storage disks via hotplug" + scripts/attach_worker_disks.sh \ + -n "{{ .NAMESPACE }}" \ + -s "{{ .STORAGE_CLASS }}" \ + -z "{{ .DISK_SIZE }}" \ + -c "{{ .DISK_COUNT }}" \ + -k "${KUBECONFIG}" # ------------------------------------------------------------ # Kubeconfig for bootstrap and cluster config @@ -477,13 +275,17 @@ tasks: dhctl-bootstrap: desc: Bootstrap Deckhouse over DVP via jump-host (docker dhctl with bastion) deps: - - render-cluster-config + - task: render-cluster-config + vars: + TMP_DIR: "{{ .TMP_DIR }}" + VALUES_FILE: "{{ .VALUES_FILE }}" + TARGET_STORAGE_CLASS: "{{ .TARGET_STORAGE_CLASS }}" + SSH_FILE_NAME: "{{ .SSH_FILE_NAME }}" vars: TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' - GENERATED_VALUES_FILE: '{{ printf "%s/%s" .TMP_DIR "generated-values.yaml" }}' NAMESPACE: sh: yq eval '.namespace' {{ .VALUES_FILE }} DEFAULT_USER: @@ -539,48 +341,14 @@ tasks: - test -f "{{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}" ssh:ensure: - desc: Ensure SSH keys exist (import from GH when USE_GH_SSH_KEYS=true) + desc: Ensure SSH keys exist (import from GitHub) vars: TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' - SSH_FILE_NAME: '{{ .SSH_FILE_NAME | default (env "SSH_FILE_NAME") | default "cloud" }}' - USE_GH_SSH_KEYS: '{{ .USE_GH_SSH_KEYS | default (env "USE_GH_SSH_KEYS") | default "false" }}' - cmds: - - | - set -euo pipefail - if [ "{{ .USE_GH_SSH_KEYS }}" = "true" ]; then - echo "[SSH] Importing GH keys to {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}" - task ssh:import-gh SSH_DIR='{{ .SSH_DIR }}' SSH_FILE_NAME='{{ .SSH_FILE_NAME }}' - else - echo "[SSH] Generating new SSH keypair at {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}" - task ssh-gen SSH_DIR='{{ .SSH_DIR }}' SSH_FILE_NAME='{{ .SSH_FILE_NAME }}' - fi - - # ------------------------------------------------------------ - # Local flow wrappers with logs (DVP-over-DVP) - # ------------------------------------------------------------ - local:bootstrap: - desc: Local flow โ€” deploy infra + bootstrap nested (logs saved) - vars: - RUN_ID: '{{ .RUN_ID | default (printf "local-%s" (now | date "20060102-150405")) }}' - RUN_NAMESPACE: '{{ .RUN_NAMESPACE | default (printf "dvp-e2e-local-%s" .RUN_ID) }}' - TMP_DIR: '{{ .TMP_DIR | default (printf "%s/runs/%s" .TMP_ROOT .RUN_ID) }}' - LOG_FILE: '{{ .LOG_FILE | default (printf "%s/%s" .TMP_DIR "bootstrap.log") }}' - VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" - TARGET_STORAGE_CLASS: '{{ .TARGET_STORAGE_CLASS | default "ceph-pool-r2-csi-rbd-immediate" }}' - USE_GH_SSH_KEYS: '{{ .USE_GH_SSH_KEYS | default (env "USE_GH_SSH_KEYS") | default "true" }}' cmds: - - mkdir -p {{ .TMP_DIR }} - | - set -euo pipefail - echo "[FLOW] Using RUN_ID={{ .RUN_ID }}, namespace={{ .RUN_NAMESPACE }}" - { - task run:values:prepare RUN_ID='{{ .RUN_ID }}' RUN_NAMESPACE='{{ .RUN_NAMESPACE }}' TMP_DIR='{{ .TMP_DIR }}' - task render-infra VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' USE_GH_SSH_KEYS='{{ .USE_GH_SSH_KEYS }}' SSH_FILE_NAME='id_ed' - task infra-deploy VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' SSH_FILE_NAME='id_ed' - task render-cluster-config VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' TARGET_STORAGE_CLASS='{{ .TARGET_STORAGE_CLASS }}' SSH_FILE_NAME='id_ed' - task dhctl-bootstrap VALUES_FILE='{{ .VALUES_FILE }}' TMP_DIR='{{ .TMP_DIR }}' SSH_FILE_NAME='id_ed' - } 2>&1 | tee '{{ .LOG_FILE }}' + echo "[SSH] Importing GH keys to {{ .SSH_DIR }}/{{ .SSH_FILE_NAME }}" + task ssh:import-gh SSH_DIR='{{ .SSH_DIR }}' SSH_FILE_NAME='{{ .SSH_FILE_NAME }}' # ------------------------------------------------------------ # Nested cluster helpers (SC + kubeconfig) @@ -596,6 +364,7 @@ tasks: DEFAULT_USER: sh: yq eval '.image.defaultUser' {{ .VALUES_FILE }} SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' + SSH_FILE_NAME: '{{ .SSH_FILE_NAME | default "id_ed" }}' SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' NESTED_DIR: '{{ .NESTED_DIR | default (printf "%s/nested-%s" .TMP_DIR .NAMESPACE) }}' NESTED_KUBECONFIG: '{{ .NESTED_KUBECONFIG | default (printf "%s/kubeconfig" .NESTED_DIR) }}' @@ -603,103 +372,21 @@ tasks: cmds: - | set -euo pipefail - if [ ! -s "{{ .PARENT_KUBECONFIG_PATH }}" ]; then - echo "[ERR] parent kubeconfig not found at {{ .PARENT_KUBECONFIG_PATH }}" + NESTED_DIR="{{ .NESTED_DIR }}" + NESTED_KUBECONFIG="{{ .NESTED_KUBECONFIG }}" + if ! mkdir -p "${NESTED_DIR}"; then + echo "[ERR] Failed to create nested directory: ${NESTED_DIR}" >&2 exit 1 fi - mkdir -p {{ .NESTED_DIR }} - MASTER_NAME=$(KUBECONFIG={{ .PARENT_KUBECONFIG_PATH }} kubectl -n {{ .NAMESPACE }} get vm -l dvp.deckhouse.io/node-group=master -o jsonpath='{.items[0].metadata.name}') - if [ -z "$MASTER_NAME" ]; then - echo "[ERR] master VM not found in namespace {{ .NAMESPACE }}" >&2 - exit 1 - fi - TOKEN_FILE="{{ .NESTED_DIR }}/token.txt" - rm -f "$TOKEN_FILE" - SSH_OK=0 - for attempt in $(seq 1 6); do - if KUBECONFIG={{ .PARENT_KUBECONFIG_PATH }} d8 v ssh --username={{ .DEFAULT_USER }} --identity-file={{ .SSH_PRIV_KEY_FILE }} --local-ssh=true --local-ssh-opts="-o StrictHostKeyChecking=no" --local-ssh-opts="-o UserKnownHostsFile=/dev/null" "${MASTER_NAME}.{{ .NAMESPACE }}" -c ' - set -euo pipefail - SUDO="sudo /opt/deckhouse/bin/kubectl" - $SUDO -n kube-system get sa e2e-admin >/dev/null 2>&1 || $SUDO -n kube-system create sa e2e-admin >/dev/null 2>&1 - $SUDO -n kube-system get clusterrolebinding e2e-admin >/dev/null 2>&1 || $SUDO -n kube-system create clusterrolebinding e2e-admin --clusterrole=cluster-admin --serviceaccount=kube-system:e2e-admin >/dev/null 2>&1 - for i in $(seq 1 10); do - TOKEN=$($SUDO -n kube-system create token e2e-admin --duration=24h 2>/dev/null) && echo "$TOKEN" && break - echo "[WARN] Failed to create token (attempt $i/10); retrying in 3s" >&2 - sleep 3 - done - if [ -z "${TOKEN:-}" ]; then - echo "[ERR] Unable to create token for e2e-admin after 10 attempts" >&2 - exit 1 - fi - ' > "$TOKEN_FILE"; then - SSH_OK=1 - break - fi - echo "[WARN] d8 v ssh attempt $attempt failed; retry in 15s..." - sleep 15 - done - if [ "$SSH_OK" -ne 1 ] || [ ! -s "$TOKEN_FILE" ]; then - echo "[ERR] Failed to obtain nested token via d8 v ssh after multiple attempts" >&2 - cat "$TOKEN_FILE" 2>/dev/null || true - exit 1 - fi - NESTED_TOKEN=$(cat {{ .NESTED_DIR }}/token.txt) - SERVER_URL="https://api.{{ .NAMESPACE }}.{{ .DOMAIN }}" - { - printf 'apiVersion: v1\n' - printf 'kind: Config\n' - printf 'clusters:\n' - printf '- cluster:\n' - printf ' insecure-skip-tls-verify: true\n' - printf ' server: %s\n' "${SERVER_URL}" - printf ' name: nested\n' - printf 'contexts:\n' - printf '- context:\n' - printf ' cluster: nested\n' - printf ' user: e2e-admin\n' - printf ' name: nested\n' - printf 'current-context: nested\n' - printf 'users:\n' - printf '- name: e2e-admin\n' - printf ' user:\n' - printf ' token: %s\n' "${NESTED_TOKEN}" - } > {{ .NESTED_KUBECONFIG }} - chmod 600 {{ .NESTED_KUBECONFIG }} - echo "Generated nested kubeconfig at {{ .NESTED_KUBECONFIG }}" - - nested:ensure-sc: - desc: Ensure StorageClass exists in nested cluster - vars: - TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' - NAMESPACE: "{{ .NAMESPACE }}" - NESTED_DIR: '{{ .NESTED_DIR | default (printf "%s/nested-%s" .TMP_DIR .NAMESPACE) }}' - NESTED_KUBECONFIG: '{{ .NESTED_KUBECONFIG | default (printf "%s/kubeconfig" .NESTED_DIR) }}' - SC_NAME: '{{ .SC_NAME | default "linstor-thin-r2" }}' - cmds: - - | - set -euo pipefail - if ! KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get sc "{{ .SC_NAME }}" >/dev/null 2>&1; then - echo "[ERR] StorageClass '{{ .SC_NAME }}' is missing in nested cluster" - exit 1 - fi - - nested:ensure-vmclass-default: - desc: Ensure default VMClass generic-for-e2e exists in nested cluster - vars: - NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" - cmds: + - chmod +x scripts/build_nested_kubeconfig.sh - | - set -euo pipefail - for i in $(seq 1 18); do - if KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get vmclass generic >/dev/null 2>&1; then - KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get vmclass generic -o json \ - | jq 'del(.status) | .metadata={"name":"generic-for-e2e","annotations":{"virtualmachineclass.virtualization.deckhouse.io/is-default-class":"true"}}' \ - | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - >/dev/null - break - fi - echo "[INFO] Waiting for vmclass/generic to appear (attempt $i)..." - sleep 10 - done + scripts/build_nested_kubeconfig.sh \ + -o "{{ .NESTED_KUBECONFIG }}" \ + -n "{{ .NAMESPACE }}" \ + -d "{{ .DOMAIN }}" \ + -k "{{ .PARENT_KUBECONFIG_PATH }}" \ + -s "{{ .SSH_PRIV_KEY_FILE }}" \ + -u "{{ .DEFAULT_USER }}" nested:storage:sds: desc: Configure SDS storage profile in nested cluster @@ -708,173 +395,12 @@ tasks: SDS_SC_NAME: '{{ .SDS_SC_NAME | default "linstor-thin-r2" }}' SDS_DVCR_SIZE: '{{ .SDS_DVCR_SIZE | default "5Gi" }}' cmds: + - chmod +x scripts/configure_sds_storage.sh - | - set -euo pipefail - echo "[SDS] Waiting for API server to be ready..." - for i in $(seq 1 50); do - if KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get nodes >/dev/null 2>&1; then - echo "[SDS] API server is ready!" - break - fi - echo "[SDS] API server not ready yet, retry $i/50"; sleep 10 - done - - | - set -euo pipefail - echo "[SDS] Step 1: Enabling sds-node-configurator..." - # Ensure ModulePullOverride exists so that releases are available (use main by default) - cat <<'EOF' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply --validate=false -f - - apiVersion: deckhouse.io/v1alpha2 - kind: ModulePullOverride - metadata: - name: sds-node-configurator - spec: - imageTag: main - scanInterval: 15s - EOF - cat <<'EOF' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl -n d8-system apply --validate=false -f - - apiVersion: deckhouse.io/v1alpha1 - kind: ModuleConfig - metadata: - name: sds-node-configurator - namespace: d8-system - spec: - enabled: true - version: 1 - settings: - disableDs: false - enableThinProvisioning: true - EOF - - | - set -euo pipefail - echo "[SDS] Step 2: Waiting for sds-node-configurator to be Ready..." - for i in $(seq 1 60); do - STATUS=$(KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get module sds-node-configurator -o json | jq -r '.status.phase // .status.status // ""' 2>/dev/null || true) - if [ "$STATUS" = "Ready" ]; then - echo "[SDS] sds-node-configurator is Ready!" - break - fi - echo "[SDS] sds-node-configurator status: ${STATUS:-NotFound}, retry $i/60" - sleep 10 - done - if [ "$STATUS" != "Ready" ]; then - echo "[WARN] sds-node-configurator not Ready after 10 minutes, proceeding anyway..." >&2 - fi - - | - set -euo pipefail - echo "[SDS] Step 3: Enabling sds-replicated-volume..." - # Ensure ModulePullOverride exists so that releases are available (use main by default) - cat <<'EOF' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply --validate=false -f - - apiVersion: deckhouse.io/v1alpha2 - kind: ModulePullOverride - metadata: - name: sds-replicated-volume - spec: - imageTag: main - scanInterval: 15s - EOF - cat <<'EOF' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl -n d8-system apply --validate=false -f - - apiVersion: deckhouse.io/v1alpha1 - kind: ModuleConfig - metadata: - name: sds-replicated-volume - namespace: d8-system - spec: - enabled: true - version: 1 - EOF - - | - set -euo pipefail - echo "[SDS] Step 4: Waiting for sds-replicated-volume to be Ready..." - for i in $(seq 1 60); do - STATUS=$(KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get module sds-replicated-volume -o json | jq -r '.status.phase // .status.status // ""' 2>/dev/null || true) - if [ "$STATUS" = "Ready" ]; then - echo "[SDS] sds-replicated-volume is Ready!" - break - fi - echo "[SDS] sds-replicated-volume status: ${STATUS:-NotFound}, retry $i/60" - sleep 10 - done - if [ "$STATUS" != "Ready" ]; then - echo "[WARN] sds-replicated-volume not Ready after 10 minutes, proceeding anyway..." >&2 - fi - - | - set -euo pipefail - echo "[SDS] Waiting for SDS CRDs to be established..." - for crd in lvmvolumegroups.storage.deckhouse.io replicatedstoragepools.storage.deckhouse.io replicatedstorageclasses.storage.deckhouse.io; do - echo "[SDS] Waiting for CRD '$crd'..." - found=0 - for i in $(seq 1 50); do - if KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get crd "$crd" >/dev/null 2>&1; then - found=1 - break - fi - echo "[SDS] CRD '$crd' not found yet, retry $i/50"; sleep 5 - done - if [ "$found" -eq 1 ]; then - KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl wait --for=condition=Established --timeout=180s crd "$crd" || true - else - echo "[WARN] CRD '$crd' not found after waiting" >&2 - fi - done - - | - set -euo pipefail - echo "[SDS] Creating per-node LVMVolumeGroups (type=Local)..." - NODES=$(KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get nodes -o json \ - | jq -r '.items[] | select(.metadata.labels["node-role.kubernetes.io/control-plane"]!=true and .metadata.labels["node-role.kubernetes.io/master"]!=true) | .metadata.name') - if [ -z "$NODES" ]; then - NODES=$(KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get nodes -o json | jq -r '.items[].metadata.name') - fi - for node in $NODES; do - [ -z "$node" ] && continue - # Build matchExpressions for known device paths per docs (label key is storage.deckhouse.io/device-path) - MATCH_EXPR=$(yq eval -n ' - .key = "storage.deckhouse.io/device-path" | - .operator = "In" | - .values = ["/dev/sdb","/dev/vdb","/dev/xvdb","/dev/sdc","/dev/vdc","/dev/xvdc","/dev/sdd","/dev/vdd","/dev/xvdd"] - ') - NODE="$node" MATCH_EXPR="$MATCH_EXPR" yq eval -n ' - .apiVersion = "storage.deckhouse.io/v1alpha1" | - .kind = "LVMVolumeGroup" | - .metadata.name = "data-" + env(NODE) | - .spec.type = "Local" | - .spec.local.nodeName = env(NODE) | - .spec.actualVGNameOnTheNode = "data" | - .spec.blockDeviceSelector.matchExpressions = [ env(MATCH_EXPR) ] - ' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - - done - echo "[SDS] Creating ReplicatedStoragePool 'data' from LVMVolumeGroups..." - LVGS=$(printf "%s\n" $NODES | sed 's/^/ - name: data-/') - { - echo "apiVersion: storage.deckhouse.io/v1alpha1" - echo "kind: ReplicatedStoragePool" - echo "metadata:" - echo " name: data" - echo "spec:" - echo " type: LVM" - echo " lvmVolumeGroups:" - printf "%s\n" "$LVGS" - } | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - - echo "[SDS] Creating ReplicatedStorageClass '{{ .SDS_SC_NAME }}'..." - yq eval -n ' - .apiVersion = "storage.deckhouse.io/v1alpha1" | - .kind = "ReplicatedStorageClass" | - .metadata.name = "{{ .SDS_SC_NAME }}" | - .spec.storagePool = "data" | - .spec.reclaimPolicy = "Delete" | - .spec.topology = "Ignored" | - .spec.volumeAccess = "Local" - ' | KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl apply -f - - - | - set -euo pipefail - if ! KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl get storageclass "{{ .SDS_SC_NAME }}" >/dev/null 2>&1; then - echo "[ERR] StorageClass '{{ .SDS_SC_NAME }}' not found in nested cluster" >&2 - exit 1 - fi - - | - echo "[SDS] Setting {{ .SDS_SC_NAME }} as default StorageClass..." - DEFAULT_STORAGE_CLASS="{{ .SDS_SC_NAME }}" - PATCH=$(jq -cn --arg v "$DEFAULT_STORAGE_CLASS" '[{"op":"replace","path":"/spec/settings/defaultClusterStorageClass","value":$v}]') - KUBECONFIG={{ .NESTED_KUBECONFIG }} kubectl patch mc global --type='json' -p="$PATCH" + scripts/configure_sds_storage.sh \ + -k "{{ .NESTED_KUBECONFIG }}" \ + -s "{{ .SDS_SC_NAME }}" \ + -d "{{ .SDS_DVCR_SIZE }}" # ------------------------------------------------------------ # Cleanup helpers @@ -923,58 +449,58 @@ tasks: chmod +x ./scripts/build_parent_kubeconfig.sh ./scripts/build_parent_kubeconfig.sh -o "{{ .OUTPUT }}" -a "{{ .API_URL }}" -t "{{ .SA_TOKEN }}" - values:inject-registry: - desc: Inject REGISTRY_DOCKER_CFG into values.yaml - vars: - VALUES_FILE: '{{ .VALUES_FILE | default "" }}' - REGISTRY_DOCKER_CFG: '{{ .REGISTRY_DOCKER_CFG | default (env "REGISTRY_DOCKER_CFG") | default "" }}' - cmds: - - | - set -euo pipefail - if [ -z "{{ .VALUES_FILE }}" ] || [ -z "{{ .REGISTRY_DOCKER_CFG }}" ]; then - echo "[ERR] VALUES_FILE/REGISTRY_DOCKER_CFG is empty" >&2; exit 1; fi - chmod +x ./scripts/inject_registry_cfg.sh - REGISTRY_DOCKER_CFG='{{ .REGISTRY_DOCKER_CFG }}' ./scripts/inject_registry_cfg.sh -f "{{ .VALUES_FILE }}" -v "{{ .REGISTRY_DOCKER_CFG }}" - # ------------------------------------------------------------ - # Run E2E + # CI: Unified installation task # ------------------------------------------------------------ - nested:e2e: - desc: Run virtualization E2E tests against nested cluster + install:nested:env: + desc: Install complete nested environment (infra + bootstrap + disks + kubeconfig + SDS) vars: TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") }}' + TARGET_STORAGE_CLASS: "{{ .TARGET_STORAGE_CLASS }}" + ATTACH_DISK_SIZE: '{{ .ATTACH_DISK_SIZE | default "10Gi" }}' + EFFECTIVE_DISK_SC: "{{ .EFFECTIVE_DISK_SC }}" NAMESPACE: "{{ .NAMESPACE }}" NESTED_DIR: '{{ .NESTED_DIR | default (printf "%s/nested-%s" .TMP_DIR .NAMESPACE) }}' NESTED_KUBECONFIG: '{{ .NESTED_KUBECONFIG | default (printf "%s/kubeconfig" .NESTED_DIR) }}' - E2E_DIR: '{{ .E2E_DIR | default (env "E2E_DIR") | default "../../tests/e2e" }}' - FOCUS: '{{ or .FOCUS "" }}' - SKIP: '{{ or .SKIP "" }}' - LABELS: '{{ or .LABELS "" }}' - TIMEOUT: '{{ or .TIMEOUT "4h" }}' - JUNIT_PATH: '{{ or .JUNIT_PATH "" }}' - TARGET_STORAGE_CLASS: '{{ .STORAGE_CLASS | default "ceph-pool-r2-csi-rbd-immediate" }}' + SDS_SC_NAME: "{{ .SDS_SC_NAME }}" + DATA_DISK_COUNT: '{{ .DATA_DISK_COUNT | default "2" }}' cmds: - - task: nested:kubeconfig + - echo "๐Ÿ“ฆ Installing infra (namespace/RBAC/ingress)" + - task: infra-deploy vars: TMP_DIR: "{{ .TMP_DIR }}" VALUES_FILE: "{{ .VALUES_FILE }}" - NAMESPACE: "{{ .NAMESPACE }}" - NESTED_DIR: "{{ .NESTED_DIR }}" - NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" - PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' - - task: nested:ensure-sc + PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" + - echo "๐Ÿš€ Bootstrapping nested cluster" + - task: dhctl-bootstrap vars: TMP_DIR: "{{ .TMP_DIR }}" + VALUES_FILE: "{{ .VALUES_FILE }}" + PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" + TARGET_STORAGE_CLASS: "{{ .TARGET_STORAGE_CLASS }}" + - echo "๐Ÿ’ฟ Attaching data disks to workers" + - task: infra:attach-storage-disks-hotplug + vars: + TMP_DIR: "{{ .TMP_DIR }}" + VALUES_FILE: "{{ .VALUES_FILE }}" + PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" + DISK_SIZE: "{{ .ATTACH_DISK_SIZE }}" + STORAGE_CLASS: "{{ .EFFECTIVE_DISK_SC }}" + DISK_COUNT: "{{ .DATA_DISK_COUNT }}" + - echo "๐Ÿ” Building nested kubeconfig" + - task: nested:kubeconfig + vars: + TMP_DIR: "{{ .TMP_DIR }}" + VALUES_FILE: "{{ .VALUES_FILE }}" + PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" NAMESPACE: "{{ .NAMESPACE }}" NESTED_DIR: "{{ .NESTED_DIR }}" NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" - SC_NAME: "{{ .TARGET_STORAGE_CLASS }}" - - task: nested:ensure-vmclass-default + - echo "๐Ÿ’พ Configuring SDS storage" + - task: nested:storage:sds vars: + TMP_DIR: "{{ .TMP_DIR }}" NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG }}" - - | - set -euo pipefail - export KUBECONFIG="{{ .NESTED_KUBECONFIG }}" - cd {{ .E2E_DIR }} - task run TIMEOUT='{{ .TIMEOUT }}' {{ if .FOCUS }}FOCUS='{{ .FOCUS }}'{{ end }} {{ if .LABELS }}LABELS='{{ .LABELS }}'{{ end }} {{ if .JUNIT_PATH }}JUNIT_PATH='{{ .JUNIT_PATH }}'{{ end }} + SDS_SC_NAME: "{{ .SDS_SC_NAME }}" diff --git a/ci/dvp-e2e/charts/cluster-config/Chart.yaml b/ci/dvp-e2e/charts/cluster-config/Chart.yaml index 344eb6ee44..de10150df7 100644 --- a/ci/dvp-e2e/charts/cluster-config/Chart.yaml +++ b/ci/dvp-e2e/charts/cluster-config/Chart.yaml @@ -14,5 +14,3 @@ sources: - https://github.com/deckhouse/deckhouse maintainers: - name: Deckhouse Team - email: team@deckhouse.io -dependencies: [] diff --git a/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml b/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml index 0156178a61..ffbea6ac62 100644 --- a/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml +++ b/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml @@ -17,7 +17,7 @@ metadata: spec: version: 1 settings: - defaultClusterStorageClass: ceph-pool-r2-csi-rbd-immediate + defaultClusterStorageClass: {{ .Values.storageClass | quote }} modules: publicDomainTemplate: "%s.{{ .Values.namespace }}.{{ .Values.domain }}" --- diff --git a/ci/dvp-e2e/charts/infra/Chart.yaml b/ci/dvp-e2e/charts/infra/Chart.yaml index 5eb2c3bfc0..29da1942f3 100644 --- a/ci/dvp-e2e/charts/infra/Chart.yaml +++ b/ci/dvp-e2e/charts/infra/Chart.yaml @@ -13,5 +13,3 @@ sources: - https://github.com/deckhouse/deckhouse maintainers: - name: Deckhouse Team - email: team@deckhouse.io -dependencies: [] diff --git a/ci/dvp-e2e/charts/support/Chart.yaml b/ci/dvp-e2e/charts/support/Chart.yaml deleted file mode 100644 index 8eefb78886..0000000000 --- a/ci/dvp-e2e/charts/support/Chart.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: v2 -name: support -description: Support components for E2E testing -type: application -version: 0.1.0 -appVersion: "1.0.0" -keywords: - - support - - utilities - - e2e - - testing -home: https://github.com/deckhouse/deckhouse -sources: - - https://github.com/deckhouse/deckhouse -maintainers: - - name: Deckhouse Team - email: team@deckhouse.io -dependencies: [] diff --git a/ci/dvp-e2e/charts/support/values.yaml b/ci/dvp-e2e/charts/support/values.yaml deleted file mode 100644 index 8d3f37bc5b..0000000000 --- a/ci/dvp-e2e/charts/support/values.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Support components values for E2E testing - -# Namespace configuration -namespace: nightly-e2e - -# Loop integration -loop: - webhook: "" - channel: "test-virtualization-loop-alerts" - enabled: true - -# Logging configuration -logging: - level: "info" - format: "json" - retention: "7d" - -# Notification settings -notifications: - slack: - enabled: false - webhook: "" - channel: "" - email: - enabled: false - smtp: - host: "" - port: 587 - username: "" - password: "" - -# Backup configuration -backup: - enabled: true - schedule: "0 2 * * *" - retention: "7d" - storage: - type: "local" - path: "/backups" - -# Health checks -healthChecks: - enabled: true - interval: "30s" - timeout: "10s" - retries: 3 - -# Resource monitoring -monitoring: - enabled: true - metrics: - enabled: true - port: 8080 - alerts: - enabled: true - rules: [] - -# Debug settings -debug: - enabled: false - verbose: false - trace: false diff --git a/ci/dvp-e2e/scripts/attach_worker_disks.sh b/ci/dvp-e2e/scripts/attach_worker_disks.sh new file mode 100755 index 0000000000..f6d0b2ca94 --- /dev/null +++ b/ci/dvp-e2e/scripts/attach_worker_disks.sh @@ -0,0 +1,206 @@ +#!/usr/bin/env bash + +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +# Usage: +# attach_worker_disks.sh -n namespace -s storage_class -z disk_size -c disk_count [-k kubeconfig] + +namespace="" +storage_class="" +disk_size="10Gi" +disk_count="2" +kubeconfig="${KUBECONFIG:-}" + +while getopts ":n:s:z:c:k:" opt; do + case $opt in + n) namespace="$OPTARG" ;; + s) storage_class="$OPTARG" ;; + z) disk_size="$OPTARG" ;; + c) disk_count="$OPTARG" ;; + k) kubeconfig="$OPTARG" ;; + *) + echo "Usage: $0 -n -s -z -c [-k ]" >&2 + exit 2 + ;; + esac +done + +if [ -z "${namespace}" ] || [ -z "${storage_class}" ]; then + echo "Usage: $0 -n -s -z -c [-k ]" >&2 + exit 2 +fi + +if [ -n "${kubeconfig}" ]; then + export KUBECONFIG="${kubeconfig}" +fi + +echo "[INFRA] Attaching ${disk_count} storage disks to worker VMs using hotplug in namespace ${namespace}" + +# Wait for worker VMs +for i in $(seq 1 50); do + worker_count=$(kubectl -n "${namespace}" get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep -c worker || echo "0") + if [ "$worker_count" -gt 0 ]; then + echo "[INFRA] Found $worker_count worker VMs" + break + fi + echo "[INFRA] Waiting for worker VMs... ($i/50)" + sleep 10 +done + +# Get worker VMs +mapfile -t workers < <(kubectl -n "${namespace}" get vm -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}' 2>/dev/null | grep worker || true) + +if [ ${#workers[@]} -eq 0 ]; then + echo "[INFRA] No worker VMs found; nothing to do" + exit 0 +fi + +echo "[INFRA] Found ${#workers[@]} worker VMs: ${workers[*]}" + +for vm in "${workers[@]}"; do + [ -z "$vm" ] && continue + echo "[INFRA] Processing VM: $vm" + + # Wait for VM to be Running + for i in $(seq 1 50); do + phase=$(kubectl -n "${namespace}" get vm "$vm" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$phase" = "Running" ]; then + echo "[INFRA] VM $vm is Running" + break + fi + echo "[INFRA] VM $vm phase=$phase; retry $i/50" + sleep 10 + done + + for disk_num in $(seq 1 "${disk_count}"); do + vd="storage-disk-${disk_num}-$vm" + echo "[INFRA] Creating VirtualDisk $vd (${disk_size}, sc=${storage_class})" + + cat > "/tmp/vd-$vd.yaml" </dev/null 2>&1 || kubectl -n "${namespace}" apply -f "/tmp/vd-$vd.yaml" + + # Wait for VirtualDisk to be Ready + echo "[INFRA] Waiting for VirtualDisk $vd to be Ready..." + vd_phase="" + for j in $(seq 1 50); do + vd_phase=$(kubectl -n "${namespace}" get vd "$vd" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$vd_phase" = "Ready" ]; then + echo "[INFRA] VirtualDisk $vd is Ready" + break + fi + echo "[INFRA] VD $vd phase=$vd_phase; retry $j/50" + sleep 5 + done + + if [ "$vd_phase" != "Ready" ]; then + echo "[ERROR] VirtualDisk $vd not Ready" + kubectl -n "${namespace}" get vd "$vd" -o yaml || true + kubectl -n "${namespace}" get events --sort-by=.lastTimestamp | tail -n 100 || true + exit 1 + fi + + # Wait for PVC + pvc_name="" + for j in $(seq 1 50); do + pvc_name=$(kubectl -n "${namespace}" get vd "$vd" -o jsonpath='{.status.target.persistentVolumeClaimName}' 2>/dev/null || true) + [ -n "$pvc_name" ] && break + echo "[INFRA] Waiting for PVC name for VD $vd; retry $j/50" + sleep 3 + done + + if [ -n "$pvc_name" ]; then + echo "[INFRA] Waiting PVC $pvc_name to reach phase=Bound..." + for j in $(seq 1 120); do + pvc_phase=$(kubectl -n "${namespace}" get pvc "$pvc_name" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$pvc_phase" = "Bound" ]; then + break + fi + [ $((j % 10)) -eq 0 ] && echo "[INFRA] PVC $pvc_name phase=$pvc_phase; retry $j/120" + sleep 2 + done + if [ "$pvc_phase" != "Bound" ]; then + echo "[WARN] PVC $pvc_name not Bound after waiting" + fi + fi + + # Create hotplug attachment + att="att-$vd" + echo "[INFRA] Creating VirtualMachineBlockDeviceAttachment $att for VM $vm" + cat > "/tmp/att-$att.yaml" </dev/null 2>&1 || kubectl -n "${namespace}" apply -f "/tmp/att-$att.yaml" + + # Wait for attachment + echo "[INFRA] Waiting for VMBDA $att to be Attached..." + att_phase="" + success_by_vm=0 + for i in $(seq 1 50); do + att_phase=$(kubectl -n "${namespace}" get vmbda "$att" -o jsonpath='{.status.phase}' 2>/dev/null || true) + if [ "$att_phase" = "Attached" ]; then + echo "[INFRA] Disk $vd attached to VM $vm" + break + fi + if kubectl -n "${namespace}" get vm "$vm" -o json 2>/dev/null \ + | jq -e --arg vd "$att" --arg disk "$vd" ' + ([.status.blockDeviceRefs[]? + | select( + (.virtualMachineBlockDeviceAttachmentName == $vd) + or (.name == $disk) + ) + | select((.attached == true) and (.hotplugged == true)) + ] | length) > 0' >/dev/null; then + echo "[INFRA] VM reports disk $vd attached/hotplugged; proceeding" + success_by_vm=1 + break + fi + [ $((i % 10)) -eq 0 ] && echo "[INFRA] Disk $vd phase=$att_phase; retry $i/50" + sleep 5 + done + + if [ "$att_phase" != "Attached" ] && [ "${success_by_vm:-0}" -ne 1 ]; then + echo "[ERROR] VMBDA $att did not reach Attached state" + kubectl -n "${namespace}" get vmbda "$att" -o yaml || true + kubectl -n "${namespace}" get vm "$vm" -o json || true + kubectl -n "${namespace}" get events --sort-by=.lastTimestamp | tail -n 100 || true + exit 1 + fi + done + + echo "[INFRA] VM $vm configured with hotplug disks" +done + +echo "[INFRA] All worker VMs configured with storage disks via hotplug" diff --git a/ci/dvp-e2e/scripts/build_nested_kubeconfig.sh b/ci/dvp-e2e/scripts/build_nested_kubeconfig.sh new file mode 100755 index 0000000000..f7d9a34d7f --- /dev/null +++ b/ci/dvp-e2e/scripts/build_nested_kubeconfig.sh @@ -0,0 +1,144 @@ +#!/usr/bin/env bash + +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -euo pipefail + +# Usage: +# build_nested_kubeconfig.sh -o /path/to/kubeconfig -n namespace -d domain -k parent_kubeconfig -s ssh_key -u user + +out="" +namespace="" +domain="" +parent_kubeconfig="" +ssh_key="" +ssh_user="ubuntu" + +while getopts ":o:n:d:k:s:u:" opt; do + case $opt in + o) out="$OPTARG" ;; + n) namespace="$OPTARG" ;; + d) domain="$OPTARG" ;; + k) parent_kubeconfig="$OPTARG" ;; + s) ssh_key="$OPTARG" ;; + u) ssh_user="$OPTARG" ;; + *) + echo "Usage: $0 -o -n -d -k -s [-u ]" >&2 + exit 2 + ;; + esac +done + +if [ -z "${out}" ] || [ -z "${namespace}" ] || [ -z "${domain}" ] || [ -z "${parent_kubeconfig}" ] || [ -z "${ssh_key}" ]; then + echo "Usage: $0 -o -n -d -k -s [-u ]" >&2 + exit 2 +fi + +if [ ! -s "${parent_kubeconfig}" ]; then + echo "[ERR] parent kubeconfig not found at ${parent_kubeconfig}" >&2 + exit 1 +fi + +if [ ! -f "${ssh_key}" ]; then + echo "[ERR] SSH key not found at ${ssh_key}" >&2 + exit 1 +fi + +# Create output directory +OUT_DIR="$(dirname "$out")" +if ! mkdir -p "${OUT_DIR}"; then + echo "[ERR] Failed to create output directory: ${OUT_DIR}" >&2 + exit 1 +fi + +# Find master VM +echo "[INFO] Finding master VM in namespace ${namespace}..." +MASTER_NAME=$(KUBECONFIG="${parent_kubeconfig}" kubectl -n "${namespace}" get vm -l dvp.deckhouse.io/node-group=master -o jsonpath='{.items[0].metadata.name}') +if [ -z "$MASTER_NAME" ]; then + echo "[ERR] master VM not found in namespace ${namespace}" >&2 + exit 1 +fi +echo "[INFO] Found master VM: ${MASTER_NAME}" + +# Get token via SSH +TOKEN_FILE="$(dirname "$out")/token.txt" +rm -f "$TOKEN_FILE" +SSH_OK=0 + +echo "[INFO] Obtaining token from nested cluster..." +for attempt in $(seq 1 6); do + if KUBECONFIG="${parent_kubeconfig}" d8 v ssh \ + --username="${ssh_user}" \ + --identity-file="${ssh_key}" \ + --local-ssh=true \ + --local-ssh-opts="-o StrictHostKeyChecking=no" \ + --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ + "${MASTER_NAME}.${namespace}" \ + -c ' + set -euo pipefail + SUDO="sudo /opt/deckhouse/bin/kubectl" + $SUDO -n kube-system get sa e2e-admin >/dev/null 2>&1 || $SUDO -n kube-system create sa e2e-admin >/dev/null 2>&1 + $SUDO -n kube-system get clusterrolebinding e2e-admin >/dev/null 2>&1 || $SUDO -n kube-system create clusterrolebinding e2e-admin --clusterrole=cluster-admin --serviceaccount=kube-system:e2e-admin >/dev/null 2>&1 + for i in $(seq 1 10); do + TOKEN=$($SUDO -n kube-system create token e2e-admin --duration=24h 2>/dev/null) && echo "$TOKEN" && break + echo "[WARN] Failed to create token (attempt $i/10); retrying in 3s" >&2 + sleep 3 + done + if [ -z "${TOKEN:-}" ]; then + echo "[ERR] Unable to create token for e2e-admin after 10 attempts" >&2 + exit 1 + fi + ' > "$TOKEN_FILE"; then + SSH_OK=1 + break + fi + echo "[WARN] d8 v ssh attempt $attempt failed; retry in 15s..." + sleep 15 +done + +if [ "$SSH_OK" -ne 1 ] || [ ! -s "$TOKEN_FILE" ]; then + echo "[ERR] Failed to obtain nested token via d8 v ssh after multiple attempts" >&2 + cat "$TOKEN_FILE" 2>/dev/null || true + exit 1 +fi + +NESTED_TOKEN=$(cat "$TOKEN_FILE") +SERVER_URL="https://api.${namespace}.${domain}" + +# Generate kubeconfig +cat > "$out" < -s [-d ]" >&2 + exit 2 + ;; + esac +done + +if [ -z "${kubeconfig}" ] || [ ! -f "${kubeconfig}" ]; then + echo "Error: kubeconfig is required and must exist" >&2 + exit 2 +fi + +export KUBECONFIG="${kubeconfig}" + +# Step 0: Wait for API server +echo "[SDS] Waiting for API server to be ready..." +for i in $(seq 1 50); do + if kubectl get nodes >/dev/null 2>&1; then + echo "[SDS] API server is ready!" + break + fi + echo "[SDS] API server not ready yet, retry $i/50" + sleep 10 +done + +# Step 1: Enable sds-node-configurator +echo "[SDS] Step 1: Enabling sds-node-configurator..." +cat <<'EOF' | kubectl apply -f - +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: sds-node-configurator +spec: + imageTag: main + scanInterval: 15s +EOF + +cat <<'EOF' | kubectl -n d8-system apply -f - +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: sds-node-configurator + namespace: d8-system +spec: + enabled: true + version: 1 + settings: + disableDs: false + enableThinProvisioning: true +EOF + +# Step 2: Wait for sds-node-configurator +echo "[SDS] Step 2: Waiting for sds-node-configurator to be Ready..." +if ! kubectl wait module sds-node-configurator --for=jsonpath='{.status.phase}'=Ready --timeout=600s >/dev/null 2>&1; then + echo "[WARN] sds-node-configurator did not reach Ready within 10 minutes" >&2 +fi + +# Step 3: Enable sds-replicated-volume +echo "[SDS] Step 3: Enabling sds-replicated-volume..." +cat <<'EOF' | kubectl apply -f - +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: sds-replicated-volume +spec: + imageTag: main + scanInterval: 15s +EOF + +cat <<'EOF' | kubectl -n d8-system apply -f - +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: sds-replicated-volume + namespace: d8-system +spec: + enabled: true + version: 1 +EOF + +# Step 4: Wait for sds-replicated-volume +echo "[SDS] Step 4: Waiting for sds-replicated-volume to be Ready..." +if ! kubectl wait module sds-replicated-volume --for=jsonpath='{.status.phase}'=Ready --timeout=600s >/dev/null 2>&1; then + echo "[WARN] sds-replicated-volume did not reach Ready within 10 minutes" >&2 +fi + +# Step 6: Create LVMVolumeGroups per node +echo "[SDS] Creating per-node LVMVolumeGroups (type=Local)..." +NODES=$(kubectl get nodes -o json \ + | jq -r '.items[] | select(.metadata.labels["node-role.kubernetes.io/control-plane"]!=true and .metadata.labels["node-role.kubernetes.io/master"]!=true) | .metadata.name') + +if [ -z "$NODES" ]; then + NODES=$(kubectl get nodes -o json | jq -r '.items[].metadata.name') +fi + +for node in $NODES; do + [ -z "$node" ] && continue + MATCH_EXPR=$(yq eval -n ' + .key = "storage.deckhouse.io/device-path" | + .operator = "In" | + .values = ["/dev/sdb","/dev/vdb","/dev/xvdb","/dev/sdc","/dev/vdc","/dev/xvdc","/dev/sdd","/dev/vdd","/dev/xvdd"] + ') + NODE="$node" MATCH_EXPR="$MATCH_EXPR" yq eval -n ' + .apiVersion = "storage.deckhouse.io/v1alpha1" | + .kind = "LVMVolumeGroup" | + .metadata.name = "data-" + env(NODE) | + .spec.type = "Local" | + .spec.local.nodeName = env(NODE) | + .spec.actualVGNameOnTheNode = "data" | + .spec.blockDeviceSelector.matchExpressions = [ env(MATCH_EXPR) ] + ' | kubectl apply -f - +done + +# Step 7: Create ReplicatedStoragePool +echo "[SDS] Creating ReplicatedStoragePool 'data' from LVMVolumeGroups..." +LVGS=$(printf "%s\n" $NODES | sed 's/^/ - name: data-/') + +cat </dev/null 2>&1; then + echo "[ERR] StorageClass '${storage_class}' not found in nested cluster" >&2 + exit 1 +fi + +# Step 10: Set default StorageClass +echo "[SDS] Setting '${storage_class}' as default StorageClass via ModuleConfig global..." +PATCH=$(jq -cn --arg v "${storage_class}" '[{"op":"replace","path":"/spec/settings/defaultClusterStorageClass","value":$v}]') +kubectl patch mc global --type='json' -p="$PATCH" + +echo "[SDS] SDS storage configuration complete!" diff --git a/ci/dvp-e2e/scripts/get_profile_config.sh b/ci/dvp-e2e/scripts/get_profile_config.sh deleted file mode 100755 index c6f1e7ea8d..0000000000 --- a/ci/dvp-e2e/scripts/get_profile_config.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash - -# Copyright 2025 Flant JSC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Script to get storage class configuration from profiles.json -# Usage: get_profile_config.sh - -set -euo pipefail - -PROFILE="${1:-}" -PROFILES_FILE="./profiles.json" - -if [[ -z "$PROFILE" ]]; then - echo "Usage: $0 " >&2 - exit 1 -fi - -if [[ ! -f "$PROFILES_FILE" ]]; then - echo "Profiles file not found: $PROFILES_FILE" >&2 - exit 1 -fi - -# Use jq to find profile by exact name only -PROFILE_CONFIG=$(jq -r --arg profile "$PROFILE" ' - .[] | select(.name == $profile) | - "\(.storage_class)|\(.image_storage_class)|\(.snapshot_storage_class)|\(.parent_storage_class)|\(.worker_data_disk_size // "10Gi")" -' "$PROFILES_FILE") - -if [[ -z "$PROFILE_CONFIG" || "$PROFILE_CONFIG" == "null" ]]; then - echo "Profile '$PROFILE' not found in $PROFILES_FILE" >&2 - echo "Available profiles:" >&2 - jq -r '.[] | " - \(.name)"' "$PROFILES_FILE" >&2 - exit 1 -fi - -# Split the result and export variables -IFS='|' read -r SC IMG_SC SNAP_SC PARENT_SC ATTACH_SIZE <<< "$PROFILE_CONFIG" - -echo "STORAGE_CLASS=$SC" -echo "IMAGE_STORAGE_CLASS=$IMG_SC" -echo "SNAPSHOT_STORAGE_CLASS=$SNAP_SC" -echo "PARENT_STORAGE_CLASS=$PARENT_SC" -echo "ATTACH_DISK_SIZE=$ATTACH_SIZE" diff --git a/ci/dvp-e2e/scripts/inject_registry_cfg.sh b/ci/dvp-e2e/scripts/inject_registry_cfg.sh deleted file mode 100755 index 302e00fad8..0000000000 --- a/ci/dvp-e2e/scripts/inject_registry_cfg.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2025 Flant JSC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -euo pipefail - -# Usage: -# inject_registry_cfg.sh -f /path/to/values.yaml -v - -file="" -val="${REGISTRY_DOCKER_CFG:-}" - -while getopts ":f:v:" opt; do - case $opt in - f) file="$OPTARG" ;; - v) val="$OPTARG" ;; - *) echo "Usage: $0 -f -v " >&2; exit 2 ;; - esac -done - -if [ -z "${file}" ] || [ -z "${val}" ]; then - echo "Usage: $0 -f -v " >&2 - exit 2 -fi - -export VAL="$val" -yq eval --inplace '.deckhouse.registryDockerCfg = strenv(VAL)' "$file" From ff007a7a560e9a6742e289bd6e7ca16e58d3d36b Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Tue, 18 Nov 2025 20:57:26 +0300 Subject: [PATCH 13/14] refactor(ci): consolidate e2e workflow, move bootstrap to task, use matrix include --- .github/workflows/e2e-matrix.yml | 203 +++++------------- ci/dvp-e2e/Taskfile.yaml | 136 +++++++++++- .../cluster-config/templates/e2e-sa.yaml | 8 - .../charts/cluster-config/templates/mc.yaml | 24 +++ .../templates/virtualization.yaml | 26 --- ci/dvp-e2e/profiles.json | 11 - 6 files changed, 205 insertions(+), 203 deletions(-) delete mode 100644 ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml delete mode 100644 ci/dvp-e2e/profiles.json diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index 3dd0c267dc..9f4469278b 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: E2E Matrix Tests (Skeleton) +name: E2E Storage Matrix on: push: @@ -30,67 +30,34 @@ on: permissions: contents: read -concurrency: - group: e2e-matrix-${{ github.head_ref || github.ref_name }} - cancel-in-progress: true - env: E2E_K8S_URL: https://api.e2e.virtlab.flant.com jobs: - # ============================================ - # 1. SETUP - Environment preparation - # ============================================ - setup-nested-envs: - name: Setup Nested Envs - runs-on: ubuntu-latest - concurrency: - group: setup-nested-envs-${{ github.head_ref || github.ref_name }} - cancel-in-progress: true - outputs: - run_id: ${{ steps.prep.outputs.run_id }} - profile: ${{ steps.load.outputs.profile }} - steps: - - uses: actions/checkout@v4 - - - name: Load storage profile - id: load - run: | - cd ci/dvp-e2e - # Map sds-replicated-volume to sds profile from profiles.json - PROFILE=$(jq -r '.[0].name' profiles.json) - echo "profile=$PROFILE" >> "$GITHUB_OUTPUT" - echo "Will test profile: $PROFILE (mapped from sds-replicated-volume)" - - - name: Prepare run context - id: prep - run: | - RUN_ID="nightly-nested-e2e-sds-$(date +%H%M%S)" - PROFILE="${{ steps.load.outputs.profile }}" - echo "run_id=$RUN_ID" >> "$GITHUB_OUTPUT" - mkdir -p ./tmp/run-context - { - echo "profile: ${PROFILE}" - echo "run_id: ${RUN_ID}" - echo "timestamp: $(date -Iseconds)" - } > ./tmp/run-context/config.yaml - - # ============================================ - # 2. PREPARE - Cluster preparation - # ============================================ - prepare: - name: Prepare Cluster - needs: [setup-nested-envs] + setup: + name: Setup (${{ matrix.profile }}) runs-on: ubuntu-latest - timeout-minutes: 300 + strategy: + matrix: + include: + - profile: sds-replicated-volume + storage_name: sds + storage_class: linstor-thin-r2 + parent_storage_class: linstor-thin-r1-immediate + image_storage_class: linstor-thin-r1-immediate + attach_disk_size: 10Gi + data_disk_count: 2 concurrency: - group: prepare-${{ github.head_ref || github.ref_name }}-${{ needs.setup-nested-envs.outputs.profile }} + group: setup-${{ github.head_ref || github.ref_name }}-${{ matrix.profile }} cancel-in-progress: true env: - PROFILE: ${{ needs.setup-nested-envs.outputs.profile }} - TMP_ROOT: ${{ github.workspace }}/ci/dvp-e2e/tmp - REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} - + RUN_ID: nightly-nested-e2e-${{ matrix.storage_name }}-${{ github.run_number }} + PROFILE: ${{ matrix.profile }} + STORAGE_CLASS: ${{ matrix.storage_class }} + PARENT_STORAGE_CLASS: ${{ matrix.parent_storage_class }} + IMAGE_STORAGE_CLASS: ${{ matrix.image_storage_class }} + ATTACH_DISK_SIZE: ${{ matrix.attach_disk_size }} + DATA_DISK_COUNT: ${{ matrix.data_disk_count }} steps: - uses: actions/checkout@v4 @@ -126,116 +93,56 @@ jobs: curl -L -o /usr/local/bin/yq https://github.com/mikefarah/yq/releases/download/v4.44.1/yq_linux_amd64 chmod +x /usr/local/bin/yq - - name: Prepare environment - id: prep - run: | - RUN_ID="${{ needs.setup-nested-envs.outputs.run_id }}" - echo "RUN_ID=$RUN_ID" >> "$GITHUB_ENV" - echo "TMP_ROOT=${{ env.TMP_ROOT }}" >> "$GITHUB_ENV" - mkdir -p "${{ env.TMP_ROOT }}/shared" "${{ env.TMP_ROOT }}/matrix-logs" - - - name: Build parent kubeconfig from secret - working-directory: ci/dvp-e2e - run: | - KCFG="$HOME/.kube/config" - task parent:kubeconfig OUTPUT="$KCFG" API_URL="${E2E_K8S_URL}" SA_TOKEN="${{ secrets.E2E_NESTED_SA_SECRET }}" - echo "KUBECONFIG=$KCFG" >> "$GITHUB_ENV" - - - name: Prepare run values.yaml + - name: Setup nested environment + env: + RUN_ID: ${{ env.RUN_ID }} + PROFILE: ${{ env.PROFILE }} + STORAGE_CLASS: ${{ env.STORAGE_CLASS }} + PARENT_STORAGE_CLASS: ${{ env.PARENT_STORAGE_CLASS }} + IMAGE_STORAGE_CLASS: ${{ env.IMAGE_STORAGE_CLASS }} + ATTACH_DISK_SIZE: ${{ env.ATTACH_DISK_SIZE }} + DATA_DISK_COUNT: ${{ env.DATA_DISK_COUNT }} + REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} + API_URL: ${{ env.E2E_K8S_URL }} + SA_TOKEN: ${{ secrets.E2E_NESTED_SA_SECRET }} working-directory: ci/dvp-e2e run: | - task run:values:prepare \ - RUN_ID="${{ env.RUN_ID }}" \ - RUN_NAMESPACE="${{ env.RUN_ID }}" \ - RUN_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" - - - name: Configure registry auth for installer pull - run: | - mkdir -p ~/.docker - printf '%s' "$REGISTRY_DOCKER_CFG" | base64 -d > ~/.docker/config.json - - - name: Configure storage profile - working-directory: ci/dvp-e2e - id: profile-config - run: | - PROFILE_JSON=$(jq -c --arg profile "$PROFILE" '.[] | select(.name == $profile)' profiles.json) - if [ -z "$PROFILE_JSON" ]; then - echo "Profile '$PROFILE' not found in profiles.json" >&2 - echo "Available profiles:" >&2 - jq -r '.[] | " - \(.name)"' profiles.json >&2 - exit 1 - fi - - STORAGE_CLASS=$(jq -r '.storage_class // ""' <<<"$PROFILE_JSON") - IMAGE_STORAGE_CLASS=$(jq -r '.image_storage_class // ""' <<<"$PROFILE_JSON") - SNAPSHOT_STORAGE_CLASS=$(jq -r '.snapshot_storage_class // ""' <<<"$PROFILE_JSON") - PARENT_STORAGE_CLASS=$(jq -r '.parent_storage_class // ""' <<<"$PROFILE_JSON") - ATTACH_DISK_SIZE=$(jq -r '.worker_data_disk_size // "10Gi"' <<<"$PROFILE_JSON") - - echo "Profile: ${PROFILE}" - echo "Storage Class: ${STORAGE_CLASS}" - echo "Image Storage Class: ${IMAGE_STORAGE_CLASS}" - echo "Snapshot Storage Class: ${SNAPSHOT_STORAGE_CLASS}" - echo "Parent Storage Class: ${PARENT_STORAGE_CLASS}" - echo "Attach Disk Size: ${ATTACH_DISK_SIZE}" - - # Export variables to GitHub Actions environment - echo "STORAGE_CLASS=${STORAGE_CLASS}" >> $GITHUB_ENV - echo "PARENT_STORAGE_CLASS=${PARENT_STORAGE_CLASS}" >> $GITHUB_ENV - echo "ATTACH_DISK_SIZE=${ATTACH_DISK_SIZE}" >> $GITHUB_ENV - # Pass storage profile into run values for Helm templates - yq eval --inplace ".storageProfile = \"${PROFILE}\"" "${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" - # Effective disk SC used for worker data disks (prefer image SC when set) - EFF_DISK_SC=${IMAGE_STORAGE_CLASS:-$STORAGE_CLASS} - echo "EFFECTIVE_DISK_SC=${EFF_DISK_SC}" >> $GITHUB_ENV - - - name: Install nested environment - working-directory: ci/dvp-e2e - run: | - task install:nested:env \ - TMP_DIR="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}" \ - VALUES_FILE="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/values.yaml" \ - PARENT_KUBECONFIG="${KUBECONFIG}" \ - TARGET_STORAGE_CLASS="${{ env.PARENT_STORAGE_CLASS }}" \ - ATTACH_DISK_SIZE="${{ env.ATTACH_DISK_SIZE }}" \ - EFFECTIVE_DISK_SC="${{ env.EFFECTIVE_DISK_SC }}" \ - NAMESPACE="${{ env.RUN_ID }}" \ - NESTED_KUBECONFIG="${{ env.TMP_ROOT }}/runs/${{ env.RUN_ID }}/nested/kubeconfig" \ - SDS_SC_NAME="${{ env.STORAGE_CLASS }}" + task ci:setup-nested-env cleanup: - name: Cleanup [skeleton] - needs: [setup-nested-envs, prepare] + name: Cleanup (${{ matrix.profile }}) + needs: setup if: always() runs-on: ubuntu-latest + strategy: + matrix: + include: + - profile: sds-replicated-volume + storage_name: sds + storage_class: linstor-thin-r2 + parent_storage_class: linstor-thin-r1-immediate + image_storage_class: linstor-thin-r1-immediate + attach_disk_size: 10Gi + data_disk_count: 2 env: CLEANUP_PREFIX: ${{ vars.CLEANUP_PREFIX || 'nightly-nested-e2e-' }} steps: - uses: actions/checkout@v4 + - name: Install Task + uses: arduino/setup-task@v2 + with: + version: 3.x + - name: Install kubectl uses: azure/setup-kubectl@v4 with: version: "latest" - - name: Install Task - uses: arduino/setup-task@v2 - - - name: Build parent kubeconfig from secret - working-directory: ci/dvp-e2e - run: | - KCFG="$HOME/.kube/config" - task parent:kubeconfig OUTPUT="$KCFG" API_URL="${E2E_K8S_URL}" SA_TOKEN="${{ secrets.E2E_NESTED_SA_SECRET }}" - echo "KUBECONFIG=$KCFG" >> "$GITHUB_ENV" - - name: Cleanup test namespaces working-directory: ci/dvp-e2e run: | - task cleanup:namespaces PREFIX="${CLEANUP_PREFIX}" PARENT_KUBECONFIG="${KUBECONFIG}" - - - name: Report cleanup results - if: always() - run: | - echo "### Cleanup Results" >> $GITHUB_STEP_SUMMARY - echo "โœ… Cleanup job completed" >> $GITHUB_STEP_SUMMARY - echo "๐Ÿงน Attempted to clean up namespaces with prefix '${CLEANUP_PREFIX}'" >> $GITHUB_STEP_SUMMARY + task cleanup:namespaces \ + PREFIX="${CLEANUP_PREFIX}" \ + API_URL="${E2E_K8S_URL}" \ + SA_TOKEN="${{ secrets.E2E_NESTED_SA_SECRET }}" diff --git a/ci/dvp-e2e/Taskfile.yaml b/ci/dvp-e2e/Taskfile.yaml index 49c179f464..e4ff030c00 100644 --- a/ci/dvp-e2e/Taskfile.yaml +++ b/ci/dvp-e2e/Taskfile.yaml @@ -85,6 +85,96 @@ tasks: export PREFIX_INPUT yq eval --inplace '.clusterConfigurationPrefix = strenv(PREFIX_INPUT)' {{ .TARGET_VALUES_FILE }} + # ------------------------------------------------------------ + # CI: Setup nested environment (main entry point) + # ------------------------------------------------------------ + ci:setup-nested-env: + desc: Setup complete nested environment for CI (prepare + infra + bootstrap + storage) + vars: + RUN_ID: '{{ .RUN_ID | default (env "RUN_ID") | default "" }}' + PROFILE: '{{ .PROFILE | default (env "PROFILE") | default "" }}' + STORAGE_CLASS: '{{ .STORAGE_CLASS | default (env "STORAGE_CLASS") | default "" }}' + IMAGE_STORAGE_CLASS: '{{ .IMAGE_STORAGE_CLASS | default (env "IMAGE_STORAGE_CLASS") | default "" }}' + PARENT_STORAGE_CLASS: '{{ .PARENT_STORAGE_CLASS | default (env "PARENT_STORAGE_CLASS") | default "" }}' + ATTACH_DISK_SIZE: '{{ .ATTACH_DISK_SIZE | default (env "ATTACH_DISK_SIZE") | default "10Gi" }}' + DATA_DISK_COUNT: '{{ .DATA_DISK_COUNT | default (env "DATA_DISK_COUNT") | default "2" }}' + REGISTRY_DOCKER_CFG: '{{ .REGISTRY_DOCKER_CFG | default (env "REGISTRY_DOCKER_CFG") | default "" }}' + API_URL: '{{ .API_URL | default (env "API_URL") | default (env "E2E_K8S_URL") | default "" }}' + SA_TOKEN: '{{ .SA_TOKEN | default (env "SA_TOKEN") | default (env "E2E_NESTED_SA_SECRET") | default "" }}' + RUN_DIR: '{{ printf "%s/runs/%s" .TMP_ROOT .RUN_ID }}' + VALUES_FILE_PATH: '{{ printf "%s/values.yaml" .RUN_DIR }}' + PARENT_KUBECONFIG_PATH: '{{ printf "%s/parent.kubeconfig" .RUN_DIR }}' + NESTED_KUBECONFIG_PATH: '{{ printf "%s/nested/kubeconfig" .RUN_DIR }}' + EFFECTIVE_DISK_SC: "{{ if .IMAGE_STORAGE_CLASS }}{{ .IMAGE_STORAGE_CLASS }}{{ else }}{{ .STORAGE_CLASS }}{{ end }}" + cmds: + - task: ci:prepare-env + vars: + RUN_ID: "{{ .RUN_ID }}" + RUN_DIR: "{{ .RUN_DIR }}" + PROFILE: "{{ .PROFILE }}" + STORAGE_CLASS: "{{ .STORAGE_CLASS }}" + PARENT_STORAGE_CLASS: "{{ .PARENT_STORAGE_CLASS }}" + REGISTRY_DOCKER_CFG: "{{ .REGISTRY_DOCKER_CFG }}" + API_URL: "{{ .API_URL }}" + SA_TOKEN: "{{ .SA_TOKEN }}" + - task: install:nested:env + vars: + TMP_DIR: "{{ .RUN_DIR }}" + VALUES_FILE: "{{ .VALUES_FILE_PATH }}" + PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG_PATH }}" + REGISTRY_DOCKER_CFG: "{{ .REGISTRY_DOCKER_CFG }}" + TARGET_STORAGE_CLASS: "{{ .PARENT_STORAGE_CLASS }}" + ATTACH_DISK_SIZE: "{{ .ATTACH_DISK_SIZE }}" + EFFECTIVE_DISK_SC: "{{ .EFFECTIVE_DISK_SC }}" + NAMESPACE: "{{ .RUN_ID }}" + NESTED_KUBECONFIG: "{{ .NESTED_KUBECONFIG_PATH }}" + SDS_SC_NAME: "{{ .STORAGE_CLASS }}" + DATA_DISK_COUNT: "{{ .DATA_DISK_COUNT }}" + + ci:prepare-env: + desc: Prepare environment (values, kubeconfig, infra manifests) + vars: + RUN_ID: '{{ .RUN_ID | default (env "RUN_ID") | default "" }}' + RUN_DIR: '{{ .RUN_DIR | default (printf "%s/runs/%s" .TMP_ROOT .RUN_ID) }}' + PROFILE: '{{ .PROFILE | default (env "PROFILE") | default "" }}' + STORAGE_CLASS: '{{ .STORAGE_CLASS | default (env "STORAGE_CLASS") | default "" }}' + PARENT_STORAGE_CLASS: '{{ .PARENT_STORAGE_CLASS | default (env "PARENT_STORAGE_CLASS") | default "" }}' + REGISTRY_DOCKER_CFG: '{{ .REGISTRY_DOCKER_CFG | default (env "REGISTRY_DOCKER_CFG") | default "" }}' + API_URL: '{{ .API_URL | default (env "API_URL") | default (env "E2E_K8S_URL") | default "" }}' + SA_TOKEN: '{{ .SA_TOKEN | default (env "SA_TOKEN") | default (env "E2E_NESTED_SA_SECRET") | default "" }}' + VALUES_FILE_PATH: '{{ printf "%s/values.yaml" .RUN_DIR }}' + PARENT_KUBECONFIG_PATH: '{{ printf "%s/parent.kubeconfig" .RUN_DIR }}' + cmds: + - | + set -euo pipefail + if [ -z "{{ .RUN_ID }}" ] || [ -z "{{ .STORAGE_CLASS }}" ] || [ -z "{{ .PARENT_STORAGE_CLASS }}" ]; then + echo "[ERR] RUN_ID/STORAGE_CLASS/PARENT_STORAGE_CLASS must be set" >&2 + exit 1 + fi + mkdir -p "{{ .RUN_DIR }}" + - task: run:values:prepare + vars: + RUN_ID: "{{ .RUN_ID }}" + RUN_NAMESPACE: "{{ .RUN_ID }}" + RUN_DIR: "{{ .RUN_DIR }}" + - | + set -euo pipefail + VALUES_FILE="{{ .VALUES_FILE_PATH }}" + if [ -n "{{ .REGISTRY_DOCKER_CFG }}" ]; then + REGISTRY_DOCKER_CFG='{{ .REGISTRY_DOCKER_CFG }}' yq eval --inplace '.deckhouse.registryDockerCfg = strenv(REGISTRY_DOCKER_CFG)' "$VALUES_FILE" + fi + yq eval --inplace '.storageProfile = "{{ .PROFILE }}"' "$VALUES_FILE" + - task: parent:kubeconfig + vars: + OUTPUT: "{{ .PARENT_KUBECONFIG_PATH }}" + API_URL: "{{ .API_URL }}" + SA_TOKEN: "{{ .SA_TOKEN }}" + - task: render-infra + vars: + TMP_DIR: "{{ .RUN_DIR }}" + VALUES_FILE: "{{ .VALUES_FILE_PATH }}" + PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG_PATH }}" + # ------------------------------------------------------------ # Infra manifests and deployment # ------------------------------------------------------------ @@ -93,7 +183,7 @@ tasks: deps: - task: ssh:ensure vars: - TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' + TMP_DIR: "{{ .TMP_DIR }}" SSH_FILE_NAME: "{{ .SSH_FILE_NAME }}" vars: TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' @@ -133,9 +223,9 @@ tasks: deps: - task: render-infra vars: - TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' - VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" - PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default "" }}' + TMP_DIR: "{{ .TMP_DIR }}" + VALUES_FILE: "{{ .VALUES_FILE }}" + PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" SSH_FILE_NAME: "{{ .SSH_FILE_NAME }}" vars: TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' @@ -190,17 +280,20 @@ tasks: vars: TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' NAMESPACE: sh: yq eval '.namespace' {{ .VALUES_FILE }} SERVER: sh: | # Use external parent cluster API (ingress host) so that both dhctl Job # and components inside the nested cluster can reach the parent API. + export KUBECONFIG='{{ .PARENT_KUBECONFIG }}' HOST=$(kubectl -n d8-user-authn get ingress kubernetes-api -o json | jq -r '.spec.rules[0].host') [ -z "$HOST" -o "$HOST" = "null" ] && { echo "[ERR] kubernetes-api ingress host not found" >&2; exit 1; } echo "https://$HOST" TOKEN: sh: | + export KUBECONFIG='{{ .PARENT_KUBECONFIG }}' for i in $(seq 1 5); do TOKEN=$(kubectl -n {{ .NAMESPACE }} create token dkp-sa --duration=10h 2>/dev/null) && break echo "[WARN] Failed to issue SA token (attempt $i); retrying in 3s" >&2 @@ -209,7 +302,7 @@ tasks: [ -z "${TOKEN:-}" ] && { echo "[ERR] Unable to obtain token for dkp-sa" >&2; exit 1; } echo "$TOKEN" env: - KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" silent: true cmds: - mkdir -p {{ .TMP_DIR }} @@ -240,11 +333,18 @@ tasks: desc: Generate cluster config (helm template) silent: true deps: - - render-kubeconfig - - password-gen + - task: render-kubeconfig + vars: + TMP_DIR: "{{ .TMP_DIR }}" + VALUES_FILE: "{{ .VALUES_FILE }}" + PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" + - task: password-gen + vars: + TMP_DIR: "{{ .TMP_DIR }}" vars: TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' GENERATED_VALUES_FILE: '{{ printf "%s/%s" .TMP_DIR "generated-values.yaml" }}' PASSWORD_HASH_FILE: '{{ printf "%s/%s" .TMP_DIR "password-hash.txt" }}' SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' @@ -279,11 +379,13 @@ tasks: vars: TMP_DIR: "{{ .TMP_DIR }}" VALUES_FILE: "{{ .VALUES_FILE }}" + PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" TARGET_STORAGE_CLASS: "{{ .TARGET_STORAGE_CLASS }}" SSH_FILE_NAME: "{{ .SSH_FILE_NAME }}" vars: TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" + REGISTRY_DOCKER_CFG: '{{ .REGISTRY_DOCKER_CFG | default (env "REGISTRY_DOCKER_CFG") | default "" }}' SSH_DIR: '{{ .SSH_DIR | default (printf "%s/%s" .TMP_DIR "ssh") }}' SSH_PRIV_KEY_FILE: '{{ printf "%s/%s" .SSH_DIR .SSH_FILE_NAME }}' NAMESPACE: @@ -300,6 +402,11 @@ tasks: cmds: - | set -euo pipefail + # Configure registry auth for docker pull + if [ -n "{{ .REGISTRY_DOCKER_CFG }}" ]; then + mkdir -p ~/.docker + echo '{{ .REGISTRY_DOCKER_CFG }}' | base64 -d > ~/.docker/config.json + fi # Pull dhctl image locally (runner authenticated in workflow) docker pull --platform=linux/amd64 "{{ .IMAGE }}" # Run dhctl bootstrap with SSH bastion (jump-host) @@ -409,13 +516,20 @@ tasks: desc: Delete namespaces by prefix and wait for deletion vars: PREFIX: '{{ .PREFIX | default (env "CLEANUP_PREFIX") | default "nightly-nested-e2e-" }}' - PARENT_KUBECONFIG_PATH: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default "" }}' + TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "cleanup") }}' + PARENT_KUBECONFIG_PATH: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") | default (printf "%s/%s" .TMP_ROOT "cleanup/parent.kubeconfig") }}' + API_URL: '{{ .API_URL | default (env "API_URL") | default (env "E2E_K8S_URL") | default "" }}' + SA_TOKEN: '{{ .SA_TOKEN | default (env "SA_TOKEN") | default (env "E2E_NESTED_SA_SECRET") | default "" }}' cmds: - | set -euo pipefail if [ ! -s "{{ .PARENT_KUBECONFIG_PATH }}" ]; then - echo "[ERR] parent kubeconfig not found (KUBECONFIG)" >&2 - exit 1 + if [ -z "{{ .API_URL }}" ] || [ -z "{{ .SA_TOKEN }}" ]; then + echo "[ERR] Unable to build parent kubeconfig: API_URL/SA_TOKEN are empty" >&2 + exit 1 + fi + mkdir -p "{{ .TMP_DIR }}" + task parent:kubeconfig OUTPUT='{{ .PARENT_KUBECONFIG_PATH }}' API_URL='{{ .API_URL }}' SA_TOKEN='{{ .SA_TOKEN }}' fi export KUBECONFIG='{{ .PARENT_KUBECONFIG_PATH }}' echo "[CLEANUP] Prefix='{{ .PREFIX }}'" @@ -458,6 +572,7 @@ tasks: TMP_DIR: '{{ .TMP_DIR | default (printf "%s/%s" .TMP_ROOT "default") }}' VALUES_FILE: "{{ .VALUES_FILE | default .VALUES_TEMPLATE_FILE }}" PARENT_KUBECONFIG: '{{ .PARENT_KUBECONFIG | default (env "KUBECONFIG") }}' + REGISTRY_DOCKER_CFG: '{{ .REGISTRY_DOCKER_CFG | default (env "REGISTRY_DOCKER_CFG") | default "" }}' TARGET_STORAGE_CLASS: "{{ .TARGET_STORAGE_CLASS }}" ATTACH_DISK_SIZE: '{{ .ATTACH_DISK_SIZE | default "10Gi" }}' EFFECTIVE_DISK_SC: "{{ .EFFECTIVE_DISK_SC }}" @@ -479,6 +594,7 @@ tasks: TMP_DIR: "{{ .TMP_DIR }}" VALUES_FILE: "{{ .VALUES_FILE }}" PARENT_KUBECONFIG: "{{ .PARENT_KUBECONFIG }}" + REGISTRY_DOCKER_CFG: "{{ .REGISTRY_DOCKER_CFG }}" TARGET_STORAGE_CLASS: "{{ .TARGET_STORAGE_CLASS }}" - echo "๐Ÿ’ฟ Attaching data disks to workers" - task: infra:attach-storage-disks-hotplug diff --git a/ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml b/ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml index dad2d77cd6..a6e0b11732 100644 --- a/ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml +++ b/ci/dvp-e2e/charts/cluster-config/templates/e2e-sa.yaml @@ -17,11 +17,3 @@ subjects: - kind: ServiceAccount name: e2e-runner namespace: kube-system - - - - - - - - diff --git a/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml b/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml index ffbea6ac62..cb5872e8b6 100644 --- a/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml +++ b/ci/dvp-e2e/charts/cluster-config/templates/mc.yaml @@ -63,3 +63,27 @@ metadata: spec: imageTag: main scanInterval: 15s +--- +apiVersion: deckhouse.io/v1alpha1 +kind: ModuleConfig +metadata: + name: virtualization +spec: + version: 1 + enabled: true + settings: + dvcr: + storage: + persistentVolumeClaim: + size: 10Gi + type: PersistentVolumeClaim + virtualMachineCIDRs: + - 192.168.10.0/24 +--- +apiVersion: deckhouse.io/v1alpha2 +kind: ModulePullOverride +metadata: + name: virtualization +spec: + imageTag: {{ .Values.virtualization.tag }} + scanInterval: 15s diff --git a/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml b/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml deleted file mode 100644 index 5011c2b2b6..0000000000 --- a/ci/dvp-e2e/charts/cluster-config/templates/virtualization.yaml +++ /dev/null @@ -1,26 +0,0 @@ ---- -{{- if and (hasKey .Values "features") (.Values.features.virtualization) }} -apiVersion: deckhouse.io/v1alpha1 -kind: ModuleConfig -metadata: - name: virtualization -spec: - enabled: true - version: 1 - settings: - dvcr: - storage: - persistentVolumeClaim: - size: 10Gi - type: PersistentVolumeClaim - virtualMachineCIDRs: - - 192.168.10.0/24 ---- -apiVersion: deckhouse.io/v1alpha2 -kind: ModulePullOverride -metadata: - name: virtualization -spec: - imageTag: {{ .Values.virtualization.tag }} - scanInterval: 15s -{{- end }} diff --git a/ci/dvp-e2e/profiles.json b/ci/dvp-e2e/profiles.json deleted file mode 100644 index c5b6307ba9..0000000000 --- a/ci/dvp-e2e/profiles.json +++ /dev/null @@ -1,11 +0,0 @@ -[ - { - "name": "sds-replicated-volume", - "storage_class": "linstor-thin-r2", - "image_storage_class": "linstor-thin-r1-immediate", - "snapshot_storage_class": "linstor-thin-r2", - "parent_storage_class": "linstor-thin-r1-immediate", - "worker_data_disk_size": "10Gi", - "description": "SDS storage with LINSTOR thin provisioning" - } -] From 2ec6d93d35dd7bc31f26ed2453068c5535bf45da Mon Sep 17 00:00:00 2001 From: Anton Yachmenev Date: Wed, 19 Nov 2025 14:22:43 +0300 Subject: [PATCH 14/14] chore(ci): read data disk count from matrix --- .github/workflows/e2e-matrix.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml index 9f4469278b..447cc00b76 100644 --- a/.github/workflows/e2e-matrix.yml +++ b/.github/workflows/e2e-matrix.yml @@ -101,7 +101,7 @@ jobs: PARENT_STORAGE_CLASS: ${{ env.PARENT_STORAGE_CLASS }} IMAGE_STORAGE_CLASS: ${{ env.IMAGE_STORAGE_CLASS }} ATTACH_DISK_SIZE: ${{ env.ATTACH_DISK_SIZE }} - DATA_DISK_COUNT: ${{ env.DATA_DISK_COUNT }} + DATA_DISK_COUNT: ${{ matrix.data_disk_count }} REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} API_URL: ${{ env.E2E_K8S_URL }} SA_TOKEN: ${{ secrets.E2E_NESTED_SA_SECRET }}