Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pipe IT: Refactor how IT is categorized to reduce overall runtime #14719

Merged
merged 16 commits into from
Feb 10, 2025
Merged
Original file line number Diff line number Diff line change
@@ -34,7 +34,110 @@ env:
DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }}

jobs:
auto-create-schema:
single:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster1: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
cluster2: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode]
os: [ ubuntu-latest ]
exclude:
- cluster1: LightWeightStandaloneMode
cluster2: LightWeightStandaloneMode
- cluster1: LightWeightStandaloneMode
cluster2: ScalableSingleNodeMode
- cluster1: ScalableSingleNodeMode
cluster2: LightWeightStandaloneMode
- cluster1: ScalableSingleNodeMode
cluster2: HighPerformanceMode
- cluster1: HighPerformanceMode
cluster2: LightWeightStandaloneMode
- cluster1: HighPerformanceMode
cluster2: HighPerformanceMode
- cluster1: PipeConsensusBatchMode
cluster2: LightWeightStandaloneMode
- cluster1: PipeConsensusBatchMode
cluster2: HighPerformanceMode
- cluster1: PipeConsensusStreamMode
cluster2: LightWeightStandaloneMode
- cluster1: PipeConsensusStreamMode
cluster2: HighPerformanceMode
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output

while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT1 \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)

mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/

echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"

if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-manual-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
dual-tree-auto-basic:
strategy:
fail-fast: false
max-parallel: 15
@@ -78,7 +181,7 @@ jobs:
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
-pl integration-test \
-am -PMultiClusterIT2AutoCreateSchema \
-am -PMultiClusterIT2DualTreeAutoBasic \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)

@@ -115,7 +218,110 @@ jobs:
name: cluster-log-auto-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
path: integration-test/target/cluster-logs
retention-days: 30
manual-create-schema:
dual-tree-auto-enhanced:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster1: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
cluster2: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode]
os: [ ubuntu-latest ]
exclude:
- cluster1: LightWeightStandaloneMode
cluster2: LightWeightStandaloneMode
- cluster1: LightWeightStandaloneMode
cluster2: ScalableSingleNodeMode
- cluster1: ScalableSingleNodeMode
cluster2: LightWeightStandaloneMode
- cluster1: ScalableSingleNodeMode
cluster2: HighPerformanceMode
- cluster1: HighPerformanceMode
cluster2: LightWeightStandaloneMode
- cluster1: HighPerformanceMode
cluster2: HighPerformanceMode
- cluster1: PipeConsensusBatchMode
cluster2: LightWeightStandaloneMode
- cluster1: PipeConsensusBatchMode
cluster2: HighPerformanceMode
- cluster1: PipeConsensusStreamMode
cluster2: LightWeightStandaloneMode
- cluster1: PipeConsensusStreamMode
cluster2: HighPerformanceMode
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output

while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2DualTreeAutoEnhanced \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)

mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/

echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"

if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-manual-create-schema-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
dual-tree-auto-manual:
strategy:
fail-fast: false
max-parallel: 15
@@ -181,7 +387,7 @@ jobs:
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster1 }},${{ matrix.cluster2 }} \
-pl integration-test \
-am -PMultiClusterIT2ManualCreateSchema \
-am -PMultiClusterIT2DualTreeManual \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)

@@ -464,7 +670,7 @@ jobs:
name: cluster-log-subscription-regression-misc-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster1 }}-${{ matrix.cluster2 }}
path: integration-test/target/cluster-logs
retention-days: 30
table-model:
dual-table-manual-basic:
strategy:
fail-fast: false
max-parallel: 15
@@ -508,7 +714,7 @@ jobs:
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
-pl integration-test \
-am -PMultiClusterIT2TableModel \
-am -PMultiClusterIT2DualTableManualBasic \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)

@@ -545,3 +751,84 @@ jobs:
name: cluster-log-table-model-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
path: integration-test/target/cluster-logs
retention-days: 30
dual-table-manual-enhanced:
strategy:
fail-fast: false
max-parallel: 15
matrix:
java: [17]
# StrongConsistencyClusterMode is ignored now because RatisConsensus has not been supported yet.
cluster: [LightWeightStandaloneMode, ScalableSingleNodeMode, HighPerformanceMode, PipeConsensusBatchMode, PipeConsensusStreamMode]
os: [ ubuntu-latest ]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v4
with:
distribution: liberica
java-version: ${{ matrix.java }}
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2-
- name: Sleep for a random duration between 0 and 10000 milliseconds
run: |
sleep $(( $(( RANDOM % 10000 + 1 )) / 1000))
- name: IT Test
shell: bash
# we do not compile client-cpp for saving time, it is tested in client.yml
# we can skip influxdb-protocol because it has been tested separately in influxdb-protocol.yml
run: |
retry() {
local -i max_attempts=3
local -i attempt=1
local -i retry_sleep=5
local test_output

while [ $attempt -le $max_attempts ]; do
mvn clean verify \
-P with-integration-tests \
-DskipUTs \
-DintegrationTest.forkCount=1 -DConfigNodeMaxHeapSize=256 -DDataNodeMaxHeapSize=1024 -DDataNodeMaxDirectMemorySize=768 \
-DClusterConfigurations=${{ matrix.cluster }},${{ matrix.cluster }} \
-pl integration-test \
-am -PMultiClusterIT2DualTableManualEnhanced \
-ntp >> ~/run-tests-$attempt.log && return 0
test_output=$(cat ~/run-tests-$attempt.log)

mv ~/run-tests-$attempt.log integration-test/target/cluster-logs/

echo "==================== BEGIN: ~/run-tests-$attempt.log ===================="
echo "$test_output"
echo "==================== END: ~/run-tests-$attempt.log ======================"

if echo "$test_output" | grep -q "Could not transfer artifact"; then
if [ $attempt -lt $max_attempts ]; then
echo "Test failed with artifact transfer issue, attempt $attempt. Retrying in $retry_sleep seconds..."
sleep $retry_sleep
attempt=$((attempt + 1))
else
echo "Test failed after $max_attempts attempts due to artifact transfer issue."
echo "Treating this as a success because the issue is likely transient."
return 0
fi
elif [ $? -ne 0 ]; then
echo "Test failed with a different error."
return 1
else
echo "Tests passed"
return 0
fi
done
}
retry
- name: Upload Artifact
if: failure()
uses: actions/upload-artifact@v4
with:
name: cluster-log-table-model-java${{ matrix.java }}-${{ runner.os }}-${{ matrix.cluster }}-${{ matrix.cluster }}
path: integration-test/target/cluster-logs
retention-days: 30
Loading