Skip to content

Commit 4e8e623

Browse files
committed
Stress test
Signed-off-by: Ovidiu Mara <[email protected]>
1 parent b464ee9 commit 4e8e623

File tree

5 files changed

+26
-17
lines changed

5 files changed

+26
-17
lines changed

.ci/jenkins/lib/test-matrix.yaml

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ env:
3838
UCX_VERSION: v1.19.0
3939
NPROC: "16"
4040
# Manual timeout - ci-demo doesn't handle docker exec
41-
TEST_TIMEOUT: 30
41+
TEST_TIMEOUT: 90
4242

4343
steps:
4444
- name: Get Environment Info
@@ -98,15 +98,21 @@ steps:
9898
- name: Test CPP
9999
parallel: false
100100
run: |
101-
timeout ${TEST_TIMEOUT}m docker exec -w ${WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_cpp.sh ${INSTALL_DIR}"
101+
for i in $(seq 1 10)
102+
do
103+
timeout ${TEST_TIMEOUT}m docker exec -w ${WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_cpp.sh ${INSTALL_DIR}"
104+
done
102105
onfail: |
103106
docker rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
104107
docker image rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
105108
106109
- name: Test Python
107110
parallel: false
108111
run: |
109-
timeout ${TEST_TIMEOUT}m docker exec -w ${WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_python.sh ${INSTALL_DIR}"
112+
for i in $(seq 1 10)
113+
do
114+
timeout ${TEST_TIMEOUT}m docker exec -w ${WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_python.sh ${INSTALL_DIR}"
115+
done
110116
onfail: |
111117
docker rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
112118
docker image rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
@@ -122,7 +128,10 @@ steps:
122128
- name: Test Rust
123129
parallel: false
124130
run: |
125-
timeout ${TEST_TIMEOUT}m docker exec -w ${WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_rust.sh ${INSTALL_DIR}"
131+
for i in $(seq 1 10)
132+
do
133+
timeout ${TEST_TIMEOUT}m docker exec -w ${WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_rust.sh ${INSTALL_DIR}"
134+
done
126135
always: |
127136
docker rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
128137
docker image rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"

.gitlab/build.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,11 @@ ninja -C nixl_build && ninja -C nixl_build install
159159
# TODO(kapila): Copy the nixl.pc file to the install directory if needed.
160160
# cp ${BUILD_DIR}/nixl.pc ${INSTALL_DIR}/lib/pkgconfig/nixl.pc
161161

162+
pip3 install --break-system-packages .
163+
pip3 install --break-system-packages pytest
164+
pip3 install --break-system-packages pytest-timeout
165+
pip3 install --break-system-packages zmq
166+
162167
cd benchmark/nixlbench
163168
meson setup nixlbench_build -Dnixl_path=${INSTALL_DIR} -Dprefix=${INSTALL_DIR}
164169
ninja -C nixlbench_build && ninja -C nixlbench_build install

.gitlab/test_cpp.sh

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,6 @@ else
3030
SUDO=""
3131
fi
3232

33-
$SUDO apt-get update
34-
$SUDO apt-get -qq install -y libaio-dev
35-
36-
3733
# Parse commandline arguments with first argument being the install directory.
3834
INSTALL_DIR=$1
3935

@@ -101,8 +97,10 @@ kill -s SIGINT $telePID
10197
nixl_test_port=$(get_next_tcp_port)
10298

10399
./bin/nixl_test target 127.0.0.1 "$nixl_test_port"&
100+
target_pid=$!
104101
sleep 1
105102
./bin/nixl_test initiator 127.0.0.1 "$nixl_test_port"
103+
wait $target_pid
106104

107105
echo "${TEXT_YELLOW}==== Disabled tests==="
108106
echo "./bin/md_streamer disabled"

.gitlab/test_python.sh

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ else
3535
SUDO=""
3636
fi
3737

38-
$SUDO apt-get -qq install liburing-dev
39-
4038
ARCH=$(uname -m)
4139
[ "$ARCH" = "arm64" ] && ARCH="aarch64"
4240

@@ -51,11 +49,6 @@ export NIXL_PREFIX=${INSTALL_DIR}
5149
# Raise exceptions for logging errors
5250
export NIXL_DEBUG_LOGGING=yes
5351

54-
pip3 install --break-system-packages .
55-
pip3 install --break-system-packages pytest
56-
pip3 install --break-system-packages pytest-timeout
57-
pip3 install --break-system-packages zmq
58-
5952
echo "==== Running ETCD server ===="
6053
etcd_port=$(get_next_tcp_port)
6154
etcd_peer_port=$(get_next_tcp_port)
@@ -83,9 +76,11 @@ blocking_send_recv_port=$(get_next_tcp_port)
8376
mkdir -p /tmp/telemetry_test
8477

8578
python3 blocking_send_recv_example.py --mode="target" --ip=127.0.0.1 --port="$blocking_send_recv_port"&
79+
target_pid=$!
8680
sleep 5
8781
NIXL_TELEMETRY_ENABLE=y NIXL_TELEMETRY_DIR=/tmp/telemetry_test \
8882
python3 blocking_send_recv_example.py --mode="initiator" --ip=127.0.0.1 --port="$blocking_send_recv_port"
83+
wait $target_pid
8984

9085
python3 telemetry_reader.py --telemetry_path /tmp/telemetry_test/initiator &
9186
telePID=$!

test/nixl/nixl_test.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,13 @@ static void targetThread(nixlAgent &agent, nixl_opt_args_t *extra_params, int th
105105
if (!rc)
106106
std::this_thread::sleep_for(std::chrono::milliseconds(10));
107107
}
108-
if (!rc)
108+
if (!rc) {
109109
std::cerr << "Thread " << thread_id << " UCX Transfer failed, buffers are different\n";
110-
else
110+
exit(-1);
111+
} else {
111112
std::cout << "Thread " << thread_id << " Transfer completed and Buffers match with Initiator\n"
112113
<< "Thread " << thread_id << " UCX Transfer Success!!!\n";
114+
}
113115

114116
std::cout << "Thread " << thread_id << " Cleanup..\n";
115117
agent.deregisterMem(dram_for_ucx, extra_params);

0 commit comments

Comments
 (0)