Skip to content

Commit 442386d

Browse files
authored
infra: Add test stages for sm120 (#3533)
* Add test stages for sm120 Signed-off-by: EmmaQiaoCh <[email protected]> * Update chip name and config name Signed-off-by: EmmaQiaoCh <[email protected]> * Split tests to gb202 and gb203 Signed-off-by: EmmaQiaoCh <[email protected]> * Don't flash driver for rtx-5090 Signed-off-by: EmmaQiaoCh <[email protected]> * Skip the failed cases Signed-off-by: EmmaQiaoCh <[email protected]> * Change the test stage names Signed-off-by: EmmaQiaoCh <[email protected]> * Reduce 5080 jobs and add back gpu list which doesn't support dynamic driver flashing Signed-off-by: qqiao <[email protected]> * Skip failed case on gb202 Signed-off-by: qqiao <[email protected]> * Fix condition to dynamic driver flashing Signed-off-by: qqiao <[email protected]> --------- Signed-off-by: EmmaQiaoCh <[email protected]> Signed-off-by: qqiao <[email protected]>
1 parent ba4131f commit 442386d

File tree

3 files changed

+63
-1
lines changed

3 files changed

+63
-1
lines changed

jenkins/L0_Test.groovy

+4-1
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
279279
targetCould = "kubernetes"
280280

281281
// The following GPU types doesn't support dynamic driver flashing.
282-
if (type == "b100-ts2" || type.contains("dgx-h100") || type.contains("dgx-h200") || type == "gh200" ) {
282+
if (type.contains("dgx-h100") || type.contains("dgx-h200") || type in ["b100-ts2", "gh200", "rtx-5080", "rtx-5090"]) {
283283
selectors = """
284284
kubernetes.io/arch: ${arch}
285285
kubernetes.io/os: linux
@@ -1219,6 +1219,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
12191219
"B200_PCIe-PyTorch-2": ["b100-ts2", "l0_b200", 2, 2],
12201220
"B200_PCIe-TensorRT-1": ["b100-ts2", "l0_b200", 1, 2],
12211221
"B200_PCIe-TensorRT-2": ["b100-ts2", "l0_b200", 2, 2],
1222+
"RTX5090-PyTorch-1": ["rtx-5090", "l0_gb202", 1, 1],
1223+
"RTX5080-TensorRT-1": ["rtx-5080", "l0_gb203", 1, 2],
1224+
"RTX5080-TensorRT-2": ["rtx-5080", "l0_gb203", 2, 2],
12221225
// Currently post-merge test stages only run tests with "stage: post_merge" mako
12231226
// in the test-db. This behavior may change in the future.
12241227
"A10-TensorRT-[Post-Merge]-1": ["a10", "l0_a10", 1, 2],
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
version: 0.0.1
2+
l0_gb202:
3+
- condition:
4+
ranges:
5+
system_gpu_count:
6+
gte: 1
7+
lte: 1
8+
wildcards:
9+
gpu:
10+
- '*gb202*'
11+
linux_distribution_name: ubuntu*
12+
terms:
13+
stage: pre_merge
14+
backend: pytorch
15+
tests:
16+
# ------------- PyTorch tests ---------------
17+
- unittest/_torch/modeling -k "modeling_mllama"
18+
- unittest/_torch/modeling -k "modeling_out_of_tree"
19+
# - unittest/_torch/modeling -k "modeling_qwen" # https://nvbugs/5234573
20+
- test_e2e.py::test_ptp_quickstart_bert[VANILLA-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
21+
- test_e2e.py::test_ptp_quickstart_bert[TRTLLM-BertForSequenceClassification-bert/bert-base-uncased-yelp-polarity]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
version: 0.0.1
2+
l0_gb203:
3+
- condition:
4+
ranges:
5+
system_gpu_count:
6+
gte: 1
7+
lte: 1
8+
wildcards:
9+
gpu:
10+
- '*gb203*'
11+
linux_distribution_name: ubuntu*
12+
terms:
13+
stage: pre_merge
14+
backend: tensorrt
15+
tests:
16+
# ------------- TRT tests ---------------
17+
- unittest/trt/attention/test_gpt_attention.py -k "partition0"
18+
- unittest/trt/attention/test_gpt_attention.py -k "partition1"
19+
- unittest/trt/attention/test_gpt_attention.py -k "partition2"
20+
- unittest/trt/attention/test_gpt_attention.py -k "partition3"
21+
- unittest/trt/attention/test_gpt_attention.py -k "xqa_generic"
22+
# - unittest/trt/quantization # https://nvbugs/5234573
23+
# - unittest/trt/functional # https://nvbugs/5234573
24+
- examples/test_llama.py::test_llm_llama_v1_1gpu_kv_cache_reuse_with_prompt_table[llama-7b]
25+
- examples/test_llama.py::test_llm_llama_v3_1_1node_single_gpu[llama-3.2-1b-disable_fp8]
26+
- examples/test_llama.py::test_llm_llama_wo_1gpu_summary[llama-7b-int4-nb:1]
27+
- examples/test_llama.py::test_llm_llama_wo_1gpu_summary[llama-7b-int8-nb:1]
28+
- examples/test_llama.py::test_llm_llama_1gpu[llama-3.1-8b-instruct-hf-fp8-enable_fp8-float16-summarization-nb:1]
29+
# - examples/test_qwen.py::test_llm_qwen1_5_7b_single_gpu_lora[qwen1.5_7b_chat-Qwen1.5-7B-Chat-750Mb-lora] # https://nvbugs/5234573
30+
# - examples/test_qwen.py::test_llm_qwen_single_gpu_summary[qwen2.5_1.5b_instruct-enable_paged_kv_cache-enable_remove_input_padding-enable_weight_only-enable_fmha_fp32_acc] # https://nvbugs/5234573
31+
- test_e2e.py::test_llmapi_quickstart
32+
- test_e2e.py::test_llmapi_example_inference
33+
- test_e2e.py::test_llmapi_example_inference_async
34+
- test_e2e.py::test_llmapi_example_inference_async_streaming
35+
- test_e2e.py::test_llmapi_example_logits_processor
36+
- test_e2e.py::test_llmapi_example_multilora
37+
- test_e2e.py::test_llmapi_example_guided_decoding
38+
- test_e2e.py::test_llmapi_example_customize

0 commit comments

Comments
 (0)