Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions .github/workflows/all-tests-metax.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ on:
paths:
- '.github/workflows/*metax*.yml'
- 'hardware/Metax_C550/**'
- 'tests/**'
- 'tests/Metax_C550/**'
pull_request:
branches: ["main"]
paths:
- '.github/workflows/*metax*.yml'
- 'hardware/Metax_C550/**'
- 'tests/**'
- 'tests/Metax_C550/**'

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.actor }}
Expand All @@ -36,15 +36,15 @@ jobs:
strategy:
matrix:
task:
- deepseek_r1_distill_qwen-metax
- deepseek_r1_distill_qwen
# TODO: Need update for [email protected]
- deepseek_r1_distill_qwen-flaggems-metax
- opi_llama3_1_instruct-metax
- opi_llama3_1_instruct-flaggems-metax
- qwen3-metax
- qwen3-flaggems-metax
- robobrain2-metax # TP1
- robobrain2-flaggems-metax # TP1
- deepseek_r1_distill_qwen-flaggems
- opi_llama3_1_instruct
- opi_llama3_1_instruct-flaggems
- qwen3
- qwen3-flaggems
- robobrain2 # TP1
- robobrain2-flaggems # TP1
name: "inference-${{ matrix.task }}"
with:
task: ${{ matrix.task }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/functional-tests-metax.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ jobs:
echo "Unknown backend type: ${{ inputs.type }}"
exit 1
fi
cd /__w/FlagScale/FlagScale
cd /__w/FlagScale/FlagScale/tests/Metax_C550/
tests/scripts/functional_tests/test_task.sh --type ${{ inputs.type }} --task ${{ inputs.task }}
exit_code=$?
echo "Exit code: $exit_code"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
defaults:
- _self_
- inference: 7b-tp2

experiment:
exp_name: deepseek_r1_distill_qwen-flaggems
exp_dir: tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen-flaggems/results_test/7b-tp2
task:
type: inference
backend: vllm
entrypoint: flagscale/inference/inference_aquila.py
runner:
hostfile: null
cmds:
before_start:
source /opt/conda/bin/activate flagscale-inference
envs:
HYDRA_FULL_ERROR: 1
CUBLAS_WORKSPACE_CONFIG: ":4096:8"
CUDNN_BENCHMARK: "false"
CUDNN_DETERMINISTIC: "true"
USE_FLAGGEMS: "true"
Comment on lines +20 to +22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There is an inconsistent use of boolean values for environment variables in this and other YAML configuration files. Some are defined as strings (e.g., "true"), while others use YAML's native boolean type (e.g., true on line 42). For consistency and to prevent potential parsing issues, it's best to use native YAML booleans throughout.

    CUDNN_BENCHMARK: false
    CUDNN_DETERMINISTIC: true
    USE_FLAGGEMS: true

GEMS_VENDOR: "metax"
# Quantitative perception training related
NVTE_APPLY_QK_LAYER_SCALING: 0
NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
NVTE_FLASH_ATTN: 0
NVTE_FUSED_ATTN: 0
# GPU parallel control
CUDA_VISIBLE_DEVICES: "0,1,2,3,4,5,6,7"
CUDA_DEVICE_MAX_CONNECTIONS: 1
NCCL_ALGO: "Ring"
NCCL_PROTOCOL: LLC
# Basic randomness control
SEED: 1234
PYTHONHASHSEED: 0
MKL_NUM_THREADS: 1
OMP_NUM_THREADS: 1
NUMEXPR_NUM_THREADS: 1
SCIPY_RDRANDOM: 0
TF_DETERMINISTIC_OPS: 1
TORCH_CUDNN_DETERMINISM: true
CUDA_LAUNCH_BLOCKING: 1
NCCL_DEBUG: INFO
MAGIC_CACHE: disabled

action: run

hydra:
run:
dir: ${experiment.exp_dir}/hydra
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
llm:
model: /home/gitlab-runner/data/DeepSeek-R1-Distill-Qwen-7B
tokenizer: /home/gitlab-runner/data/DeepSeek-R1-Distill-Qwen-7B
trust_remote_code: true
tensor_parallel_size: 2
pipeline_parallel_size: 1
gpu_memory_utilization: 0.9
seed: 1234
enforce_eager: true

generate:
prompts: [
"The president of the United States",
"The capital of France",
]
sampling:
top_p: 0.01
top_k: 1
temperature: 0.0
seed: 1234
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
**************************************************
output.prompt='The president of the United States'
output.outputs[0].text=', George W. Bush, is known for his leadership in the 19'
output.outputs[0].token_ids=[11, 9857, 467, 13, 14079, 11, 374, 3881, 369, 806, 11438, 304, 279, 220, 16, 24]
**************************************************
output.prompt='The capital of France'
output.outputs[0].text=', Paris, is known for its iconic landmarks such as the Eiffel Tower'
output.outputs[0].token_ids=[11, 12095, 11, 374, 3881, 369, 1181, 26277, 59924, 1741, 438, 279, 468, 3092, 301, 21938]
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
defaults:
- _self_
- inference: 7b-tp2

experiment:
exp_name: deepseek_r1_distill_qwen
exp_dir: tests/functional_tests/test_cases/inference/deepseek_r1_distill_qwen/results_test/7b-tp2
task:
type: inference
backend: vllm
entrypoint: flagscale/inference/inference_aquila.py
runner:
hostfile: null
cmds:
before_start:
source /opt/conda/bin/activate flagscale-inference
envs:
HYDRA_FULL_ERROR: 1
CUBLAS_WORKSPACE_CONFIG: ":4096:8"
CUDNN_BENCHMARK: "false"
CUDNN_DETERMINISTIC: "true"
# Quantitative perception training related
NVTE_APPLY_QK_LAYER_SCALING: 0
NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
NVTE_FLASH_ATTN: 0
NVTE_FUSED_ATTN: 0
# GPU parallel control
CUDA_VISIBLE_DEVICES: "0,1,2,3,4,5,6,7"
CUDA_DEVICE_MAX_CONNECTIONS: 1
NCCL_ALGO: "Ring"
NCCL_PROTOCOL: LLC
# Basic randomness control
SEED: 1234
PYTHONHASHSEED: 0
MKL_NUM_THREADS: 1
OMP_NUM_THREADS: 1
NUMEXPR_NUM_THREADS: 1
SCIPY_RDRANDOM: 0
TF_DETERMINISTIC_OPS: 1
TORCH_CUDNN_DETERMINISM: true
CUDA_LAUNCH_BLOCKING: 1
NCCL_DEBUG: INFO
MAGIC_CACHE: disabled

action: run

hydra:
run:
dir: ${experiment.exp_dir}/hydra
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
llm:
model: /home/gitlab-runner/data/DeepSeek-R1-Distill-Qwen-7B
tokenizer: /home/gitlab-runner/data/DeepSeek-R1-Distill-Qwen-7B
trust_remote_code: true
tensor_parallel_size: 2
pipeline_parallel_size: 1
gpu_memory_utilization: 0.9
seed: 1234
enforce_eager: true

generate:
prompts: [
"The president of the United States",
"The capital of France",
]
sampling:
top_p: 0.1
top_k: 1
temperature: 0.0
seed: 1234
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
**************************************************
output.prompt='The president of the United States'
output.outputs[0].text=', George W. Bush, is known for his leadership in the 19'
output.outputs[0].token_ids=[11, 9857, 467, 13, 14079, 11, 374, 3881, 369, 806, 11438, 304, 279, 220, 16, 24]
**************************************************
output.prompt='The capital of France'
output.outputs[0].text=', Paris, is known for its iconic landmarks such as the Eiffel Tower'
output.outputs[0].token_ids=[11, 12095, 11, 374, 3881, 369, 1181, 26277, 59924, 1741, 438, 279, 468, 3092, 301, 21938]
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
defaults:
- _self_
- inference: 8b-tp2

experiment:
exp_name: opi_llama3_1_instruct-flaggems
exp_dir: tests/functional_tests/test_cases/inference/opi_llama3_1_instruct-flaggems/results_test/8b-tp2
task:
type: inference
backend: vllm
entrypoint: flagscale/inference/inference_aquila.py
runner:
hostfile: null
cmds:
before_start:
source /opt/conda/bin/activate flagscale-inference
envs:
HYDRA_FULL_ERROR: 1
CUDA_VISIBLE_DEVICES: "0,1"
CUDA_DEVICE_MAX_CONNECTIONS: 1
CUBLAS_WORKSPACE_CONFIG: ":4096:8"
NCCL_ALGO: "Ring"
NVTE_APPLY_QK_LAYER_SCALING: 0
NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
NVTE_FLASH_ATTN: 0
NVTE_FUSED_ATTN: 0
CUDNN_BENCHMARK: "false"
CUDNN_DETERMINISTIC: "true"
USE_FLAGGEMS: "true"
GEMS_VENDOR: "metax"

action: run

hydra:
run:
dir: ${experiment.exp_dir}/hydra
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
llm:
model: /home/gitlab-runner/data/OPI-Llama-3.1-8B-Instruct
tokenizer: /home/gitlab-runner/data/OPI-Llama-3.1-8B-Instruct
trust_remote_code: true
tensor_parallel_size: 2
pipeline_parallel_size: 1
max_model_len: 2048
gpu_memory_utilization: 0.9
seed: 1234
enforce_eager: true

generate:
prompts: [
"The president of the United States",
"The capital of France",
]
Comment on lines +13 to +16
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The current formatting for the prompts list is unconventional and harms readability. Using a standard YAML block sequence format will make the configuration easier to read and maintain. This comment also applies to other similar files in this pull request, such as tests/Metax_C550/tests/functional_tests/test_cases/inference/opi_llama3_1_instruct/conf/inference/8b-tp2.yaml.

  prompts:
    - "The president of the United States"
    - "The capital of France"

sampling:
top_k: 1
seed: 1234
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
**************************************************
output.prompt='The president of the United States'
output.outputs[0].text=' is responsible for the repression of the spread of nuclear arms and the maintenance of the'
output.outputs[0].token_ids=[374, 8647, 369, 279, 72498, 315, 279, 9041, 315, 11499, 11977, 323, 279, 13709, 315, 279]
**************************************************
output.prompt='The capital of France'
output.outputs[0].text=', which is also a major component of the globalized culture, is a hub'
output.outputs[0].token_ids=[11, 902, 374, 1101, 264, 3682, 3777, 315, 279, 3728, 1534, 7829, 11, 374, 264, 19240]
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
defaults:
- _self_
- inference: 8b-tp2

experiment:
exp_name: opi_llama3_1_instruct
exp_dir: tests/functional_tests/test_cases/inference/opi_llama3_1_instruct/results_test/8b-tp2
task:
type: inference
backend: vllm
entrypoint: flagscale/inference/inference_aquila.py
runner:
hostfile: null
cmds:
before_start:
source /opt/conda/bin/activate flagscale-inference
envs:
HYDRA_FULL_ERROR: 1
CUDA_VISIBLE_DEVICES: "0,1,2,3,4,5,6,7"
CUDA_DEVICE_MAX_CONNECTIONS: 1
CUBLAS_WORKSPACE_CONFIG: ":4096:8"
NCCL_ALGO: "Ring"
NVTE_APPLY_QK_LAYER_SCALING: 0
NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
NVTE_FLASH_ATTN: 0
NVTE_FUSED_ATTN: 0
CUDNN_BENCHMARK: "false"
CUDNN_DETERMINISTIC: "true"
GEMS_VENDOR: "metax"

action: run

hydra:
run:
dir: ${experiment.exp_dir}/hydra
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
llm:
model: /home/gitlab-runner/data/OPI-Llama-3.1-8B-Instruct
tokenizer: /home/gitlab-runner/data/OPI-Llama-3.1-8B-Instruct
trust_remote_code: true
tensor_parallel_size: 2
pipeline_parallel_size: 1
max_model_len: 2048
gpu_memory_utilization: 0.9
seed: 1234
enforce_eager: true

generate:
prompts: [
"The president of the United States",
"The capital of France",
]
sampling:
top_k: 1
seed: 1234
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
**************************************************
output.prompt='The president of the United States'
output.outputs[0].text=' is responsible for the repression of the spread of nuclear arms and the maintenance of the'
output.outputs[0].token_ids=[374, 8647, 369, 279, 72498, 315, 279, 9041, 315, 11499, 11977, 323, 279, 13709, 315, 279]
**************************************************
output.prompt='The capital of France'
output.outputs[0].text=', which is also a major component of the globalized culture, is a hub'
output.outputs[0].token_ids=[11, 902, 374, 1101, 264, 3682, 3777, 315, 279, 3728, 1534, 7829, 11, 374, 264, 19240]
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
defaults:
- _self_
- inference: 4b-tp2

experiment:
exp_name: qwen3-flaggems
exp_dir: tests/functional_tests/test_cases/inference/qwen3-flaggems/results_test/4b-tp2
task:
type: inference
backend: vllm
entrypoint: flagscale/inference/inference_aquila.py
runner:
hostfile: null
cmds:
before_start:
source /opt/conda/bin/activate flagscale-inference
envs:
HYDRA_FULL_ERROR: 1
CUBLAS_WORKSPACE_CONFIG: ":4096:8"
CUDNN_BENCHMARK: "false"
CUDNN_DETERMINISTIC: "true"
USE_FLAGGEMS: "true"
GEMS_VENDOR: "metax"
# Quantitative perception training related
NVTE_APPLY_QK_LAYER_SCALING: 0
NVTE_ALLOW_NONDETERMINISTIC_ALGO: 0
NVTE_FLASH_ATTN: 0
NVTE_FUSED_ATTN: 0
# GPU parallel control
CUDA_VISIBLE_DEVICES: "0,1,2,3,4,5,6,7"
CUDA_DEVICE_MAX_CONNECTIONS: 1
NCCL_ALGO: "Ring"
NCCL_PROTOCOL: LLC
# Basic randomness control
SEED: 1234
PYTHONHASHSEED: 0
MKL_NUM_THREADS: 1
OMP_NUM_THREADS: 1
NUMEXPR_NUM_THREADS: 1
SCIPY_RDRANDOM: 0
TF_DETERMINISTIC_OPS: 1
TORCH_CUDNN_DETERMINISM: true
CUDA_LAUNCH_BLOCKING: 1
MAGIC_CACHE: disabled

action: run

hydra:
run:
dir: ${experiment.exp_dir}/hydra
Loading
Loading