Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
759dca3
TEST: add api evaluate
littlegy Sep 18, 2025
a955b7d
TEST: rm qwen1.5_7b test
littlegy Sep 18, 2025
aa8a0bd
TEST: add evaluate result to github
littlegy Sep 18, 2025
71022de
CI: update workflow docker
littlegy Sep 18, 2025
88a6836
TEST: update code based on comments
littlegy Sep 19, 2025
1d20999
Merge branch 'main' into api_eva
littlegy Sep 19, 2025
c68f4d2
TEST: update docker
littlegy Sep 19, 2025
96b8c55
Merge branch 'InternLM:main' into api_eva
littlegy Sep 19, 2025
fd244e7
add H800 base model eval
zhulinJulia24 Sep 19, 2025
cbdbeeb
add h800 api eval
zhulinJulia24 Sep 19, 2025
cf1ddcc
update
zhulinJulia24 Sep 19, 2025
b50e1f5
update
zhulinJulia24 Sep 19, 2025
7951325
Update eval_base_config.py
zhulinJulia24 Sep 20, 2025
a4a903b
update
zhulinJulia24 Sep 20, 2025
cbed0dc
update
zhulinJulia24 Sep 22, 2025
a72ea20
update max_out_len
zhulinJulia24 Sep 22, 2025
a42ac7e
set oc data path
zhulinJulia24 Sep 22, 2025
94f8b85
update
zhulinJulia24 Sep 22, 2025
bb37a84
update
zhulinJulia24 Sep 22, 2025
440a833
update
zhulinJulia24 Sep 22, 2025
db5ac50
update
zhulinJulia24 Sep 22, 2025
7dc54bd
update
zhulinJulia24 Sep 22, 2025
8df42da
update
zhulinJulia24 Sep 22, 2025
506c0e3
update
zhulinJulia24 Sep 22, 2025
d16448a
update
zhulinJulia24 Sep 23, 2025
5a7a1bb
Merge branch 'InternLM:main' into add_h800_eval
zhulinJulia24 Sep 23, 2025
aa6a8b8
Update evaluate_h800.yml
zhulinJulia24 Sep 23, 2025
a245005
update
zhulinJulia24 Sep 24, 2025
5528e5e
Merge branch 'InternLM:main' into add_h800_eval
zhulinJulia24 Sep 24, 2025
d72ef37
Update eval_base_config.py
zhulinJulia24 Sep 24, 2025
7cca1ff
update
zhulinJulia24 Sep 25, 2025
8b64b50
merge main
zhulinJulia24 Sep 25, 2025
54c9c14
update
zhulinJulia24 Sep 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 81 additions & 53 deletions .github/scripts/eval_base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,26 +39,6 @@
wikibench_datasets # noqa: F401, E501
from opencompass.configs.datasets.winogrande.winogrande_5shot_ll_252f01 import \
winogrande_datasets # noqa: F401, E501
from opencompass.configs.models.baichuan.hf_baichuan_7b import models as hf_baichuan_7b # noqa: F401, E501
from opencompass.configs.models.gemma.hf_gemma_7b import models as hf_gemma_7b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b import models as hf_internlm2_5_7b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm2_7b import models as hf_internlm2_7b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm2_20b import models as hf_internlm2_20b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm_7b import models as hf_internlm_7b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.hf_internlm_20b import models as hf_internlm_20b # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b import \
models as lmdeploy_internlm2_5_7b # noqa: F401, E501
from opencompass.configs.models.hf_llama.hf_llama2_7b import models as hf_llama2_7b # noqa: F401, E501
from opencompass.configs.models.hf_llama.hf_llama3_8b import models as hf_llama3_8b # noqa: F401, E501
from opencompass.configs.models.mistral.hf_mistral_7b_v0_1 import models as hf_mistral_7b_v0_1 # noqa: F401, E501
from opencompass.configs.models.mistral.hf_mixtral_8x7b_v0_1 import \
models as hf_mixtral_8x7b_v0_1 # noqa: F401, E501
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b import models as lmdeploy_qwen2_5_7b # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen1_5_7b import models as hf_qwen1_5_7b # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen2_7b import models as hf_qwen2_7b # noqa: F401, E501
from opencompass.configs.models.qwen.hf_qwen_7b import models as hf_qwen_7b # noqa: F401, E501
from opencompass.configs.models.qwen.lmdeploy_qwen1_5_7b import models as lmdeploy_qwen1_5_7b # noqa: F401, E501
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import models as lmdeploy_qwen2_7b # noqa: F401, E501
# Summary Groups
from opencompass.configs.summarizers.groups.cmmlu import cmmlu_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.GaokaoBench import GaokaoBench_summary_groups # noqa: F401, E501
Expand All @@ -69,6 +49,14 @@

# read models
race_datasets = [race_datasets[1]]
mmlu_datasets = [
x for x in mmlu_datasets if x['abbr'].replace('lukaemon_mmlu_', '') in [
'business_ethics', 'clinical_knowledge', 'college_medicine', 'global_facts', 'human_aging', 'management',
'marketing', 'medical_genetics', 'miscellaneous', 'nutrition', 'professional_accounting',
'professional_medicine', 'virology'
]
]

summarizer = dict(
dataset_abbrs=[
['race-high', 'accuracy'],
Expand Down Expand Up @@ -138,49 +126,89 @@
summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []),
)

turbomind_qwen1_5_7b = deepcopy(*lmdeploy_qwen1_5_7b)
turbomind_qwen2_7b = deepcopy(*lmdeploy_qwen2_7b)
turbomind_qwen2_5_7b = deepcopy(*lmdeploy_qwen2_5_7b)
turbomind_qwen2_5_14b = deepcopy(*lmdeploy_qwen2_5_7b)
turbomind_qwen2_5_14b['path'] = 'Qwen/Qwen2.5-14B'
turbomind_internlm2_5_7b = deepcopy(*lmdeploy_internlm2_5_7b)
turbomind_internlm2_5_7b_4bits = deepcopy(*lmdeploy_internlm2_5_7b)
turbomind_internlm2_5_7b_batch1 = deepcopy(*lmdeploy_internlm2_5_7b)
turbomind_internlm2_5_7b_batch1_4bits = deepcopy(*lmdeploy_internlm2_5_7b)

base_model = dict(
type=TurboMindModel,
engine_config=dict(session_len=7168, max_batch_size=128, tp=1),
engine_config=dict(session_len=7168, tp=1),
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
max_seq_len=7168,
max_out_len=1024,
batch_size=128,
batch_size=32,
run_cfg=dict(num_gpus=1),
)

turbomind_qwen2_5_1_5b = deepcopy(base_model)
turbomind_qwen2_5_1_5b['path'] = 'Qwen/Qwen2.5-1.5B'
turbomind_qwen2_5_1_5b['abbr'] = 'turbomind_qwen2_5_1_5b'
turbomind_qwen2_5_7b = deepcopy(base_model)
turbomind_qwen2_5_7b['path'] = 'Qwen/Qwen2.5-7B'
turbomind_qwen2_5_7b['abbr'] = 'turbomind_qwen2_5_7b'
turbomind_qwen2_5_32b = deepcopy(base_model)
turbomind_qwen2_5_32b['path'] = 'Qwen/Qwen2.5-32B'
turbomind_qwen2_5_32b['abbr'] = 'turbomind_qwen2_5_32b'
turbomind_qwen2_5_32b['run_cfg']['num_gpus'] = 2
turbomind_qwen2_5_32b['engine_config']['tp'] = 2
turbomind_internlm2_5_7b = deepcopy(base_model)
turbomind_internlm2_5_7b['path'] = 'internlm/internlm2_5-7b-chat'
turbomind_internlm2_5_7b['abbr'] = 'turbomind_internlm2_5_7b'
turbomind_glm_4_9b = deepcopy(base_model)
turbomind_glm_4_9b['path'] = 'THUDM/glm-4-9b'
turbomind_glm_4_9b['abbr'] = 'turbomind_glm_4_9b'
turbomind_llama_3_70b = deepcopy(base_model)
turbomind_llama_3_70b['path'] = 'meta-llama/Meta-Llama-3-70B'
turbomind_llama_3_70b['abbr'] = 'turbomind_llama_3_70b'
turbomind_llama_3_70b['run_cfg']['num_gpus'] = 4
turbomind_llama_3_70b['engine_config']['tp'] = 4
turbomind_llama_3_1_8b = deepcopy(base_model)
turbomind_llama_3_1_8b['path'] = 'meta-llama/Llama-3.1-8B'
turbomind_llama_3_1_8b['abbr'] = 'turbomind_llama_3_1_8b'
turbomind_qwen3_0_6b_base = deepcopy(base_model)
turbomind_qwen3_0_6b_base['path'] = 'Qwen/Qwen3-0.6B-Base'
turbomind_qwen3_0_6b_base['abbr'] = 'turbomind_qwen3_0_6b_base'
turbomind_qwen3_8b_base = deepcopy(base_model)
pytorch_qwen3_8b_base = deepcopy(base_model)
turbomind_qwen3_8b_base_4bits = deepcopy(base_model)
turbomind_qwen3_8b_base_kvint8 = deepcopy(base_model)
for model in [
v for k, v in locals().items()
if k.startswith('turbomind_qwen3_8b_base') or k.startswith('pytorch_qwen3_8b_base')
]:
model['abbr'] = 'qwen3_8b_base_turbomind'
model['path'] = 'Qwen/Qwen3-8B-Base'
model['run_cfg']['num_gpus'] = 1
model['engine_config']['tp'] = 1
turbomind_qwen3_8b_base['path'] = 'Qwen/Qwen3-8B-Base'
turbomind_qwen3_8b_base['abbr'] = 'turbomind_qwen3_8b_base'
turbomind_qwen3_30b_A3B_base = deepcopy(base_model)
turbomind_qwen3_30b_A3B_base['path'] = 'Qwen/Qwen3-30B-A3B-Base'
turbomind_qwen3_30b_A3B_base['abbr'] = 'turbomind_qwen3_30b_A3B_base'
turbomind_qwen3_30b_A3B_base['run_cfg']['num_gpus'] = 2
turbomind_qwen3_30b_A3B_base['engine_config']['tp'] = 2

for model in [v for k, v in locals().items() if k.endswith('_4bits')]:
model['engine_config']['model_format'] = 'awq'
model['abbr'] = model['abbr'] + '_4bits'
model['path'] = model['path'] + '-inner-4bits'

for model in [v for k, v in locals().items() if '_batch1' in k]:
model['abbr'] = model['abbr'] + '_batch1'
model['engine_config']['max_batch_size'] = 1
model['batch_size'] = 1
pytorch_qwen2_5_1_5b = deepcopy(base_model)
pytorch_qwen2_5_1_5b['path'] = 'Qwen/Qwen2.5-1.5B'
pytorch_qwen2_5_1_5b['abbr'] = 'pytorch_qwen2_5_1_5b'
pytorch_qwen2_5_7b = deepcopy(base_model)
pytorch_qwen2_5_7b['path'] = 'Qwen/Qwen2.5-7B'
pytorch_qwen2_5_7b['abbr'] = 'pytorch_qwen2_5_7b'
pytorch_qwen2_5_32b = deepcopy(base_model)
pytorch_qwen2_5_32b['path'] = 'Qwen/Qwen2.5-32B'
pytorch_qwen2_5_32b['abbr'] = 'pytorch_qwen2_5_32b'
pytorch_qwen2_5_32b['run_cfg']['num_gpus'] = 2
pytorch_qwen2_5_32b['engine_config']['tp'] = 2
pytorch_internlm2_5_7b = deepcopy(base_model)
pytorch_internlm2_5_7b['path'] = 'internlm/internlm2_5-7b-chat'
pytorch_internlm2_5_7b['abbr'] = 'pytorch_internlm2_5_7b'
pytorch_gemma_2_9b = deepcopy(base_model)
pytorch_gemma_2_9b['path'] = 'google/gemma-2-9b'
pytorch_gemma_2_9b['abbr'] = 'pytorch_gemma_2_9b'
pytorch_llama_3_70b = deepcopy(base_model)
pytorch_llama_3_70b['path'] = 'meta-llama/Meta-Llama-3-70B'
pytorch_llama_3_70b['abbr'] = 'pytorch_llama_3_70b'
pytorch_llama_3_70b['run_cfg']['num_gpus'] = 4
pytorch_llama_3_70b['engine_config']['tp'] = 4
pytorch_llama_3_1_8b = deepcopy(base_model)
pytorch_llama_3_1_8b['path'] = 'meta-llama/Llama-3.1-8B'
pytorch_llama_3_1_8b['abbr'] = 'pytorch_llama_3_1_8b'
pytorch_qwen3_0_6b_base = deepcopy(base_model)
pytorch_qwen3_0_6b_base['path'] = 'Qwen/Qwen3-0.6B-Base'
pytorch_qwen3_0_6b_base['abbr'] = 'pytorch_qwen3_0_6b_base'
pytorch_qwen3_8b_base = deepcopy(base_model)
pytorch_qwen3_8b_base['path'] = 'Qwen/Qwen3-8B-Base'
pytorch_qwen3_8b_base['abbr'] = 'pytorch_qwen3_8b_base'
pytorch_qwen3_30b_A3B_base = deepcopy(base_model)
pytorch_qwen3_30b_A3B_base['path'] = 'Qwen/Qwen3-30B-A3B-Base'
pytorch_qwen3_30b_A3B_base['abbr'] = 'pytorch_qwen3_30b_A3B_base'
pytorch_qwen3_30b_A3B_base['run_cfg']['num_gpus'] = 2
pytorch_qwen3_30b_A3B_base['engine_config']['tp'] = 2

for model in [v for k, v in locals().items() if k.startswith('pytorch_')]:
model['abbr'] = model['abbr'].replace('turbomind', 'pytorch')
model['backend'] = 'pytorch'
140 changes: 140 additions & 0 deletions .github/workflows/api_eval.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
name: api_eval

on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM/lmdeploy'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
backend:
required: true
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch']"


env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
FAIL_CONFIG: '--lf'
TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }}
OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL
COMPASS_DATA_CACHE: /nvme/qa_test_models/compass_data_cache

jobs:
linux-build:
if: ${{ !cancelled() }}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda12.4
OUTPUT_FOLDER: cuda12.4_dist_${{ github.run_id }}
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}

test_evaluation:
needs: linux-build
if: ${{ !cancelled() }}
runs-on: [self-hosted, test-140]
timeout-minutes: 2400
strategy:
fail-fast: false
matrix:
backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
container:
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/resources:/root/resources
- /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/shared:/mnt/shared
- /mnt/bigdisk:/mnt/bigdisk
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
- /mnt/187:/mnt/187
steps:
- name: Create and change to _wk directory
run: |
echo "Working directory set to: $(pwd)"
- name: Clone repository
uses: actions/checkout@v2
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Download Artifacts
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Install lmdeploy - dependency
run: |
python3 -m pip install -r requirements_cuda.txt
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install opencompass
run: |
python3 -m pip install opencompass
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
mkdir -p ${{ env.REPORT_DIR }}/.pytest_cache
ln -s ${{ env.REPORT_DIR }}/.pytest_cache autotest
- name: Setup paths for evaluation
if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind')
run: |
overall_exit=0
ln -s /mnt/187/opencompass-data/data ./data
pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_1 and not pr_test and ${{matrix.backend}}" -n 8 --run_id ${{ github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_2 and not pr_test and ${{matrix.backend}}" -n 4 --run_id ${{ github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_4 and not pr_test and ${{matrix.backend}}" -n 2 --run_id ${{ github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_8 and not pr_test and ${{matrix.backend}}" -n 1 --run_id ${{ github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
exit $overall_exit
- name: Clear workspace
if: always()
run: |
export workdir=$(pwd)
rm -rf $workdir/*
Loading