InternLM · zhulinJulia24 · Sep 18, 2025 · Sep 18, 2025 · Sep 18, 2025 · Sep 18, 2025
diff --git a/.github/scripts/eval_base_config.py b/.github/scripts/eval_base_config.py
@@ -39,26 +39,6 @@
         wikibench_datasets  # noqa: F401, E501
     from opencompass.configs.datasets.winogrande.winogrande_5shot_ll_252f01 import \
         winogrande_datasets  # noqa: F401, E501
-    from opencompass.configs.models.baichuan.hf_baichuan_7b import models as hf_baichuan_7b  # noqa: F401, E501
-    from opencompass.configs.models.gemma.hf_gemma_7b import models as hf_gemma_7b  # noqa: F401, E501
-    from opencompass.configs.models.hf_internlm.hf_internlm2_5_7b import models as hf_internlm2_5_7b  # noqa: F401, E501
-    from opencompass.configs.models.hf_internlm.hf_internlm2_7b import models as hf_internlm2_7b  # noqa: F401, E501
-    from opencompass.configs.models.hf_internlm.hf_internlm2_20b import models as hf_internlm2_20b  # noqa: F401, E501
-    from opencompass.configs.models.hf_internlm.hf_internlm_7b import models as hf_internlm_7b  # noqa: F401, E501
-    from opencompass.configs.models.hf_internlm.hf_internlm_20b import models as hf_internlm_20b  # noqa: F401, E501
-    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b import \
-        models as lmdeploy_internlm2_5_7b  # noqa: F401, E501
-    from opencompass.configs.models.hf_llama.hf_llama2_7b import models as hf_llama2_7b  # noqa: F401, E501
-    from opencompass.configs.models.hf_llama.hf_llama3_8b import models as hf_llama3_8b  # noqa: F401, E501
-    from opencompass.configs.models.mistral.hf_mistral_7b_v0_1 import models as hf_mistral_7b_v0_1  # noqa: F401, E501
-    from opencompass.configs.models.mistral.hf_mixtral_8x7b_v0_1 import \
-        models as hf_mixtral_8x7b_v0_1  # noqa: F401, E501
-    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b import models as lmdeploy_qwen2_5_7b  # noqa: F401, E501
-    from opencompass.configs.models.qwen.hf_qwen1_5_7b import models as hf_qwen1_5_7b  # noqa: F401, E501
-    from opencompass.configs.models.qwen.hf_qwen2_7b import models as hf_qwen2_7b  # noqa: F401, E501
-    from opencompass.configs.models.qwen.hf_qwen_7b import models as hf_qwen_7b  # noqa: F401, E501
-    from opencompass.configs.models.qwen.lmdeploy_qwen1_5_7b import models as lmdeploy_qwen1_5_7b  # noqa: F401, E501
-    from opencompass.configs.models.qwen.lmdeploy_qwen2_7b import models as lmdeploy_qwen2_7b  # noqa: F401, E501
     # Summary Groups
     from opencompass.configs.summarizers.groups.cmmlu import cmmlu_summary_groups  # noqa: F401, E501
     from opencompass.configs.summarizers.groups.GaokaoBench import GaokaoBench_summary_groups  # noqa: F401, E501
@@ -69,6 +49,14 @@
 
     # read models
 race_datasets = [race_datasets[1]]
+mmlu_datasets = [
+    x for x in mmlu_datasets if x['abbr'].replace('lukaemon_mmlu_', '') in [
+        'business_ethics', 'clinical_knowledge', 'college_medicine', 'global_facts', 'human_aging', 'management',
+        'marketing', 'medical_genetics', 'miscellaneous', 'nutrition', 'professional_accounting',
+        'professional_medicine', 'virology'
+    ]
+]
+
 summarizer = dict(
     dataset_abbrs=[
         ['race-high', 'accuracy'],
@@ -138,49 +126,89 @@
     summary_groups=sum([v for k, v in locals().items() if k.endswith('_summary_groups')], []),
 )
 
-turbomind_qwen1_5_7b = deepcopy(*lmdeploy_qwen1_5_7b)
-turbomind_qwen2_7b = deepcopy(*lmdeploy_qwen2_7b)
-turbomind_qwen2_5_7b = deepcopy(*lmdeploy_qwen2_5_7b)
-turbomind_qwen2_5_14b = deepcopy(*lmdeploy_qwen2_5_7b)
-turbomind_qwen2_5_14b['path'] = 'Qwen/Qwen2.5-14B'
-turbomind_internlm2_5_7b = deepcopy(*lmdeploy_internlm2_5_7b)
-turbomind_internlm2_5_7b_4bits = deepcopy(*lmdeploy_internlm2_5_7b)
-turbomind_internlm2_5_7b_batch1 = deepcopy(*lmdeploy_internlm2_5_7b)
-turbomind_internlm2_5_7b_batch1_4bits = deepcopy(*lmdeploy_internlm2_5_7b)
-
 base_model = dict(
     type=TurboMindModel,
-    engine_config=dict(session_len=7168, max_batch_size=128, tp=1),
+    engine_config=dict(session_len=7168, tp=1),
     gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=1024),
     max_seq_len=7168,
     max_out_len=1024,
-    batch_size=128,
+    batch_size=32,
     run_cfg=dict(num_gpus=1),
 )
 
+turbomind_qwen2_5_1_5b = deepcopy(base_model)
+turbomind_qwen2_5_1_5b['path'] = 'Qwen/Qwen2.5-1.5B'
+turbomind_qwen2_5_1_5b['abbr'] = 'turbomind_qwen2_5_1_5b'
+turbomind_qwen2_5_7b = deepcopy(base_model)
+turbomind_qwen2_5_7b['path'] = 'Qwen/Qwen2.5-7B'
+turbomind_qwen2_5_7b['abbr'] = 'turbomind_qwen2_5_7b'
+turbomind_qwen2_5_32b = deepcopy(base_model)
+turbomind_qwen2_5_32b['path'] = 'Qwen/Qwen2.5-32B'
+turbomind_qwen2_5_32b['abbr'] = 'turbomind_qwen2_5_32b'
+turbomind_qwen2_5_32b['run_cfg']['num_gpus'] = 2
+turbomind_qwen2_5_32b['engine_config']['tp'] = 2
+turbomind_internlm2_5_7b = deepcopy(base_model)
+turbomind_internlm2_5_7b['path'] = 'internlm/internlm2_5-7b-chat'
+turbomind_internlm2_5_7b['abbr'] = 'turbomind_internlm2_5_7b'
+turbomind_glm_4_9b = deepcopy(base_model)
+turbomind_glm_4_9b['path'] = 'THUDM/glm-4-9b'
+turbomind_glm_4_9b['abbr'] = 'turbomind_glm_4_9b'
+turbomind_llama_3_70b = deepcopy(base_model)
+turbomind_llama_3_70b['path'] = 'meta-llama/Meta-Llama-3-70B'
+turbomind_llama_3_70b['abbr'] = 'turbomind_llama_3_70b'
+turbomind_llama_3_70b['run_cfg']['num_gpus'] = 4
+turbomind_llama_3_70b['engine_config']['tp'] = 4
+turbomind_llama_3_1_8b = deepcopy(base_model)
+turbomind_llama_3_1_8b['path'] = 'meta-llama/Llama-3.1-8B'
+turbomind_llama_3_1_8b['abbr'] = 'turbomind_llama_3_1_8b'
+turbomind_qwen3_0_6b_base = deepcopy(base_model)
+turbomind_qwen3_0_6b_base['path'] = 'Qwen/Qwen3-0.6B-Base'
+turbomind_qwen3_0_6b_base['abbr'] = 'turbomind_qwen3_0_6b_base'
 turbomind_qwen3_8b_base = deepcopy(base_model)
-pytorch_qwen3_8b_base = deepcopy(base_model)
-turbomind_qwen3_8b_base_4bits = deepcopy(base_model)
-turbomind_qwen3_8b_base_kvint8 = deepcopy(base_model)
-for model in [
-        v for k, v in locals().items()
-        if k.startswith('turbomind_qwen3_8b_base') or k.startswith('pytorch_qwen3_8b_base')
-]:
-    model['abbr'] = 'qwen3_8b_base_turbomind'
-    model['path'] = 'Qwen/Qwen3-8B-Base'
-    model['run_cfg']['num_gpus'] = 1
-    model['engine_config']['tp'] = 1
+turbomind_qwen3_8b_base['path'] = 'Qwen/Qwen3-8B-Base'
+turbomind_qwen3_8b_base['abbr'] = 'turbomind_qwen3_8b_base'
+turbomind_qwen3_30b_A3B_base = deepcopy(base_model)
+turbomind_qwen3_30b_A3B_base['path'] = 'Qwen/Qwen3-30B-A3B-Base'
+turbomind_qwen3_30b_A3B_base['abbr'] = 'turbomind_qwen3_30b_A3B_base'
+turbomind_qwen3_30b_A3B_base['run_cfg']['num_gpus'] = 2
+turbomind_qwen3_30b_A3B_base['engine_config']['tp'] = 2
 
-for model in [v for k, v in locals().items() if k.endswith('_4bits')]:
-    model['engine_config']['model_format'] = 'awq'
-    model['abbr'] = model['abbr'] + '_4bits'
-    model['path'] = model['path'] + '-inner-4bits'
-
-for model in [v for k, v in locals().items() if '_batch1' in k]:
-    model['abbr'] = model['abbr'] + '_batch1'
-    model['engine_config']['max_batch_size'] = 1
-    model['batch_size'] = 1
+pytorch_qwen2_5_1_5b = deepcopy(base_model)
+pytorch_qwen2_5_1_5b['path'] = 'Qwen/Qwen2.5-1.5B'
+pytorch_qwen2_5_1_5b['abbr'] = 'pytorch_qwen2_5_1_5b'
+pytorch_qwen2_5_7b = deepcopy(base_model)
+pytorch_qwen2_5_7b['path'] = 'Qwen/Qwen2.5-7B'
+pytorch_qwen2_5_7b['abbr'] = 'pytorch_qwen2_5_7b'
+pytorch_qwen2_5_32b = deepcopy(base_model)
+pytorch_qwen2_5_32b['path'] = 'Qwen/Qwen2.5-32B'
+pytorch_qwen2_5_32b['abbr'] = 'pytorch_qwen2_5_32b'
+pytorch_qwen2_5_32b['run_cfg']['num_gpus'] = 2
+pytorch_qwen2_5_32b['engine_config']['tp'] = 2
+pytorch_internlm2_5_7b = deepcopy(base_model)
+pytorch_internlm2_5_7b['path'] = 'internlm/internlm2_5-7b-chat'
+pytorch_internlm2_5_7b['abbr'] = 'pytorch_internlm2_5_7b'
+pytorch_gemma_2_9b = deepcopy(base_model)
+pytorch_gemma_2_9b['path'] = 'google/gemma-2-9b'
+pytorch_gemma_2_9b['abbr'] = 'pytorch_gemma_2_9b'
+pytorch_llama_3_70b = deepcopy(base_model)
+pytorch_llama_3_70b['path'] = 'meta-llama/Meta-Llama-3-70B'
+pytorch_llama_3_70b['abbr'] = 'pytorch_llama_3_70b'
+pytorch_llama_3_70b['run_cfg']['num_gpus'] = 4
+pytorch_llama_3_70b['engine_config']['tp'] = 4
+pytorch_llama_3_1_8b = deepcopy(base_model)
+pytorch_llama_3_1_8b['path'] = 'meta-llama/Llama-3.1-8B'
+pytorch_llama_3_1_8b['abbr'] = 'pytorch_llama_3_1_8b'
+pytorch_qwen3_0_6b_base = deepcopy(base_model)
+pytorch_qwen3_0_6b_base['path'] = 'Qwen/Qwen3-0.6B-Base'
+pytorch_qwen3_0_6b_base['abbr'] = 'pytorch_qwen3_0_6b_base'
+pytorch_qwen3_8b_base = deepcopy(base_model)
+pytorch_qwen3_8b_base['path'] = 'Qwen/Qwen3-8B-Base'
+pytorch_qwen3_8b_base['abbr'] = 'pytorch_qwen3_8b_base'
+pytorch_qwen3_30b_A3B_base = deepcopy(base_model)
+pytorch_qwen3_30b_A3B_base['path'] = 'Qwen/Qwen3-30B-A3B-Base'
+pytorch_qwen3_30b_A3B_base['abbr'] = 'pytorch_qwen3_30b_A3B_base'
+pytorch_qwen3_30b_A3B_base['run_cfg']['num_gpus'] = 2
+pytorch_qwen3_30b_A3B_base['engine_config']['tp'] = 2
 
 for model in [v for k, v in locals().items() if k.startswith('pytorch_')]:
-    model['abbr'] = model['abbr'].replace('turbomind', 'pytorch')
     model['backend'] = 'pytorch'
diff --git a/.github/workflows/api_eval.yml b/.github/workflows/api_eval.yml
@@ -0,0 +1,140 @@
+name: api_eval
+
+on:
+  workflow_dispatch:
+    inputs:
+      repo_org:
+        required: false
+        description: 'Tested repository organization name. Default is InternLM/lmdeploy'
+        type: string
+        default: 'InternLM/lmdeploy'
+      repo_ref:
+        required: false
+        description: 'Set branch or tag or commit id. Default is "main"'
+        type: string
+        default: 'main'
+      backend:
+        required: true
+        description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
+        type: string
+        default: "['turbomind', 'pytorch']"
+
+
+env:
+  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
+  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
+  OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
+  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
+  REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }}
+  COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
+  FAIL_CONFIG: '--lf'
+  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }}
+  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
+  OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
+  DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL
+  COMPASS_DATA_CACHE: /nvme/qa_test_models/compass_data_cache
+
+jobs:
+  linux-build:
+    if: ${{ !cancelled() }}
+    strategy:
+      matrix:
+        pyver: [py310]
+    runs-on: ubuntu-latest
+    env:
+      PYTHON_VERSION: ${{ matrix.pyver }}
+      PLAT_NAME: manylinux2014_x86_64
+      DOCKER_TAG: cuda12.4
+      OUTPUT_FOLDER: cuda12.4_dist_${{ github.run_id }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
+          ref: ${{github.event.inputs.repo_ref || 'main'}}
+      - name: Build
+        run: |
+          echo ${PYTHON_VERSION}
+          echo ${PLAT_NAME}
+          echo ${DOCKER_TAG}
+          echo ${OUTPUT_FOLDER}
+          echo ${GITHUB_RUN_ID}
+          # remove -it
+          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
+          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
+      - name: Upload Artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          if-no-files-found: error
+          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
+          retention-days: 1
+          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
+
+  test_evaluation:
+    needs: linux-build
+    if: ${{ !cancelled() }}
+    runs-on: [self-hosted, test-140]
+    timeout-minutes: 2400
+    strategy:
+      fail-fast: false
+      matrix:
+        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
+    container:
+      image: openmmlab/lmdeploy:latest-cu12
+      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
+      volumes:
+        - /nvme/github-actions/pip-cache:/root/.cache/pip
+        - /nvme/github-actions/packages:/root/packages
+        - /nvme/github-actions/resources:/root/resources
+        - /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports
+        - /nvme/qa_test_models:/nvme/qa_test_models
+        - /mnt/shared:/mnt/shared
+        - /mnt/bigdisk:/mnt/bigdisk
+        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
+        - /mnt/187:/mnt/187
+    steps:
+      - name: Create and change to _wk directory
+        run: |
+          echo "Working directory set to: $(pwd)"
+      - name: Clone repository
+        uses: actions/checkout@v2
+        with:
+          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
+          ref: ${{github.event.inputs.repo_ref || 'main'}}
+      - name: Download Artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: my-artifact-${{ github.run_id }}-py310
+      - name: Install lmdeploy - dependency
+        run: |
+          python3 -m pip install -r requirements_cuda.txt
+          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
+      - name: Install lmdeploy
+        run: |
+          python3 -m pip install lmdeploy-*.whl --no-deps
+          python3 -m pip install -r requirements/test.txt
+      - name: Install opencompass
+        run: |
+          python3 -m pip install opencompass
+      - name: Check env
+        run: |
+          python3 -m pip list
+          lmdeploy check_env
+          rm -rf allure-results
+          mkdir -p ${{ env.REPORT_DIR }}/.pytest_cache
+          ln -s ${{ env.REPORT_DIR }}/.pytest_cache autotest
+      - name: Setup paths for evaluation
+        if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind')
+        run: |
+          overall_exit=0
+          ln -s /mnt/187/opencompass-data/data ./data
+          pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_1 and not pr_test and ${{matrix.backend}}" -n 8 --run_id ${{ github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
+          pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_2 and not pr_test and ${{matrix.backend}}" -n 4 --run_id ${{ github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
+          pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_4 and not pr_test and ${{matrix.backend}}" -n 2 --run_id ${{ github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
+          pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_8 and not pr_test and ${{matrix.backend}}" -n 1 --run_id ${{ github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
+          exit $overall_exit
+      - name: Clear workspace
+        if: always()
+        run: |
+          export workdir=$(pwd)
+          rm -rf $workdir/*