diff --git a/.github/packaging/vllm_reqs.txt b/.github/packaging/vllm_reqs.txt new file mode 100644 index 00000000..929e5a3f --- /dev/null +++ b/.github/packaging/vllm_reqs.txt @@ -0,0 +1,147 @@ +# These requirements were generated by running steps 1-3 of scripts/build_wheels.shell +# then running pip freeze and manually removing the vllm dependency. +# The intention of this file is to use these known requirements for a fixed +# vLLM build to supplement a vLLM install from download.pytorch.org without +# resorting to --extra-index-url https://download.pytorch.org/whl/nightly to find +# vLLM dependencies (as this results in a ResolutionTooDeep error from pip). +# See the file .github/workflows/gpu_test.yaml for an E2E forge installation using this approach. +# TODO: this should be done way less hackily +aiohappyeyeballs==2.6.1 +aiohttp==3.13.0 +aiosignal==1.4.0 +annotated-types==0.7.0 +anyio==4.11.0 +astor==0.8.1 +async-timeout==5.0.1 +attrs==25.4.0 +blake3==1.0.7 +cachetools==6.2.0 +cbor2==5.7.0 +certifi==2025.10.5 +cffi==2.0.0 +charset-normalizer==3.4.3 +click==8.3.0 +cloudpickle==3.1.1 +cmake==4.1.0 +compressed-tensors==0.10.2 +cupy-cuda12x==13.6.0 +depyf==0.19.0 +dill==0.4.0 +diskcache==5.6.3 +distro==1.9.0 +dnspython==2.8.0 +einops==0.8.1 +email-validator==2.3.0 +exceptiongroup==1.3.0 +fastapi==0.118.3 +fastapi-cli==0.0.13 +fastapi-cloud-cli==0.3.1 +fastrlock==0.8.3 +filelock==3.19.1 +frozenlist==1.8.0 +fsspec==2025.9.0 +gguf==0.17.1 +h11==0.16.0 +hf-xet==1.1.10 +httpcore==1.0.9 +httptools==0.7.1 +httpx==0.28.1 +huggingface-hub==0.35.3 +idna==3.10 +interegular==0.3.3 +Jinja2==3.1.6 +jiter==0.11.0 +jsonschema==4.25.1 +jsonschema-specifications==2025.9.1 +lark==1.2.2 +llguidance==0.7.30 +llvmlite==0.44.0 +lm-format-enforcer==0.10.12 +markdown-it-py==4.0.0 +MarkupSafe==3.0.2 +mdurl==0.1.2 +mistral_common==1.8.5 +mpmath==1.3.0 +msgpack==1.1.2 +msgspec==0.19.0 +multidict==6.7.0 +networkx==3.4.2 +ninja==1.13.0 +numba==0.61.2 +numpy==2.2.6 +nvidia-cublas-cu12==12.9.1.4 +nvidia-cuda-cupti-cu12==12.9.79 +nvidia-cuda-nvrtc-cu12==12.9.86 +nvidia-cuda-runtime-cu12==12.9.79 +nvidia-cudnn-cu12==9.10.2.21 +nvidia-cufft-cu12==11.4.1.4 +nvidia-cufile-cu12==1.14.1.1 +nvidia-curand-cu12==10.3.10.19 +nvidia-cusolver-cu12==11.7.5.82 +nvidia-cusparse-cu12==12.5.10.65 +nvidia-cusparselt-cu12==0.7.1 +nvidia-nccl-cu12==2.27.5 +nvidia-nvjitlink-cu12==12.9.86 +nvidia-nvshmem-cu12==3.3.20 +nvidia-nvtx-cu12==12.9.79 +openai==1.90.0 +opencv-python-headless==4.12.0.88 +outlines_core==0.2.10 +packaging==25.0 +partial-json-parser==0.2.1.1.post6 +pillow==11.3.0 +prometheus-fastapi-instrumentator==7.1.0 +prometheus_client==0.23.1 +propcache==0.4.1 +protobuf==6.32.1 +psutil==7.1.0 +py-cpuinfo==9.0.0 +pybase64==1.4.2 +pycountry==24.6.1 +pycparser==2.23 +pydantic==2.12.0 +pydantic-extra-types==2.10.6 +pydantic_core==2.41.1 +Pygments==2.19.2 +python-dotenv==1.1.1 +python-json-logger==4.0.0 +python-multipart==0.0.20 +pytorch-triton==3.4.0+gitf7888497 +PyYAML==6.0.3 +pyzmq==27.1.0 +ray==2.49.2 +referencing==0.36.2 +regex==2025.9.18 +requests==2.32.5 +rich==14.2.0 +rich-toolkit==0.15.1 +rignore==0.7.0 +rpds-py==0.27.1 +safetensors==0.6.2 +scipy==1.15.3 +sentencepiece==0.2.1 +sentry-sdk==2.41.0 +setuptools-scm==9.2.0 +shellingham==1.5.4 +sniffio==1.3.1 +soundfile==0.13.1 +soxr==1.0.0 +starlette==0.48.0 +sympy==1.14.0 +tiktoken==0.12.0 +tokenizers==0.22.1 +tomli==2.3.0 +torch==2.9.0.dev20250905+cu129 +tqdm==4.67.1 +transformers==4.57.0 +triton==3.4.0 +typer==0.19.2 +typing-inspection==0.4.2 +typing_extensions==4.15.0 +urllib3==2.5.0 +uvicorn==0.37.0 +uvloop==0.21.0 +watchfiles==1.1.0 +websockets==15.0.1 +xgrammar==0.1.21 +yarl==1.22.0 diff --git a/.github/workflows/gpu_test.yaml b/.github/workflows/gpu_test.yaml new file mode 100644 index 00000000..fb6cf507 --- /dev/null +++ b/.github/workflows/gpu_test.yaml @@ -0,0 +1,67 @@ +name: GPU tests + +on: + schedule: + # Runs at midnight every day + - cron: '0 0 * * *' + push: + branches: [ main ] + pull_request: + workflow_dispatch: + +concurrency: + group: gpu-test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }} + cancel-in-progress: true + +permissions: + id-token: write + contents: read + +defaults: + run: + shell: bash -l -eo pipefail {0} + +jobs: + gpu_test: + if: github.repository_owner == 'meta-pytorch' + runs-on: linux.g5.12xlarge.nvidia.gpu + strategy: + matrix: + python-version: ['3.10'] + steps: + - name: Check out repo + uses: actions/checkout@v4 + - name: Setup conda env + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + miniconda-version: "latest" + activate-environment: test + python-version: ${{ matrix.python-version }} + - name: Update pip + run: python -m pip install --upgrade pip + - name: Install pinned torch nightly + run: python -m pip install --pre torch==2.9.0.dev20250905 --no-cache-dir --index-url https://download.pytorch.org/whl/nightly/cu129 + - name: Download and install vLLM and its dependencies + # TODO: this honestly could not be hackier if I tried + run: | + python -m pip install -r .github/packaging/vllm_reqs.txt + python -m pip install vllm==0.10.1.dev0+g6d8d0a24c.d20251009.cu129 --no-cache-dir --index-url https://download.pytorch.org/whl/preview/forge + - name: Download and install monarch and its dependencies + run: | + python -m pip install -r https://raw.githubusercontent.com/meta-pytorch/monarch/main/requirements.txt + python -m pip install torchmonarch --extra-index-url https://download.pytorch.org/whl/preview/forge + - name: Install torchtitan and torchstore + run: | + python -m pip install git+https://github.com/pytorch/torchtitan.git + python -m pip install git+https://github.com/meta-pytorch/torchstore.git + - name: Install dependencies + run: python -m pip install --no-build-isolation -e ".[dev]" + - name: Run unit tests with coverage + # TODO add all tests + run: | + export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0 + export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0 + pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv + - name: Upload Coverage to Codecov + uses: codecov/codecov-action@v3