Skip to content

Toolchains for aarch64 platforms #3474

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,7 @@ build:cxx11_abi --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=1"
build:cxx11_abi --linkopt="-D_GLIBCXX_USE_CXX11_ABI=1"
build:cxx11_abi --define=abi=cxx11_abi

build:pre_cxx11_abi --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0"
build:pre_cxx11_abi --linkopt="-D_GLIBCXX_USE_CXX11_ABI=0"
build:pre_cxx11_abi --define=abi=pre_cxx11_abi
build:jetpack --//toolchains/dep_collection:compute_libs=jetpack

build:ci_testing --define=torchtrt_src=prebuilt --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0"
build:use_precompiled_torchtrt --define=torchtrt_src=prebuilt
Expand Down
2 changes: 1 addition & 1 deletion .bazelversion
Original file line number Diff line number Diff line change
@@ -1 +1 @@
7.2.1
8.1.1
11 changes: 9 additions & 2 deletions .github/scripts/filter-matrix.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
#!/usr/bin/env python3
from typing import List

import argparse
import json
import sys

disabled_python_versions = "3.13"
cpu_tag = "cpu"


def main(args: list[str]) -> None:
def main(args: List[str]) -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--matrix",
Expand All @@ -24,7 +26,12 @@ def main(args: list[str]) -> None:
includes = matrix_dict["include"]
filtered_includes = []
for item in includes:
if item["python_version"] not in disabled_python_versions:
if all(
[
item["python_version"] not in disabled_python_versions,
item["desired_cuda"] != cpu_tag
]
):
filtered_includes.append(item)
filtered_matrix_dict = {}
filtered_matrix_dict["include"] = filtered_includes
Expand Down
82 changes: 46 additions & 36 deletions .github/scripts/generate_binary_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

"""Generates a matrix to be utilized through github actions

Important. After making changes to this file please run following command:
python -m tools.tests.test_generate_binary_build_matrix --update-reference-files

Will output a condensed version of the matrix if on a pull request that only
includes the latest version of python we support built on four different
architectures:
Expand All @@ -11,13 +14,13 @@
* Latest XPU
"""


import argparse
import json
import os
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple


PYTHON_ARCHES_DICT = {
"nightly": ["3.9", "3.10", "3.11", "3.12"],
"test": ["3.9", "3.10", "3.11", "3.12"],
Expand All @@ -26,16 +29,30 @@
CUDA_ARCHES_DICT = {
"nightly": ["11.8", "12.6", "12.8"],
"test": ["11.8", "12.6", "12.8"],
"release": ["11.8", "12.6", "12.8"],
"release": ["11.8", "12.4", "12.6"],
}
ROCM_ARCHES_DICT = {
"nightly": ["6.1", "6.2"],
"test": ["6.1", "6.2"],
"release": ["6.1", "6.2"],
"nightly": ["6.2.4", "6.3"],
"test": ["6.2.4", "6.3"],
"release": ["6.1", "6.2.4"],
}

CUDA_CUDNN_VERSIONS = {
"11.8": {"cuda": "11.8.0", "cudnn": "9"},
"12.4": {"cuda": "12.4.1", "cudnn": "9"},
"12.6": {"cuda": "12.6.3", "cudnn": "9"},
"12.8": {"cuda": "12.8.0", "cudnn": "9"},
}

STABLE_CUDA_VERSIONS = {
"nightly": "12.6",
"test": "12.6",
"release": "12.4",
}

CUDA_AARCH64_ARCHES = ["12.8-aarch64", "12.6-aarch64"]

PACKAGE_TYPES = ["wheel", "conda", "libtorch"]
PRE_CXX11_ABI = "pre-cxx11"
CXX11_ABI = "cxx11-abi"
RELEASE = "release"
DEBUG = "debug"
Expand All @@ -59,7 +76,7 @@

CURRENT_NIGHTLY_VERSION = "2.8.0"
CURRENT_CANDIDATE_VERSION = "2.7.0"
CURRENT_STABLE_VERSION = "2.7.0"
CURRENT_STABLE_VERSION = "2.6.0"
CURRENT_VERSION = CURRENT_STABLE_VERSION

# By default use Nightly for CUDA arches
Expand Down Expand Up @@ -94,7 +111,7 @@ def arch_type(arch_version: str) -> str:
return ROCM
elif arch_version == CPU_AARCH64:
return CPU_AARCH64
elif arch_version == CUDA_AARCH64:
elif arch_version in CUDA_AARCH64_ARCHES:
return CUDA_AARCH64
elif arch_version == XPU:
return XPU
Expand Down Expand Up @@ -140,11 +157,14 @@ def initialize_globals(channel: str, build_python_only: bool) -> None:
else:
PYTHON_ARCHES = PYTHON_ARCHES_DICT[channel]
WHEEL_CONTAINER_IMAGES = {
"11.8": "pytorch/manylinux2_28-builder:cuda11.8",
"12.1": "pytorch/manylinux2_28-builder:cuda12.1",
"12.4": "pytorch/manylinux2_28-builder:cuda12.4",
"12.6": "pytorch/manylinux2_28-builder:cuda12.6",
"12.8": "pytorch/manylinux2_28-builder:cuda12.8",
**{
gpu_arch: f"pytorch/manylinux2_28-builder:cuda{gpu_arch}"
for gpu_arch in CUDA_ARCHES
},
**{
gpu_arch: f"pytorch/manylinuxaarch64-builder:cuda{gpu_arch.replace('-aarch64', '')}"
for gpu_arch in CUDA_AARCH64_ARCHES
},
**{
gpu_arch: f"pytorch/manylinux2_28-builder:rocm{gpu_arch}"
for gpu_arch in ROCM_ARCHES
Expand All @@ -153,26 +173,17 @@ def initialize_globals(channel: str, build_python_only: bool) -> None:
XPU: "pytorch/manylinux2_28-builder:xpu",
# TODO: Migrate CUDA_AARCH64 image to manylinux2_28_aarch64-builder:cuda12.4
CPU_AARCH64: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64",
CUDA_AARCH64: "pytorch/manylinuxaarch64-builder:cuda12.4",
CUDA_AARCH64: "pytorch/manylinuxaarch64-builder:cuda12.6",
}
LIBTORCH_CONTAINER_IMAGES = {
**{
(gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux2_28-builder:cuda{gpu_arch}"
for gpu_arch in CUDA_ARCHES
},
**{
(gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}"
for gpu_arch in CUDA_ARCHES
},
**{
(gpu_arch, PRE_CXX11_ABI): f"pytorch/manylinux2_28-builder:rocm{gpu_arch}"
for gpu_arch in ROCM_ARCHES
},
**{
(gpu_arch, CXX11_ABI): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}"
for gpu_arch in ROCM_ARCHES
},
(CPU, PRE_CXX11_ABI): "pytorch/manylinux2_28-builder:cpu",
(CPU, CXX11_ABI): "pytorch/libtorch-cxx11-builder:cpu",
}

Expand All @@ -181,7 +192,7 @@ def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
return {
CPU: "cpu",
CPU_AARCH64: CPU,
CUDA_AARCH64: "cu124",
CUDA_AARCH64: f"cu{gpu_arch_version.replace('-aarch64', '').replace('.', '')}",
CUDA: f"cu{gpu_arch_version.replace('.', '')}",
ROCM: f"rocm{gpu_arch_version}",
XPU: "xpu",
Expand Down Expand Up @@ -272,7 +283,7 @@ def get_wheel_install_command(
return f"{WHL_INSTALL_BASE} {PACKAGES_TO_INSTALL_WHL} --index-url {get_base_download_url_for_repo('whl', channel, gpu_arch_type, desired_cuda)}_pypi_pkg" # noqa: E501
else:
raise ValueError(
"Split build is not supported for this configuration. It is only supported for CUDA 11.8, 12.4, 12.6, 12.8 on Linux nightly builds." # noqa: E501
"Split build is not supported for this configuration. It is only supported for CUDA 11.8, 12.4, 12.6 on Linux nightly builds." # noqa: E501
)
if (
channel == RELEASE
Expand Down Expand Up @@ -343,7 +354,7 @@ def generate_libtorch_matrix(
if os == WINDOWS:
abi_versions = [RELEASE, DEBUG]
elif os == LINUX:
abi_versions = [PRE_CXX11_ABI, CXX11_ABI]
abi_versions = [CXX11_ABI]
elif os in [MACOS_ARM64]:
abi_versions = [CXX11_ABI]
else:
Expand Down Expand Up @@ -422,11 +433,6 @@ def generate_wheels_matrix(
# Define default python version
python_versions = list(PYTHON_ARCHES)

# If the list of python versions is set explicitly by the caller, stick with it instead
# of trying to add more versions behind the scene
if channel == NIGHTLY and (os in (LINUX, MACOS_ARM64, LINUX_AARCH64)):
python_versions += ["3.13"]

if os == LINUX:
# NOTE: We only build manywheel packages for linux
package_type = "manywheel"
Expand All @@ -442,7 +448,11 @@ def generate_wheels_matrix(
if os == LINUX_AARCH64:
# Only want the one arch as the CPU type is different and
# uses different build/test scripts
arches = [CPU_AARCH64, CUDA_AARCH64]
arches = []
if with_cpu == ENABLE:
arches += [CPU_AARCH64]
elif with_cuda == ENABLE:
arches += CUDA_AARCH64_ARCHES

if with_cuda == ENABLE:
upload_to_base_bucket = "no"
Expand All @@ -463,15 +473,15 @@ def generate_wheels_matrix(
ret: List[Dict[str, Any]] = []
for python_version in python_versions:
for arch_version in arches:
# TODO: Enable Python 3.13 support for ROCM
if arch_version in ROCM_ARCHES and python_version == "3.13":
continue

gpu_arch_type = arch_type(arch_version)
gpu_arch_version = (
"" if arch_version in [CPU, CPU_AARCH64, XPU] else arch_version
)

# TODO: Enable python 3.13t on cpu-s390x or Windows
if (gpu_arch_type == "cpu-s390x") and python_version == "3.13t":
continue

desired_cuda = translate_desired_cuda(gpu_arch_type, gpu_arch_version)
entry = {
"python_version": python_version,
Expand Down
Loading
Loading