intel_kivi_hybrid/setup.py at main · Rajaykumar12/intel_kivi_hybrid · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
KIVI-SYCL: 2-bit KV Cache Quantization for Intel GPUs
======================================================
Build & install:
    pip install . --no-build-isolation

Requires:
    - Intel oneAPI DPC++ compiler (source setvars.sh first)
    - PyTorch with Intel Extension for PyTorch (IPEX)
"""

import os
import glob
from setuptools import setup, find_packages
from torch.utils.cpp_extension import BuildExtension, CppExtension

# --------------------------------------------------------------------------
# Auto-detect oneAPI SYCL include path (portable across versions)
# --------------------------------------------------------------------------
oneapi_root = os.environ.get("ONEAPI_ROOT", "/opt/intel/oneapi")
sycl_include = os.environ.get("SYCL_INCLUDE_DIR", "")

if not sycl_include:
    # Search common locations
    candidates = sorted(
        glob.glob(os.path.join(oneapi_root, "compiler", "*", "include", "sycl")),
        reverse=True,  # newest version first
    )
    if candidates:
        sycl_include = candidates[0]
    else:
        # Fallback: try the 'latest' symlink
        fallback = os.path.join(oneapi_root, "compiler", "latest", "include", "sycl")
        if os.path.isdir(fallback):
            sycl_include = fallback

if sycl_include:
    print(f"[KIVI] SYCL include: {sycl_include}")
else:
    print("[KIVI] WARNING: Could not find SYCL include dir. "
          "Set SYCL_INCLUDE_DIR or source setvars.sh.")

# --------------------------------------------------------------------------
# Build flags
# --------------------------------------------------------------------------
cxx_flags = os.environ.get("CXXFLAGS", "").split()
ld_flags = os.environ.get("LDFLAGS", "").split()

compile_flags = ["-fsycl", "-fPIC", "-std=c++17", "-O3", "-w"] + cxx_flags
if sycl_include:
    compile_flags.append(f"-I{sycl_include}")

link_flags = compile_flags + ld_flags

# --------------------------------------------------------------------------
# Package metadata
# --------------------------------------------------------------------------
with open("README.md", encoding="utf-8") as f:
    long_description = f.read()

setup(
    name="kivi-sycl",
    version="0.1.0",
    author="Rajay",
    description="KIVI: Tuning-Free Asymmetric 2-bit KV Cache Quantization for Intel GPUs",
    long_description=long_description,
    long_description_content_type="text/markdown",
    url="https://github.com/Rajaykumar12/intel_kivi_hybrid",  # TODO: update with your repo URL
    license="MIT",
    python_requires=">=3.10",
    install_requires=[
        "torch",
        "transformers",
    ],
    py_modules=["kivi_cache"],
    ext_modules=[
        CppExtension(
            name="kivi_sycl",
            sources=["src/kivi_optimized.cpp"],
            extra_compile_args=compile_flags,
            extra_link_args=link_flags,
        )
    ],
    cmdclass={"build_ext": BuildExtension},
    classifiers=[
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Science/Research",
        "License :: OSI Approved :: MIT License",
        "Programming Language :: Python :: 3",
        "Programming Language :: C++",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
        "Operating System :: POSIX :: Linux",
    ],
    keywords="kv-cache quantization 2-bit sycl intel xpu llm inference",
)