RAGLLM.PlusPlus/requirements.txt at main · Gabrieliam42/RAGLLM.PlusPlus · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

--extra-index-url https://download.pytorch.org/whl/cu128
torch==2.7.1+cu128
numpy>=1.26
transformers>=4.46,<5.0  # vllm 0.10.1 incompatible with transformers 5.x (all_special_tokens_extended removed)
huggingface-hub>=0.26
accelerate>=1.0
sentence-transformers>=5.2.3
sentencepiece>=0.2.0
einops>=0.8.0
# faiss-gpu-cu12-cuvs — requires RAPIDS deps first:
# pip install libcuvs-cu12==25.10.0 librmm-cu12==25.10.0 libraft-cu12==25.10.0 rapids-logger "nvidia-nvjitlink-cu12>=12.9" --extra-index-url https://pypi.nvidia.com
faiss @ https://github.com/Gabrieliam42/faiss-gpu-cu12-cuvs/releases/download/v1.14.0/faiss_gpu_cu12_cuvs-1.14.0-cp312-cp312-linux_x86_64.whl; platform_system == "Linux"
bitsandbytes>=0.49.2; platform_system == "Linux"
causal-conv1d>=1.6.0; platform_system == "Linux"
mamba-ssm>=2.3.0; platform_system == "Linux"
python-docx>=1.1.2
# flash-attn prebuilt wheels (official): https://github.com/Dao-AILab/flash-attention/releases/tag/v2.8.3
# Example for this stack (Linux x86_64, Python 3.12, torch 2.7, CUDA 12, cxx11abi TRUE):
# pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp312-cp312-linux_x86_64.whl
flash-attn>=2.8.3; platform_system == "Linux"
# vllm install: pip install vllm==0.10.1 --extra-index-url https://download.pytorch.org/whl/cu128
# vllm==0.10.1; platform_system == "Linux"
# llama-cpp-python + ggml-python cu124 prebuilt wheels (GGUF GPU inference):
# pip install https://github.com/abetlen/ggml-python/releases/download/v0.0.37-cu124/ggml_python-0.0.37-cp312-cp312-linux_x86_64.whl https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.16-cu124/llama_cpp_python-0.3.16-cp312-cp312-linux_x86_64.whl