Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[shortfin llm integration tests] Add tinystories llama2 25m model as a smoke test #936

Merged
merged 12 commits into from
Feb 14, 2025
5 changes: 5 additions & 0 deletions .github/workflows/pkgci_shark_ai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ jobs:
source ${VENV_DIR}/bin/activate
uv pip install -r requirements-iree-pinned.txt

- name: Run LLM Smoke Test
run: |
source ${VENV_DIR}/bin/activate
pytest -v -s --test_device=${{ matrix.test_device }} app_tests/integration_tests/llm/shortfin/tinystories_llama2_25m_test.py --log-cli-level=INFO

- name: Run LLM Integration Tests
run: |
source ${VENV_DIR}/bin/activate
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
# we can replace this with an import after #890 merges
TEST_MODELS = {
"llama3.1_8b": ModelConfig(
source=ModelSource.HUGGINGFACE,
source=ModelSource.HUGGINGFACE_FROM_GGUF,
repo_id="SanctumAI/Meta-Llama-3.1-8B-Instruct-GGUF",
model_file="meta-llama-3.1-8b-instruct.f16.gguf",
tokenizer_id="NousResearch/Meta-Llama-3.1-8B",
Expand Down
198 changes: 163 additions & 35 deletions app_tests/integration_tests/llm/model_management.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""Module for managing model artifacts through various processing stages."""
import logging
import tempfile
import zipfile
import urllib.request
from pathlib import Path
import subprocess
from dataclasses import dataclass
from typing import Optional, Tuple, Dict
from typing import Optional, Tuple
from enum import Enum, auto

from sharktank.utils.hf_datasets import Dataset, RemoteFile, get_dataset
Expand All @@ -13,6 +16,37 @@
logger = logging.getLogger(__name__)


def get_llama_cpp_path() -> Path:
stbaione marked this conversation as resolved.
Show resolved Hide resolved
"""Downloads and extracts llama.cpp if needed, returns path to installation."""
# Use system temp directory as base
temp_base = Path(tempfile.gettempdir()) / "sharktank_llamacpp"
llama_cpp_dir = temp_base / "llama.cpp-b4696"

# Only download and extract if not already present
if not llama_cpp_dir.exists():
temp_base.mkdir(parents=True, exist_ok=True)
zip_path = temp_base / "llama.cpp.zip"

# Download zip file
logger.info("Downloading llama.cpp...")
urllib.request.urlretrieve(
"https://github.com/ggerganov/llama.cpp/archive/refs/tags/b4696.zip",
zip_path,
)

# Extract zip file
logger.info("Extracting llama.cpp...")
with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(temp_base)

# Clean up zip file
zip_path.unlink()

logger.info(f"llama.cpp installed at {llama_cpp_dir}")

return llama_cpp_dir


class AccuracyValidationException(RuntimeError):
"""Custom exception for accuracy validation failures."""

Expand All @@ -32,9 +66,10 @@ def __init__(


class ModelSource(Enum):
HUGGINGFACE = auto()
HUGGINGFACE_FROM_GGUF = auto()
LOCAL = auto()
AZURE = auto()
HUGGINGFACE_FROM_SAFETENSORS = auto()


@dataclass
Expand Down Expand Up @@ -62,7 +97,7 @@ class ModelConfig:
azure_config: Optional[AzureConfig] = None

def __post_init__(self):
if self.source == ModelSource.HUGGINGFACE:
if self.source == ModelSource.HUGGINGFACE_FROM_GGUF:
if not (self.dataset_name or self.repo_id):
raise ValueError(
"Either dataset_name or repo_id required for HuggingFace models"
Expand All @@ -71,6 +106,11 @@ def __post_init__(self):
raise ValueError("local_path required for local models")
elif self.source == ModelSource.AZURE and not self.azure_config:
raise ValueError("azure_config required for Azure models")
elif self.source == ModelSource.HUGGINGFACE_FROM_SAFETENSORS:
if not self.dataset_name:
raise ValueError(
"dataset_name required for HUGGINGFACE_FROM_SAFETENSORS models"
)


@dataclass
Expand All @@ -96,7 +136,7 @@ def __init__(self, base_dir: Path, config: ModelConfig):

def _get_model_dir(self) -> Path:
"""Creates and returns appropriate model directory based on source."""
if self.config.source == ModelSource.HUGGINGFACE:
if self.config.source == ModelSource.HUGGINGFACE_FROM_GGUF:
if self.config.dataset_name:
return self.base_dir / self.config.dataset_name.replace("/", "_")
return self.base_dir / self.config.repo_id.replace("/", "_")
Expand All @@ -108,6 +148,8 @@ def _get_model_dir(self) -> Path:
/ "azure"
/ self.config.azure_config.blob_path.replace("/", "_")
)
elif self.config.source == ModelSource.HUGGINGFACE_FROM_SAFETENSORS:
return self.base_dir / self.config.dataset_name.replace("/", "_")
raise ValueError(f"Unsupported model source: {self.config.source}")

def _download_from_huggingface(self) -> Path:
Expand Down Expand Up @@ -143,12 +185,67 @@ def _download_from_huggingface(self) -> Path:

return model_path

def _download_and_convert_from_huggingface(self) -> Path:
stbaione marked this conversation as resolved.
Show resolved Hide resolved
"""Downloads model from HuggingFace and converts through GGUF to IRPA."""
irpa_path = self.model_dir / "model.irpa"

if not irpa_path.exists():
logger.info(
f"Processing model {self.config.repo_id} from HuggingFace through GGUF to IRPA"
)

# Step 1: Download from HuggingFace
hf_model_path = self.model_dir / "model_hf_repo_clone"
if not hf_model_path.exists():
logger.info(
f"Downloading model from HuggingFace: {self.config.repo_id}"
)
dataset = get_dataset(self.config.dataset_name)
downloaded_files = dataset.download(local_dir=self.model_dir)

# Step 2: Convert to GGUF
gguf_path = self.model_dir / "model.gguf"
if not gguf_path.exists():
logger.info("Converting model to GGUF format")
subprocess.run(
[
"python",
get_llama_cpp_path() / "convert_hf_to_gguf.py",
self.model_dir,
"--outfile",
str(gguf_path),
"--outtype",
"f32",
],
check=True,
)

# Step 3: Convert to IRPA
logger.info("Converting GGUF to IRPA format")
subprocess.run(
[
"python",
"-m",
"sharktank.tools.dump_gguf",
f"--gguf-file={gguf_path}",
"--save",
str(irpa_path),
],
check=True,
)

# Cleanup intermediate files if desired
# shutil.rmtree(hf_model_path)
# gguf_path.unlink()

return irpa_path

def _copy_from_local(self) -> Path:
"""Copies model from local filesystem."""
import shutil

model_path = self.model_dir / self.config.model_file
if not model_path.exists():
import shutil

logger.info(f"Copying local model from {self.config.local_path}")
shutil.copy2(self.config.local_path, model_path)
return model_path
Expand Down Expand Up @@ -267,12 +364,14 @@ def process_model(self, config: ModelConfig) -> ModelArtifacts:
manager = ModelStageManager(self.base_dir, config)

# Stage 1: Download weights and tokenizer (cached)
if config.source == ModelSource.HUGGINGFACE:
if config.source == ModelSource.HUGGINGFACE_FROM_GGUF:
weights_path = manager._download_from_huggingface()
elif config.source == ModelSource.LOCAL:
weights_path = manager._copy_from_local()
elif config.source == ModelSource.AZURE:
weights_path = manager._download_from_azure()
elif config.source == ModelSource.HUGGINGFACE_FROM_SAFETENSORS:
weights_path = manager._download_and_convert_from_huggingface()
else:
raise ValueError(f"Unsupported model source: {config.source}")

Expand All @@ -294,33 +393,62 @@ def process_model(self, config: ModelConfig) -> ModelArtifacts:
)


TEST_MODELS = {
"open_llama_3b": ModelConfig(
source=ModelSource.HUGGINGFACE,
repo_id="SlyEcho/open_llama_3b_v2_gguf",
model_file="open-llama-3b-v2-f16.gguf",
tokenizer_id="openlm-research/open_llama_3b_v2",
batch_sizes=(1, 4),
device_settings=None,
TEST_MODELS = {}

TEST_MODELS["open_llama_3b"] = ModelConfig(
source=ModelSource.HUGGINGFACE_FROM_GGUF,
repo_id="SlyEcho/open_llama_3b_v2_gguf",
model_file="open-llama-3b-v2-f16.gguf",
tokenizer_id="openlm-research/open_llama_3b_v2",
batch_sizes=(1, 4),
device_settings=None,
)

TEST_MODELS["llama3.1_8b"] = ModelConfig(
source=ModelSource.HUGGINGFACE_FROM_GGUF,
repo_id="SanctumAI/Meta-Llama-3.1-8B-Instruct-GGUF",
model_file="meta-llama-3.1-8b-instruct.f16.gguf",
tokenizer_id="NousResearch/Meta-Llama-3.1-8B",
batch_sizes=(1, 4),
device_settings=None,
)
TEST_MODELS[
"azure_llama"
] = ModelConfig( # This model is currently unused. When you use it, check to make sure the irpa indeed still exist and remove this comment.
source=ModelSource.AZURE,
azure_config=AzureConfig(
account_name="sharkblobs",
container_name="halo-models",
blob_path="llm-dev/llama3_8b/8b_f16.irpa",
),
"llama3.1_8b": ModelConfig(
source=ModelSource.HUGGINGFACE,
repo_id="SanctumAI/Meta-Llama-3.1-8B-Instruct-GGUF",
model_file="meta-llama-3.1-8b-instruct.f16.gguf",
tokenizer_id="NousResearch/Meta-Llama-3.1-8B",
batch_sizes=(1, 4),
device_settings=None,
),
"azure_llama": ModelConfig(
source=ModelSource.AZURE,
azure_config=AzureConfig(
account_name="sharkblobs",
container_name="halo-models",
blob_path="llm-dev/llama3_8b/8b_f16.irpa",
model_file="azure-llama.irpa",
tokenizer_id="openlm-research/open_llama_3b_v2",
batch_sizes=(1, 4),
device_settings=None,
)

# TODO: upstream this to sharktank
Dataset(
"Mxode/TinyStories-LLaMA2-25M-256h-4l-GQA",
files=[
RemoteFile(
file_id="model.safetensors",
filename="model.safetensors",
repo_id="Mxode/TinyStories-LLaMA2-25M-256h-4l-GQA",
extra_filenames=(
"config.json",
"tokenizer.json",
"tokenizer_config.json",
),
),
model_file="azure-llama.irpa",
tokenizer_id="openlm-research/open_llama_3b_v2",
batch_sizes=(1, 4),
device_settings=None,
),
}
],
)

TEST_MODELS["tinystories_llama2_25m"] = ModelConfig(
source=ModelSource.HUGGINGFACE_FROM_SAFETENSORS,
dataset_name="Mxode/TinyStories-LLaMA2-25M-256h-4l-GQA",
model_file="model.irpa", # This will be the final converted file name
tokenizer_id="Mxode/TinyStories-LLaMA2-25M-256h-4l-GQA",
batch_sizes=(1, 4),
device_settings=None,
)
2 changes: 1 addition & 1 deletion app_tests/integration_tests/llm/sglang/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def model_artifacts(request, tmp_path_factory):
tmp_dir = tmp_path_factory.mktemp("sglang_integration_tests")

model_config = ModelConfig(
source=ModelSource.HUGGINGFACE,
source=ModelSource.HUGGINGFACE_FROM_GGUF,
repo_id="SanctumAI/Meta-Llama-3.1-8B-Instruct-GGUF",
model_file="meta-llama-3.1-8b-instruct.f16.gguf",
tokenizer_id="NousResearch/Meta-Llama-3.1-8B",
Expand Down
2 changes: 1 addition & 1 deletion app_tests/integration_tests/llm/shortfin/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_device(request):
return ret


@pytest.fixture(scope="module")
@pytest.fixture(scope="session")
def model_artifacts(tmp_path_factory, request, test_device):
"""Prepares model artifacts in a cached directory."""
model_config = TEST_MODELS[request.param]
Expand Down
Loading
Loading