Skip to content

Commit

Permalink
Add Dockerfile and sync_hf_space (#12)
Browse files Browse the repository at this point in the history
* Add Dockerfile and sync_hf_space

* Add reboot_space

* Add README.md

* Check GPU without torch

* Update
  • Loading branch information
daavoo authored Jan 17, 2025
1 parent 17425e7 commit 1486395
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 28 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/sync_hf_space.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: Sync to Hugging Face Space

on:
release:
types: [published]

workflow_dispatch:

jobs:
Expand All @@ -10,3 +13,29 @@ jobs:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- run: git clone https://${{ secrets.HF_USERNAME }}:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/mozilla-ai/structured-qa hf-space

- run: |
cp demo/app.py hf-space/app.py
cp demo/Dockerfile hf-space/Dockerfile
- run: |
cd hf-space
git config user.name 'github-actions[bot]'
git config user.email 'github-actions[bot]@users.noreply.github.com'
git add .
git commit -m "Sync with https://github.com/mozilla-ai/structured-qa"
- name: Push to Hugging Face
run: |
cd hf-space
git push https://${{ secrets.HF_USERNAME }}:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/mozilla-ai/structured-qa main
- name: Reboot Space
if: always()
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
pip install huggingface_hub
python demo/reboot_space.py
26 changes: 26 additions & 0 deletions demo/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04

RUN apt-get update && apt-get install --no-install-recommends -y \
build-essential \
python3.10 \
python3.10-dev \
python3-pip \
git \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

RUN useradd -m -u 1000 user

USER user

ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH

WORKDIR $HOME/app

RUN pip3 install https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
RUN pip3 install structured-qa

COPY --chown=user . $HOME/app

EXPOSE 8501
ENTRYPOINT ["streamlit", "run", "app.py", "--server.enableXsrfProtection", "false"]
11 changes: 11 additions & 0 deletions demo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
title: Structured Qa
emoji: 📚
colorFrom: green
colorTo: purple
sdk: docker
app_port: 8501
pinned: false
license: apache-2.0
short_description: Question answering for structured documents
---
11 changes: 11 additions & 0 deletions demo/reboot_space.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import os

from huggingface_hub import HfApi

if __name__ == "__main__":
api = HfApi()
api.restart_space(
repo_id="mozilla-ai/structured-qa",
token=os.getenv("HF_TOKEN"),
factory_reboot=True,
)
26 changes: 0 additions & 26 deletions demo/run.sh

This file was deleted.

12 changes: 10 additions & 2 deletions src/structured_qa/model_loaders.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
import torch
import subprocess
from llama_cpp import Llama


def gpu_available():
try:
subprocess.check_output("nvidia-smi")
return True
except Exception:
return False


def load_llama_cpp_model(model_id: str) -> Llama:
"""
Loads the given model_id using Llama.from_pretrained.
Expand All @@ -22,6 +30,6 @@ def load_llama_cpp_model(model_id: str) -> Llama:
filename=filename,
n_ctx=0, # 0 means that the model limit will be used, instead of the default (512) or other hardcoded value
verbose=False,
n_gpu_layers=-1 if torch.cuda.is_available() else 0,
n_gpu_layers=-1 if gpu_available() else 0,
)
return model

0 comments on commit 1486395

Please sign in to comment.