Add Dockerfile and sync_hf_space (#12)

* Add Dockerfile and sync_hf_space * Add reboot_space * Add README.md * Check GPU without torch * Update
mozilla-ai · Jan 17, 2025 · 1486395 · 1486395
1 parent 17425e7
commit 1486395
Show file tree

Hide file tree

Showing 6 changed files with 87 additions and 28 deletions.
diff --git a/.github/workflows/sync_hf_space.yaml b/.github/workflows/sync_hf_space.yaml
@@ -1,6 +1,9 @@
 name: Sync to Hugging Face Space
 
 on:
+  release:
+    types: [published]
+
   workflow_dispatch:
 
 jobs:
@@ -10,3 +13,29 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
+
+      - run: git clone https://${{ secrets.HF_USERNAME }}:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/mozilla-ai/structured-qa hf-space
+
+      - run: |
+          cp demo/app.py hf-space/app.py
+          cp demo/Dockerfile hf-space/Dockerfile
+
+      - run: |
+          cd hf-space
+          git config user.name 'github-actions[bot]'
+          git config user.email 'github-actions[bot]@users.noreply.github.com'
+          git add .
+          git commit -m "Sync with https://github.com/mozilla-ai/structured-qa"
+
+      - name: Push to Hugging Face
+        run: |
+          cd hf-space
+          git push https://${{ secrets.HF_USERNAME }}:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/mozilla-ai/structured-qa main
+
+      - name: Reboot Space
+        if: always()
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          pip install huggingface_hub
+          python demo/reboot_space.py
diff --git a/demo/Dockerfile b/demo/Dockerfile
@@ -0,0 +1,26 @@
+FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04
+
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  build-essential \
+  python3.10 \
+  python3.10-dev \
+  python3-pip \
+  git \
+  && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN useradd -m -u 1000 user
+
+USER user
+
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+
+WORKDIR $HOME/app
+
+RUN pip3 install https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
+RUN pip3 install structured-qa
+
+COPY --chown=user . $HOME/app
+
+EXPOSE 8501
+ENTRYPOINT ["streamlit", "run", "app.py", "--server.enableXsrfProtection", "false"]
diff --git a/demo/README.md b/demo/README.md
@@ -0,0 +1,11 @@
+---
+title: Structured Qa
+emoji: 📚
+colorFrom: green
+colorTo: purple
+sdk: docker
+app_port: 8501
+pinned: false
+license: apache-2.0
+short_description: Question answering for structured documents
+---
diff --git a/demo/reboot_space.py b/demo/reboot_space.py
@@ -0,0 +1,11 @@
+import os
+
+from huggingface_hub import HfApi
+
+if __name__ == "__main__":
+    api = HfApi()
+    api.restart_space(
+        repo_id="mozilla-ai/structured-qa",
+        token=os.getenv("HF_TOKEN"),
+        factory_reboot=True,
+    )
diff --git a/demo/run.sh b/demo/run.sh
diff --git a/src/structured_qa/model_loaders.py b/src/structured_qa/model_loaders.py
@@ -1,7 +1,15 @@
-import torch
+import subprocess
 from llama_cpp import Llama
 
 
+def gpu_available():
+    try:
+        subprocess.check_output("nvidia-smi")
+        return True
+    except Exception:
+        return False
+
+
 def load_llama_cpp_model(model_id: str) -> Llama:
     """
     Loads the given model_id using Llama.from_pretrained.
@@ -22,6 +30,6 @@ def load_llama_cpp_model(model_id: str) -> Llama:
         filename=filename,
         n_ctx=0,  # 0 means that the model limit will be used, instead of the default (512) or other hardcoded value
         verbose=False,
-        n_gpu_layers=-1 if torch.cuda.is_available() else 0,
+        n_gpu_layers=-1 if gpu_available() else 0,
     )
     return model