diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
new file mode 100644
index 0000000..1168bd9
--- /dev/null
+++ b/.github/workflows/python-app.yml
@@ -0,0 +1,39 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python application
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: "3.10"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..380e870
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,34 @@
+# syntax=docker/dockerfile:1
+
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PORT=8080
+
+# System dependencies required by OCR/image libraries
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    poppler-utils \
+    tesseract-ocr \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxrender1 \
+    libxext6 \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY pyproject.toml README.md ./
+COPY docstrange ./docstrange
+COPY examples ./examples
+COPY scripts ./scripts
+
+RUN pip install --upgrade pip && \
+    pip install .[web] gunicorn
+
+EXPOSE 8080
+
+CMD ["gunicorn", "docstrange.web_app:app", "--bind", "0.0.0.0:8080", "--workers", "1", "--threads", "4", "--timeout", "120"]
diff --git a/README.md b/README.md
index d2c8556..49ad2ec 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,19 @@ Install the library using pip:
 pip install docstrange
 ```
 
+### Deploy the web UI to Fly.io (local-only)
+
+The repository now ships with a production-ready `Dockerfile` and `fly.toml` so you can spin up the drag-and-drop interface on [Fly.io](https://fly.io) without touching the DocStrange cloud API. By default the container runs `DocumentExtractor(cpu=True)`, keeping every request on the VM you control.
+
+Follow the step-by-step instructions in [`docs/deployment/fly.md`](docs/deployment/fly.md) to:
+
+- Build and run the container locally before deploying.
+- Launch a Fly app that serves the Flask interface with Gunicorn.
+- Call `/api/extract` with `processing_mode=cpu` from your own frontend and keep the workload private.
+- (Optional) add secrets if you later decide to use the hosted API.
+
+The guide opens with a non-technical checklist so you can deploy, test via `flyctl proxy`, and shut the machine down again in just a few commands.
+
 ## **Quick Start**
 
 > 💡 **New to DocStrange?** Try the [online demo](https://docstrange.nanonets.com/) first - no installation needed!
diff --git a/docs/deployment/fly.md b/docs/deployment/fly.md
new file mode 100644
index 0000000..a975476
--- /dev/null
+++ b/docs/deployment/fly.md
@@ -0,0 +1,131 @@
+# Fly.io Deployment Guide
+
+This guide shows how to run the DocStrange web interface on [Fly.io](https://fly.io) and keep the workflow friendly for local testing.
+
+> 🧭 **In a hurry?** Follow the quick checklist below—no coding knowledge required.
+
+### Quick start (5-minute overview)
+1. **Install tooling** – Install [Docker](https://docs.docker.com/get-started/get-docker/) and [`flyctl`](https://fly.io/docs/hands-on/install-flyctl/). Sign in with `flyctl auth login`.
+2. **Clone the repo** – `git clone https://github.com/NanoNets/docstrange.git && cd docstrange`.
+3. **(Optional) Test locally** – `docker build -t docstrange:local .` then `docker run --rm -p 8080:8080 docstrange:local` and visit `http://localhost:8080`.
+4. **Launch your Fly app** – `flyctl launch --name <app-name> --copy-config --no-deploy` and keep the generated `fly.toml`.
+5. **Deploy** – `flyctl deploy`. No DocStrange API key is needed; everything runs on the VM’s CPU.
+6. **Try the API** – With `flyctl proxy 8080` running, execute:
+
+   ```bash
+   curl -F "file=@/full/path/to/document.pdf" \
+        -F "output_format=markdown" \
+        -F "processing_mode=cpu" \
+        http://127.0.0.1:8080/api/extract
+   ```
+
+7. **Shut down** – When you’re done, run `flyctl scale count 0` (pause) or `flyctl apps destroy <app-name>` (delete).
+
+The rest of this document adds detail, background, and optional customisations once you are comfortable with the basics.
+
+## Prerequisites
+
+- A Fly.io account and the [`flyctl`](https://fly.io/docs/hands-on/install-flyctl/) CLI installed locally.
+- Docker installed locally if you want to build images on your machine. (Fly will also build in its remote builders.)
+- Python 3.11+ if you plan to run scripts locally before deploying.
+- Optional: A `NANONETS_API_KEY` if you want to use DocStrange's managed cloud extraction mode. Local CPU/GPU extraction works without it—and the bundled Fly deployment defaults to CPU-only processing so nothing leaves your VM.
+
+## 1. Clone the repository
+
+```bash
+git clone https://github.com/NanoNets/docstrange.git
+cd docstrange
+```
+
+## 2. Review the Docker image
+
+The included [`Dockerfile`](../../Dockerfile) installs all OCR dependencies (Poppler, Tesseract, and the OpenGL runtime libraries required by EasyOCR) and launches the Flask application with Gunicorn. Adjust the base image or dependency list if you need GPU support.
+
+If you want to verify the container locally before deploying to Fly.io:
+
+```bash
+docker build -t docstrange:local .
+docker run --rm -p 8080:8080 docstrange:local
+```
+
+Then visit <http://localhost:8080> to access the drag-and-drop UI.
+
+## 3. Launch a Fly.io app
+
+Use `flyctl` to create (or link) an application. The provided [`fly.toml`](../../fly.toml) has sane defaults for a single shared-CPU machine.
+
+```bash
+flyctl launch --name <your-app-name> --copy-config --no-deploy
+```
+
+- Choose a region close to you (defaults to `iad` in the template).
+- The `--copy-config` flag tells `flyctl` to reuse the committed `fly.toml`.
+- `--no-deploy` lets you review everything before the first deploy.
+
+Update `app` (and optionally `primary_region`) in `fly.toml` after running `flyctl launch`. Nothing else needs to change for a private, CPU-only deployment.
+
+## 4. Configure secrets (optional)
+
+If you want to use DocStrange's cloud mode, store the API key as a Fly secret:
+
+```bash
+flyctl secrets set NANONETS_API_KEY=your_api_key_here
+```
+
+You can add other environment variables in the same way.
+
+## 5. Deploy to Fly.io
+
+```bash
+flyctl deploy
+```
+
+Fly.io will build the Docker image, provision a machine, and release it. Once the command finishes you'll see the deployed URL. By default the Gunicorn server listens on `0.0.0.0:8080`, which Fly maps to HTTPS. The container now instantiates `DocumentExtractor(cpu=True)` so no DocStrange-hosted API calls are made.
+
+## 6. Test the app locally via Fly proxy
+
+To exercise the remote app locally without exposing it publicly, use Fly's proxy capability:
+
+```bash
+flyctl proxy 8080
+```
+
+This forwards `localhost:8080` to your Fly app's internal port so you can use the UI or call the `/api/extract` endpoint from local scripts. Remember to include `processing_mode=cpu` (already the UI default) when hitting the API from custom clients.
+
+### Use the Fly-hosted API from your frontend
+
+Your Fly VM exposes the following request contract. Point your web or mobile frontend at the Fly hostname (or via the proxy when testing locally):
+
+```bash
+curl -F "file=@/path/to/input.pdf" \
+     -F "output_format=markdown" \  # markdown | html | json | flat-json | csv
+     -F "processing_mode=cpu" \      # keep workloads inside your VM
+     https://<your-app-name>.fly.dev/api/extract
+```
+
+The response includes the extracted content plus metadata (`processing_mode`, pages processed, processing time). You can store those values in your own database for billing or analytics.
+
+## 7. Tear down resources
+
+When you're done testing:
+
+```bash
+flyctl apps destroy <your-app-name>
+```
+
+or scale to zero:
+
+```bash
+flyctl scale count 0
+```
+
+This stops billing while keeping the configuration around for future tests.
+
+## Troubleshooting
+
+- **Large model downloads** – The first request triggers model downloads. Keep the machine alive or bake models into a custom image if startup latency is a concern.
+- **GPU mode** – Fly Machines with GPUs are required for GPU extraction. Update the Dockerfile to include CUDA dependencies and change the `[[vm]]` section in `fly.toml` to request a GPU instance.
+- **File size limits** – The web app rejects uploads larger than 100 MB (`MAX_CONTENT_LENGTH`). Adjust `app.config['MAX_CONTENT_LENGTH']` in `docstrange/web_app.py` if needed.
+- **Accidentally sending jobs to the cloud** – If a client submits an unknown `processing_mode`, the server now falls back to CPU so you stay within your own Fly VM.
+
+With these steps you can iterate locally using the Fly.io deployment while still benefiting from the hosted environment.
diff --git a/docstrange/templates/index.html b/docstrange/templates/index.html
index 8685d7d..336d085 100644
--- a/docstrange/templates/index.html
+++ b/docstrange/templates/index.html
@@ -995,7 +995,7 @@ <h3>Error</h3>
                 const formData = new FormData();
                 formData.append('file', file);
                 formData.append('output_format', format === 'json' ? 'flat-json' : format);
-                formData.append('processing_mode', 'cloud');
+                formData.append('processing_mode', 'cpu');
                 
                 // Show loading state
                 extractBtn.textContent = 'Processing...';
diff --git a/docstrange/web_app.py b/docstrange/web_app.py
index ed04a97..a359f0b 100644
--- a/docstrange/web_app.py
+++ b/docstrange/web_app.py
@@ -6,7 +6,6 @@
 from pathlib import Path
 from typing import Optional
 from flask import Flask, request, jsonify, render_template, send_from_directory
-from werkzeug.utils import secure_filename
 from werkzeug.exceptions import RequestEntityTooLarge
 
 from .extractor import DocumentExtractor
@@ -60,9 +59,25 @@ def download_models():
         if os.path.exists(test_file_path):
             os.unlink(test_file_path)
 
+def normalize_processing_mode(processing_mode: Optional[str]) -> str:
+    """Map user-provided processing mode to an internal value."""
+    if not processing_mode:
+        return 'cpu'
+
+    normalized = processing_mode.strip().lower()
+
+    if normalized in {'cpu', 'gpu'}:
+        return normalized
+
+    # Treat any legacy labels (like "cloud" or "local") as CPU-only processing.
+    return 'cpu'
+
+
 def create_extractor_with_mode(processing_mode):
     """Create DocumentExtractor with proper error handling for processing mode."""
-    if processing_mode == 'gpu':
+    normalized_mode = normalize_processing_mode(processing_mode)
+
+    if normalized_mode == 'gpu':
         if not check_gpu_availability():
             raise ValueError("GPU mode selected but GPU is not available. Please install PyTorch with CUDA support or use CPU mode.")
         return DocumentExtractor(gpu=True)
@@ -70,7 +85,7 @@ def create_extractor_with_mode(processing_mode):
         return DocumentExtractor(cpu=True)
 
 # Initialize the document extractor
-extractor = DocumentExtractor()
+extractor = DocumentExtractor(cpu=True)
 
 @app.route('/')
 def index():
@@ -96,7 +111,7 @@ def extract_document():
         
         # Get parameters
         output_format = request.form.get('output_format', 'markdown')
-        processing_mode = request.form.get('processing_mode', 'cloud')
+        processing_mode = normalize_processing_mode(request.form.get('processing_mode', 'cpu'))
         
         # Create extractor based on processing mode
         try:
diff --git a/fly.toml b/fly.toml
new file mode 100644
index 0000000..0badcf8
--- /dev/null
+++ b/fly.toml
@@ -0,0 +1,21 @@
+# Replace "docstrange-local" with your Fly.io app name once you run `fly launch`
+app = "docstrange-local"
+primary_region = "iad"
+
+[build]
+  dockerfile = "Dockerfile"
+
+[env]
+  PORT = "8080"
+
+[http_service]
+  internal_port = 8080
+  force_https = true
+  auto_stop_machines = true
+  auto_start_machines = true
+  min_machines_running = 0
+
+[[vm]]
+  cpu_kind = "shared"
+  cpus = 2
+  memory = "4096"