ewalid · ewalid · Jan 1, 2026 · Jan 1, 2026 · Jan 1, 2026 · cursor
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,12 +13,17 @@ dependencies = [
     "openpyxl>=3.1.5",
     "click>=8.1.7",
     "python-dotenv>=1.0.0",
+    "fastapi>=0.115.0",
+    "uvicorn>=0.32.0",
+    "python-multipart>=0.0.12",
 ]
 
 [project.optional-dependencies]
 dev = [
     "pytest>=8.0.0",
     "pytest-cov>=4.1.0",
+    "pytest-asyncio>=0.24.0",
+    "httpx>=0.27.0",
     "black>=24.0.0",
     "ruff>=0.6.0",
     "mypy>=1.11.0",

diff --git a/run_api.py b/run_api.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+"""Run the Rosetta API server."""
+
+from dotenv import load_dotenv
+import uvicorn
+
+load_dotenv()  # Load .env file
+
+if __name__ == "__main__":
+    uvicorn.run(
+        "rosetta.api:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+    )
diff --git a/src/rosetta/api/__init__.py b/src/rosetta/api/__init__.py
@@ -0,0 +1,5 @@
+"""FastAPI application for Rosetta."""
+
+from rosetta.api.app import app
+
+__all__ = ["app"]
diff --git a/src/rosetta/api/app.py b/src/rosetta/api/app.py
@@ -0,0 +1,115 @@
+"""FastAPI application for Rosetta translation service."""
+
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+from fastapi import FastAPI, File, Form, HTTPException, UploadFile
+from fastapi.responses import FileResponse
+
+from rosetta.services.translation_service import count_cells, translate_file
+
+# Limits
+MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
+MAX_CELLS = 5000
+
+app = FastAPI(
+    title="Rosetta",
+    description="Excel translation API that preserves formatting, formulas, and data integrity",
+    version="0.1.0",
+)
+
+
+@app.get("/")
+async def root() -> dict:
+    """Health check endpoint."""
+    return {"status": "ok", "service": "rosetta"}
+
+
+@app.post("/translate")
+async def translate(
+    file: UploadFile = File(..., description="Excel file to translate"),
+    target_lang: str = Form(..., description="Target language (e.g., french, spanish)"),
+    source_lang: Optional[str] = Form(None, description="Source language (auto-detect if omitted)"),
+    context: Optional[str] = Form(None, description="Additional context for accurate translations"),
+    sheets: Optional[str] = Form(None, description="Comma-separated sheet names (all if omitted)"),
+) -> FileResponse:
+    """Translate an Excel file.
+
+    Upload an Excel file and receive the translated version.
+    Preserves all formatting, formulas, images, and data validations.
+    """
+    # Validate file type
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No filename provided")
+
+    if not file.filename.lower().endswith((".xlsx", ".xlsm", ".xltx", ".xltm")):
+        raise HTTPException(
+            status_code=400,
+            detail="Invalid file type. Only Excel files (.xlsx, .xlsm, .xltx, .xltm) are supported",
+        )
+
+    # Read file content
+    content = await file.read()
+
+    # Check file size
+    if len(content) > MAX_FILE_SIZE:
+        raise HTTPException(
+            status_code=400,
+            detail=f"File too large. Maximum size is {MAX_FILE_SIZE // (1024 * 1024)}MB",
+        )
+
+    # Parse sheets parameter
+    sheets_set = None
+    if sheets:
+        sheets_set = {s.strip() for s in sheets.split(",") if s.strip()}
+
+    # Save to temp file for processing
+    with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp_input:
+        tmp_input.write(content)
+        input_path = Path(tmp_input.name)
+
+    try:
+        # Check cell count
+        cell_count = count_cells(input_path, sheets_set)
+        if cell_count > MAX_CELLS:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Too many cells ({cell_count}). Maximum is {MAX_CELLS} cells per request",
+            )
+
+        if cell_count == 0:
+            raise HTTPException(
+                status_code=400,
+                detail="No translatable content found in the file",
+            )
+
+        # Create output path
+        output_path = input_path.with_name(f"{input_path.stem}_translated.xlsx")
+
+        # Translate
+        result = translate_file(
+            input_file=input_path,
+            output_file=output_path,
+            target_lang=target_lang,
+            source_lang=source_lang,
+            context=context,
+            sheets=sheets_set,
+        )
+
+        # Return translated file
+        output_filename = file.filename.replace(".xlsx", f"_{target_lang}.xlsx")
+        return FileResponse(
+            path=output_path,
+            filename=output_filename,
+            media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+            headers={"X-Cells-Translated": str(result["cell_count"])},
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
+    finally:
+        # Cleanup input file (output file cleaned up after response is sent)
+        input_path.unlink(missing_ok=True)
diff --git a/src/rosetta/services/translation_service.py b/src/rosetta/services/translation_service.py
@@ -0,0 +1,110 @@
+"""High-level translation service for the API."""
+
+from pathlib import Path
+from typing import Optional
+
+from rosetta.core.config import Config
+from rosetta.models import TranslationBatch
+from rosetta.services import ExcelExtractor, Translator
+
+
+def translate_file(
+    input_file: Path,
+    output_file: Path,
+    target_lang: str,
+    source_lang: Optional[str] = None,
+    context: Optional[str] = None,
+    sheets: Optional[set[str]] = None,
+    batch_size: int = 50,
+) -> dict:
+    """Translate an Excel file.
+
+    Args:
+        input_file: Path to input Excel file
+        output_file: Path for translated output file
+        target_lang: Target language for translation
+        source_lang: Source language (auto-detected if None)
+        context: Additional context for translations
+        sheets: Set of sheet names to translate (all if None)
+        batch_size: Number of cells per API batch
+
+    Returns:
+        Dict with translation stats
+    """
+    from rosetta.main import (
+        _extract_dropdown_validations,
+        _extract_rich_text_info,
+        _translate_dropdowns,
+        _translate_rich_text_runs,
+        write_translations,
+    )
+
+    config = Config.from_env()
+    config.batch_size = batch_size
+    translator = Translator(config)
+
+    # Extract cells
+    with ExcelExtractor(input_file, sheets=sheets) as extractor:
+        cells = list(extractor.extract_cells())
+
+    # Enrich with rich text info
+    _extract_rich_text_info(input_file, cells, sheets)
+
+    if not cells:
+        return {"cell_count": 0, "status": "no_content"}
+
+    # Translate in batches
+    translated_cells = []
+    for i in range(0, len(cells), config.batch_size):
+        batch_cells = cells[i : i + config.batch_size]
+        batch = TranslationBatch(
+            cells=batch_cells,
+            source_lang=source_lang,
+            target_lang=target_lang,
+            context=context,
+        )
+        translations = translator.translate_batch(batch)
+
+        for cell, translation in zip(batch_cells, translations):
+            cell.value = translation
+            translated_cells.append(cell)
+
+    # Translate rich text runs
+    rich_text_cells = [c for c in translated_cells if c.rich_text_runs]
+    if rich_text_cells:
+        _translate_rich_text_runs(
+            rich_text_cells,
+            translator,
+            source_lang,
+            target_lang,
+            config.batch_size,
+            context,
+        )
+
+    # Translate dropdowns
+    dropdowns = _extract_dropdown_validations(input_file, sheets)
+    if dropdowns:
+        _translate_dropdowns(
+            dropdowns,
+            translator,
+            source_lang,
+            target_lang,
+            config.batch_size,
+            context,
+        )
+
+    # Write output
+    write_translations(input_file, output_file, translated_cells, dropdowns)
+
+    return {
+        "cell_count": len(translated_cells),
+        "rich_text_cells": len(rich_text_cells),
+        "dropdown_count": len(dropdowns) if dropdowns else 0,
+        "status": "completed",
+    }
+
+
+def count_cells(input_file: Path, sheets: Optional[set[str]] = None) -> int:
+    """Count translatable cells in a file (for validation before translation)."""
+    with ExcelExtractor(input_file, sheets=sheets) as extractor:
+        return sum(1 for _ in extractor.extract_cells())