diff --git a/pyproject.toml b/pyproject.toml index 429112f..ed4e292 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,12 +13,17 @@ dependencies = [ "openpyxl>=3.1.5", "click>=8.1.7", "python-dotenv>=1.0.0", + "fastapi>=0.115.0", + "uvicorn>=0.32.0", + "python-multipart>=0.0.12", ] [project.optional-dependencies] dev = [ "pytest>=8.0.0", "pytest-cov>=4.1.0", + "pytest-asyncio>=0.24.0", + "httpx>=0.27.0", "black>=24.0.0", "ruff>=0.6.0", "mypy>=1.11.0", diff --git a/run_api.py b/run_api.py new file mode 100644 index 0000000..4f9f608 --- /dev/null +++ b/run_api.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +"""Run the Rosetta API server.""" + +from dotenv import load_dotenv +import uvicorn + +load_dotenv() # Load .env file + +if __name__ == "__main__": + uvicorn.run( + "rosetta.api:app", + host="0.0.0.0", + port=8000, + reload=True, + ) diff --git a/src/rosetta/api/__init__.py b/src/rosetta/api/__init__.py new file mode 100644 index 0000000..04df963 --- /dev/null +++ b/src/rosetta/api/__init__.py @@ -0,0 +1,5 @@ +"""FastAPI application for Rosetta.""" + +from rosetta.api.app import app + +__all__ = ["app"] diff --git a/src/rosetta/api/app.py b/src/rosetta/api/app.py new file mode 100644 index 0000000..7232e74 --- /dev/null +++ b/src/rosetta/api/app.py @@ -0,0 +1,115 @@ +"""FastAPI application for Rosetta translation service.""" + +import tempfile +from pathlib import Path +from typing import Optional + +from fastapi import FastAPI, File, Form, HTTPException, UploadFile +from fastapi.responses import FileResponse + +from rosetta.services.translation_service import count_cells, translate_file + +# Limits +MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB +MAX_CELLS = 5000 + +app = FastAPI( + title="Rosetta", + description="Excel translation API that preserves formatting, formulas, and data integrity", + version="0.1.0", +) + + +@app.get("/") +async def root() -> dict: + """Health check endpoint.""" + return {"status": "ok", "service": "rosetta"} + + +@app.post("/translate") +async def translate( + file: UploadFile = File(..., description="Excel file to translate"), + target_lang: str = Form(..., description="Target language (e.g., french, spanish)"), + source_lang: Optional[str] = Form(None, description="Source language (auto-detect if omitted)"), + context: Optional[str] = Form(None, description="Additional context for accurate translations"), + sheets: Optional[str] = Form(None, description="Comma-separated sheet names (all if omitted)"), +) -> FileResponse: + """Translate an Excel file. + + Upload an Excel file and receive the translated version. + Preserves all formatting, formulas, images, and data validations. + """ + # Validate file type + if not file.filename: + raise HTTPException(status_code=400, detail="No filename provided") + + if not file.filename.lower().endswith((".xlsx", ".xlsm", ".xltx", ".xltm")): + raise HTTPException( + status_code=400, + detail="Invalid file type. Only Excel files (.xlsx, .xlsm, .xltx, .xltm) are supported", + ) + + # Read file content + content = await file.read() + + # Check file size + if len(content) > MAX_FILE_SIZE: + raise HTTPException( + status_code=400, + detail=f"File too large. Maximum size is {MAX_FILE_SIZE // (1024 * 1024)}MB", + ) + + # Parse sheets parameter + sheets_set = None + if sheets: + sheets_set = {s.strip() for s in sheets.split(",") if s.strip()} + + # Save to temp file for processing + with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp_input: + tmp_input.write(content) + input_path = Path(tmp_input.name) + + try: + # Check cell count + cell_count = count_cells(input_path, sheets_set) + if cell_count > MAX_CELLS: + raise HTTPException( + status_code=400, + detail=f"Too many cells ({cell_count}). Maximum is {MAX_CELLS} cells per request", + ) + + if cell_count == 0: + raise HTTPException( + status_code=400, + detail="No translatable content found in the file", + ) + + # Create output path + output_path = input_path.with_name(f"{input_path.stem}_translated.xlsx") + + # Translate + result = translate_file( + input_file=input_path, + output_file=output_path, + target_lang=target_lang, + source_lang=source_lang, + context=context, + sheets=sheets_set, + ) + + # Return translated file + output_filename = file.filename.replace(".xlsx", f"_{target_lang}.xlsx") + return FileResponse( + path=output_path, + filename=output_filename, + media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + headers={"X-Cells-Translated": str(result["cell_count"])}, + ) + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}") + finally: + # Cleanup input file (output file cleaned up after response is sent) + input_path.unlink(missing_ok=True) diff --git a/src/rosetta/services/translation_service.py b/src/rosetta/services/translation_service.py new file mode 100644 index 0000000..53c503e --- /dev/null +++ b/src/rosetta/services/translation_service.py @@ -0,0 +1,110 @@ +"""High-level translation service for the API.""" + +from pathlib import Path +from typing import Optional + +from rosetta.core.config import Config +from rosetta.models import TranslationBatch +from rosetta.services import ExcelExtractor, Translator + + +def translate_file( + input_file: Path, + output_file: Path, + target_lang: str, + source_lang: Optional[str] = None, + context: Optional[str] = None, + sheets: Optional[set[str]] = None, + batch_size: int = 50, +) -> dict: + """Translate an Excel file. + + Args: + input_file: Path to input Excel file + output_file: Path for translated output file + target_lang: Target language for translation + source_lang: Source language (auto-detected if None) + context: Additional context for translations + sheets: Set of sheet names to translate (all if None) + batch_size: Number of cells per API batch + + Returns: + Dict with translation stats + """ + from rosetta.main import ( + _extract_dropdown_validations, + _extract_rich_text_info, + _translate_dropdowns, + _translate_rich_text_runs, + write_translations, + ) + + config = Config.from_env() + config.batch_size = batch_size + translator = Translator(config) + + # Extract cells + with ExcelExtractor(input_file, sheets=sheets) as extractor: + cells = list(extractor.extract_cells()) + + # Enrich with rich text info + _extract_rich_text_info(input_file, cells, sheets) + + if not cells: + return {"cell_count": 0, "status": "no_content"} + + # Translate in batches + translated_cells = [] + for i in range(0, len(cells), config.batch_size): + batch_cells = cells[i : i + config.batch_size] + batch = TranslationBatch( + cells=batch_cells, + source_lang=source_lang, + target_lang=target_lang, + context=context, + ) + translations = translator.translate_batch(batch) + + for cell, translation in zip(batch_cells, translations): + cell.value = translation + translated_cells.append(cell) + + # Translate rich text runs + rich_text_cells = [c for c in translated_cells if c.rich_text_runs] + if rich_text_cells: + _translate_rich_text_runs( + rich_text_cells, + translator, + source_lang, + target_lang, + config.batch_size, + context, + ) + + # Translate dropdowns + dropdowns = _extract_dropdown_validations(input_file, sheets) + if dropdowns: + _translate_dropdowns( + dropdowns, + translator, + source_lang, + target_lang, + config.batch_size, + context, + ) + + # Write output + write_translations(input_file, output_file, translated_cells, dropdowns) + + return { + "cell_count": len(translated_cells), + "rich_text_cells": len(rich_text_cells), + "dropdown_count": len(dropdowns) if dropdowns else 0, + "status": "completed", + } + + +def count_cells(input_file: Path, sheets: Optional[set[str]] = None) -> int: + """Count translatable cells in a file (for validation before translation).""" + with ExcelExtractor(input_file, sheets=sheets) as extractor: + return sum(1 for _ in extractor.extract_cells()) diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..ddb2926 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,225 @@ +"""Tests for the Rosetta API.""" + +import io +import tempfile +from pathlib import Path +from unittest.mock import patch, MagicMock + +import pytest +from fastapi.testclient import TestClient +from openpyxl import Workbook + +from rosetta.api import app + + +@pytest.fixture +def client(): + """Create a test client for the API.""" + return TestClient(app) + + +def create_mock_translate_file(sample_excel_bytes): + """Create a mock translate_file that creates an actual output file.""" + def mock_translate(input_file, output_file, target_lang, source_lang=None, context=None, sheets=None): + # Create an actual output file (copy of input) + wb = Workbook() + ws = wb.active + ws["A1"] = "[TR] Hello" + ws["A2"] = "[TR] World" + wb.save(output_file) + + return { + "cell_count": 2, + "rich_text_cells": 0, + "dropdown_count": 0, + "status": "completed", + } + return mock_translate + + +@pytest.fixture +def sample_excel_bytes(): + """Create a simple Excel file in memory.""" + wb = Workbook() + ws = wb.active + ws["A1"] = "Hello" + ws["A2"] = "World" + + buffer = io.BytesIO() + wb.save(buffer) + buffer.seek(0) + return buffer.getvalue() + + +@pytest.fixture +def empty_excel_bytes(): + """Create an Excel file with no text content.""" + wb = Workbook() + ws = wb.active + ws["A1"] = 123 # Number, not text + ws["A2"] = "=SUM(1,2)" # Formula + + buffer = io.BytesIO() + wb.save(buffer) + buffer.seek(0) + return buffer.getvalue() + + +class TestHealthCheck: + """Tests for the health check endpoint.""" + + def test_root_returns_ok(self, client): + """GET / should return status ok.""" + response = client.get("/") + assert response.status_code == 200 + assert response.json() == {"status": "ok", "service": "rosetta"} + + +class TestTranslateEndpoint: + """Tests for the /translate endpoint.""" + + def test_missing_file_returns_422(self, client): + """POST /translate without file should return 422.""" + response = client.post("/translate", data={"target_lang": "french"}) + assert response.status_code == 422 + + def test_missing_target_lang_returns_422(self, client, sample_excel_bytes): + """POST /translate without target_lang should return 422.""" + response = client.post( + "/translate", + files={"file": ("test.xlsx", sample_excel_bytes)}, + ) + assert response.status_code == 422 + + def test_invalid_file_type_returns_400(self, client): + """POST /translate with non-Excel file should return 400.""" + response = client.post( + "/translate", + files={"file": ("test.txt", b"Hello world")}, + data={"target_lang": "french"}, + ) + assert response.status_code == 400 + assert "Invalid file type" in response.json()["detail"] + + def test_empty_filename_returns_error(self, client, sample_excel_bytes): + """POST /translate with empty filename should return error.""" + response = client.post( + "/translate", + files={"file": ("", sample_excel_bytes)}, + data={"target_lang": "french"}, + ) + # FastAPI returns 422 for validation errors + assert response.status_code in (400, 422) + + def test_no_translatable_content_returns_400(self, client, empty_excel_bytes): + """POST /translate with no text content should return 400.""" + response = client.post( + "/translate", + files={"file": ("empty.xlsx", empty_excel_bytes)}, + data={"target_lang": "french"}, + ) + assert response.status_code == 400 + assert "No translatable content" in response.json()["detail"] + + @patch("rosetta.api.app.translate_file") + def test_successful_translation(self, mock_translate, client, sample_excel_bytes): + """POST /translate with valid file should return translated file.""" + # Mock needs to create actual output file + mock_translate.side_effect = create_mock_translate_file(sample_excel_bytes) + + response = client.post( + "/translate", + files={"file": ("test.xlsx", sample_excel_bytes)}, + data={"target_lang": "french"}, + ) + + assert response.status_code == 200 + assert response.headers["content-type"] == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + assert response.headers["x-cells-translated"] == "2" + assert "test_french.xlsx" in response.headers.get("content-disposition", "") + + @patch("rosetta.api.app.translate_file") + def test_translation_with_source_lang(self, mock_translate, client, sample_excel_bytes): + """POST /translate with source_lang should pass it to translate_file.""" + mock_translate.side_effect = create_mock_translate_file(sample_excel_bytes) + + response = client.post( + "/translate", + files={"file": ("test.xlsx", sample_excel_bytes)}, + data={"target_lang": "french", "source_lang": "english"}, + ) + + assert response.status_code == 200 + # Verify source_lang was passed + call_kwargs = mock_translate.call_args.kwargs + assert call_kwargs["source_lang"] == "english" + + @patch("rosetta.api.app.translate_file") + def test_translation_with_context(self, mock_translate, client, sample_excel_bytes): + """POST /translate with context should pass it to translate_file.""" + mock_translate.side_effect = create_mock_translate_file(sample_excel_bytes) + + response = client.post( + "/translate", + files={"file": ("test.xlsx", sample_excel_bytes)}, + data={ + "target_lang": "french", + "context": "Medical terminology", + }, + ) + + assert response.status_code == 200 + call_kwargs = mock_translate.call_args.kwargs + assert call_kwargs["context"] == "Medical terminology" + + @patch("rosetta.api.app.count_cells") + @patch("rosetta.api.app.translate_file") + def test_translation_with_sheets(self, mock_translate, mock_count, client, sample_excel_bytes): + """POST /translate with sheets param should filter sheets.""" + mock_count.return_value = 2 # Pretend we found cells + mock_translate.side_effect = create_mock_translate_file(sample_excel_bytes) + + response = client.post( + "/translate", + files={"file": ("test.xlsx", sample_excel_bytes)}, + data={ + "target_lang": "french", + "sheets": "Sheet1, Sheet2", + }, + ) + + assert response.status_code == 200 + call_kwargs = mock_translate.call_args.kwargs + assert call_kwargs["sheets"] == {"Sheet1", "Sheet2"} + + @patch("rosetta.api.app.translate_file") + def test_translation_error_returns_500(self, mock_translate, client, sample_excel_bytes): + """Translation errors should return 500.""" + mock_translate.side_effect = Exception("API error") + + response = client.post( + "/translate", + files={"file": ("test.xlsx", sample_excel_bytes)}, + data={"target_lang": "french"}, + ) + + assert response.status_code == 500 + assert "Translation failed" in response.json()["detail"] + + +class TestFileSizeLimits: + """Tests for file size validation.""" + + def test_large_file_returns_400(self, client): + """Files over 10MB should be rejected.""" + # Create a file larger than 10MB + large_content = b"x" * (11 * 1024 * 1024) + + response = client.post( + "/translate", + files={"file": ("large.xlsx", large_content)}, + data={"target_lang": "french"}, + ) + + assert response.status_code == 400 + assert "File too large" in response.json()["detail"]