-
Notifications
You must be signed in to change notification settings - Fork 0
feat: add REST API for Excel translation #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| #!/usr/bin/env python3 | ||
| """Run the Rosetta API server.""" | ||
|
|
||
| from dotenv import load_dotenv | ||
| import uvicorn | ||
|
|
||
| load_dotenv() # Load .env file | ||
|
|
||
| if __name__ == "__main__": | ||
| uvicorn.run( | ||
| "rosetta.api:app", | ||
| host="0.0.0.0", | ||
| port=8000, | ||
| reload=True, | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| """FastAPI application for Rosetta.""" | ||
|
|
||
| from rosetta.api.app import app | ||
|
|
||
| __all__ = ["app"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,115 @@ | ||
| """FastAPI application for Rosetta translation service.""" | ||
|
|
||
| import tempfile | ||
| from pathlib import Path | ||
| from typing import Optional | ||
|
|
||
| from fastapi import FastAPI, File, Form, HTTPException, UploadFile | ||
| from fastapi.responses import FileResponse | ||
|
|
||
| from rosetta.services.translation_service import count_cells, translate_file | ||
|
|
||
| # Limits | ||
| MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB | ||
| MAX_CELLS = 5000 | ||
|
|
||
| app = FastAPI( | ||
| title="Rosetta", | ||
| description="Excel translation API that preserves formatting, formulas, and data integrity", | ||
| version="0.1.0", | ||
| ) | ||
|
|
||
|
|
||
| @app.get("/") | ||
| async def root() -> dict: | ||
| """Health check endpoint.""" | ||
| return {"status": "ok", "service": "rosetta"} | ||
|
|
||
|
|
||
| @app.post("/translate") | ||
| async def translate( | ||
| file: UploadFile = File(..., description="Excel file to translate"), | ||
| target_lang: str = Form(..., description="Target language (e.g., french, spanish)"), | ||
| source_lang: Optional[str] = Form(None, description="Source language (auto-detect if omitted)"), | ||
| context: Optional[str] = Form(None, description="Additional context for accurate translations"), | ||
| sheets: Optional[str] = Form(None, description="Comma-separated sheet names (all if omitted)"), | ||
| ) -> FileResponse: | ||
| """Translate an Excel file. | ||
|
|
||
| Upload an Excel file and receive the translated version. | ||
| Preserves all formatting, formulas, images, and data validations. | ||
| """ | ||
| # Validate file type | ||
| if not file.filename: | ||
| raise HTTPException(status_code=400, detail="No filename provided") | ||
|
|
||
| if not file.filename.lower().endswith((".xlsx", ".xlsm", ".xltx", ".xltm")): | ||
| raise HTTPException( | ||
| status_code=400, | ||
| detail="Invalid file type. Only Excel files (.xlsx, .xlsm, .xltx, .xltm) are supported", | ||
| ) | ||
|
|
||
| # Read file content | ||
| content = await file.read() | ||
|
|
||
| # Check file size | ||
| if len(content) > MAX_FILE_SIZE: | ||
| raise HTTPException( | ||
| status_code=400, | ||
| detail=f"File too large. Maximum size is {MAX_FILE_SIZE // (1024 * 1024)}MB", | ||
| ) | ||
|
|
||
| # Parse sheets parameter | ||
| sheets_set = None | ||
| if sheets: | ||
| sheets_set = {s.strip() for s in sheets.split(",") if s.strip()} | ||
|
|
||
| # Save to temp file for processing | ||
| with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp_input: | ||
| tmp_input.write(content) | ||
| input_path = Path(tmp_input.name) | ||
|
|
||
| try: | ||
| # Check cell count | ||
| cell_count = count_cells(input_path, sheets_set) | ||
| if cell_count > MAX_CELLS: | ||
| raise HTTPException( | ||
| status_code=400, | ||
| detail=f"Too many cells ({cell_count}). Maximum is {MAX_CELLS} cells per request", | ||
| ) | ||
|
|
||
| if cell_count == 0: | ||
| raise HTTPException( | ||
| status_code=400, | ||
| detail="No translatable content found in the file", | ||
| ) | ||
|
|
||
| # Create output path | ||
| output_path = input_path.with_name(f"{input_path.stem}_translated.xlsx") | ||
|
|
||
| # Translate | ||
| result = translate_file( | ||
| input_file=input_path, | ||
| output_file=output_path, | ||
| target_lang=target_lang, | ||
| source_lang=source_lang, | ||
| context=context, | ||
| sheets=sheets_set, | ||
| ) | ||
|
|
||
| # Return translated file | ||
| output_filename = file.filename.replace(".xlsx", f"_{target_lang}.xlsx") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Output filename replacement fails for non-.xlsx extensionsThe output filename generation uses Additional Locations (1) |
||
| return FileResponse( | ||
| path=output_path, | ||
| filename=output_filename, | ||
| media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | ||
| headers={"X-Cells-Translated": str(result["cell_count"])}, | ||
| ) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Output temp file never deleted, causing disk leaksThe comment claims "output file cleaned up after response is sent" but Additional Locations (1) |
||
|
|
||
| except HTTPException: | ||
| raise | ||
| except Exception as e: | ||
| raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}") | ||
| finally: | ||
| # Cleanup input file (output file cleaned up after response is sent) | ||
| input_path.unlink(missing_ok=True) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| """High-level translation service for the API.""" | ||
|
|
||
| from pathlib import Path | ||
| from typing import Optional | ||
|
|
||
| from rosetta.core.config import Config | ||
| from rosetta.models import TranslationBatch | ||
| from rosetta.services import ExcelExtractor, Translator | ||
|
|
||
|
|
||
| def translate_file( | ||
| input_file: Path, | ||
| output_file: Path, | ||
| target_lang: str, | ||
| source_lang: Optional[str] = None, | ||
| context: Optional[str] = None, | ||
| sheets: Optional[set[str]] = None, | ||
| batch_size: int = 50, | ||
| ) -> dict: | ||
| """Translate an Excel file. | ||
|
|
||
| Args: | ||
| input_file: Path to input Excel file | ||
| output_file: Path for translated output file | ||
| target_lang: Target language for translation | ||
| source_lang: Source language (auto-detected if None) | ||
| context: Additional context for translations | ||
| sheets: Set of sheet names to translate (all if None) | ||
| batch_size: Number of cells per API batch | ||
|
|
||
| Returns: | ||
| Dict with translation stats | ||
| """ | ||
| from rosetta.main import ( | ||
| _extract_dropdown_validations, | ||
| _extract_rich_text_info, | ||
| _translate_dropdowns, | ||
| _translate_rich_text_runs, | ||
| write_translations, | ||
| ) | ||
|
|
||
| config = Config.from_env() | ||
| config.batch_size = batch_size | ||
| translator = Translator(config) | ||
|
|
||
| # Extract cells | ||
| with ExcelExtractor(input_file, sheets=sheets) as extractor: | ||
| cells = list(extractor.extract_cells()) | ||
|
|
||
| # Enrich with rich text info | ||
| _extract_rich_text_info(input_file, cells, sheets) | ||
|
|
||
| if not cells: | ||
| return {"cell_count": 0, "status": "no_content"} | ||
|
|
||
| # Translate in batches | ||
| translated_cells = [] | ||
| for i in range(0, len(cells), config.batch_size): | ||
| batch_cells = cells[i : i + config.batch_size] | ||
| batch = TranslationBatch( | ||
| cells=batch_cells, | ||
| source_lang=source_lang, | ||
| target_lang=target_lang, | ||
| context=context, | ||
| ) | ||
| translations = translator.translate_batch(batch) | ||
|
|
||
| for cell, translation in zip(batch_cells, translations): | ||
| cell.value = translation | ||
| translated_cells.append(cell) | ||
|
|
||
| # Translate rich text runs | ||
| rich_text_cells = [c for c in translated_cells if c.rich_text_runs] | ||
| if rich_text_cells: | ||
| _translate_rich_text_runs( | ||
| rich_text_cells, | ||
| translator, | ||
| source_lang, | ||
| target_lang, | ||
| config.batch_size, | ||
| context, | ||
| ) | ||
|
|
||
| # Translate dropdowns | ||
| dropdowns = _extract_dropdown_validations(input_file, sheets) | ||
| if dropdowns: | ||
| _translate_dropdowns( | ||
| dropdowns, | ||
| translator, | ||
| source_lang, | ||
| target_lang, | ||
| config.batch_size, | ||
| context, | ||
| ) | ||
|
|
||
| # Write output | ||
| write_translations(input_file, output_file, translated_cells, dropdowns) | ||
|
|
||
| return { | ||
| "cell_count": len(translated_cells), | ||
| "rich_text_cells": len(rich_text_cells), | ||
| "dropdown_count": len(dropdowns) if dropdowns else 0, | ||
| "status": "completed", | ||
| } | ||
|
|
||
|
|
||
| def count_cells(input_file: Path, sheets: Optional[set[str]] = None) -> int: | ||
| """Count translatable cells in a file (for validation before translation).""" | ||
| with ExcelExtractor(input_file, sheets=sheets) as extractor: | ||
| return sum(1 for _ in extractor.extract_cells()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Whitespace-only sheets parameter causes empty filter set
When the
sheetsparameter contains only whitespace (e.g.," "), the string is truthy so the if-block executes, but the set comprehension strips and filters all elements, producing an empty setset()instead ofNone. TheExcelExtractortreats an empty set as "filter to zero sheets" (since it's notNone), causing all sheets to be skipped. Users would receive a confusing "No translatable content found" error instead of having all sheets translated as likely intended.