Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,17 @@ dependencies = [
"openpyxl>=3.1.5",
"click>=8.1.7",
"python-dotenv>=1.0.0",
"fastapi>=0.115.0",
"uvicorn>=0.32.0",
"python-multipart>=0.0.12",
]

[project.optional-dependencies]
dev = [
"pytest>=8.0.0",
"pytest-cov>=4.1.0",
"pytest-asyncio>=0.24.0",
"httpx>=0.27.0",
"black>=24.0.0",
"ruff>=0.6.0",
"mypy>=1.11.0",
Expand Down
15 changes: 15 additions & 0 deletions run_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env python3
"""Run the Rosetta API server."""

from dotenv import load_dotenv
import uvicorn

load_dotenv() # Load .env file

if __name__ == "__main__":
uvicorn.run(
"rosetta.api:app",
host="0.0.0.0",
port=8000,
reload=True,
)
5 changes: 5 additions & 0 deletions src/rosetta/api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""FastAPI application for Rosetta."""

from rosetta.api.app import app

__all__ = ["app"]
115 changes: 115 additions & 0 deletions src/rosetta/api/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""FastAPI application for Rosetta translation service."""

import tempfile
from pathlib import Path
from typing import Optional

from fastapi import FastAPI, File, Form, HTTPException, UploadFile
from fastapi.responses import FileResponse

from rosetta.services.translation_service import count_cells, translate_file

# Limits
MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB
MAX_CELLS = 5000

app = FastAPI(
title="Rosetta",
description="Excel translation API that preserves formatting, formulas, and data integrity",
version="0.1.0",
)


@app.get("/")
async def root() -> dict:
"""Health check endpoint."""
return {"status": "ok", "service": "rosetta"}


@app.post("/translate")
async def translate(
file: UploadFile = File(..., description="Excel file to translate"),
target_lang: str = Form(..., description="Target language (e.g., french, spanish)"),
source_lang: Optional[str] = Form(None, description="Source language (auto-detect if omitted)"),
context: Optional[str] = Form(None, description="Additional context for accurate translations"),
sheets: Optional[str] = Form(None, description="Comma-separated sheet names (all if omitted)"),
) -> FileResponse:
"""Translate an Excel file.

Upload an Excel file and receive the translated version.
Preserves all formatting, formulas, images, and data validations.
"""
# Validate file type
if not file.filename:
raise HTTPException(status_code=400, detail="No filename provided")

if not file.filename.lower().endswith((".xlsx", ".xlsm", ".xltx", ".xltm")):
raise HTTPException(
status_code=400,
detail="Invalid file type. Only Excel files (.xlsx, .xlsm, .xltx, .xltm) are supported",
)

# Read file content
content = await file.read()

# Check file size
if len(content) > MAX_FILE_SIZE:
raise HTTPException(
status_code=400,
detail=f"File too large. Maximum size is {MAX_FILE_SIZE // (1024 * 1024)}MB",
)

# Parse sheets parameter
sheets_set = None
if sheets:
sheets_set = {s.strip() for s in sheets.split(",") if s.strip()}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whitespace-only sheets parameter causes empty filter set

When the sheets parameter contains only whitespace (e.g., " "), the string is truthy so the if-block executes, but the set comprehension strips and filters all elements, producing an empty set set() instead of None. The ExcelExtractor treats an empty set as "filter to zero sheets" (since it's not None), causing all sheets to be skipped. Users would receive a confusing "No translatable content found" error instead of having all sheets translated as likely intended.

Fix in Cursor Fix in Web


# Save to temp file for processing
with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp_input:
tmp_input.write(content)
input_path = Path(tmp_input.name)

try:
# Check cell count
cell_count = count_cells(input_path, sheets_set)
if cell_count > MAX_CELLS:
raise HTTPException(
status_code=400,
detail=f"Too many cells ({cell_count}). Maximum is {MAX_CELLS} cells per request",
)

if cell_count == 0:
raise HTTPException(
status_code=400,
detail="No translatable content found in the file",
)

# Create output path
output_path = input_path.with_name(f"{input_path.stem}_translated.xlsx")

# Translate
result = translate_file(
input_file=input_path,
output_file=output_path,
target_lang=target_lang,
source_lang=source_lang,
context=context,
sheets=sheets_set,
)

# Return translated file
output_filename = file.filename.replace(".xlsx", f"_{target_lang}.xlsx")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Output filename replacement fails for non-.xlsx extensions

The output filename generation uses file.filename.replace(".xlsx", ...) but the validation at line 46 accepts .xlsm, .xltx, .xltm extensions (and case-insensitively). For files like data.xlsm, report.xltx, or FILE.XLSX, the replace finds no match and the output filename remains unchanged, omitting the target language indicator entirely.

Additional Locations (1)

Fix in Cursor Fix in Web

return FileResponse(
path=output_path,
filename=output_filename,
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
headers={"X-Cells-Translated": str(result["cell_count"])},
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Output temp file never deleted, causing disk leaks

The comment claims "output file cleaned up after response is sent" but FileResponse doesn't auto-delete files. The output_path temp file is never removed because no BackgroundTask is attached to handle cleanup after the response completes. Every successful translation leaves a file in the temp directory, causing cumulative disk space exhaustion.

Additional Locations (1)

Fix in Cursor Fix in Web


except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
finally:
# Cleanup input file (output file cleaned up after response is sent)
input_path.unlink(missing_ok=True)
110 changes: 110 additions & 0 deletions src/rosetta/services/translation_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""High-level translation service for the API."""

from pathlib import Path
from typing import Optional

from rosetta.core.config import Config
from rosetta.models import TranslationBatch
from rosetta.services import ExcelExtractor, Translator


def translate_file(
input_file: Path,
output_file: Path,
target_lang: str,
source_lang: Optional[str] = None,
context: Optional[str] = None,
sheets: Optional[set[str]] = None,
batch_size: int = 50,
) -> dict:
"""Translate an Excel file.

Args:
input_file: Path to input Excel file
output_file: Path for translated output file
target_lang: Target language for translation
source_lang: Source language (auto-detected if None)
context: Additional context for translations
sheets: Set of sheet names to translate (all if None)
batch_size: Number of cells per API batch

Returns:
Dict with translation stats
"""
from rosetta.main import (
_extract_dropdown_validations,
_extract_rich_text_info,
_translate_dropdowns,
_translate_rich_text_runs,
write_translations,
)

config = Config.from_env()
config.batch_size = batch_size
translator = Translator(config)

# Extract cells
with ExcelExtractor(input_file, sheets=sheets) as extractor:
cells = list(extractor.extract_cells())

# Enrich with rich text info
_extract_rich_text_info(input_file, cells, sheets)

if not cells:
return {"cell_count": 0, "status": "no_content"}

# Translate in batches
translated_cells = []
for i in range(0, len(cells), config.batch_size):
batch_cells = cells[i : i + config.batch_size]
batch = TranslationBatch(
cells=batch_cells,
source_lang=source_lang,
target_lang=target_lang,
context=context,
)
translations = translator.translate_batch(batch)

for cell, translation in zip(batch_cells, translations):
cell.value = translation
translated_cells.append(cell)

# Translate rich text runs
rich_text_cells = [c for c in translated_cells if c.rich_text_runs]
if rich_text_cells:
_translate_rich_text_runs(
rich_text_cells,
translator,
source_lang,
target_lang,
config.batch_size,
context,
)

# Translate dropdowns
dropdowns = _extract_dropdown_validations(input_file, sheets)
if dropdowns:
_translate_dropdowns(
dropdowns,
translator,
source_lang,
target_lang,
config.batch_size,
context,
)

# Write output
write_translations(input_file, output_file, translated_cells, dropdowns)

return {
"cell_count": len(translated_cells),
"rich_text_cells": len(rich_text_cells),
"dropdown_count": len(dropdowns) if dropdowns else 0,
"status": "completed",
}


def count_cells(input_file: Path, sheets: Optional[set[str]] = None) -> int:
"""Count translatable cells in a file (for validation before translation)."""
with ExcelExtractor(input_file, sheets=sheets) as extractor:
return sum(1 for _ in extractor.extract_cells())
Loading