Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: CI

on:
push:
branches: [f25]
pull_request:
branches: [f25]

jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"

- name: Install dependencies
run: pip install -r requirements.txt

- name: Run tests
run: pytest test/ -v
4 changes: 3 additions & 1 deletion src/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
class TranslateResponse(BaseModel):
is_english: bool
translated_content: str
language: str | None = None


app = FastAPI()


@app.get("/")
def translator_root(content: str = Query(default="")) -> TranslateResponse:
is_english, translated_content = translate_content(content.strip())
is_english, translated_content, language = translate_content(content.strip())
return TranslateResponse(
is_english=is_english,
translated_content=translated_content,
language=language,
)
51 changes: 36 additions & 15 deletions src/translator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
from typing import Any

import httpx
Expand Down Expand Up @@ -27,20 +28,34 @@ def _user_prompt(post: str) -> str:
Text: {post}"""


def _parse_model_content(raw: str, post: str) -> tuple[bool, str]:
def _normalize_response_line(line: str) -> str:
"""Strip list markers and leading markdown so LANGUAGE:/TRANSLATION: can be found."""
s = line.strip()
s = re.sub(r"^(\d+\.|[*•-])\s+", "", s)
while s.startswith("*"):
s = s[1:].lstrip()
return s.lstrip()


def _parse_model_content(raw: str, post: str) -> tuple[bool, str, str | None]:
content = raw.strip()
if "</redacted_thinking>" in content:
content = content.split("</redacted_thinking>")[-1].strip()
language = None
detected_language: str | None = None
translation = post
for line in content.splitlines():
if line.startswith("LANGUAGE:"):
language = line[len("LANGUAGE:") :].strip()
elif line.startswith("TRANSLATION:"):
translation = line[len("TRANSLATION:") :].strip()
if language is None:
return (True, post)
return (language.lower() == "english", translation)
norm = _normalize_response_line(line)
low = norm.lower()
if low.startswith("language:"):
detected_language = norm.split(":", 1)[1].strip()
detected_language = detected_language.strip("*").strip()
elif low.startswith("translation:"):
translation = norm.split(":", 1)[1].strip()
translation = translation.strip("*").strip()
if detected_language is None:
return (True, post, None)
is_english = detected_language.lower() == "english"
return (is_english, translation, detected_language)


def _httpx_timeout() -> httpx.Timeout:
Expand All @@ -49,11 +64,17 @@ def _httpx_timeout() -> httpx.Timeout:
return httpx.Timeout(connect=connect, read=read, write=10.0, pool=5.0)


def translate_content(content: str) -> tuple[bool, str]:
return query_llm_robust(content)
def _strip_html(text: str) -> str:
"""Remove HTML tags so the LLM receives plain text (NodeBB sends HTML)."""
return re.sub(r"<[^>]+>", "", text).strip()


def translate_content(content: str) -> tuple[bool, str, str | None]:
plain = _strip_html(content) if content else content
return query_llm_robust(plain or content)


def query_llm_robust(post: str) -> tuple[bool, str]:
def query_llm_robust(post: str) -> tuple[bool, str, str | None]:
url = f"{_ollama_base_url()}/api/chat"
payload: dict[str, Any] = {
"model": _ollama_model(),
Expand All @@ -66,13 +87,13 @@ def query_llm_robust(post: str) -> tuple[bool, str]:
response.raise_for_status()
data = response.json()
except Exception:
return (True, post)
return (True, post, None)

message = data.get("message")
if not isinstance(message, dict):
return (True, post)
return (True, post, None)
content = message.get("content")
if not isinstance(content, str):
return (True, post)
return (True, post, None)

return _parse_model_content(content, post)
66 changes: 57 additions & 9 deletions test/unit/test_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,49 +15,77 @@


@pytest.mark.parametrize(
("raw", "post", "expected_english", "expected_text"),
("raw", "post", "expected_english", "expected_text", "expected_language"),
[
(
"LANGUAGE: English\nTRANSLATION: Hello, world.",
"Hello, world.",
True,
"Hello, world.",
"English",
),
(
"LANGUAGE: French\nTRANSLATION: Good day.",
"Bonjour.",
False,
"Good day.",
"French",
),
(
"TRANSLATION: only this line",
"some input",
True,
"some input",
None,
),
(
"</redacted_thinking>\nLANGUAGE: Spanish\nTRANSLATION: Hello.",
"Hola",
False,
"Hello.",
"Spanish",
),
(
"LANGUAGE: German\nTRANSLATION: Hi there",
"src",
False,
"Hi there",
"German",
),
(
"No LANGUAGE line at all.\nJust prose.",
"orig",
True,
"orig",
None,
),
(
"LANGUAGE: english\nTRANSLATION: Same",
"x",
True,
"Same",
"english",
),
(
"**LANGUAGE:** French\nTRANSLATION: Hello",
"Bonjour",
False,
"Hello",
"French",
),
(
"1. LANGUAGE: French\nTRANSLATION: Hello",
"Bonjour",
False,
"Hello",
"French",
),
(
"Language: French\nTranslation: Hello",
"Bonjour",
False,
"Hello",
"French",
),
],
)
Expand All @@ -66,8 +94,13 @@ def test_parse_model_content(
post: str,
expected_english: bool,
expected_text: str,
expected_language: str | None,
) -> None:
assert _parse_model_content(raw, post) == (expected_english, expected_text)
assert _parse_model_content(raw, post) == (
expected_english,
expected_text,
expected_language,
)


def test_user_prompt_includes_post_text() -> None:
Expand All @@ -94,9 +127,10 @@ def test_query_llm_robust_posts_chat_and_parses_response(monkeypatch: pytest.Mon
)
)

is_english, text = query_llm_robust("in")
is_english, text, language = query_llm_robust("in")
assert is_english is True
assert text == "out"
assert language == "English"
assert route.called
payload = json.loads(route.calls[0].request.content.decode())
assert payload["model"] == "qwen3:0.6b"
Expand All @@ -112,7 +146,7 @@ def test_query_llm_robust_connect_error_returns_original(monkeypatch: pytest.Mon
respx.post(_DEFAULT_CHAT_URL).mock(
side_effect=httpx.ConnectError("refused", request=req),
)
assert query_llm_robust("fall") == (True, "fall")
assert query_llm_robust("fall") == (True, "fall", None)


@respx.mock
Expand All @@ -123,7 +157,7 @@ def test_query_llm_robust_missing_message_dict_returns_original(
respx.post(_DEFAULT_CHAT_URL).mock(
return_value=httpx.Response(200, json={"done": True}),
)
assert query_llm_robust("z") == (True, "z")
assert query_llm_robust("z") == (True, "z", None)


@respx.mock
Expand All @@ -137,18 +171,32 @@ def test_query_llm_robust_non_string_message_content_returns_original(
json={"message": {"role": "assistant", "content": None}, "done": True},
),
)
assert query_llm_robust("y") == (True, "y")
assert query_llm_robust("y") == (True, "y", None)


def test_translate_content_delegates_to_query_llm_robust(
monkeypatch: pytest.MonkeyPatch,
) -> None:
calls: list[str] = []

def fake(post: str) -> tuple[bool, str]:
def fake(post: str) -> tuple[bool, str, str | None]:
calls.append(post)
return (True, "ok")
return (True, "ok", "English")

monkeypatch.setattr("src.translator.query_llm_robust", fake)
assert translate_content("hi") == (True, "ok")
assert translate_content("hi") == (True, "ok", "English")
assert calls == ["hi"]


def test_translate_content_strips_html(
monkeypatch: pytest.MonkeyPatch,
) -> None:
calls: list[str] = []

def fake(post: str) -> tuple[bool, str, str | None]:
calls.append(post)
return (False, "translated", "French")

monkeypatch.setattr("src.translator.query_llm_robust", fake)
assert translate_content("<p>Bonjour</p>") == (False, "translated", "French")
assert calls == ["Bonjour"]
Loading