Skip to content
Open
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
b4c0965
Rename LectureSearch DTOs to GlobalSearch and add SearchIntent enum
Nayer-kotry Apr 27, 2026
fd8dce4
Add global search intent classifier and trained ONNX model
Nayer-kotry Apr 27, 2026
8304e54
Remove tokenizer files from repo and load from HuggingFace Hub at run…
Nayer-kotry Apr 27, 2026
ddf5909
Rename LectureSearchAnswerPipeline to GlobalSearchPipeline with inten…
Nayer-kotry Apr 27, 2026
53b6db0
Add GlobalSearchCallback for async webhook pattern
Nayer-kotry Apr 27, 2026
279fd79
Move global search ask to pipeline endpoint and strip search router
Nayer-kotry Apr 27, 2026
098006d
Add onnxruntime, transformers, joblib to project dependencies
Nayer-kotry Apr 27, 2026
b7284af
Fix inconsistent log prefix in GlobalSearchCallback to use [global-se…
Nayer-kotry Apr 27, 2026
7897581
Merge branch 'main' into iris/feature/iris-answer-intent-routing
Nayer-kotry Apr 28, 2026
918bbad
Merge branch 'main' into iris/feature/iris-answer-intent-routing
Nayer-kotry Apr 28, 2026
053f483
chore: regenerate poetry.lock after adding onnxruntime, transformers,…
Nayer-kotry Apr 28, 2026
fb35d7a
fix: address code review comments on global search pipeline
Nayer-kotry Apr 28, 2026
73243a7
fix: address second-round code review comments
Nayer-kotry Apr 28, 2026
0763c2a
fix: suppress repeated warnings when intent model dir is missing
Nayer-kotry Apr 28, 2026
b9876e7
Merge branch 'main' into iris/feature/iris-answer-intent-routing
Nayer-kotry Apr 28, 2026
a196f68
`Logos`: Pin vLLM to 0.20.0
wasnertobias Apr 28, 2026
21cabd1
Athena: Make database optional for llm modules (#526)
maximiliansoelch Apr 30, 2026
623a2d3
Merge remote-tracking branch 'origin/main' into iris/feature/iris-ans…
Nayer-kotry May 3, 2026
fa34433
fix: pass local flag to GlobalSearchPipeline from Artemis LLM selection
Nayer-kotry May 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions athena/athena/athena/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from fastapi.responses import JSONResponse

from . import env
from .database import create_tables
from .database import configure_database, create_tables, is_database_enabled
from .logger import logger
from .module_config import get_module_config
from .metadata import MetaDataMiddleware
Expand All @@ -30,16 +30,26 @@ def __init__(self, *args, **kwargs):
self.add_middleware(ExperimentMiddleware)


def start(self) -> None:
"""Start Athena. You have to ensure to have `app` in your module main scope so that it can be imported."""
def start(self, *, database_required: bool = True) -> None:
"""
Start Athena.

You have to ensure to have `app` in your module main scope so that it can be imported.
Set `database_required=False` for modules that can run without persistent storage. In that mode database
support can still be enabled explicitly via `ATHENA_DATABASE_ENABLED=1`.
"""
LOGGING_CONFIG["formatters"]["default"]["fmt"] = "%(asctime)s %(levelname)s --- [%(name)s] : %(message)s"
LOGGING_CONFIG["formatters"]["access"]["fmt"] = "%(asctime)s %(levelname)s --- [%(name)s] : %(message)s"
logger.info("Starting athena module")

conf = get_module_config()

logger.debug("Creating database tables")
create_tables(conf.type)
configure_database(required=database_required)
if is_database_enabled():
logger.debug("Creating database tables")
create_tables(conf.type)
else:
logger.info("Starting without database support. Set ATHENA_DATABASE_ENABLED=1 to enable it.")

if env.PRODUCTION:
logger.info("Running in PRODUCTION mode")
Expand Down Expand Up @@ -69,4 +79,4 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
return JSONResponse(
status_code=422,
content={"detail": exc.errors()},
)
)
102 changes: 83 additions & 19 deletions athena/athena/athena/database.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,111 @@
import importlib
import os
from contextlib import contextmanager
from typing import Iterator, Optional

from sqlalchemy import create_engine
from sqlalchemy.orm import declarative_base, sessionmaker
from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session, declarative_base, sessionmaker

from athena import env
from athena.logger import logger

Base = declarative_base()
OPTIONAL_DATABASE_ENV_VAR = "ATHENA_DATABASE_ENABLED"

# SQLite specific configuration
is_sqlite = env.DATABASE_URL.startswith("sqlite:///")
if is_sqlite:
connect_args = {"check_same_thread": False}
# create the data directory if it does not exist
data_dir = os.path.dirname(env.DATABASE_URL[10:])
os.makedirs(data_dir, exist_ok=True)
else:
connect_args = {}

engine = create_engine(
env.DATABASE_URL, connect_args=connect_args
)
class DatabaseDisabledError(RuntimeError):
"""Raised when code tries to use the database although it is disabled."""

SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()

def create_tables(exercise_type: str):
_engine: Optional[Engine] = None
SessionLocal: Optional[sessionmaker] = None
_database_enabled = True


def optional_database_enabled_from_env() -> bool:
"""Return whether optional database support should be enabled for this module."""
return os.environ.get(OPTIONAL_DATABASE_ENV_VAR, "0") == "1"


def configure_database(required: bool = True, enabled: Optional[bool] = None) -> None:
"""
Configure whether Athena should use database-backed storage.

Required modules always enable the database. Optional modules only enable it when explicitly requested or when
`enabled` is passed directly.
"""
global _database_enabled, _engine, SessionLocal

if required:
_database_enabled = True
return

_database_enabled = optional_database_enabled_from_env() if enabled is None else enabled
if not _database_enabled:
_engine = None
SessionLocal = None


def is_database_enabled() -> bool:
"""Return whether database-backed storage is enabled for this process."""
return _database_enabled


def _initialize_database() -> None:
global _engine, SessionLocal

if not _database_enabled:
raise DatabaseDisabledError("Database support is disabled for this Athena module.")

if _engine is not None and SessionLocal is not None:
return

database_url = env.DATABASE_URL

if database_url.startswith("sqlite:///"):
connect_args = {"check_same_thread": False}
data_dir = os.path.dirname(database_url[10:])
if data_dir:
os.makedirs(data_dir, exist_ok=True)
else:
connect_args = {}

_engine = create_engine(database_url, connect_args=connect_args)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=_engine)


def create_tables(exercise_type: str) -> None:
"""
Create all tables for models in athena.models, whose name starts with "DB"+exercise_type.name.title().
Also create all tables which have been registered previously using `create_additional_table_if_not_exists`.
"""
if not is_database_enabled():
logger.info("Database support is disabled, skipping table creation")
return

_initialize_database()

model_module = importlib.import_module("athena.models")
model_class_name_start = "DB" + exercise_type.title()
for model_class_name in dir(model_module):
if model_class_name.startswith(model_class_name_start):
# Get the model class so that Base knows about it
getattr(model_module, model_class_name)
Base.metadata.create_all(engine)

assert _engine is not None
Base.metadata.create_all(_engine)


@contextmanager
def get_db():
db = SessionLocal()
def get_db() -> Iterator[Session]:
if not is_database_enabled():
raise DatabaseDisabledError("Database support is disabled for this Athena module.")

_initialize_database()
assert SessionLocal is not None

db: Session = SessionLocal()
try:
yield db
finally:
Expand Down
5 changes: 5 additions & 0 deletions athena/athena/athena/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from athena.app import app
from athena.authenticate import authenticated
from athena.database import is_database_enabled
from athena.metadata import with_meta
from athena.module_config import get_dynamic_module_config_factory
from athena.logger import logger
Expand Down Expand Up @@ -167,6 +168,10 @@ async def wrapper(request: SubmissionSelectorRequest):
exercise.meta.update(get_stored_exercise_meta(exercise) or {})
store_exercise(exercise)

if not is_database_enabled():
logger.info("%s: Database support is disabled, falling back to the manager's default submission selection.", func.__name__)
return -1

# Get the full submission objects
submissions = list(get_stored_submissions(submission_type, exercise.id, submission_ids))
if len(submission_ids) != len(submissions):
Expand Down
8 changes: 7 additions & 1 deletion athena/athena/athena/storage/exercise_storage.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List, Iterable, Optional, Type

from athena.contextvars import get_lms_url
from athena.database import get_db
from athena.database import get_db, is_database_enabled
from athena.schemas import Exercise


Expand All @@ -11,6 +11,8 @@ def get_stored_exercises(exercise_cls: Type[Exercise], lms_url: Optional[str] =
Returns a list of exercises for the given exercise type and exercise ids.
If only_ids is None, returns all exercises for the given exercise type.
"""
if not is_database_enabled():
return ()

if lms_url is None:
lms_url = get_lms_url()
Expand All @@ -26,6 +28,8 @@ def get_stored_exercises(exercise_cls: Type[Exercise], lms_url: Optional[str] =

def get_stored_exercise_meta(exercise: Exercise, lms_url: Optional[str] = None, ) -> Optional[dict]:
"""Returns the stored metadata associated with the exercise."""
if not is_database_enabled():
return None

if lms_url is None:
lms_url = get_lms_url()
Expand All @@ -38,6 +42,8 @@ def get_stored_exercise_meta(exercise: Exercise, lms_url: Optional[str] = None,

def store_exercises(exercises: List[Exercise], lms_url: Optional[str] = None):
"""Stores the given exercises, all at once."""
if not is_database_enabled():
return

if lms_url is None:
lms_url = get_lms_url()
Expand Down
12 changes: 11 additions & 1 deletion athena/athena/athena/storage/feedback_storage.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Iterable, Union, Type, Optional, List

from athena.contextvars import get_lms_url
from athena.database import get_db
from athena.database import get_db, is_database_enabled
from athena.schemas import Feedback


Expand All @@ -12,6 +12,8 @@ def get_stored_feedback(
Returns a list of feedbacks for the given exercise in the given submission.
If submission_id is None, returns all feedbacks for the given exercise.
"""
if not is_database_enabled():
return ()

if lms_url is None:
lms_url = get_lms_url()
Expand All @@ -26,6 +28,8 @@ def get_stored_feedback(

def get_stored_feedback_meta(feedback: Feedback, lms_url: Optional[str] = None) -> Optional[dict]:
"""Returns the stored metadata associated with the feedback."""
if not is_database_enabled():
return None

if lms_url is None:
lms_url = get_lms_url()
Expand All @@ -46,6 +50,8 @@ def store_feedback(feedback: Feedback, is_lms_id=False, lms_url: Optional[str] =
Returns:
Feedback: The stored feedback with its internal ID assigned.
"""
if not is_database_enabled():
return feedback

if lms_url is None:
lms_url = get_lms_url()
Expand All @@ -68,6 +74,8 @@ def get_stored_feedback_suggestions(
feedback_cls: Type[Feedback], exercise_id: int, submission_id: int, lms_url: Optional[str] = None
) -> Iterable[Feedback]:
"""Returns a list of feedback suggestions for the given exercise in the given submission."""
if not is_database_enabled():
return ()

if lms_url is None:
lms_url = get_lms_url()
Expand All @@ -87,6 +95,8 @@ def store_feedback_suggestions(feedbacks: List[Feedback], lms_url: Optional[str]
Returns:
List[Feedback]: The stored feedback suggestions with their internal IDs assigned.
"""
if not is_database_enabled():
return feedbacks

if lms_url is None:
lms_url = get_lms_url()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from typing import Optional
from athena.contextvars import get_lms_url
from athena.database import get_db
from athena.database import get_db, is_database_enabled

from athena.models import DBStructuredGradingCriterion
from athena.schemas import StructuredGradingCriterion

def get_structured_grading_criterion(exercise_id: int, current_hash: Optional[str] = None) -> Optional[StructuredGradingCriterion]:
if not is_database_enabled():
return None

lms_url = get_lms_url()
with get_db() as db:
cache_entry = db.query(DBStructuredGradingCriterion).filter(
Expand All @@ -19,6 +22,9 @@ def get_structured_grading_criterion(exercise_id: int, current_hash: Optional[st
def store_structured_grading_criterion(
exercise_id: int, hash: str, structured_instructions: StructuredGradingCriterion
):
if not is_database_enabled():
return

with get_db() as db:
db.merge(
DBStructuredGradingCriterion(
Expand Down
10 changes: 9 additions & 1 deletion athena/athena/athena/storage/submission_storage.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from typing import List, Iterable, Union, Type, Optional

from athena.contextvars import get_lms_url
from athena.database import get_db
from athena.database import get_db, is_database_enabled
from athena.schemas import Submission


def count_stored_submissions(
submission_cls: Type[Submission], exercise_id: int, lms_url: Optional[str] = None
) -> int:
"""Returns the number of submissions for the given exercise."""
if not is_database_enabled():
return 0

if lms_url is None:
lms_url = get_lms_url()
Expand All @@ -27,6 +29,8 @@ def get_stored_submissions(
Returns a list of submissions for the given exercise and submission ids.
If only_ids is None, returns all submissions for the given exercise.
"""
if not is_database_enabled():
return ()

if lms_url is None:
lms_url = get_lms_url()
Expand All @@ -41,6 +45,8 @@ def get_stored_submissions(

def get_stored_submission_meta(submission: Submission, lms_url: Optional[str] = None) -> Optional[dict]:
"""Returns the stored metadata associated with the submission."""
if not is_database_enabled():
return None

if lms_url is None:
lms_url = get_lms_url()
Expand All @@ -53,6 +59,8 @@ def get_stored_submission_meta(submission: Submission, lms_url: Optional[str] =

def store_submissions(submissions: List[Submission], lms_url: Optional[str] = None):
"""Stores the given submissions, all at once."""
if not is_database_enabled():
return

if lms_url is None:
lms_url = get_lms_url()
Expand Down
8 changes: 6 additions & 2 deletions athena/env_example/module_programming_llm.env
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
PRODUCTION=1
SECRET=12345abcdef
DATABASE_URL=postgresql://postgres:password@postgres:5432/athena

# Optional for LLM modules. Leave ATHENA_DATABASE_ENABLED unset or set it to 0 to run without a database.
# Set ATHENA_DATABASE_ENABLED=1 to opt into Athena's database-backed storage and caching.
# ATHENA_DATABASE_ENABLED=1
# DATABASE_URL=postgresql://postgres:password@postgres:5432/athena


################################################################
Expand Down Expand Up @@ -31,4 +35,4 @@ REPLICATE_API_TOKEN=
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
# LANGCHAIN_API_KEY="XXX"
# LANGCHAIN_PROJECT="XXX"
# LANGCHAIN_PROJECT="XXX"
8 changes: 6 additions & 2 deletions athena/env_example/module_text_llm.env
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
PRODUCTION=1
SECRET=12345abcdef
DATABASE_URL=postgresql://postgres:password@postgres:5432/athena

# Optional for LLM modules. Leave ATHENA_DATABASE_ENABLED unset or set it to 0 to run without a database.
# Set ATHENA_DATABASE_ENABLED=1 to opt into Athena's database-backed storage and caching.
# ATHENA_DATABASE_ENABLED=1
# DATABASE_URL=postgresql://postgres:password@postgres:5432/athena


################################################################
Expand Down Expand Up @@ -31,4 +35,4 @@ REPLICATE_API_TOKEN=
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
# LANGCHAIN_API_KEY="XXX"
# LANGCHAIN_PROJECT="XXX"
# LANGCHAIN_PROJECT="XXX"
Loading
Loading