Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 9 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# CIRCE Python Implementation

[![Python](https://img.shields.io/badge/python-3.9%2B-blue)](https://www.python.org/downloads/)
[![Tests](https://img.shields.io/badge/tests-3400%2B%20passed-brightgreen)](tests/)
[![Tests](https://img.shields.io/badge/tests-passing-brightgreen)](tests/)
[![codecov](https://codecov.io/gh/OHDSI/Circepy/graph/badge.svg?token=CODECOV_TOKEN)](https://codecov.io/gh/OHDSI/Circepy)
[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
[![PyPI](https://img.shields.io/badge/PyPI-ohdsi--circe--python--alpha-blue)](https://pypi.org/project/ohdsi-circe-python-alpha/)
Expand All @@ -27,17 +27,15 @@ CIRCE Python provides a comprehensive toolkit for working with OMOP CDM cohort d
> [!IMPORTANT]
> This package is currently in **Alpha** status and undergoing rigorous parity testing against the Java implementation.

- **Version**: 0.1.0 (Alpha)
- **Tests**: 3,400+ passing
- **Coverage**: 34% (Core logic focus)
- **Version**: 0.2.0 (Alpha)
- **Tests**: Passing in CI
- **Python**: 3.9+
- **License**: Apache 2.0

## Installation

> [!NOTE]
> This package is currently in private development. Install from source using Git.
> The recommended workflow uses `uv` and the checked-in `uv.lock` for a reproducible environment.
> The recommended source workflow uses `uv` and the checked-in `uv.lock` for a reproducible environment.

### From Source (Current Method)

Expand Down Expand Up @@ -148,18 +146,17 @@ An experimental backend-native execution API is available under
`circe.execution`.

```python
from circe.execution import ExecutionOptions, IbisExecutor
from circe.execution import build_cohort

# Requires optional extras, e.g. `pip install ohdsi-circe-python-alpha[ibis-duckdb]`
executor = IbisExecutor(conn, ExecutionOptions(cdm_schema="main"))
events = executor.build(cohort) # lazy ibis relation
events = build_cohort(cohort, backend=conn, cdm_schema="main") # lazy ibis relation
```

## What's Included

This package provides a complete Python implementation of CIRCE-BE with:

- **3,400+ passing tests** with focused coverage on core logic
- **Passing test suite** with focused coverage on core logic
- **18+ SQL builders** for all OMOP CDM domains:
- Condition Occurrence/Era
- Drug Exposure/Era
Expand Down Expand Up @@ -227,7 +224,7 @@ circe/
- [x] Java interoperability with camelCase/snake_case field support
- [x] Cohort expression validation with 40+ checker implementations
- [x] Markdown rendering for print-friendly descriptions
- [x] Full test suite (3,400+ tests)
- [x] Full test suite
- [x] Type hints throughout with py.typed marker
- [x] Concept set expression handling
- [x] Window criteria and correlated criteria support
Expand Down Expand Up @@ -373,7 +370,7 @@ uv run circe --help
uv run pytest
```

All 3,400+ tests should pass.
The full test suite should pass.

### Linting and Formatting

Expand Down
18 changes: 6 additions & 12 deletions circe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,14 @@
)

from .api import (
build_cohort,
build_cohort_query,
cohort_expression_from_json,
cohort_print_friendly,
)
from .execution import (
ExecutionOptions,
IbisExecutor,
build_ibis,
to_polars,
write_cohort,
)

# Main exports
from .io import load_expression
from .vocabulary import Concept, ConceptSet, ConceptSetExpression, ConceptSetItem

Expand Down Expand Up @@ -208,13 +205,10 @@ def get_json_schema() -> dict:
# API functions
"cohort_expression_from_json",
"build_cohort_query",
"build_cohort",
"write_cohort",
"cohort_print_friendly",
"safe_model_rebuild",
# I/O and experimental execution API
# I/O helpers
"load_expression",
"ExecutionOptions",
"IbisExecutor",
"build_ibis",
"to_polars",
"write_cohort",
]
121 changes: 120 additions & 1 deletion circe/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,20 @@
This module provides a simple R CirceR-style API for working with cohort definitions:
- cohort_expression_from_json(): Load cohort expression from JSON string
- build_cohort_query(): Generate SQL from cohort expression
- build_cohort(): Build cohort as a relational expression (experimental)
- write_cohort(): Write OHDSI cohort-table rows to a database table
- cohort_print_friendly(): Generate Markdown from cohort expression
"""

from typing import Optional
from typing import Literal, Optional

from .cohortdefinition import (
BuildExpressionQueryOptions,
CohortExpression,
CohortExpressionQueryBuilder,
MarkdownRender,
)
from .execution.typing import IbisBackendLike, Table
from .vocabulary.concept import ConceptSet


Expand Down Expand Up @@ -102,6 +105,122 @@ def build_cohort_query(
return builder.build_expression_query(expression, options)


def build_cohort(
expression: CohortExpression,
*,
backend: IbisBackendLike,
cdm_schema: str,
vocabulary_schema: Optional[str] = None,
results_schema: Optional[str] = None,
) -> Table:
"""Build a cohort as a relational table expression.

This uses the experimental Ibis execution engine to compile the cohort
expression into a backend-native relational expression.

Args:
expression: CohortExpression instance
backend: Ibis backend used to compile the cohort relation
cdm_schema: Schema containing the OMOP CDM tables
vocabulary_schema: Optional schema for vocabulary tables. Defaults to
``cdm_schema`` when omitted.
results_schema: Optional schema used for result-side table resolution

Returns:
Ibis table expression representing the cohort result

Raises:
ExecutionError: If the cohort cannot be normalized, lowered, or
compiled into a relational expression

Example:
>>> import ibis
>>> backend = ibis.duckdb.connect()
>>> expression = cohort_expression_from_json(json_str)
>>> relation = build_cohort(
... expression,
... backend=backend,
... cdm_schema="cdm",
... vocabulary_schema="vocab",
... )
"""
from .execution import build_cohort as _build_cohort

return _build_cohort(
expression,
backend=backend,
cdm_schema=cdm_schema,
vocabulary_schema=vocabulary_schema,
results_schema=results_schema,
)


def write_cohort(
expression: CohortExpression,
*,
backend: IbisBackendLike,
cdm_schema: str,
cohort_table: str,
cohort_id: int,
vocabulary_schema: Optional[str] = None,
results_schema: Optional[str] = None,
if_exists: Literal["fail", "replace"] = "fail",
) -> None:
"""Build and write an OHDSI cohort table.

This wraps :func:`build_cohort`, projects the resulting relation into the
standard OHDSI cohort-table shape, and materializes it to a backend table.
Existing rows for other cohort IDs are preserved.

Args:
expression: CohortExpression instance
backend: Ibis backend used to compile and write the cohort relation
cdm_schema: Schema containing the OMOP CDM tables
cohort_table: Name of the OHDSI cohort table to create or update
cohort_id: Cohort definition identifier written to
``cohort_definition_id``
vocabulary_schema: Optional schema for vocabulary tables. Defaults to
``cdm_schema`` when omitted.
results_schema: Optional schema for the target table
if_exists: Cohort-row policy, either ``"fail"`` or ``"replace"``.
``"fail"`` raises if rows for ``cohort_id`` already exist.
``"replace"`` replaces only rows for ``cohort_id``.

Returns:
None

Raises:
ExecutionError: If the cohort cannot be built or the target table
cannot be written

Example:
>>> import ibis
>>> backend = ibis.duckdb.connect()
>>> expression = cohort_expression_from_json(json_str)
>>> write_cohort(
... expression,
... backend=backend,
... cdm_schema="cdm",
... cohort_table="cohort",
... cohort_id=1,
... results_schema="results",
... if_exists="replace",
... )
"""
from .execution import write_cohort as _write_cohort

_write_cohort(
expression,
backend=backend,
cdm_schema=cdm_schema,
cohort_table=cohort_table,
cohort_id=cohort_id,
vocabulary_schema=vocabulary_schema,
results_schema=results_schema,
if_exists=if_exists,
)


def cohort_print_friendly(
expression: CohortExpression,
concept_sets: Optional[list[ConceptSet]] = None,
Expand Down
29 changes: 21 additions & 8 deletions circe/execution/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
"""Experimental backend execution APIs."""
"""New Ibis execution subsystem.

from .ibis import IbisExecutor, build_ibis, to_polars, write_cohort
from .options import ExecutionOptions, SchemaName
This package is intentionally parallel to the existing SQL builder path and does
not modify cohortdefinition model semantics.
"""

from .api import build_cohort, write_cohort
from .databricks_compat import apply_databricks_post_connect_workaround
from .errors import (
CompilationError,
ExecutionError,
ExecutionNormalizationError,
UnsupportedCriterionError,
UnsupportedFeatureError,
)

__all__ = [
"ExecutionOptions",
"SchemaName",
"IbisExecutor",
"build_ibis",
"to_polars",
"build_cohort",
"write_cohort",
"apply_databricks_post_connect_workaround",
"ExecutionError",
"ExecutionNormalizationError",
"UnsupportedCriterionError",
"UnsupportedFeatureError",
"CompilationError",
]
39 changes: 39 additions & 0 deletions circe/execution/_dataclass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

import sys
from dataclasses import dataclass
from typing import Any, Callable, TypeVar, cast, overload

from typing_extensions import dataclass_transform

T = TypeVar("T")


@overload
def frozen_slots_dataclass(_cls: type[T], **kwargs: Any) -> type[T]: ...


@overload
def frozen_slots_dataclass(_cls: None = None, **kwargs: Any) -> Callable[[type[T]], type[T]]: ...


@dataclass_transform(frozen_default=True)
def frozen_slots_dataclass(
_cls: type[T] | None = None,
**kwargs: Any,
) -> type[T] | Callable[[type[T]], type[T]]:
"""Compatibility wrapper for frozen+slots dataclasses.

`slots=True` is preferred for memory/layout guarantees, but this wrapper keeps
compatibility with older Python runtimes that do not support dataclass slots.
"""

def wrap(cls: type[T]) -> type[T]:
dataclass_factory = cast(Any, dataclass)
if sys.version_info >= (3, 10):
return cast(type[T], dataclass_factory(frozen=True, slots=True, **kwargs)(cls))
return cast(type[T], dataclass_factory(frozen=True, **kwargs)(cls))

if _cls is None:
return wrap
return wrap(_cls)
Loading
Loading