Skip to content

Commit c2e7067

Browse files
authored
Merge pull request #36 from OHDSI/fix/era-collapse-deterministic
fix(execution): make ERA collapse ordering deterministic
2 parents 76466c5 + b4d9419 commit c2e7067

2 files changed

Lines changed: 45 additions & 2 deletions

File tree

circe/execution/engine/collapse.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,11 @@ def _apply_censor_window(events, censor_window):
2727
def _collapse_era(intervals, era_pad: int):
2828
padded = intervals.mutate(_padded_end_date=(intervals.end_date + ibis.interval(days=int(era_pad))))
2929

30-
ordering = [padded.start_date]
30+
ordering = [
31+
padded.start_date,
32+
padded._padded_end_date.desc(),
33+
padded.end_date.desc(),
34+
]
3135
ordered_window = ibis.window(group_by=padded.person_id, order_by=ordering)
3236
cumulative_window = ibis.cumulative_window(group_by=padded.person_id, order_by=ordering)
3337
with_cummax = padded.mutate(_cummax_padded_end=padded._padded_end_date.max().over(cumulative_window))
@@ -44,7 +48,12 @@ def _collapse_era(intervals, era_pad: int):
4448

4549
grouping_window = ibis.cumulative_window(
4650
group_by=marked.person_id,
47-
order_by=[marked.start_date, marked._is_new_group.desc()],
51+
order_by=[
52+
marked.start_date,
53+
marked._padded_end_date.desc(),
54+
marked.end_date.desc(),
55+
marked._is_new_group.desc(),
56+
],
4857
)
4958
group_index = marked._is_new_group.sum().over(grouping_window)
5059
grouped = marked.mutate(_group_idx=group_index)

tests/execution/test_end_strategy_censoring.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,40 @@ def test_collapse_settings_era_merges_tied_start_dates_into_one_group():
283283
assert str(result.iloc[0]["end_date"])[:10] == "2020-01-05"
284284

285285

286+
def test_collapse_settings_era_merges_contained_intervals_after_tied_start_dates():
287+
ibis = pytest.importorskip("ibis")
288+
_ = pytest.importorskip("duckdb")
289+
290+
conn = ibis.duckdb.connect()
291+
_seed_common_tables(conn, ibis)
292+
conn.create_table(
293+
"condition_occurrence",
294+
obj=ibis.memtable(
295+
{
296+
"person_id": [1, 1, 1],
297+
"condition_occurrence_id": [100, 101, 102],
298+
"condition_concept_id": [111, 111, 111],
299+
"condition_start_date": ["2020-01-01", "2020-01-01", "2020-01-10"],
300+
"condition_end_date": ["2020-01-02", "2020-02-01", "2020-01-15"],
301+
"visit_occurrence_id": [10, 10, 10],
302+
}
303+
),
304+
overwrite=True,
305+
)
306+
307+
expression = CohortExpression(
308+
concept_sets=[_make_concept_set(1, 111)],
309+
primary_criteria=PrimaryCriteria(criteria_list=[ConditionOccurrence(codeset_id=1)]),
310+
end_strategy=DateOffsetStrategy(offset=0, date_field="end_date"),
311+
collapse_settings=CollapseSettings(era_pad=0),
312+
)
313+
314+
result = build_cohort(expression, backend=conn, cdm_schema="main").execute()
315+
assert len(result) == 1
316+
assert str(result.iloc[0]["start_date"])[:10] == "2020-01-01"
317+
assert str(result.iloc[0]["end_date"])[:10] == "2020-02-01"
318+
319+
286320
def test_apply_end_strategy_rejects_invalid_date_field_and_preserves_fallback_semantics():
287321
ibis_mod = pytest.importorskip("ibis")
288322
_ = pytest.importorskip("duckdb")

0 commit comments

Comments
 (0)