rapiddweller · ake2l · Oct 12, 2025 · Oct 12, 2025 · Oct 12, 2025
diff --git a/README.md b/README.md
@@ -52,31 +52,7 @@ Install and run:
 pip install datamimic-ce
 ```
 
-### Deterministic generation
-
-```python
-from datamimic_ce.domains.facade import generate_domain
-
-request = {
-    "domain": "person",
-    "version": "v1",
-    "count": 1,
-    "seed": "docs-demo",
-    "locale": "en_US",
-    "clock": "2025-01-01T00:00:00Z"
-}
-
-response = generate_domain(request)
-print(response["items"][0]["id"])
-```
-
-Same input → same output.
-Seeds, clocks, and UUIDv5 namespaces guarantee reproducibility across CI, dev, and analytics pipelines.
-Here’s a sharper, more README-friendly rewrite that feels technical yet inviting — something that speaks equally to devs and agent builders. It keeps the essence but polishes framing, rhythm, and clarity:
-
----
-
-### Deterministic Data Generation
+## Deterministic Data Generation
 
 DATAMIMIC lets you generate the *same* data, every time across machines, environments, or CI pipelines.
 Seeds, clocks, and UUIDv5 namespaces ensure your synthetic datasets remain reproducible and traceable, no matter where or when they’re generated.
@@ -106,7 +82,7 @@ Behind the scenes, every deterministic request combines:
 * A **frozen clock** (for time-dependent values), and
 * A **UUIDv5 namespace** (for globally consistent identifiers).
 
-Together, they form a reproducibility contract. Ideal for CI/CD pipelines, agent workflows, and analytics verification.
+Together, they form a reproducibility contract. Ideal for CI/CD pipelines, agentic pipelines, and analytics verification.
 
 Agents can safely re-invoke the same generation call and receive byte-for-byte identical data. 
 

diff --git a/datamimic_ce/domains/common/demographics/__init__.py b/datamimic_ce/domains/common/demographics/__init__.py
@@ -1,5 +1,6 @@
 """Demographic profile domain package."""
 
+from .api import build_sampler_with_profile_groups
 from .loader import DemographicProfileError, load_demographic_profile
 from .profile import DemographicProfile, DemographicProfileId, normalize_sex
 from .profile_meta import profile_group_refs
@@ -14,4 +15,5 @@
     "load_demographic_profile",
     "normalize_sex",
     "profile_group_refs",
+    "build_sampler_with_profile_groups",
 ]
diff --git a/datamimic_ce/domains/common/demographics/api.py b/datamimic_ce/domains/common/demographics/api.py
@@ -0,0 +1,48 @@
+"""Convenience API for building a demographic sampler with metadata-applied groups.
+
+Provide a simple integration that loads a demographic profile and applies
+group references from profile metadata in one call, so callers don't have to
+manually stitch loader + profile_meta + sampler wiring.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from .loader import load_demographic_profile
+from .profile_meta import profile_group_refs
+from .sampler import DemographicSampler
+
+
+def build_sampler_with_profile_groups(
+    *,
+    directory: Path,
+    dataset: str,
+    version: str,
+    profile_id: str,
+    request_hash: str,
+) -> DemographicSampler:
+    """Load profile CSVs and return a sampler with profile group masks applied.
+
+    Parameters
+    - directory: Folder containing age_pyramid.dmgrp.csv and condition_rates.dmgrp.csv
+    - dataset: Dataset code matching the CSV rows (e.g., "US")
+    - version: Profile version (e.g., "v1")
+    - profile_id: Profile metadata row id to look up group refs
+    - request_hash: Hash or identifier for error context tracking
+    """
+
+    # Load the core demographic profile (pure domain model)
+    profile = load_demographic_profile(directory, dataset, version)
+    sampler = DemographicSampler(profile)
+
+    # Apply group references from profile metadata if available
+    refs = profile_group_refs(
+        dataset=dataset,
+        version=version,
+        profile_id=profile_id,
+        request_hash=request_hash,
+    )
+    if refs:
+        sampler.apply_profile_groups(refs, dataset, version)
+    return sampler
diff --git a/docs/standards/datasets.md → docs/data-domains/datasets.md b/docs/standards/datasets.md → docs/data-domains/datasets.md
diff --git a/docs/examples/person_generation.md b/docs/examples/person_generation.md
@@ -184,6 +184,7 @@ from random import Random
 patient_service = PatientService(dataset="US", rng=Random(77))
 patient = patient_service.generate()
 print(f"Patient: {patient.full_name}, ID: {patient.patient_id}")
+```
 
 ## Reproducible Runs with Seeds
 
@@ -195,7 +196,7 @@ svc_a = PersonService(dataset="US", rng=Random(123))
 svc_b = PersonService(dataset="US", rng=Random(123))
 assert svc_a.generate().to_dict() == svc_b.generate().to_dict()
 ```
-```
+
 
 Example output:
 ```

diff --git a/tests_ce/api_tests/domains/test_integration_sampler_profile_meta.py b/tests_ce/api_tests/domains/test_integration_sampler_profile_meta.py
@@ -0,0 +1,55 @@
+"""Integration test for building a sampler with profile metadata group refs applied."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+
+from datamimic_ce.domains.common.demographics import build_sampler_with_profile_groups
+
+
+@pytest.fixture()
+def demo_profile_dir(tmp_path: Path) -> Path:
+    # Create minimal demographic CSVs for dataset US/v1
+    (tmp_path / "age_pyramid.dmgrp.csv").write_text(
+        """
+dataset,version,sex,age_min,age_max,weight
+US,v1,F,0,17,0.3
+US,v1,F,18,44,0.5
+US,v1,F,45,90,0.2
+US,v1,M,0,17,0.3
+US,v1,M,18,44,0.5
+US,v1,M,45,90,0.2
+""".strip()
+        + "\n",
+        encoding="utf-8",
+    )
+    (tmp_path / "condition_rates.dmgrp.csv").write_text(
+        """
+dataset,version,condition,sex,age_min,age_max,prevalence
+US,v1,Hypertension,,0,120,0.2
+US,v1,Type 2 Diabetes,,0,120,0.15
+""".strip()
+        + "\n",
+        encoding="utf-8",
+    )
+    return tmp_path
+
+
+def test_build_sampler_with_profile_groups_applies_masks(demo_profile_dir: Path) -> None:
+    sampler = build_sampler_with_profile_groups(
+        directory=demo_profile_dir,
+        dataset="US",
+        version="v1",
+        profile_id="urban_adult",
+        request_hash="it",
+    )
+
+    # The integration should apply at least one group mask from metadata.
+    # Gender mask is well-formed across keys in repo data (age mask may be rejected in non-strict mode
+    # if bounds are violated by single-band groups), so assert on gender.
+    gender_mask = sampler.group_mask("gender_category")
+    assert gender_mask, "Expected gender_category mask to be applied from profile metadata"
+    # Provenance must track group file usage
+    assert sampler.provenance_hash(), "Expected provenance to be recorded for applied group tables"