ONEcampaign · jm-rivera · Dec 19, 2025 · Dec 19, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog for oda_reader
 
+## 1.4.0 (2025-12-19)
+- Adds `bulk_download_dac2a()` function for bulk downloading the full DAC2A dataset.
+- Auto-detects file types (parquet vs txt/csv) in bulk downloads, removing the need for the `is_txt` parameter.
+- Auto-detects CSV delimiters (comma, pipe, tab, semicolon) when reading txt files from bulk downloads.
+- Deprecates the `is_txt` parameter in `bulk_download_parquet()`. The parameter is still accepted for backward compatibility but emits a deprecation warning and will be removed in a future major release.
+- Adds pytest and pytest-mock to dev dependencies for improved testing support.
+
 ## 1.3.5 (2025-12-19)
 - Fixes `_get_dataflow_version()` to gracefully handle URLs without a version pattern instead of crashing.
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "oda_reader"
-version = "1.3.5"
+version = "1.4.0"
 description = "A simple package to import ODA data from the OECD's API and AidData's database"
 readme = "README.md"
 license = "MIT"
@@ -42,6 +42,8 @@ build-backend = "uv_build"
 [dependency-groups]
 dev = [
     "pre-commit>=4.0.0",
+    "pytest>=9.0.2",
+    "pytest-mock>=3.15.1",
     "ruff>=0.14.0",
 ]
 docs = [

diff --git a/src/oda_reader/__init__.py b/src/oda_reader/__init__.py
@@ -28,7 +28,7 @@
 )
 from oda_reader.crs import bulk_download_crs, download_crs, download_crs_file
 from oda_reader.dac1 import download_dac1
-from oda_reader.dac2a import download_dac2a
+from oda_reader.dac2a import bulk_download_dac2a, download_dac2a
 from oda_reader.download.query_builder import QueryBuilder
 from oda_reader.multisystem import bulk_download_multisystem, download_multisystem
 from oda_reader.tools import get_available_filters
@@ -38,6 +38,7 @@
     "QueryBuilder",
     "download_dac1",
     "download_dac2a",
+    "bulk_download_dac2a",
     "download_multisystem",
     "bulk_download_multisystem",
     "download_crs",

diff --git a/src/oda_reader/crs.py b/src/oda_reader/crs.py
@@ -62,7 +62,6 @@ def download_crs_file(
     return bulk_download_parquet(
         file_id=file_id,
         save_to_path=save_to_path,
-        is_txt=True,
         as_iterator=as_iterator,
     )
 

diff --git a/src/oda_reader/dac2a.py b/src/oda_reader/dac2a.py
@@ -1,13 +1,38 @@
+import typing
+from pathlib import Path
+
 import pandas as pd
 
 from oda_reader._cache import cache_info
 from oda_reader.common import logger
-from oda_reader.download.download_tools import download
+from oda_reader.download.download_tools import (
+    DAC2A_FLOW_URL,
+    bulk_download_parquet,
+    download,
+    get_bulk_file_id,
+)
 
 DATAFLOW_ID: str = "DSD_DAC2@DF_DAC2A"
 DATAFLOW_VERSION: str = "1.6"
 
 
+def get_full_dac2a_parquet_id() -> str:
+    """Retrieve the file ID for the full DAC2A bulk download parquet file.
+
+    Queries the OECD dataflow to find the bulk download link for the complete
+    DAC2A dataset in dotStat format.
+
+    Returns:
+        str: The file ID to use with the bulk download service.
+
+    Raises:
+        RuntimeError: If the file ID cannot be found after maximum retries.
+    """
+    return get_bulk_file_id(
+        flow_url=DAC2A_FLOW_URL, search_string="DAC2A full dataset (dotStat format)|"
+    )
+
+
 @cache_info
 def download_dac2a(
     start_year: int | None = None,
@@ -52,3 +77,30 @@ def download_dac2a(
     )
 
     return df
+
+
+def bulk_download_dac2a(
+    save_to_path: Path | str | None = None,
+    *,
+    as_iterator: bool = False,
+) -> pd.DataFrame | None | typing.Iterator[pd.DataFrame]:
+    """
+    Bulk download the DAC2a data from the bulk download service. The file is very large.
+    It is therefore strongly recommended to save it to disk. If save_to_path is not
+    provided, the function will return a DataFrame.
+
+    Args:
+        save_to_path: The path to save the file to. Optional. If not provided, a DataFrame is returned.
+        as_iterator: If ``True`` yields ``DataFrame`` chunks instead of a single ``DataFrame``.
+
+    Returns:
+        pd.DataFrame | Iterator[pd.DataFrame] | None
+
+    """
+    file_id = get_full_dac2a_parquet_id()
+
+    return bulk_download_parquet(
+        file_id=file_id,
+        save_to_path=save_to_path,
+        as_iterator=as_iterator,
+    )