diff --git a/CHANGELOG.md b/CHANGELOG.md index 39dcfa0..7ab9892 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## v1.4.1 (2025-12-05) +- The new API implements a different scaling value behaviour. To preserve backwards compatibility, this new version +aligns with the old behaviour. + ## v1.4.0 (2025-12-05) - The October 2025 release of WEO removed bulk downloads and moved everything towards the SDMX API. This update provides a way to parse new releases from the API instead of relying on the XML files. Note that thew new API response does not include observation-level notes or information on when projections start for each country-indicator. diff --git a/pyproject.toml b/pyproject.toml index 980099e..3d026b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "imf-reader" -version = "1.4.0" +version = "1.4.1" description = "A package to access imf data" authors = [{ name = "The ONE Campaign" }] license = { text = "MIT" } diff --git a/src/imf_reader/weo/api.py b/src/imf_reader/weo/api.py index a836ff3..f4c1388 100644 --- a/src/imf_reader/weo/api.py +++ b/src/imf_reader/weo/api.py @@ -18,6 +18,13 @@ 9: "Billions", } +# Map scale exponents to multipliers (for converting to legacy format) +SCALE_MULTIPLIERS = { + 0: 1, + 6: 1_000_000, + 9: 1_000_000_000, +} + # Cache configuration _CACHE_DIR = Path.home() / ".cache" / "imf_reader" / "weo" _CACHE_TTL = 7 * 24 * 60 * 60 # 7 days in seconds @@ -148,6 +155,23 @@ def _align_schema(df: pd.DataFrame) -> pd.DataFrame: df["LASTACTUALDATE"] = pd.array([pd.NA] * len(df), dtype="Int64") df["NOTES"] = pd.array([pd.NA] * len(df), dtype="string") + # Convert values to match legacy format: + # - Legacy format stores OBS_VALUE "in scale" (e.g., 447.416 for 447.416 billion) + # - New API returns OBS_VALUE in units (e.g., 447416000000.0) + # - Legacy SCALE_CODE is the multiplier (e.g., 1000000000), not the exponent (e.g., 9) + + # First, convert OBS_VALUE from units to "in scale" by dividing by 10^SCALE_CODE + # Only apply where SCALE_CODE is present and > 0 + scale_exponent = pd.to_numeric(df["SCALE_CODE"], errors="coerce") + has_scale = scale_exponent.notna() & (scale_exponent > 0) + df.loc[has_scale, "OBS_VALUE"] = ( + pd.to_numeric(df.loc[has_scale, "OBS_VALUE"], errors="coerce") + / (10 ** scale_exponent[has_scale]) + ) + + # Convert SCALE_CODE from exponent to multiplier to match legacy format + df["SCALE_CODE"] = scale_exponent.map(SCALE_MULTIPLIERS) + # Fix data types to match old parser # Numeric columns df["OBS_VALUE"] = df["OBS_VALUE"].astype("Float64")