-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathread_announcements.py
More file actions
122 lines (86 loc) · 3.51 KB
/
read_announcements.py
File metadata and controls
122 lines (86 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""Module to get SDR data from the IMF website
info: https://www.imf.org/en/About/Factsheets/Sheets/2023/special-drawing-rights-sdr
"""
from functools import lru_cache
import pandas as pd
import calendar
from bs4 import BeautifulSoup
from datetime import datetime
from imf_reader.utils import make_request
from imf_reader.config import logger
BASE_URL = "https://www.imf.org/external/np/fin/tad/"
MAIN_PAGE_URL = "https://www.imf.org/external/np/fin/tad/extsdr1.aspx"
def read_tsv(url: str) -> pd.DataFrame:
"""Read a tsv file from a url and return a dataframe"""
try:
return pd.read_csv(url, delimiter="/t", engine="python")
except pd.errors.ParserError:
raise ValueError("SDR data not available for this date")
def clean_df(df: pd.DataFrame) -> pd.DataFrame:
"""Clean the SDR dataframe"""
df = df.iloc[3:, 0].str.split("\t", expand=True)
df.columns = ["entity", "holdings", "allocations"]
return df.assign(
holdings=lambda d: pd.to_numeric(
d.holdings.str.replace(r"[^\d.]", "", regex=True), errors="coerce"
),
allocations=lambda d: pd.to_numeric(
d.allocations.str.replace(r"[^\d.]", "", regex=True), errors="coerce"
),
).melt(
id_vars="entity", value_vars=["holdings", "allocations"], var_name="indicator"
)
def format_date(month: int, year: int) -> str:
"""Return a date as year-month-day where day is the last day in the month"""
last_day = calendar.monthrange(year, month)[1]
return f"{year}-{month}-{last_day}"
@lru_cache
def get_holdings_and_allocations_data(
year: int,
month: int,
):
"""Get sdr allocations and holdings data for a given month and year"""
date = format_date(month, year)
url = f"{BASE_URL}extsdr2.aspx?date1key={date}&tsvflag=Y"
logger.info(f"Fetching SDR data for date: {date}")
df = read_tsv(url)
df = clean_df(df)
df["date"] = pd.to_datetime(date)
return df
@lru_cache
def get_latest_allocations_holdings_date() -> tuple[int, int]:
"""
Get the latest available SDR allocation holdings date.
Returns:
tuple[int, int]: A tuple containing the year and month of the latest SDR data.
"""
logger.info("Fetching latest date")
response = make_request(MAIN_PAGE_URL)
soup = BeautifulSoup(response.content, "html.parser")
table = soup.find_all("table")[4]
row = table.find_all("tr")[1]
date = row.td.text.strip()
date = datetime.strptime(date, "%B %d, %Y")
return date.year, date.month
def fetch_allocations_holdings(date: tuple[int, int] | None = None) -> pd.DataFrame:
"""
Fetch SDR holdings and allocations data for a given date. If date is not specified or exceeds the latest available
date, it fetches data for the latest date
Args:
date: The year and month to get allocations and holdings data for. e.g. (2024, 11) for November 2024.
If None, the latest announcements released are fetched
returns:
A dataframe with the SDR allocations and holdings data
"""
latest_date = get_latest_allocations_holdings_date()
if date is None:
date = latest_date
else:
date_obj = datetime(date[0], date[1], 1)
latest_date_obj = datetime(latest_date[0], latest_date[1], 1)
if date_obj > latest_date_obj:
logger.info(
f"SDR data unavailable for date: {format_date(date[1],date[0])}. Will fetch latest available"
)
date = latest_date
return get_holdings_and_allocations_data(*date)