diff --git a/pycamt/parser.py b/pycamt/parser.py index c4dcaa0..2d1b4e3 100644 --- a/pycamt/parser.py +++ b/pycamt/parser.py @@ -1,7 +1,4 @@ -from io import StringIO - -from defusedxml import ElementTree as ET - +from lxml import etree as ET class Camt053Parser: """ @@ -31,7 +28,7 @@ class Camt053Parser: Extracts statement information like IBAN and balances from the CAMT.053 file. """ - def __init__(self, xml_data): + def __init__(self, xml_data: str | bytes): """ Initializes the Camt053Parser with XML data. @@ -41,7 +38,7 @@ def __init__(self, xml_data): XML data as a string representation of CAMT.053 content. """ self.tree = ET.fromstring(xml_data) - self.namespaces = self._detect_namespaces(xml_data) + self.namespaces = self.tree.nsmap self.version = self._detect_version() @classmethod @@ -59,29 +56,10 @@ def from_file(cls, file_path): Camt053Parser An instance of the parser initialized with the XML content from the file. """ - with open(file_path, encoding="utf-8") as file: + with open(file_path, 'rb') as file: xml_data = file.read() return cls(xml_data) - def _detect_namespaces(self, xml_data): - """ - Detects and extracts namespaces from the XML data for XPath queries. - - Parameters - ---------- - xml_data : str - XML data from which namespaces are to be extracted. - - Returns - ------- - dict - A dictionary of namespace prefixes to namespace URIs. - """ - namespaces = {} - for _, elem in ET.iterparse(StringIO(xml_data), events=("start-ns",)): - namespaces[elem[0]] = elem[1] - return namespaces - def _detect_version(self): """ Detects the CAMT.053 version from the XML root element. @@ -96,6 +74,8 @@ def _detect_version(self): "camt.053.001.02", "camt.053.001.03", "camt.053.001.04", + "camt.053.001.08", + "camt.053.001.12", ]: if version in root.tag: return version @@ -143,12 +123,16 @@ def get_transactions(self): A list of dictionaries, each representing a transaction with its associated data. """ transactions = [] - entries = self.tree.findall(".//Ntry", self.namespaces) - for entry in entries: - transactions.extend(self._extract_transaction(entry)) + statements = self.tree.findall(".//Stmt", self.namespaces) + + for statement in statements: + entries = statement.findall(".//Ntry", self.namespaces) + for entry in entries: + transactions.extend(self._extract_transaction(entry, statement)) + return transactions - def _extract_transaction(self, entry): + def _extract_transaction(self, entry, statement): """ Extracts data from a single transaction entry. @@ -163,7 +147,7 @@ def _extract_transaction(self, entry): A dictionary containing extracted data for the transaction. """ - common_data = self._extract_common_entry_data(entry) + common_data = self._extract_common_entry_data(entry, statement) entry_details = entry.findall(".//NtryDtls", self.namespaces) transactions = [] @@ -195,7 +179,19 @@ def _extract_transaction(self, entry): ) return transactions - def _extract_common_entry_data(self, entry): + def _parse_status(self, entry): + status = None + if entry is not None: + child_element = entry.find(".//Cd", self.namespaces) + + if child_element is not None: + status = child_element.text + else: + status = entry.text + + return status + + def _extract_common_entry_data(self, entry, statement): """ Extracts common data applicable to all transactions within an entry. @@ -211,6 +207,11 @@ def _extract_common_entry_data(self, entry): """ return { "TransactionID": entry.find(".//AcctSvcrRef", self.namespaces).text, + "AccountIBAN": ( + statement.find(".//Acct//Id//IBAN", self.namespaces).text + if statement.find(".//Acct//Id//IBAN", self.namespaces) is not None + else None + ), "Amount": entry.find(".//Amt", self.namespaces).text, "Currency": entry.find(".//Amt", self.namespaces).attrib.get("Ccy"), "CreditDebitIndicator": entry.find(".//CdtDbtInd", self.namespaces).text, @@ -219,13 +220,9 @@ def _extract_common_entry_data(self, entry): if entry.find(".//RvslInd", self.namespaces) is not None else None ), - "Status": ( - entry.find(".//Sts", self.namespaces).text - if entry.find(".//Sts", self.namespaces) is not None - else None - ), - "BookingDate": entry.find(".//BookgDt//Dt", self.namespaces).text, - "ValueDate": entry.find(".//ValDt//Dt", self.namespaces).text, + "Status": self._parse_status(entry=entry.find(".//Sts", self.namespaces)), + "BookingDate": entry.find(".//BookgDt//*", self.namespaces).text, + "ValueDate": entry.find(".//ValDt//*", self.namespaces).text, "BankTransactionCode": ( entry.find(".//BkTxCd//Domn//Cd", self.namespaces).text if entry.find(".//BkTxCd//Domn//Cd", self.namespaces) is not None @@ -320,7 +317,6 @@ def _extract_transaction_details(self, tx_detail): data["RemittanceInformation"] = ref_elem.text if ref_elem is not None else None data["AdditionalRemittanceInformation"] = additional_ref_elem.text if additional_ref_elem is not None else None - return {key: value for key, value in data.items() if value is not None} def get_statement_info(self): @@ -338,12 +334,13 @@ def get_statement_info(self): - ClosingBalanceDate: Date of the closing balance - Currency: Account currency (if available) """ - stmt = self.tree.find(".//Stmt", self.namespaces) - if stmt is None: + statements = [] + stmts = self.tree.findall(".//Stmt", self.namespaces) + if len(stmts) == 0: # Maybe we have a Rpt file - stmt = self.tree.find(".//Rpt", self.namespaces) + stmts = self.tree.findall(".//Rpt", self.namespaces) - if stmt is not None: + for stmt in stmts: # Extract IBAN iban = stmt.find(".//Acct//Id//IBAN", self.namespaces) iban_text = iban.text if iban is not None else None @@ -401,6 +398,6 @@ def get_statement_info(self): result["ClosingBalance"] = amount_text result["ClosingBalanceDate"] = date_text - return result + statements.append(result) - return {} + return statements diff --git a/pyproject.toml b/pyproject.toml index c70b704..410ccfe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ packages = [ [tool.poetry.dependencies] python = ">=3.8,<4.0" -defusedxml = "^0.7.1" +lxml = ">=4.4.1" [tool.poetry.group.dev.dependencies] pytest = "^7.2.0" diff --git a/tests/test_parser.py b/tests/test_parser.py index 4e9742d..6b4e7ad 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -19,6 +19,14 @@ def parser(): + + + OPBD + + +
+
2025-07-31
+ 1000.00
@@ -30,6 +38,7 @@ def parser():
2020-06-23
+ 123 @@ -70,8 +79,12 @@ def test_get_transactions(self, parser): assert transaction["ValueDate"] == "2020-06-23" def test_get_statement_info(self, parser): - expected = { + expected = [{ "IBAN": "GB33BUKB20201555555555", "OpeningBalance": "1000.00", - } + "Currency": None, + "ClosingBalance": None, + "OpeningBalanceDate": "2025-07-31", + "ClosingBalanceDate": None + }] assert parser.get_statement_info() == expected