Skip to content

Commit 743b222

Browse files
committedDec 5, 2023
Add parsing categories from file
Prepopulate categories file with basic categories. NOTE: It is expected that user will modify categories.json according to his own needs (to get the best experience from this app)
1 parent 3377479 commit 743b222

13 files changed

+470
-15
lines changed
 

‎.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ __pycache__
66
.idea
77
unmatched_transactions.html
88
*.xlsx
9+
output

‎pyproject.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ dependencies = [
3434
"html5lib",
3535
"beautifulsoup4",
3636
"py-moneyed",
37-
"openpyxl"
37+
"openpyxl",
38+
"semver",
39+
"importlib-resources"
3840
]
3941

4042
[project.optional-dependencies] # Optional

‎src/banker/__main__.py

+41-13
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,58 @@
1-
from banker.analyzer.analyze import analyze_transactions, deduce_month_year
2-
from banker.data.category import Category, PaymentType
31
import argparse
2+
import os.path
3+
4+
from importlib_resources import files
5+
6+
from banker.analyzer.analyze import analyze_transactions, deduce_month_year
7+
from banker.data.category import Category
48

9+
from banker.data.transaction import Transaction
510
from banker.formatter.month_year_formatter import format_month_year
611
from banker.parser.html_transactions_parser import HtmlTransactionsParser
712
from banker.formatter.html_transactions_formatter import HtmlTransactionsFormatter
13+
from banker.parser.interfaces.categories_parser import ICategoriesParser
14+
from banker.parser.interfaces.transactions_parser import ITransactionsParser
15+
from banker.parser.json_categories_parser import JsonCategoriesParser
816
from banker.writer.excel_categories_writer import ExcelCategoriesWriter
917

1018

19+
def get_supported_categories(categories_parser: ICategoriesParser, categories_filepath: str) -> list[Category]:
20+
with open(categories_filepath, "r") as file:
21+
return categories_parser.parse_categories(file.read())
22+
23+
24+
def get_transactions(transactions_parser: ITransactionsParser, transactions_filepath: str) -> list[Transaction]:
25+
with open(transactions_filepath, "r") as transactions_file:
26+
return transactions_parser.parse_transactions(transactions_file.read())
27+
28+
29+
def save_to_file(filepath: str, content: str):
30+
with open(filepath, "w") as file:
31+
file.write(content)
32+
33+
1134
def main():
12-
supported_categories = [
13-
Category(name="Kaufland", payment_type=PaymentType.Household, matching_regexes=[r"KAUFLAND PL"])]
1435
transactions_parser = HtmlTransactionsParser()
36+
categories_parser = JsonCategoriesParser()
1537
transactions_formatter = HtmlTransactionsFormatter()
1638
categories_writer = ExcelCategoriesWriter()
1739

1840
parser = argparse.ArgumentParser()
1941
parser.add_argument("html_file")
42+
parser.add_argument("--categories_file", default=files('banker.resources').joinpath('categories.json'))
43+
parser.add_argument("--output_directory", default=files('banker.resources').joinpath('output'))
2044
args = parser.parse_args()
2145

22-
with open(args.html_file, "rb") as input_file:
23-
all_transactions = transactions_parser.parse_transactions(input_file.read().decode('utf-8'))
24-
analyze_result = analyze_transactions(all_transactions, supported_categories)
25-
formatted_transactions = transactions_formatter.format_transactions(analyze_result.unmatched_transactions)
26-
with open("unmatched_transactions.html", "w") as transactions_file:
27-
transactions_file.write(formatted_transactions)
28-
month_year = deduce_month_year(all_transactions)
29-
categories_writer.write_categories(analyze_result.matched_categories, "autogen_budget.xlsx",
30-
format_month_year(month_year))
46+
os.makedirs(args.output_directory, exist_ok=True)
47+
output_unmatched_transactions_filepath = os.path.join(args.output_directory, "unmatched_transactions.html")
48+
output_matched_categories_filepath = os.path.join(args.output_directory, "autogen_budget.xlsx")
49+
50+
all_transactions = get_transactions(transactions_parser, args.html_file)
51+
month_year = deduce_month_year(all_transactions)
52+
supported_categories = get_supported_categories(categories_parser, args.categories_file)
53+
analyze_result = analyze_transactions(all_transactions, supported_categories)
54+
formatted_transactions = transactions_formatter.format_transactions(analyze_result.unmatched_transactions)
55+
56+
save_to_file(output_unmatched_transactions_filepath, formatted_transactions)
57+
categories_writer.write_categories(analyze_result.matched_categories, output_matched_categories_filepath,
58+
format_month_year(month_year))

‎src/banker/analyzer/analyze.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import datetime
21
from dataclasses import dataclass
32

43
from moneyed import Money, PLN

‎src/banker/common/naming.py

+5
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,8 @@
33
TRANSACTION_COL_NAME_DESCRIPTION = "Opis"
44
TRANSACTION_COL_NAME_VALUE = "Kwota"
55

6+
CATEGORIES_KEY_NAME_VERSION = "version"
7+
CATEGORIES_KEY_NAME_CATEGORIES = "categories"
8+
CATEGORIES_KEY_NAME_CATEGORY_NAME = "name"
9+
CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE = "payment_type"
10+
CATEGORIES_KEY_NAME_CATEGORY_REGEXES = "matching_regexes"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from abc import ABC, abstractmethod
2+
3+
from banker.data.category import Category
4+
5+
6+
class ICategoriesParser(ABC):
7+
@abstractmethod
8+
def parse_categories(self, content: str) -> list[Category]:
9+
raise NotImplementedError("Method not implemented in subclass")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import logging
2+
import semver
3+
import json
4+
5+
from banker.common.naming import CATEGORIES_KEY_NAME_VERSION, CATEGORIES_KEY_NAME_CATEGORIES, \
6+
CATEGORIES_KEY_NAME_CATEGORY_NAME, CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE, CATEGORIES_KEY_NAME_CATEGORY_REGEXES
7+
from banker.data.category import Category
8+
from banker.parser.interfaces.categories_parser import ICategoriesParser
9+
from banker.parser.payment_type_parser import parse_payment_type
10+
11+
12+
class CategoriesVersionMissing(Exception):
13+
def __str__(self):
14+
return f"Key {CATEGORIES_KEY_NAME_VERSION} is missing in categories JSON file"
15+
16+
17+
class CategoriesVersionInvalid(Exception):
18+
def __init__(self, version: str):
19+
self.__version = version
20+
21+
def __str__(self):
22+
return f"Categories version has invalid format, " \
23+
f"expected semantic versioning e.g: 1.0.0, actual: {self.__version}"
24+
25+
26+
class CategoriesVersionUnsupported(Exception):
27+
def __init__(self, supported_version: semver.Version, current_version: semver.Version):
28+
self.__supported_version = supported_version
29+
self.__current_version = current_version
30+
31+
def __str__(self):
32+
return f"Categories version is unsupported by application, " \
33+
f"supported version: {self.__supported_version}, current version: {self.__current_version}"
34+
35+
36+
class CategoryNameDuplicate(Exception):
37+
def __init__(self, name: str):
38+
self.__name = name
39+
40+
def __str__(self):
41+
return f"Categories names must be unique, but this category name is used multiple times: {self.__name}"
42+
43+
44+
class JsonCategoriesParser(ICategoriesParser):
45+
def __init__(self):
46+
self.__supported_version = semver.Version(major=1, minor=0, patch=0)
47+
self.__logger = logging.getLogger("JsonCategoriesParser")
48+
49+
def __validate_version(self, json_dict: dict):
50+
version = json_dict.get(CATEGORIES_KEY_NAME_VERSION)
51+
if version is None:
52+
raise CategoriesVersionMissing()
53+
if not semver.Version.is_valid(version):
54+
raise CategoriesVersionInvalid(version)
55+
version = semver.Version.parse(version)
56+
if not self.__supported_version.is_compatible(version):
57+
raise CategoriesVersionUnsupported(self.__supported_version, version)
58+
59+
def __contains_required_keys(self, category: dict) -> bool:
60+
required_keys = [CATEGORIES_KEY_NAME_CATEGORY_NAME, CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE,
61+
CATEGORIES_KEY_NAME_CATEGORY_REGEXES]
62+
for required_key in required_keys:
63+
if required_key not in category:
64+
self.__logger.info(f"Category object key missing: {required_key}")
65+
return False
66+
return True
67+
68+
def __valid_payment_type(self, category: dict) -> bool:
69+
if parse_payment_type(category[CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE]) is None:
70+
self.__logger.info("Invalid payment type")
71+
return False
72+
return True
73+
74+
def parse_categories(self, content: str) -> list[Category]:
75+
json_dict = json.loads(content)
76+
self.__validate_version(json_dict)
77+
78+
result = {}
79+
for category in json_dict.get(CATEGORIES_KEY_NAME_CATEGORIES, []):
80+
if not self.__contains_required_keys(category):
81+
continue
82+
if not self.__valid_payment_type(category):
83+
continue
84+
name = category[CATEGORIES_KEY_NAME_CATEGORY_NAME]
85+
if name in result:
86+
raise CategoryNameDuplicate(name)
87+
payment_type = parse_payment_type(category[CATEGORIES_KEY_NAME_CATEGORY_PAYMENT_TYPE])
88+
matching_regexes = category[CATEGORIES_KEY_NAME_CATEGORY_REGEXES]
89+
result[name] = Category(name, payment_type, matching_regexes)
90+
return list(result.values())
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from banker.data.category import PaymentType
2+
3+
4+
def parse_payment_type(payment_type: str) -> PaymentType | None:
5+
match payment_type:
6+
case 'household':
7+
return PaymentType.Household
8+
case 'recurring':
9+
return PaymentType.Recurring
10+
case 'occasional':
11+
return PaymentType.Occasional
12+
case 'optional':
13+
return PaymentType.Optional
14+
return None

‎src/banker/resources/__init__.py

Whitespace-only changes.

‎src/banker/resources/categories.json

+152
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
{
2+
"version": "1.0.0",
3+
"categories": [
4+
{
5+
"name": "Kaufland",
6+
"payment_type": "household",
7+
"matching_regexes": [
8+
"KAUFLAND PL"
9+
]
10+
},
11+
{
12+
"name": "Bilety PKP",
13+
"payment_type": "occasional",
14+
"matching_regexes": [
15+
"intercity\\.pl"
16+
]
17+
},
18+
{
19+
"name": "Leclerc",
20+
"payment_type": "household",
21+
"matching_regexes": [
22+
"eLeclerc"
23+
]
24+
},
25+
{
26+
"name": "Biedronka",
27+
"payment_type": "household",
28+
"matching_regexes": [
29+
"BIEDRONKA"
30+
]
31+
},
32+
{
33+
"name": "Obuwie",
34+
"payment_type": "occasional",
35+
"matching_regexes": [
36+
"eobuwie\\.com\\.pl"
37+
]
38+
},
39+
{
40+
"name": "Netto",
41+
"payment_type": "household",
42+
"matching_regexes": [
43+
"NETTO"
44+
]
45+
},
46+
{
47+
"name": "Paliwo",
48+
"payment_type": "household",
49+
"matching_regexes": [
50+
"ORLEN"
51+
]
52+
},
53+
{
54+
"name": "Darowizny",
55+
"payment_type": "optional",
56+
"matching_regexes": [
57+
"DAROWIZNA"
58+
]
59+
},
60+
{
61+
"name": "Carrefour",
62+
"payment_type": "household",
63+
"matching_regexes": [
64+
"CARREFOUR"
65+
]
66+
},
67+
{
68+
"name": "Piekarnie",
69+
"payment_type": "household",
70+
"matching_regexes": [
71+
"(?i)piekarnia"
72+
]
73+
},
74+
{
75+
"name": "Drogerie",
76+
"payment_type": "household",
77+
"matching_regexes": [
78+
"HEBE"
79+
]
80+
},
81+
{
82+
"name": "Pralnie",
83+
"payment_type": "occasional",
84+
"matching_regexes": [
85+
"PRALNIA"
86+
]
87+
},
88+
{
89+
"name": "Bilety MPK Wrocław",
90+
"payment_type": "household",
91+
"matching_regexes": [
92+
"URBANCARD"
93+
]
94+
},
95+
{
96+
"name": "Spotify",
97+
"payment_type": "recurring",
98+
"matching_regexes": [
99+
"Spotify"
100+
]
101+
},
102+
{
103+
"name": "Action",
104+
"payment_type": "household",
105+
"matching_regexes": [
106+
"Action"
107+
]
108+
},
109+
{
110+
"name": "Lidl",
111+
"payment_type": "household",
112+
"matching_regexes": [
113+
"LIDL"
114+
]
115+
},
116+
{
117+
"name": "RTV Euro AGD",
118+
"payment_type": "occasional",
119+
"matching_regexes": [
120+
"EURO\\-NET"
121+
]
122+
},
123+
{
124+
"name": "Abonament telefoniczny",
125+
"payment_type": "recurring",
126+
"matching_regexes": [
127+
"24\\.play\\.pl"
128+
]
129+
},
130+
{
131+
"name": "Castorama",
132+
"payment_type": "occassional",
133+
"matching_regexes": [
134+
"CASTORAMA"
135+
]
136+
},
137+
{
138+
"name": "McDonalds",
139+
"payment_type": "optional",
140+
"matching_regexes": [
141+
"MCDONALDS"
142+
]
143+
},
144+
{
145+
"name": "Lody",
146+
"payment_type": "optional",
147+
"matching_regexes": [
148+
"(?i)lodziarnia"
149+
]
150+
}
151+
]
152+
}

0 commit comments

Comments
 (0)
Please sign in to comment.