From d706b9dd9395240a0b8a71be6dafe8205252ea87 Mon Sep 17 00:00:00 2001 From: Renne Rocha Date: Thu, 13 Feb 2020 08:35:14 -0300 Subject: [PATCH 1/3] Add test cases for negative amounts --- tests/test_price_parsing.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_price_parsing.py b/tests/test_price_parsing.py index e7fd9bf..d448987 100644 --- a/tests/test_price_parsing.py +++ b/tests/test_price_parsing.py @@ -1976,6 +1976,28 @@ def __eq__(self, other): ] +PRICE_PARSING_NEGATIVE_PRICE = [ + Example(None, 'R$ -2,00', + 'R$', '2,00', -2.0), + Example(None, '- 1.400,00', + None, '1.400,00', -1400), + Example(None, '-3,95', + None, '3,95', -3.95), + Example(None, '-1.649,69', + None, '1.649,69', -1649.69), + Example(None, '-£127.54', + "£", '127.54', -127.54), + Example(None, '-R$127.54', + "R$", '127.54', -127.54), + Example(None, '-127,54 €', + "€", '127,54', -127.54), + Example(None, 'kr-127,54', + "kr", '127,54', -127.54), + Example(None, '€ 127,54-', + "€", '127,54', -127.54), +] + + @pytest.mark.parametrize( ["example"], [[e] for e in PRICE_PARSING_EXAMPLES_BUGS_CAUGHT] + @@ -1986,6 +2008,7 @@ def __eq__(self, other): [[e] for e in PRICE_PARSING_EXAMPLES_NO_PRICE] + [[e] for e in PRICE_PARSING_EXAMPLES_NO_CURRENCY] + [[e] for e in PRICE_PARSING_DECIMAL_SEPARATOR_EXAMPLES] + + [[e] for e in PRICE_PARSING_NEGATIVE_PRICE] + [pytest.param(e, marks=pytest.mark.xfail()) for e in PRICE_PARSING_EXAMPLES_XFAIL] ) From 502014e237dfbf1ffcc778cfb3c674932ad364e5 Mon Sep 17 00:00:00 2001 From: Renne Rocha Date: Thu, 13 Feb 2020 09:34:42 -0300 Subject: [PATCH 2/3] Allow process of negative amount values --- price_parser/parser.py | 53 +++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/price_parser/parser.py b/price_parser/parser.py index 364ab2c..375648b 100644 --- a/price_parser/parser.py +++ b/price_parser/parser.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import re import string +from collections import namedtuple from typing import Callable, Optional, Pattern, List, Tuple from decimal import Decimal, InvalidOperation @@ -37,18 +38,21 @@ def fromstring(cls, price: Optional[str], ``price`` string, it could be **preferred** over a value extracted from ``currency_hint`` string. """ - amount_text = extract_price_text(price) if price is not None else None - amount_num = ( - parse_number(amount_text, decimal_separator) - if amount_text is not None else None - ) currency = extract_currency_symbol(price, currency_hint) if currency is not None: currency = currency.strip() + + price_amount = extract_price_text(price, currency) + + amount_num = ( + parse_number(price_amount.text, decimal_separator, price_amount.negative) + if price_amount.text is not None else None + ) + return Price( amount=amount_num, currency=currency, - amount_text=amount_text, + amount_text=price_amount.text, ) @@ -156,7 +160,7 @@ def extract_currency_symbol(price: Optional[str], return None -def extract_price_text(price: str) -> Optional[str]: +def extract_price_text(price: str, currency: Optional[str] = "") -> Optional[str]: """ Extract text of a price from a string which contains price and maybe some other text. If multiple price-looking substrings are present, @@ -189,6 +193,24 @@ def extract_price_text(price: str) -> Optional[str]: >>> extract_price_text("50") '50' """ + PriceAmount = namedtuple('PriceAmount', ['text', 'negative']) + + if price is None: + return PriceAmount(text=None, negative=False) + + negative_regexes = [ + r"-\s*?\d[\d.,\d]*", + r"\d[\d.,\d]*\d-", + ] + if currency is not None: + negative_regexes.append(r"-{}\d[\d.,\d]*".format(re.escape(currency))) + negative_amount_search = re.search( + r"({})(?:[^%\d]|$)".format("|".join(negative_regexes)), + price, + re.VERBOSE + ) + negative_amount = bool(negative_amount_search) + if price.count('€') == 1: m = re.search(r""" [\d\s.,]*?\d # number, probably with thousand separators @@ -197,7 +219,8 @@ def extract_price_text(price: str) -> Optional[str]: (?:$|[^\d]) # something which is not a digit """, price, re.VERBOSE) if m: - return m.group(0).replace(' ', '') + return PriceAmount(text=m.group(0).replace(' ', ''), negative=negative_amount) + m = re.search(r""" (\d[\d\s.,]*) # number, probably with thousand separators \s*? # skip whitespace @@ -205,10 +228,12 @@ def extract_price_text(price: str) -> Optional[str]: """, price, re.VERBOSE) if m: - return m.group(1).strip(',.').strip() + return PriceAmount(text=m.group(1).strip(',.').strip(), negative=negative_amount) + if 'free' in price.lower(): - return '0' - return None + return PriceAmount(text="0", negative=negative_amount) + + return PriceAmount(text=None, negative=negative_amount) # NOTE: Keep supported separators in sync with parse_number() @@ -244,7 +269,8 @@ def get_decimal_separator(price: str) -> Optional[str]: def parse_number(num: str, - decimal_separator: Optional[str] = None) -> Optional[Decimal]: + decimal_separator: Optional[str] = None, + is_negative: Optional[bool] = False) -> Optional[Decimal]: """ Parse a string with a number to a Decimal, guessing its format: decimal separator, thousand separator. Return None if parsing fails. @@ -294,6 +320,7 @@ def parse_number(num: str, assert decimal_separator == '€' num = num.replace('.', '').replace(',', '').replace('€', '.') try: - return Decimal(num) + multiplier = -1 if is_negative else 1 + return multiplier * Decimal(num) except InvalidOperation: return None From 3c50a592230e01fdc93c2e948b465f749fd3454a Mon Sep 17 00:00:00 2001 From: Renne Rocha Date: Wed, 15 Apr 2020 15:23:46 -0300 Subject: [PATCH 3/3] Created private function to extract text and negative flag from price strings avoiding changes in existing public function return values --- price_parser/parser.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/price_parser/parser.py b/price_parser/parser.py index 375648b..cc360f9 100644 --- a/price_parser/parser.py +++ b/price_parser/parser.py @@ -42,7 +42,7 @@ def fromstring(cls, price: Optional[str], if currency is not None: currency = currency.strip() - price_amount = extract_price_text(price, currency) + price_amount = _extract_price_amount(price, currency) amount_num = ( parse_number(price_amount.text, decimal_separator, price_amount.negative) @@ -160,7 +160,7 @@ def extract_currency_symbol(price: Optional[str], return None -def extract_price_text(price: str, currency: Optional[str] = "") -> Optional[str]: +def extract_price_text(price: str) -> Optional[str]: """ Extract text of a price from a string which contains price and maybe some other text. If multiple price-looking substrings are present, @@ -193,6 +193,16 @@ def extract_price_text(price: str, currency: Optional[str] = "") -> Optional[str >>> extract_price_text("50") '50' """ + price_amount = _extract_price_amount(price, currency) + return price_amount.text + + +def _extract_price_amount(price: str, currency: Optional[str] = "") -> Optional[str]: + """ + Extract from a string the text of a price and a flag indicating + if this is a string of a negative price. + """ + PriceAmount = namedtuple('PriceAmount', ['text', 'negative']) if price is None: