diff --git a/CHANGELOG.md b/CHANGELOG.md index 225557c..f1cf518 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## v0.9.14 (2025-12-09) + +### Refactor + +- remove url_to_str + ## v0.9.13 (2025-12-08) ### Refactor diff --git a/README.md b/README.md index e6513fb..04042f4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Collection acronyms for identification - cafi -[![release: 0.9.13](https://img.shields.io/badge/rel-0.9.13-blue.svg?style=flat-square)](https://github.com/LeibnizDSMZ/cafi) +[![release: 0.9.14](https://img.shields.io/badge/rel-0.9.14-blue.svg?style=flat-square)](https://github.com/LeibnizDSMZ/cafi) [![MIT LICENSE](https://img.shields.io/badge/License-MIT-brightgreen.svg?style=flat-square)](https://choosealicense.com/licenses/mit/) [![DATA LICENSE - CC BY 4.0](https://img.shields.io/badge/Data%20License-CC%20BY%204.0-brightgreen.svg?style=flat-square)](http://creativecommons.org/licenses/by/4.0/) [![Documentation Status](https://img.shields.io/badge/docs-GitHub-blue.svg?style=flat-square)](https://LeibnizDSMZ.github.io/cafi/) diff --git a/pyproject.toml b/pyproject.toml index 84fc8a5..c388271 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "cafi" -version = "0.9.13" +version = "0.9.14" description = "This project acts as a registry for acronyms used by culture collections." readme = "README.md" authors = [{ name = "Artur Lissin", email = "artur.lissin@dsmz.de" }] diff --git a/src/cafi/constants/versions.py b/src/cafi/constants/versions.py index c3c24d2..710c56d 100644 --- a/src/cafi/constants/versions.py +++ b/src/cafi/constants/versions.py @@ -1,4 +1,4 @@ from typing import Final -CURRENT_VER: Final[str] = "v0.9.13" +CURRENT_VER: Final[str] = "v0.9.14" STABLE_VER: Final[str] = "main" diff --git a/src/cafi/container/fun/acr_db.py b/src/cafi/container/fun/acr_db.py index 27ea9e6..6c0d606 100644 --- a/src/cafi/container/fun/acr_db.py +++ b/src/cafi/container/fun/acr_db.py @@ -6,7 +6,6 @@ from cafi.constants.types import ACR_DB_T, ACR_MIN_DB_T, CCNO_DB_T from cafi.container.acr_db import AcrDbEntry, CatArgs -from cafi.container.fun.format import url_to_str from cafi.errors.custom_exceptions import ValJsonEx from pydantic import HttpUrl @@ -100,6 +99,16 @@ def _get_domain(url: str, /) -> str: return domain.group(1) +def _unquote_http_url(href: HttpUrl, /) -> str: + return ( + str(href) + .replace("%7B", "{") + .replace("%7D", "}") + .replace("%3C", "<") + .replace("%3E", ">") + ) + + def _check_uri_template(uri: str, /) -> None: sub_parts = defaultdict(list) for param in _VALID_URI.findall(uri): @@ -120,8 +129,8 @@ def _check_uri_template(uri: str, /) -> None: def check_uri_template(uris: list[HttpUrl], /) -> None: domains = set() for uri in uris: - _check_uri_template(url_to_str(uri)) - domains.add(_get_domain(url_to_str(uri))) + _check_uri_template(_unquote_http_url(uri)) + domains.add(_get_domain(str(uri))) if len(domains) > 1: raise ValJsonEx(f"multiple catalogue domains detected [{uris!s}]") @@ -205,11 +214,12 @@ def _fix_opt(href: str, match: tuple[str, str], args: CatArgs, /) -> str: ) -def replace_param_value(href: str, args: CatArgs, /) -> str: - for opt in _OPT_VAL.findall(href): - href = _fix_opt(href, opt, args) +def replace_param_value(href: HttpUrl, args: CatArgs, /) -> str: + href_str = _unquote_http_url(href) + for opt in _OPT_VAL.findall(href_str): + href_str = _fix_opt(href_str, opt, args) for che, repl in _REPL_PARAM.items(): - for mat in che.finditer(href): + for mat in che.finditer(href_str): for to_repl, repl_val in repl(mat.group(0), args).items(): - href = href.replace(to_repl, quote(repl_val, safe="")) - return href + href_str = href_str.replace(to_repl, quote(repl_val, safe="")) + return str(HttpUrl(url=href_str)) diff --git a/src/cafi/container/fun/format.py b/src/cafi/container/fun/format.py index 7b24a66..f298eb5 100644 --- a/src/cafi/container/fun/format.py +++ b/src/cafi/container/fun/format.py @@ -1,16 +1,4 @@ import re -from typing import Protocol -from urllib.parse import unquote_plus - - -class _URLlike(Protocol): - def unicode_string(self) -> str: ... - - -def url_to_str(url: _URLlike | None, /) -> str: - if url is None: - return "" - return unquote_plus(url.unicode_string()) def is_regex(val: str) -> str: diff --git a/src/cafi/library/catalogue.py b/src/cafi/library/catalogue.py index 7439562..0dc4a7e 100644 --- a/src/cafi/library/catalogue.py +++ b/src/cafi/library/catalogue.py @@ -3,12 +3,10 @@ from cafi.container.fun.acr_db import replace_param_value from cafi.container.links import CatalogueLink, LinkLevel -from cafi.container.fun.acr_db import url_to_str - def create_catalogue_link(acr_db: AcrDbEntry, args: CatArgs, /) -> Iterable[str]: for cat in acr_db.catalogue: - yield replace_param_value(url_to_str(cat), args) + yield replace_param_value(cat, args) def _create_link_level(cat_link: list[str], hom_link: str, /) -> LinkLevel: @@ -29,11 +27,9 @@ def create_ccno_links( return CatalogueLink(level=LinkLevel.emp) cat_link, hom_link = [], "" if LinkLevel.cat not in exclude: - cat_link = [ - replace_param_value(url_to_str(cat), args) for cat in acr_db.catalogue - ] + cat_link = [replace_param_value(cat, args) for cat in acr_db.catalogue] if LinkLevel.home not in exclude: - hom_link = url_to_str(acr_db.homepage) + hom_link = "" if acr_db.homepage is None else str(acr_db.homepage) return CatalogueLink( level=_create_link_level(cat_link, hom_link), catalogue=cat_link, diff --git a/src/cafi/library/validate.py b/src/cafi/library/validate.py index fd5400b..75e96bb 100644 --- a/src/cafi/library/validate.py +++ b/src/cafi/library/validate.py @@ -13,7 +13,6 @@ ACR_DB_KEYS, ) from cafi.container.fun.acr_db import check_uri_template, create_acr_db, create_ccno_db -from cafi.container.fun.format import url_to_str from cafi.errors.custom_exceptions import ValJsonEx @@ -50,7 +49,7 @@ def _check_unique_gid( def _check_active(cur_acr_con: AcrDbEntry, /) -> None: - if not cur_acr_con.active and url_to_str(cur_acr_con.homepage) != "": + if not cur_acr_con.active and cur_acr_con.homepage is not None: raise ValJsonEx( f"{cur_acr_con.acr}: 'inactive' BRC can not have a 'homepage' link" ) diff --git a/tests/library/test_ccno_link.py b/tests/library/test_ccno_link.py index 92fa314..d5bc1bc 100644 --- a/tests/library/test_ccno_link.py +++ b/tests/library/test_ccno_link.py @@ -76,5 +76,5 @@ def test_link_simple_catalogue_split_core( cat_id, cat_args = ccno_lmg_1_1 acr_db = parse_acr_db(json.loads(load_fix_acr_db)).get(cat_id, None) assert acr_db is not None - cat = "https://bccm.belspo.be/page/lmg-catalogue-display/fields/name/LMG 1t1" + cat = "https://bccm.belspo.be/page/lmg-catalogue-display/fields/name/LMG%201t1" assert cat in set(create_catalogue_link(acr_db, cat_args)) diff --git a/uv.lock b/uv.lock index c05a478..50a26f7 100644 --- a/uv.lock +++ b/uv.lock @@ -81,7 +81,7 @@ wheels = [ [[package]] name = "cafi" -version = "0.9.13" +version = "0.9.14" source = { editable = "." } dependencies = [ { name = "pydantic" },