diff --git a/CHANGELOG.md b/CHANGELOG.md index 9647925..087635d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,39 @@ +## v0.10.0 (2026-03-12) + +### Feat + +- **acr_db.json,regex_db.json**: complete list of CBS ccno examples for all found suffixes +- **acr_db.json,regex_db.json**: add entry for ICCF Collection of Industrial Microorganisms +- **acr_db.json,regex_db.json**: add entry for MSCL Microbial Strain Collection of Latvia +- **acr_db.json,regex_db.json**: add entry for UNIMORE Microbial Culture Collection (UMCC) +- **acr_db.json,regex_db.json**: add entry for PYCC The portuguese yeast culture collection +- **acr_db.json,regex_db.json**: add entry for UCCCB The University of Coimbra Bacteria Culture Collection +- **acr_db.json,regex_db.json**: add entry for UOA/HCPF Hellenic Collection of Pathogenic Fungi +- **acr_db.json,regex_db.json**: add entry for ACOI Coimbra Collection of Algae +- **acr_db.json,regex_db.json**: add entry for The Athens University Bacterial & Archaea Culture Collection ATHUBA +- **acr_db.json,regex_db.json**: add entry for ITEM Agri-Food Microbial Collection +- add MIRRI acronym + +### Fix + +- **acr_db.json**: fixed ACOI suffix regex correctly +- **acr_db.json**: changed ACOI suffix regex from [] to (|) +- **acr_db.json,catalogue_db.json,regex_db.json**: fixed mismatching and incomplete regexes for ACOI +- **acr_db.json**: fix mismatching regexes for CCUG +- **acr_db.json**: fixed mismatching regexes for UOA/HCPF +- **acr_db.json,catalogue_db.json,regex_db.json**: fixed some issues with regexes and removed MIRRI acronym +- **acr_db.json,regex_db.json**: recognize CBS ccnos with capital letter suffix +- **catalogue_db.json**: add missing ccnos for new entries with catalogue urls +- **acr_db.json,regex_db.json**: recognize LEGE ccnos with one dot +- **acr_db.json,regex_db.json**: recognize suffix 'i' for MUM collection +- **acr_db.json,regex_db.json**: recognize BEA ccnos with one slash +- **regex_db.json**: add private CCUG ccno examples +- **acr_db.json**: Extended CCUG regex to accept ! and # + +### Refactor + +- **validate**: adjust regex patterns and error message + ## v0.9.15 (2026-01-12) ### Refactor diff --git a/README.md b/README.md index 3475261..c6338e3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Collection acronyms for identification - cafi -[![release: 0.9.15](https://img.shields.io/badge/rel-0.9.15-blue.svg?style=flat-square)](https://github.com/LeibnizDSMZ/cafi) +[![release: 0.10.0](https://img.shields.io/badge/rel-0.10.0-blue.svg?style=flat-square)](https://github.com/LeibnizDSMZ/cafi) [![MIT LICENSE](https://img.shields.io/badge/License-MIT-brightgreen.svg?style=flat-square)](https://choosealicense.com/licenses/mit/) [![DATA LICENSE - CC BY 4.0](https://img.shields.io/badge/Data%20License-CC%20BY%204.0-brightgreen.svg?style=flat-square)](http://creativecommons.org/licenses/by/4.0/) [![Documentation Status](https://img.shields.io/badge/docs-GitHub-blue.svg?style=flat-square)](https://LeibnizDSMZ.github.io/cafi/) diff --git a/pyproject.toml b/pyproject.toml index 71bffe6..ead57e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "cafi" -version = "0.9.15" +version = "0.10.0" description = "This project acts as a registry for acronyms used by culture collections." readme = "README.md" authors = [{ name = "Artur Lissin", email = "artur.lissin@dsmz.de" }] diff --git a/src/cafi/constants/versions.py b/src/cafi/constants/versions.py index 3f42202..5b2b2df 100644 --- a/src/cafi/constants/versions.py +++ b/src/cafi/constants/versions.py @@ -1,4 +1,4 @@ from typing import Final -CURRENT_VER: Final[str] = "v0.9.15" +CURRENT_VER: Final[str] = "v0.10.0" STABLE_VER: Final[str] = "main" diff --git a/src/cafi/data/acr_db.json b/src/cafi/data/acr_db.json index c4a453c..dc4b84e 100644 --- a/src/cafi/data/acr_db.json +++ b/src/cafi/data/acr_db.json @@ -366,10 +366,10 @@ "country": "PT", "homepage": "https://lege.ciimar.up.pt", "name": "Blue Biotechnology and Ecotoxicology Culture Collection", - "regex_ccno": "^LEGE\\s*(XX?-?)?\\d+M?$", + "regex_ccno": "^LEGE\\s*(XX?-?)?\\d+(\\.\\d+)?M?$", "regex_id": { - "core": "\\d+", - "full": "^(XX?-?)?\\d+M?$", + "core": "\\d+(\\.\\d+)?", + "full": "^(XX?-?)?\\d+(\\.\\d+)?M?$", "pre": "XX?", "suf": "M" }, @@ -486,10 +486,10 @@ "country": "ES", "homepage": "https://marinebiotechnology.org/en/beaen/about-us.html", "name": "Spanish Bank of Algae", - "regex_ccno": "^BEA\\s*\\d+B?$", + "regex_ccno": "^BEA\\s*\\d+(/\\d+)?B?$", "regex_id": { - "core": "\\d+", - "full": "^\\d+B?$", + "core": "\\d+(/\\d+)?", + "full": "^\\d+(/\\d+)?B?$", "suf": "B" }, "ror": "01teme464" @@ -639,6 +639,144 @@ }, "ror": "02y5sbr94" }, + "130": { + "acr": "ITEM", + "acr_synonym": [ + "ISPA:ITEM", + "CNR:ISPA:ITEM" + ], + "active": true, + "code": "CNR:ISPA:ITEM", + "country": "IT", + "gbif": "21c9b326-b756-4391-b302-f3e095b947cb", + "homepage": "https://item.bio-aware.com/", + "name": "ITEM Agri-Food Microbial Collection", + "regex_ccno": "^ITEM\\s*\\d+$", + "regex_id": { + "full": "^\\d+$" + } + }, + "131": { + "acr": "ATHUBA", + "active": true, + "code": "ATHUBA", + "country": "GR", + "gbif": "1a74161f-2a7b-4f40-8e4a-c7d2db88ae2c", + "homepage": "http://m-biotech.biol.uoa.gr/ATHUBstrains.html", + "name": "The Athens University Bacterial & Archaea Culture Collection", + "regex_ccno": "^ATHUBA\\s*\\d+$", + "regex_id": { + "full": "^\\d+$" + } + }, + "132": { + "acr": "ACOI", + "active": true, + "catalogue": [ + "http://acoi.ci.uc.pt/spec_detail.php?cult_id={id}" + ], + "code": "ACOI", + "country": "PT", + "gbif": "e3fba94d-bd32-4a35-9385-a97263f618b1", + "homepage": "http://acoi.ci.uc.pt/index.php", + "name": "ACOI Coimbra Collection of Algae", + "regex_ccno": "^ACOI\\s*(-\\s*)?\\d+(-(A|B))?$", + "regex_id": { + "core": "\\d+", + "full": "^\\d+(-(A|B))?$", + "suf": "(A|B)" + } + }, + "133": { + "acr": "UOA:HCPF", + "active": true, + "code": "UOA:HCPF", + "country": "GR", + "gbif": "e9e55bd6-98d7-41a7-a250-da49ded6dc94", + "homepage": "http://www.med.uoa.gr/~aveleg/index_files/Page596.htm", + "name": "UOA/HCPF Hellenic Collection of Pathogenic Fungi", + "regex_ccno": "^UOA/HCPF\\s*(REF|ENV|AM)?\\s*\\d+([.-]\\d+)*[\\s-]*(A|B|C|a|b)?$", + "regex_id": { + "core": "\\d+([.-]\\d+)*", + "full": "^(REF|ENV|AM)?\\s*\\d+([.-]\\d+)*[\\s-]*(A|B|C|a|b)?$", + "pre": "(REF|ENV|AM)", + "suf": "(A|B|C|a|b)" + } + }, + "134": { + "acr": "UCCCB", + "active": true, + "catalogue": [ + "https://ucccb.uc.pt/strain-details/?detail={acr}{id}" + ], + "code": "UCCCB", + "country": "PT", + "homepage": "https://ucccb.uc.pt/", + "name": "UCCCB The University of Coimbra Bacteria Culture Collection", + "regex_ccno": "^UCCCB\\s*\\d+$", + "regex_id": { + "full": "^\\d+$" + } + }, + "135": { + "acr": "PYCC", + "acr_synonym": [ + "NOVA:PYCC" + ], + "active": true, + "catalogue": [ + "https://pycc.pt/pycc-{id}" + ], + "code": "PYCC", + "country": "PT", + "homepage": "https://pycc.pt/", + "name": "PYCC The portuguese yeast culture collection", + "regex_ccno": "^PYCC\\s*\\d+$", + "regex_id": { + "full": "^\\d+$" + } + }, + "136": { + "acr": "UMCC", + "active": true, + "code": "UMCC", + "country": "IT", + "homepage": "https://umcc.bio-aware.com/page/Homepage", + "name": "UNIMORE Microbial Culture Collection (UMCC)", + "regex_ccno": "^UMCC\\s*\\d+$", + "regex_id": { + "full": "^\\d+$" + } + }, + "137": { + "acr": "MSCL", + "active": true, + "code": "MSCL", + "country": "LV", + "gbif": "26bed235-564f-4c34-a971-dafcc7371b2a", + "homepage": "https://www.lu.lv/en/mbi/laboratories/microbial-strain-collection-of-latvia", + "name": "MSCL Microbial Strain Collection of Latvia", + "regex_ccno": "^MSCL\\s*\\d+$", + "regex_id": { + "full": "^\\d+$" + } + }, + "138": { + "acr": "ICCF", + "acr_synonym": [ + "CMII:ICCF", + "CMII:ICCF:WFCC" + ], + "active": false, + "code": "ICCF", + "country": "RO", + "gbif": "12693a23-a500-41c8-a7c6-61403ea24900", + "name": "ICCF Collection of Industrial Microorganisms", + "regex_ccno": "^ICCF\\s*\\d+$", + "regex_id": { + "full": "^\\d+$" + } + }, "14": { "acr": "JCM", "active": true, @@ -954,11 +1092,11 @@ "gbif": "323cc3eb-16f5-4aa0-8126-f0ccdfe9b56d", "homepage": "https://www.ccug.se/", "name": "Culture Collection University of Gothenburg", - "regex_ccno": "^CCUG\\s*\\d+(\\s*[AaBbCEQDFG]{1,2})?$", + "regex_ccno": "^CCUG\\s*\\d+(\\s*[AaBbCEQDFG!#]{1,2})?$", "regex_id": { "core": "\\d+", - "full": "^\\d+(\\s*[AaBbCEQDFG]{1,2})?$", - "suf": "[AaBbCEQDFG]{1,2}" + "full": "^\\d+(\\s*[AaBbCEQDFG!#]{1,2})?$", + "suf": "[AaBbCEQDFG!#]{1,2}" }, "ror": "01tm6cn81" }, @@ -1359,9 +1497,11 @@ "gbif": "1370bc72-540f-4ff0-aefd-2358971299be", "homepage": "https://wi.knaw.nl/", "name": "Westerdijk Institute: CBS Collection of yeasts, filamentous fungi and bacteria", - "regex_ccno": "^CBS\\s*\\d+(\\.\\d+)?$", + "regex_ccno": "^CBS\\s*\\d+(\\.\\d+)?[A-Z]?$", "regex_id": { - "full": "^\\d+(\\.\\d+)?$" + "core": "\\d+(\\.\\d+)?", + "full": "^\\d+(\\.\\d+)?[A-Z]?$", + "suf": "[A-Z]" }, "ror": "030a5r161" }, @@ -1666,9 +1806,11 @@ "gbif": "dfdd9fce-e4b9-4444-a86b-bb68b2b2ef48", "homepage": "https://www.micoteca.deb.uminho.pt", "name": "Micoteca da Universidade do Minho", - "regex_ccno": "^MUM\\s*\\d+(\\.\\d+)?$", + "regex_ccno": "^MUM\\s*\\d+(\\.\\d+)?i?$", "regex_id": { - "full": "^\\d+(\\.\\d+)?$" + "core": "\\d+(\\.\\d+)?", + "full": "^\\d+(\\.\\d+)?i?$", + "suf": "i" } }, "67": { diff --git a/src/cafi/data/catalogue_db.json b/src/cafi/data/catalogue_db.json index 3048b4e..651d6dd 100644 --- a/src/cafi/data/catalogue_db.json +++ b/src/cafi/data/catalogue_db.json @@ -65,6 +65,17 @@ "129": [ "UIO 457" ], + "132": [ + "ACOI 2754", + "ACOI 2902-A", + "ACOI 2902-B" + ], + "134": [ + "UCCCB 231" + ], + "135": [ + "PYCC 10001" + ], "14": [ "JCM 12345" ], @@ -148,7 +159,27 @@ "NCIM 1001" ], "5": [ - "CBS 101" + "CBS 3", + "CBS 101", + "CBS 107.95", + "CBS 208.77A", + "CBS 528.79B", + "CBS 721.73C", + "CBS 287.70D", + "CBS 226.71E", + "CBS 377.70F", + "CBS 907.72G", + "CBS 378.70H", + "CBS 907.72I", + "CBS 298.70J", + "CBS 287.70K", + "CBS 295.70L", + "CBS 376.70M", + "CBS 295.70N", + "CBS 287.70O", + "CBS 377.70P", + "CBS 377.70Q", + "CBS 287.70R" ], "50": [ "RCC103" diff --git a/src/cafi/data/regex_db.json b/src/cafi/data/regex_db.json index 474d064..c714eaa 100644 --- a/src/cafi/data/regex_db.json +++ b/src/cafi/data/regex_db.json @@ -74,7 +74,8 @@ "LEGE 11473", "LEGE 16694M", "LEGE XX511", - "LEGE X-002" + "LEGE X-002", + "LEGE 06149.2" ], "117": [ "BACA 0749" @@ -96,7 +97,8 @@ ], "122": [ "BEA 0050", - "BEA 0428B" + "BEA 0428B", + "BEA 0140/1" ], "123": [ "BMCC 11" @@ -127,6 +129,50 @@ "IMI 108201aii", "IMI 143318b" ], + "130": [ + "ITEM 4501" + ], + "131": [ + "ATHUBA 282" + ], + "132": [ + "ACOI 2754", + "ACOI 2902-A", + "ACOI 2902-B" + ], + "133": [ + "UOA/HCPF 9683", + "UOA/HCPF REF81", + "UOA/HCPF 9259A", + "UOA/HCPF 9259B", + "UOA/HCPF 0.0493", + "UOA/HCPF10323", + "UOA/HCPF 12658C", + "UOA/HCPF 13481-3", + "UOA/HCPF 12825-B", + "UOA/HCPF REF98782a", + "UOA/HCPF REF98782b", + "UOA/HCPF REF 334", + "UOA/HCPF ENV32-1.2", + "UOA/HCPF ENV49", + "UOA/HCPF AM 61.016", + "UOA/HCPF AM 2699" + ], + "134": [ + "UCCCB 231" + ], + "135": [ + "PYCC 10001" + ], + "136": [ + "UMCC 1733" + ], + "137": [ + "MSCL 1003" + ], + "138": [ + "ICCF 40" + ], "14": [ "JCM 32166" ], @@ -202,7 +248,10 @@ "CCUG 58491 Q", "CCUG 15536 D", "CCUG 5087 F", - "CCUG 42321 G" + "CCUG 42321 G", + "CCUG 53931 !#", + "CCUG 47460 !", + "CCUG 48172 #" ], "30": [ "CCRC 17087" @@ -294,7 +343,26 @@ ], "5": [ "CBS 3", - "CBS 107.95" + "CBS 101", + "CBS 107.95", + "CBS 208.77A", + "CBS 528.79B", + "CBS 721.73C", + "CBS 287.70D", + "CBS 226.71E", + "CBS 377.70F", + "CBS 907.72G", + "CBS 378.70H", + "CBS 907.72I", + "CBS 298.70J", + "CBS 287.70K", + "CBS 295.70L", + "CBS 376.70M", + "CBS 295.70N", + "CBS 287.70O", + "CBS 377.70P", + "CBS 377.70Q", + "CBS 287.70R" ], "50": [ "RCC 4681" @@ -356,7 +424,8 @@ ], "66": [ "MUM 22.66", - "MUM 9718" + "MUM 9718", + "MUM 14.07i" ], "67": [ "HUT 6598" diff --git a/src/cafi/library/validate.py b/src/cafi/library/validate.py index 75e96bb..6c84354 100644 --- a/src/cafi/library/validate.py +++ b/src/cafi/library/validate.py @@ -160,7 +160,9 @@ def _check_regex(r_ccno: str, r_id: AcrCoreReg, bid: int, /) -> None: pre, *_, suf = pre_suf.groups() for typ, fps, rps in [("prefix", pre, r_id.pre), ("suffix", suf, r_id.suf)]: if not isinstance(fps, str) or rps not in fps or (rps == "" and fps != ""): - raise ValJsonEx(f"{typ} defines a different {rps} regex than the full id") + raise ValJsonEx( + f"{typ} defines a different {rps} regex than the full id {fps}!" + ) _check_or_order(r_id.suf, bid) _check_or_order(r_id.pre, bid) diff --git a/uv.lock b/uv.lock index ad0e689..d67e752 100644 --- a/uv.lock +++ b/uv.lock @@ -54,7 +54,7 @@ wheels = [ [[package]] name = "cafi" -version = "0.9.15" +version = "0.10.0" source = { editable = "." } dependencies = [ { name = "pydantic" },