diff --git a/comment_spell_check.py b/comment_spell_check.py index 1c9c25f..b34121e 100755 --- a/comment_spell_check.py +++ b/comment_spell_check.py @@ -25,6 +25,7 @@ import argparse import re from pathlib import Path +from importlib.metadata import version, PackageNotFoundError from enchant.checker import SpellChecker from enchant.tokenize import EmailFilter, URLFilter @@ -32,7 +33,7 @@ from comment_parser import comment_parser -from importlib.metadata import version, PackageNotFoundError +from lib import bibtex_loader __version__ = "unknown" @@ -378,6 +379,13 @@ def parse_args(): help="Set file mime type. File name suffix will be ignored.", ) + parser.add_argument( + "--bibtex", + action="append", + dest="bibtex", + help="Bibtex file to load for additional dictionary words.", + ) + parser.add_argument("--version", action="version", version=f"{__version__}") args = parser.parse_args() @@ -404,11 +412,38 @@ def add_dict(enchant_dict, filename, verbose=False): enchant_dict.add(wrd) +def create_spell_checker(args, output_lvl): + """Create a SpellChecker.""" + + my_dict = Dict("en_US") + + # Load the dictionary files + # + initial_dct = Path(__file__).parent / "additional_dictionary.txt" + if not initial_dct.exists(): + initial_dct = None + else: + add_dict(my_dict, str(initial_dct), any([args.brief, output_lvl >= 0])) + + if args.dict is not None: + for d in args.dict: + add_dict(my_dict, d, any([args.brief, output_lvl >= 0])) + + # Load the bibliography files + # + if args.bibtex is not None: + for bib in args.bibtex: + bibtex_loader.add_bibtex(my_dict, bib, any([args.brief, output_lvl >= 0])) + + # Create the SpellChecker + spell_checker = SpellChecker(my_dict, filters=[EmailFilter, URLFilter]) + + return spell_checker + + def main(): args = parse_args() - sitk_dict = Dict("en_US") - # Set the amount of debugging messages to print. output_lvl = 1 if args.brief: @@ -419,19 +454,7 @@ def main(): if args.miss: output_lvl = -1 - # Load the dictionary files - # - initial_dct = Path(__file__).parent / "additional_dictionary.txt" - if not initial_dct.exists(): - initial_dct = None - else: - add_dict(sitk_dict, str(initial_dct), any([args.brief, output_lvl >= 0])) - - if args.dict is not None: - for d in args.dict: - add_dict(sitk_dict, d, any([args.brief, output_lvl >= 0])) - - spell_checker = SpellChecker(sitk_dict, filters=[EmailFilter, URLFilter]) + spell_checker = create_spell_checker(args, output_lvl) file_list = [] if len(args.filenames): diff --git a/lib/bibtex_loader.py b/lib/bibtex_loader.py new file mode 100644 index 0000000..e7d031a --- /dev/null +++ b/lib/bibtex_loader.py @@ -0,0 +1,33 @@ +import bibtexparser + + +def split_bibtex_name(name): + """ + Split a Bibtex name, which is two words seperated by a number. + """ + + # map any digit to space + mytable = str.maketrans("0123456789", " ") + new_name = name.translate(mytable) + + # split by space + words = new_name.split() + return words + + +def add_bibtex(enchant_dict, filename, verbose=False): + """Update ``enchant_dict`` spell checking dictionary with names + from ``filename``, a Bibtex file.""" + + if verbose: + print(f"Bibtex file: {filename}") + + with open(filename, "rt", encoding="utf-8") as biblatex_file: + bib_database = bibtexparser.load(biblatex_file) + + for k in bib_database.get_entry_dict().keys(): + words = split_bibtex_name(k) + for w in words: + enchant_dict.add(w) + if verbose: + print("Added Bibtex word:", w) diff --git a/requirements.txt b/requirements.txt index 2f7eb45..1e7beed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ comment_parser pyenchant +bibtexparser diff --git a/tests/bibtest.py b/tests/bibtest.py new file mode 100644 index 0000000..3d31d67 --- /dev/null +++ b/tests/bibtest.py @@ -0,0 +1,7 @@ +# lowekamp2013design +# yaniv2018simpleitk +# ibanez2003itk +# avants2014insight +# yushkevich2017itk + +print("Hello World") diff --git a/tests/itk.bib b/tests/itk.bib new file mode 100644 index 0000000..96058e5 --- /dev/null +++ b/tests/itk.bib @@ -0,0 +1,48 @@ +@article{lowekamp2013design, + title={The design of SimpleITK}, + author={Lowekamp, Bradley C and Chen, David T and Ib{\'a}{\~n}ez, Luis and Blezek, Daniel}, + journal={Frontiers in neuroinformatics}, + volume={7}, + pages={45}, + year={2013}, + publisher={Frontiers Media SA} +} + +@article{yaniv2018simpleitk, + title={SimpleITK image-analysis notebooks: a collaborative environment for education and reproducible research}, + author={Yaniv, Ziv and Lowekamp, Bradley C and Johnson, Hans J and Beare, Richard}, + journal={Journal of digital imaging}, + volume={31}, + number={3}, + pages={290--303}, + year={2018}, + publisher={Springer} +} + +@misc{ibanez2003itk, + title={The ITK software guide}, + author={Ibanez, Luis and Schroeder, Will and Ng, Lydia and Cates, Josh and others}, + year={2003}, + publisher={Kitware, Incorporated Clifton Park, New York} +} + +@article{avants2014insight, + title={The Insight ToolKit image registration framework}, + author={Avants, Brian B and Tustison, Nicholas J and Stauffer, Michael and Song, Gang and Wu, Baohua and Gee, James C}, + journal={Frontiers in neuroinformatics}, + volume={8}, + pages={44}, + year={2014}, + publisher={Frontiers Media SA} +} + +@article{yushkevich2017itk, + title={ITK-SNAP: an intractive medical image segmentation tool to meet the need for expert-guided segmentation of complex medical images}, + author={Yushkevich, Paul A and Gerig, Guido}, + journal={IEEE pulse}, + volume={8}, + number={4}, + pages={54--57}, + year={2017}, + publisher={IEEE} +} diff --git a/tests/test_comment_spell_check.py b/tests/test_comment_spell_check.py index 63f396a..0cb93ac 100644 --- a/tests/test_comment_spell_check.py +++ b/tests/test_comment_spell_check.py @@ -81,3 +81,17 @@ def test_version(self): self.assertNotEqual( version_string, "unknown", "version string contains 'unknown'" ) + + def test_bibtex(self): + """Bibtext test""" + runresult = subprocess.run( + [ + "python", + "comment_spell_check.py", + "--bibtex", + "tests/itk.bib", + "tests/bibtest.py", + ], + stdout=subprocess.PIPE, + ) + self.assertEqual(runresult.returncode, 0, runresult.stdout)