diff --git a/news/get-cif.rst b/news/get-cif.rst new file mode 100644 index 0000000..5ded69e --- /dev/null +++ b/news/get-cif.rst @@ -0,0 +1,23 @@ +**Added:** + +* Function that returns a list of cif filenames based on a given chemical formula. + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* diff --git a/src/diffpy/utils/tools.py b/src/diffpy/utils/tools.py index 63e10ba..799aeda 100644 --- a/src/diffpy/utils/tools.py +++ b/src/diffpy/utils/tools.py @@ -4,6 +4,7 @@ from pathlib import Path import numpy as np +import requests from scipy.optimize import dual_annealing from scipy.signal import convolve from xraydb import material_mu @@ -214,6 +215,47 @@ def get_package_info(package_names, metadata=None): return metadata +def fetch_cif_filenames(hill_formula): + """Fetches a list of CIF filenames from the Crystallography Open Database + (COD) based on the given chemical formula in Hill notation, where elements + are separated by whitespace and the count of 1 is omitted (e.g., "Cl Na"). + + Parameters + ---------- + hill_formula : str + The chemical formula in Hill notation. + + Returns + ------- + list of str + A list of CIF filenames (e.g., ["1000041.cif", "2104025.cif"]). + + Raises + ------ + ValueError + If no CIF files are found for the given formula. + """ + base_url = "https://www.crystallography.net/cod/result.php" + params = {"formula": hill_formula, "format": "json"} + response = requests.get(base_url, params=params) + if response.status_code != 200: + raise Exception( + f"Failed to retrieve search results. " + f"HTTP status code: {response.status_code}." + ) + data = response.json() + cif_filenames = [str(entry["file"]) + ".cif" for entry in data] + if len(cif_filenames) == 0: + raise ValueError( + f"No CIF files found for the given formula: {hill_formula}. " + "Please ensure it's in Hill notation (e.g., 'Cl Na'). " + "You can use ``to_hill_notation`` for conversion. " + "If the formula is correct, it is possible that " + "no CIF files are available for this formula in the COD." + ) + return cif_filenames + + def get_density_from_cloud(sample_composition, mp_token=""): """Function to get material density from the MP or COD database. diff --git a/tests/test_tools.py b/tests/test_tools.py index 6be3870..dfd8581 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -1,6 +1,7 @@ import importlib.metadata import json import os +import re from pathlib import Path import numpy as np @@ -11,6 +12,7 @@ check_and_build_global_config, compute_mu_using_xraydb, compute_mud, + fetch_cif_filenames, get_package_info, get_user_info, ) @@ -270,6 +272,45 @@ def test_get_package_info(monkeypatch, inputs, expected): assert actual_metadata == expected +def test_fetch_cif_filenames(): + actual_cif_filenames = fetch_cif_filenames("Cl Na") + expected_cif_filenames = [ + "1000041.cif", + "2104025.cif", + "2108652.cif", + "2311042.cif", + "4300180.cif", + "4320809.cif", + "7132177.cif", + "9000629.cif", + "9003308.cif", + "9003309.cif", + "9003310.cif", + "9003311.cif", + "9003312.cif", + "9003313.cif", + "9003314.cif", + "9006369.cif", + "9006370.cif", + "9006371.cif", + "9006372.cif", + "9006373.cif", + ] + return sorted(actual_cif_filenames) == sorted(expected_cif_filenames) + + +def test_fetch_cif_filenames_bad(): + expected_error_msg = ( + "No CIF files found for the given formula: NaCl. " + "Please ensure it's in Hill notation (e.g., 'Cl Na'). " + "You can use ``to_hill_notation`` for conversion. " + "If the formula is correct, it is possible that " + "no CIF files are available for this formula in the COD." + ) + with pytest.raises(ValueError, match=re.escape(expected_error_msg)): + fetch_cif_filenames("NaCl") + + @pytest.mark.parametrize( "inputs", [