Skip to content

feat: add function that returns cif filenames based on chemical formula #336

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions news/get-cif.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
**Added:**

* Function that returns a list of cif filenames based on a given chemical formula.

**Changed:**

* <news item>

**Deprecated:**

* <news item>

**Removed:**

* <news item>

**Fixed:**

* <news item>

**Security:**

* <news item>
42 changes: 42 additions & 0 deletions src/diffpy/utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path

import numpy as np
import requests
from scipy.optimize import dual_annealing
from scipy.signal import convolve
from xraydb import material_mu
Expand Down Expand Up @@ -214,6 +215,47 @@ def get_package_info(package_names, metadata=None):
return metadata


def fetch_cif_filenames(hill_formula):
"""Fetches a list of CIF filenames from the Crystallography Open Database
(COD) based on the given chemical formula in Hill notation, where elements
are separated by whitespace and the count of 1 is omitted (e.g., "Cl Na").

Parameters
----------
hill_formula : str
The chemical formula in Hill notation.

Returns
-------
list of str
A list of CIF filenames (e.g., ["1000041.cif", "2104025.cif"]).

Raises
------
ValueError
If no CIF files are found for the given formula.
"""
base_url = "https://www.crystallography.net/cod/result.php"
params = {"formula": hill_formula, "format": "json"}
response = requests.get(base_url, params=params)
if response.status_code != 200:
raise Exception(
f"Failed to retrieve search results. "
f"HTTP status code: {response.status_code}."
)
data = response.json()
cif_filenames = [str(entry["file"]) + ".cif" for entry in data]
if len(cif_filenames) == 0:
raise ValueError(
f"No CIF files found for the given formula: {hill_formula}. "
"Please ensure it's in Hill notation (e.g., 'Cl Na'). "
"You can use ``to_hill_notation`` for conversion. "
"If the formula is correct, it is possible that "
"no CIF files are available for this formula in the COD."
)
return cif_filenames


def get_density_from_cloud(sample_composition, mp_token=""):
"""Function to get material density from the MP or COD database.

Expand Down
41 changes: 41 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import importlib.metadata
import json
import os
import re
from pathlib import Path

import numpy as np
Expand All @@ -11,6 +12,7 @@
check_and_build_global_config,
compute_mu_using_xraydb,
compute_mud,
fetch_cif_filenames,
get_package_info,
get_user_info,
)
Expand Down Expand Up @@ -270,6 +272,45 @@ def test_get_package_info(monkeypatch, inputs, expected):
assert actual_metadata == expected


def test_fetch_cif_filenames():
actual_cif_filenames = fetch_cif_filenames("Cl Na")
expected_cif_filenames = [
"1000041.cif",
"2104025.cif",
"2108652.cif",
"2311042.cif",
"4300180.cif",
"4320809.cif",
"7132177.cif",
"9000629.cif",
"9003308.cif",
"9003309.cif",
"9003310.cif",
"9003311.cif",
"9003312.cif",
"9003313.cif",
"9003314.cif",
"9006369.cif",
"9006370.cif",
"9006371.cif",
"9006372.cif",
"9006373.cif",
]
return sorted(actual_cif_filenames) == sorted(expected_cif_filenames)


def test_fetch_cif_filenames_bad():
expected_error_msg = (
"No CIF files found for the given formula: NaCl. "
"Please ensure it's in Hill notation (e.g., 'Cl Na'). "
"You can use ``to_hill_notation`` for conversion. "
"If the formula is correct, it is possible that "
"no CIF files are available for this formula in the COD."
)
with pytest.raises(ValueError, match=re.escape(expected_error_msg)):
fetch_cif_filenames("NaCl")


@pytest.mark.parametrize(
"inputs",
[
Expand Down
Loading