Skip to content

Commit a4f2802

Browse files
authored
Merge pull request #30 from QuantumChemist/main
added config_type keyword to gapfit
2 parents 5d9e22d + 435bf5a commit a4f2802

File tree

9 files changed

+483
-409
lines changed

9 files changed

+483
-409
lines changed

autoplex/auto/flows.py

+14-15
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@
3636

3737

3838
@dataclass
39-
class CompleteDFTvsMLBenchmarkWorkflow(
40-
Maker
41-
): # merge with complete wf and set another flag for adding data
39+
class CompleteDFTvsMLBenchmarkWorkflow(Maker):
4240
"""
4341
Maker to add more data to existing dataset (.xyz file).
4442
@@ -103,21 +101,18 @@ def make(
103101
fit_input = {}
104102
collect = []
105103

106-
# if xyz_file is None:
107-
# raise Exception("Error. Please provide an existing xyz file.")
108-
109-
for i, structure in enumerate(structure_list):
104+
for structure, mp_id in zip(structure_list, mp_ids):
110105
if self.add_dft_random_struct:
111106
addDFTrand = self.add_dft_random(
112107
structure,
113-
mp_ids[i],
108+
mp_id,
114109
self.phonon_displacement_maker,
115110
self.n_struct,
116111
self.uc,
117112
self.supercell_matrix,
118113
)
119114
flows.append(addDFTrand)
120-
fit_input.update({mp_ids[i]: addDFTrand.output})
115+
fit_input.update({mp_id: addDFTrand.output})
121116
if self.add_dft_phonon_struct:
122117
addDFTphon = self.add_dft_phonons(
123118
structure,
@@ -127,17 +122,20 @@ def make(
127122
self.min_length,
128123
)
129124
flows.append(addDFTphon)
130-
fit_input.update({mp_ids[i]: addDFTphon.output})
125+
fit_input.update({mp_id: addDFTphon.output})
131126
if self.add_dft_random_struct and self.add_dft_phonon_struct:
132-
fit_input.update(
133-
{mp_ids[i]: {**addDFTrand.output, **addDFTphon.output}}
134-
)
127+
fit_input.update({mp_id: {**addDFTrand.output, **addDFTphon.output}})
135128
if self.add_rss_struct:
136129
raise NotImplementedError
137130

138131
isoatoms = get_iso_atom(structure_list)
139132
flows.append(isoatoms)
140133

134+
if xyz_file is None:
135+
fit_input.update(
136+
{"isolated_atom": {"iso_atoms_dir": [isoatoms.output["dirs"]]}}
137+
)
138+
141139
add_data_fit = PhononDFTMLFitFlow().make(
142140
species=isoatoms.output["species"],
143141
isolated_atoms_energy=isoatoms.output["energies"],
@@ -153,7 +151,6 @@ def make(
153151
for ibenchmark_structure, benchmark_structure in enumerate(
154152
benchmark_structures
155153
):
156-
# not sure if it would make sense to put everything from here in its own flow?
157154
add_data_ml_phonon = get_phonon_ml_calculation_jobs(
158155
structure=benchmark_structure,
159156
min_length=self.min_length,
@@ -379,7 +376,9 @@ def make(self, structure: Structure, mp_id):
379376
@dataclass
380377
class PhononDFTMLFitFlow(Maker):
381378
"""
382-
Maker to fit ML potentials based on DFT data.
379+
Maker to fit several types of ML potentials (GAP, ACE etc.) based on DFT data.
380+
381+
(Currently only the subroutines for GAP are implemented).
383382
384383
Parameters
385384
----------

autoplex/auto/jobs.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -314,5 +314,12 @@ def get_iso_atom(structure_list: list[Structure]):
314314
isoatoms = IsoAtomMaker().make(all_species=all_species)
315315
jobs.append(isoatoms)
316316

317-
flow = Flow(jobs, {"species": all_species, "energies": isoatoms.output})
317+
flow = Flow(
318+
jobs,
319+
{
320+
"species": all_species,
321+
"energies": isoatoms.output["energies"],
322+
"dirs": isoatoms.output["dirs"],
323+
},
324+
)
318325
return Response(replace=flow)

autoplex/data/flows.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,8 @@ def make(self, all_species: list[Species]):
357357
list of pymatgen specie object.
358358
"""
359359
jobs = []
360-
isoatoms = []
360+
isoatoms_energy = []
361+
isoatoms_dirs = []
361362
for species in all_species:
362363
site = Site(species=species, coords=[0, 0, 0])
363364
mol = Molecule.from_sites([site])
@@ -370,6 +371,8 @@ def make(self, all_species: list[Species]):
370371
).make(iso_atom)
371372

372373
jobs.append(isoatom_calcs)
373-
isoatoms.append(isoatom_calcs.output.output.energy_per_atom)
374+
isoatoms_energy.append(isoatom_calcs.output.output.energy_per_atom)
375+
isoatoms_dirs.append(isoatom_calcs.output.dir_name)
376+
374377
# create a flow including all jobs
375-
return Flow(jobs, isoatoms)
378+
return Flow(jobs, {"energies": isoatoms_energy, "dirs": isoatoms_dirs})

autoplex/fitting/gap-defaults.json

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
{
22
"general": {
33
"at_file": "trainGAP.xyz",
4-
"e0": "2",
54
"default_sigma": "{0.01 0.2 0.2 0.0}",
6-
"energy_parameter_name": "energy",
7-
"force_parameter_name": "forces",
8-
"virial_parameter_name": "virial",
5+
"energy_parameter_name": "REF_energy",
6+
"force_parameter_name": "REF_forces",
7+
"virial_parameter_name": "REF_virial",
98
"sparse_jitter": 1.0e-8,
109
"do_copy_at_file": "F",
1110
"openmp_chunk_size": 10000,

autoplex/fitting/jobs.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ def gapfit(
5252
bool indicating whether to include soap hyperparameters
5353
xyz_file: str or None
5454
a possibly already existing xyz file
55+
config_types: list[str] or None
56+
list of config_types.
5557
fit_kwargs : dict.
5658
dict including gap fit keyword args.
5759
@@ -60,13 +62,24 @@ def gapfit(
6062
Response.output
6163
Path to the gap fit file.
6264
"""
65+
config_types = []
6366
if fit_kwargs is None:
6467
fit_kwargs = field(default_factory=dict)
6568

6669
list_of_vasp_calc_dirs = get_list_of_vasp_calc_dirs(flow_output=fit_input)
6770

71+
config_types = [
72+
key
73+
for key, value in fit_input.items()
74+
for key2, value2 in value.items()
75+
if key2 != "phonon_data"
76+
for _ in value2[0]
77+
]
78+
6879
outcar_2_extended_xyz(
69-
path_to_vasp_static_calcs=list_of_vasp_calc_dirs, xyz_file=xyz_file
80+
path_to_vasp_static_calcs=list_of_vasp_calc_dirs,
81+
config_types=config_types,
82+
xyz_file=xyz_file,
7083
)
7184

7285
gap_default_hyperparameters = load_gap_hyperparameter_defaults(

autoplex/fitting/utils.py

+30-15
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import shutil
88
from pathlib import Path
99

10-
import numpy as np
10+
from ase.constraints import voigt_6_to_full_3x3_stress
1111
from ase.io import read, write
1212
from atomate2.utils.path import strip_hostname
1313

@@ -62,16 +62,16 @@ def gap_hyperparameter_constructor(
6262
gap fit input parameter string.
6363
"""
6464
# convert gap_parameter_dict to representation compatible with gap
65-
if atoms_energies and atoms_symbols is not None:
66-
e0 = ":".join(
67-
[
68-
f"{iso_atom}:{iso_energy}"
69-
for iso_atom, iso_energy in zip(atoms_symbols, atoms_energies)
70-
]
71-
)
65+
# if atoms_energies and atoms_symbols is not None:
66+
# e0 = ":".join(
67+
# [
68+
# f"{iso_atom}:{iso_energy}"
69+
# for iso_atom, iso_energy in zip(atoms_symbols, atoms_energies)
70+
# ]
71+
# )
7272

73-
# Update the isolated atom energy argument
74-
gap_parameter_dict["general"].update({"e0": e0})
73+
# Update the isolated atom energy argument
74+
# gap_parameter_dict["general"].update({"e0": e0})
7575

7676
general = [f"{key}={value}" for key, value in gap_parameter_dict["general"].items()]
7777

@@ -136,7 +136,11 @@ def get_list_of_vasp_calc_dirs(flow_output):
136136
return list_of_vasp_calc_dirs
137137

138138

139-
def outcar_2_extended_xyz(path_to_vasp_static_calcs: list, xyz_file: str | None = None):
139+
def outcar_2_extended_xyz(
140+
path_to_vasp_static_calcs: list,
141+
config_types: list[str] | None = None,
142+
xyz_file: str | None = None,
143+
):
140144
"""
141145
Parse all VASP OUTCARs and generates a trainGAP.xyz.
142146
@@ -149,16 +153,27 @@ def outcar_2_extended_xyz(path_to_vasp_static_calcs: list, xyz_file: str | None
149153
List of VASP static calculation directories.
150154
xyz_file: str or None
151155
a possibly already existing xyz file.
156+
config_types: list[str] or None
157+
list of config_types.
152158
"""
153-
for path in path_to_vasp_static_calcs:
159+
if config_types is None:
160+
config_types = ["bulk"] * len(path_to_vasp_static_calcs)
161+
162+
for path, config_type in zip(path_to_vasp_static_calcs, config_types):
154163
# strip hostname if it exists in the path
155-
path_without_hostname = Path(strip_hostname(path)).joinpath("OUTCAR.gz")
164+
path_without_hostname = Path(strip_hostname(path)).joinpath("vasprun.xml.gz")
156165
# read the outcar
157166
file = read(path_without_hostname, index=":")
158167
for i in file:
159-
xx, yy, zz, yz, xz, xy = -i.calc.results["stress"] * i.get_volume()
160-
i.info["virial"] = np.array([(xx, xy, xz), (xy, yy, yz), (xz, yz, zz)])
168+
virial_list = -voigt_6_to_full_3x3_stress(i.get_stress()) * i.get_volume()
169+
i.info["REF_virial"] = " ".join(map(str, virial_list.flatten()))
161170
del i.calc.results["stress"]
171+
i.arrays["REF_forces"] = i.calc.results["forces"]
172+
del i.calc.results["forces"]
173+
i.info["REF_energy"] = i.calc.results["free_energy"]
174+
del i.calc.results["energy"]
175+
del i.calc.results["free_energy"]
176+
i.info["config_type"] = config_type
162177
i.pbc = True
163178
if xyz_file is not None:
164179
shutil.copy2(xyz_file, os.getcwd())

tests/fitting/test_fitting_flows.py

+59-21
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,33 @@ def test_mlip_fit_maker(test_dir, clean_dir, memory_jobstore, vasp_test_dir):
6565
]
6666
],
6767
"phonon_data": [],
68+
},
69+
"isolated_atom": {"iso_atoms_dir": [[
70+
(
71+
vasp_test_dir
72+
/ "Li_iso_atoms"
73+
/ "Li-statisoatom"
74+
/ "outputs"
75+
)
76+
.absolute()
77+
.as_posix(),
78+
(
79+
vasp_test_dir
80+
/ "Cl_iso_atoms"
81+
/ "Cl-statisoatom"
82+
/ "outputs"
83+
)
84+
.absolute()
85+
.as_posix(),
86+
]]
6887
}
6988
}
7089

7190
# Test to check if gap fit runs with default hyperparameter sets (i.e. include_two_body and include_soap is True)
7291
gapfit = MLIPFitMaker().make(
7392
species_list=["Li", "Cl"],
7493
iso_atom_energy=[-0.28649227, -0.25638457],
75-
fit_input=fit_input_dict,
94+
fit_input=fit_input_dict
7695
)
7796

7897
responses = run_locally(
@@ -107,26 +126,26 @@ def test_mlip_fit_maker_with_kwargs(
107126
"rand_struc_dir": [
108127
[
109128
(
110-
vasp_test_dir
111-
/ "dft_ml_data_generation"
112-
/ "rand_static_1"
113-
/ "outputs"
129+
vasp_test_dir
130+
/ "dft_ml_data_generation"
131+
/ "rand_static_1"
132+
/ "outputs"
114133
)
115134
.absolute()
116135
.as_posix(),
117136
(
118-
vasp_test_dir
119-
/ "dft_ml_data_generation"
120-
/ "rand_static_2"
121-
/ "outputs"
137+
vasp_test_dir
138+
/ "dft_ml_data_generation"
139+
/ "rand_static_2"
140+
/ "outputs"
122141
)
123142
.absolute()
124143
.as_posix(),
125144
(
126-
vasp_test_dir
127-
/ "dft_ml_data_generation"
128-
/ "rand_static_3"
129-
/ "outputs"
145+
vasp_test_dir
146+
/ "dft_ml_data_generation"
147+
/ "rand_static_3"
148+
/ "outputs"
130149
)
131150
.absolute()
132151
.as_posix(),
@@ -135,24 +154,43 @@ def test_mlip_fit_maker_with_kwargs(
135154
"phonon_dir": [
136155
[
137156
(
138-
vasp_test_dir
139-
/ "dft_ml_data_generation"
140-
/ "phonon_static_1"
141-
/ "outputs"
157+
vasp_test_dir
158+
/ "dft_ml_data_generation"
159+
/ "phonon_static_1"
160+
/ "outputs"
142161
)
143162
.absolute()
144163
.as_posix(),
145164
(
146-
vasp_test_dir
147-
/ "dft_ml_data_generation"
148-
/ "phonon_static_2"
149-
/ "outputs"
165+
vasp_test_dir
166+
/ "dft_ml_data_generation"
167+
/ "phonon_static_2"
168+
/ "outputs"
150169
)
151170
.absolute()
152171
.as_posix(),
153172
]
154173
],
155174
"phonon_data": [],
175+
},
176+
"isolated_atom": {"iso_atoms_dir": [[
177+
(
178+
vasp_test_dir
179+
/ "Li_iso_atoms"
180+
/ "Li-statisoatom"
181+
/ "outputs"
182+
)
183+
.absolute()
184+
.as_posix(),
185+
(
186+
vasp_test_dir
187+
/ "Cl_iso_atoms"
188+
/ "Cl-statisoatom"
189+
/ "outputs"
190+
)
191+
.absolute()
192+
.as_posix(),
193+
]]
156194
}
157195
}
158196

0 commit comments

Comments
 (0)