Skip to content

Commit

Permalink
Merge pull request #22 from JaGeo/add_more_benchmark_structures
Browse files Browse the repository at this point in the history
Add options for more benchmark structures to phonon benchmarks
  • Loading branch information
JaGeo authored Jan 26, 2024
2 parents 7bd29b9 + 169c9a6 commit 2159ba8
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 205 deletions.
248 changes: 64 additions & 184 deletions autoplex/auto/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,138 +38,6 @@
# Volker's idea: provide several default flows with different setting/setups
# TODO TaskDocs

#
# @dataclass
# class CompleteDFTvsMLBenchmarkWorkflow(Maker):
# """
# Maker to calculate harmonic phonons with DFT, fit GAP and benchmark the results.
#
# User has no data.
#
# Parameters
# ----------
# name : str
# Name of the flows produced by this maker
# n_struct: int.
# The total number of randomly displaced structures to be generated.
# displacements: List[float]
# displacement distance for phonons
# symprec : float
# Symmetry precision to use in the
# reduction of symmetry to find the primitive/conventional cell
# (use_primitive_standard_structure, use_conventional_standard_structure)
# and to handle all symmetry-related tasks in phonopy
# uc: bool.
# If True, will generate randomly distorted structures (unitcells)
# and add static computation jobs to the flow
#
# """
#
# name: str = "complete_workflow"
# n_struct: int = 1
# displacements: list[float] = field(default_factory=lambda: [0.01])
# min_length: int = 20
# symprec: float = 1e-4
# uc: bool = False # to get rattled unit cells
# supercell_matrix: Matrix3D | None = None
#
# def make(
# self,
# structure_list: list[Structure],
# mp_ids,
# phonon_displacement_maker,
# benchmark_structure: Structure, # structures
# mp_id, # benchmark_mp_ids
# **fit_kwargs,
# ):
# """
# Make the complete workflow for DFT vs. ML benchmarking.
#
# Parameters
# ----------
# structure_list: List[Structure]
# list of pymatgen structures
# mp_ids : list.
# list of materials project ids
# phonon_displacement_maker : .BaseVaspMaker
# Maker used to compute the forces for a supercell.
# benchmark_structure: Structure.
# Structure used for benchmarking.
# mp_id: str
# materials project ID corresponding to the benchmark structure
# """
# flows = []
# datagen = {}
# collect = []
# isoatoms = get_iso_atom(structure_list)
# flows.append(isoatoms)
#
# for struc_i, structure in enumerate(structure_list):
# autoplex_datagen = DFTDataGenerationFlow(
# name="datagen",
# phonon_displacement_maker=phonon_displacement_maker,
# n_struct=self.n_struct,
# displacements=self.displacements,
# min_length=self.min_length,
# symprec=self.symprec,
# uc=self.uc,
# supercell_matrix=self.supercell_matrix,
# ).make(structure=structure, mp_id=mp_ids[struc_i])
# flows.append(autoplex_datagen)
# datagen.update({mp_ids[struc_i]: autoplex_datagen.output})
#
# autoplex_fit = PhononDFTMLFitFlow().make(
# species=isoatoms.output["species"],
# isolated_atoms_energy=isoatoms.output["energies"],
# fit_input=datagen,
# **fit_kwargs,
# )
# flows.append(autoplex_fit)
#
# autoplex_ml_phonon = get_phonon_ml_calculation_jobs(
# structure=benchmark_structure,
# min_length=self.min_length,
# ml_dir=autoplex_fit.output,
# )
# flows.append(autoplex_ml_phonon)
# if mp_id not in mp_ids:
# dft_phonons = DFTPhononMaker(
# symprec=self.symprec,
# phonon_displacement_maker=phonon_displacement_maker,
# born_maker=None,
# min_length=self.min_length,
# ).make(structure=benchmark_structure)
# dft_phonons = update_user_incar_settings(
# dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
# )
# flows.append(dft_phonons)
#
# dft_reference = dft_phonons.output
# else:
# dft_reference = datagen[mp_id]["phonon_data"][
# "001"
# ] # flag take all phonon runs
# # explanation for 001 = 0.01
#
# autoplex_bm = PhononDFTMLBenchmarkFlow(name="testBM").make(
# structure=benchmark_structure,
# mp_id=mp_id,
# ml_phonon_task_doc=autoplex_ml_phonon.output,
# dft_phonon_task_doc=dft_reference,
# )
# flows.append(autoplex_bm)
# collect.append(autoplex_bm.output)
#
# collect_bm = write_benchmark_metrics(
# benchmark_structure=benchmark_structure,
# mp_id=mp_id,
# rmse=collect,
# displacements=self.displacements,
# )
# flows.append(collect_bm)
#
# return Flow(flows)


@dataclass
class CompleteDFTvsMLBenchmarkWorkflow(
Expand Down Expand Up @@ -211,9 +79,9 @@ def make(
structure_list: list[Structure],
mp_ids,
xyz_file: str | None = None,
dft_reference: PhononBSDOSDoc | None = None,
benchmark_structure: Structure | None = None,
mp_id: str | None = None,
dft_references: PhononBSDOSDoc | None = None,
benchmark_structures: Structure | None = None,
benchmark_mp_ids: str | None = None,
**fit_kwargs,
):
"""
Expand All @@ -227,11 +95,11 @@ def make(
materials project id.
xyz_file:
the already existing training data xyz file.
dft_reference:
dft_references:
DFT reference file containing the PhononBSDOCDoc object.
benchmark_structure: Structure
benchmark_structures: Structure
pymatgen structure for benchmarking.
mp_id:
benchmark_mp_ids:
Materials Project ID of the benchmarking structure.
"""
Expand Down Expand Up @@ -283,50 +151,60 @@ def make(
)
flows.append(add_data_fit)

# not sure if it would make sense to put everything from here in its own flow?
add_data_ml_phonon = get_phonon_ml_calculation_jobs(
structure=benchmark_structure,
min_length=self.min_length,
ml_dir=add_data_fit.output,
)
flows.append(add_data_ml_phonon)

if dft_reference is None:
if (mp_id in mp_ids) and self.add_dft_phonon_struct:
dft_reference = fit_input[mp_id]["phonon_data"]["001"]
elif (mp_id not in mp_ids) or ( # else?
self.add_dft_phonon_struct is False
):
dft_phonons = DFTPhononMaker(
symprec=self.symprec,
phonon_displacement_maker=self.phonon_displacement_maker,
born_maker=None,
min_length=self.min_length,
).make(structure=benchmark_structure)
dft_phonons = update_user_incar_settings(
dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
)
flows.append(dft_phonons)
dft_reference = dft_phonons.output

add_data_bm = PhononDFTMLBenchmarkFlow(name="addDataBM").make(
structure=benchmark_structure,
mp_id=mp_id,
ml_phonon_task_doc=add_data_ml_phonon.output,
dft_phonon_task_doc=dft_reference,
)
flows.append(add_data_bm)
collect.append(add_data_bm.output)

collect_bm = write_benchmark_metrics(
benchmark_structure=benchmark_structure,
mp_id=mp_id,
rmse=collect,
displacements=self.displacements,
)
flows.append(collect_bm)
bm_outputs=[]

return Flow(flows, collect_bm.output)
for ibenchmark_structure, benchmark_structure in enumerate(benchmark_structures):
# not sure if it would make sense to put everything from here in its own flow?
add_data_ml_phonon = get_phonon_ml_calculation_jobs(
structure=benchmark_structure,
min_length=self.min_length,
ml_dir=add_data_fit.output,
)
flows.append(add_data_ml_phonon)

if dft_references is None and benchmark_mp_ids is not None:
if (benchmark_mp_ids[ibenchmark_structure] in mp_ids) and self.add_dft_phonon_struct:
dft_references = fit_input[benchmark_mp_ids[ibenchmark_structure]]["phonon_data"]["001"]
elif (benchmark_mp_ids[ibenchmark_structure] not in mp_ids) or ( # else?
self.add_dft_phonon_struct is False
):
dft_phonons = DFTPhononMaker(
symprec=self.symprec,
phonon_displacement_maker=self.phonon_displacement_maker,
born_maker=None,
min_length=self.min_length,
).make(structure=benchmark_structure)
dft_phonons = update_user_incar_settings(
dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
)
flows.append(dft_phonons)
dft_references = dft_phonons.output

add_data_bm = PhononDFTMLBenchmarkFlow(name="addDataBM").make(
structure=benchmark_structure,
benchmark_mp_id=benchmark_mp_ids[ibenchmark_structure],
ml_phonon_task_doc=add_data_ml_phonon.output,
dft_phonon_task_doc=dft_references,
)
else:
add_data_bm = PhononDFTMLBenchmarkFlow(name="addDataBM").make(
structure=benchmark_structure,
benchmark_mp_id=benchmark_mp_ids[ibenchmark_structure],
ml_phonon_task_doc=add_data_ml_phonon.output,
dft_phonon_task_doc=dft_references[ibenchmark_structure],
)
flows.append(add_data_bm)
collect.append(add_data_bm.output)

collect_bm = write_benchmark_metrics(
benchmark_structure=benchmark_structure,
mp_id=benchmark_mp_ids[ibenchmark_structure],
rmse=collect,
displacements=self.displacements,
)
flows.append(collect_bm)
bm_outputs.append(collect_bm.output)
return Flow(flows, bm_outputs)

def add_dft_phonons(
self,
Expand Down Expand Up @@ -511,6 +389,8 @@ def make(
return Flow(flows, ml_fit_flow.output)


# We need to extend this flow to run over more than one structure.
# I am not sure why it even is a flow
@dataclass
class PhononDFTMLBenchmarkFlow(Maker):
"""
Expand All @@ -527,7 +407,7 @@ class PhononDFTMLBenchmarkFlow(Maker):
def make(
self,
structure: Structure,
mp_id,
benchmark_mp_id,
ml_phonon_task_doc: PhononBSDOSDoc,
dft_phonon_task_doc: PhononBSDOSDoc,
):
Expand All @@ -538,7 +418,7 @@ def make(
----------
structure: Structure
Structure used for benchmark
mp_id: str.
benchmark_mp_id: str.
Material project id string
ml_phonon_task_doc: PhononBSDOSDoc
Phonon task doc from ML potential consisting of pymatgen band-structure object
Expand All @@ -549,7 +429,7 @@ def make(

benchmark = PhononBenchmarkMaker(name="Benchmark").make(
structure=structure,
mp_id=mp_id,
benchmark_mp_id=benchmark_mp_id,
ml_phonon_bs=ml_phonon_task_doc.phonon_bandstructure, # TODO take BS at top lvl?
dft_phonon_bs=dft_phonon_task_doc.phonon_bandstructure,
)
Expand Down
2 changes: 1 addition & 1 deletion autoplex/auto/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from autoplex.data.flows import IsoAtomMaker, RandomStructuresDataGenerator


# This should be a maker rather than a job in a job
@job
def get_phonon_ml_calculation_jobs(
ml_dir: str,
Expand Down
4 changes: 2 additions & 2 deletions autoplex/benchmark/flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class PhononBenchmarkMaker(Maker):
def make(
self,
structure: Structure,
mp_id: str,
benchmark_mp_id: str,
ml_phonon_bs: PhononBandStructureSymmLine,
dft_phonon_bs: PhononBandStructureSymmLine,
**kwargs,
Expand All @@ -44,7 +44,7 @@ def make(
----------
structure :
Pymatgen structures drawn from the Materials Project.
mp_id: str.
benchmark_mp_id: str.
Materials project IDs for the structure
ml_phonon_bs: PhononBandStructureSymmLine.
ML potential generated pymatgen phonon band-structure object
Expand Down
Loading

0 comments on commit 2159ba8

Please sign in to comment.