From 4a5b50bb20e88185a9cce571bad02471fabe53ee Mon Sep 17 00:00:00 2001
From: JaGeo <janine.george@bam.de>
Date: Fri, 26 Jan 2024 12:40:16 +0100
Subject: [PATCH 1/5] first renaming

---
 autoplex/auto/flows.py                  | 72 ++++++++++++-------------
 autoplex/benchmark/flows.py             |  4 +-
 tests/auto/test_auto_flows.py           | 38 ++++++-------
 tests/benchmark/test_benchmark_flows.py |  2 +-
 tests/data/test_datagen_flows.py        |  2 +-
 5 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/autoplex/auto/flows.py b/autoplex/auto/flows.py
index aab05b41e..ea3e16859 100644
--- a/autoplex/auto/flows.py
+++ b/autoplex/auto/flows.py
@@ -78,8 +78,8 @@
 #         structure_list: list[Structure],
 #         mp_ids,
 #         phonon_displacement_maker,
-#         benchmark_structure: Structure,  # structures
-#         mp_id,  # benchmark_mp_ids
+#         benchmark_structures: Structure,  # structures
+#         benchmark_mp_ids,  # benchmark_mp_ids
 #         **fit_kwargs,
 #     ):
 #         """
@@ -93,9 +93,9 @@
 #             list of materials project ids
 #         phonon_displacement_maker : .BaseVaspMaker
 #             Maker used to compute the forces for a supercell.
-#         benchmark_structure: Structure.
+#         benchmark_structures: Structure.
 #             Structure used for benchmarking.
-#         mp_id: str
+#         benchmark_mp_ids: str
 #             materials project ID corresponding to the benchmark structure
 #         """
 #         flows = []
@@ -114,7 +114,7 @@
 #                 symprec=self.symprec,
 #                 uc=self.uc,
 #                 supercell_matrix=self.supercell_matrix,
-#             ).make(structure=structure, mp_id=mp_ids[struc_i])
+#             ).make(structure=structure, benchmark_mp_ids=mp_ids[struc_i])
 #             flows.append(autoplex_datagen)
 #             datagen.update({mp_ids[struc_i]: autoplex_datagen.output})
 #
@@ -127,42 +127,42 @@
 #         flows.append(autoplex_fit)
 #
 #         autoplex_ml_phonon = get_phonon_ml_calculation_jobs(
-#             structure=benchmark_structure,
+#             structure=benchmark_structures,
 #             min_length=self.min_length,
 #             ml_dir=autoplex_fit.output,
 #         )
 #         flows.append(autoplex_ml_phonon)
-#         if mp_id not in mp_ids:
+#         if benchmark_mp_ids not in mp_ids:
 #             dft_phonons = DFTPhononMaker(
 #                 symprec=self.symprec,
 #                 phonon_displacement_maker=phonon_displacement_maker,
 #                 born_maker=None,
 #                 min_length=self.min_length,
-#             ).make(structure=benchmark_structure)
+#             ).make(structure=benchmark_structures)
 #             dft_phonons = update_user_incar_settings(
 #                 dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
 #             )
 #             flows.append(dft_phonons)
 #
-#             dft_reference = dft_phonons.output
+#             dft_references = dft_phonons.output
 #         else:
-#             dft_reference = datagen[mp_id]["phonon_data"][
+#             dft_references = datagen[benchmark_mp_ids]["phonon_data"][
 #                 "001"
 #             ]  # flag take all phonon runs
 #             # explanation for 001 = 0.01
 #
 #         autoplex_bm = PhononDFTMLBenchmarkFlow(name="testBM").make(
-#             structure=benchmark_structure,
-#             mp_id=mp_id,
+#             structure=benchmark_structures,
+#             benchmark_mp_ids=benchmark_mp_ids,
 #             ml_phonon_task_doc=autoplex_ml_phonon.output,
-#             dft_phonon_task_doc=dft_reference,
+#             dft_phonon_task_doc=dft_references,
 #         )
 #         flows.append(autoplex_bm)
 #         collect.append(autoplex_bm.output)
 #
 #         collect_bm = write_benchmark_metrics(
-#             benchmark_structure=benchmark_structure,
-#             mp_id=mp_id,
+#             benchmark_structures=benchmark_structures,
+#             benchmark_mp_ids=benchmark_mp_ids,
 #             rmse=collect,
 #             displacements=self.displacements,
 #         )
@@ -211,9 +211,9 @@ def make(
         structure_list: list[Structure],
         mp_ids,
         xyz_file: str | None = None,
-        dft_reference: PhononBSDOSDoc | None = None,
-        benchmark_structure: Structure | None = None,
-        mp_id: str | None = None,
+        dft_references: PhononBSDOSDoc | None = None,
+        benchmark_structures: Structure | None = None,
+        benchmark_mp_ids: str | None = None,
         **fit_kwargs,
     ):
         """
@@ -227,11 +227,11 @@ def make(
             materials project id.
         xyz_file:
             the already existing training data xyz file.
-        dft_reference:
+        dft_references:
             DFT reference file containing the PhononBSDOCDoc object.
-        benchmark_structure: Structure
+        benchmark_structures: Structure
             pymatgen structure for benchmarking.
-        mp_id:
+        benchmark_mp_ids:
             Materials Project ID of the benchmarking structure.
 
         """
@@ -285,16 +285,16 @@ def make(
 
         # not sure if it would make sense to put everything from here in its own flow?
         add_data_ml_phonon = get_phonon_ml_calculation_jobs(
-            structure=benchmark_structure,
+            structure=benchmark_structures,
             min_length=self.min_length,
             ml_dir=add_data_fit.output,
         )
         flows.append(add_data_ml_phonon)
 
-        if dft_reference is None:
-            if (mp_id in mp_ids) and self.add_dft_phonon_struct:
-                dft_reference = fit_input[mp_id]["phonon_data"]["001"]
-            elif (mp_id not in mp_ids) or (  # else?
+        if dft_references is None:
+            if (benchmark_mp_ids in mp_ids) and self.add_dft_phonon_struct:
+                dft_references = fit_input[benchmark_mp_ids]["phonon_data"]["001"]
+            elif (benchmark_mp_ids not in mp_ids) or (  # else?
                 self.add_dft_phonon_struct is False
             ):
                 dft_phonons = DFTPhononMaker(
@@ -302,25 +302,25 @@ def make(
                     phonon_displacement_maker=self.phonon_displacement_maker,
                     born_maker=None,
                     min_length=self.min_length,
-                ).make(structure=benchmark_structure)
+                ).make(structure=benchmark_structures)
                 dft_phonons = update_user_incar_settings(
                     dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
                 )
                 flows.append(dft_phonons)
-                dft_reference = dft_phonons.output
+                dft_references = dft_phonons.output
 
         add_data_bm = PhononDFTMLBenchmarkFlow(name="addDataBM").make(
-            structure=benchmark_structure,
-            mp_id=mp_id,
+            structure=benchmark_structures,
+            benchmark_mp_id=benchmark_mp_ids,
             ml_phonon_task_doc=add_data_ml_phonon.output,
-            dft_phonon_task_doc=dft_reference,
+            dft_phonon_task_doc=dft_references,
         )
         flows.append(add_data_bm)
         collect.append(add_data_bm.output)
 
         collect_bm = write_benchmark_metrics(
-            benchmark_structure=benchmark_structure,
-            mp_id=mp_id,
+            benchmark_structure=benchmark_structures,
+            mp_id=benchmark_mp_ids,
             rmse=collect,
             displacements=self.displacements,
         )
@@ -527,7 +527,7 @@ class PhononDFTMLBenchmarkFlow(Maker):
     def make(
         self,
         structure: Structure,
-        mp_id,
+        benchmark_mp_id,
         ml_phonon_task_doc: PhononBSDOSDoc,
         dft_phonon_task_doc: PhononBSDOSDoc,
     ):
@@ -538,7 +538,7 @@ def make(
         ----------
         structure: Structure
             Structure used for benchmark
-        mp_id: str.
+        benchmark_mp_id: str.
             Material project id string
         ml_phonon_task_doc: PhononBSDOSDoc
             Phonon task doc from ML potential consisting of pymatgen band-structure object
@@ -549,7 +549,7 @@ def make(
 
         benchmark = PhononBenchmarkMaker(name="Benchmark").make(
             structure=structure,
-            mp_id=mp_id,
+            benchmark_mp_id=benchmark_mp_id,
             ml_phonon_bs=ml_phonon_task_doc.phonon_bandstructure,  # TODO take BS at top lvl?
             dft_phonon_bs=dft_phonon_task_doc.phonon_bandstructure,
         )
diff --git a/autoplex/benchmark/flows.py b/autoplex/benchmark/flows.py
index fc90aab68..40b515407 100644
--- a/autoplex/benchmark/flows.py
+++ b/autoplex/benchmark/flows.py
@@ -32,7 +32,7 @@ class PhononBenchmarkMaker(Maker):
     def make(
         self,
         structure: Structure,
-        mp_id: str,
+        benchmark_mp_id: str,
         ml_phonon_bs: PhononBandStructureSymmLine,
         dft_phonon_bs: PhononBandStructureSymmLine,
         **kwargs,
@@ -44,7 +44,7 @@ def make(
         ----------
         structure :
             Pymatgen structures drawn from the Materials Project.
-        mp_id: str.
+        benchmark_mp_ids: str.
             Materials project IDs for the structure
         ml_phonon_bs: PhononBandStructureSymmLine.
             ML potential generated pymatgen phonon band-structure object
diff --git a/tests/auto/test_auto_flows.py b/tests/auto/test_auto_flows.py
index 898514411..420cfbc8d 100644
--- a/tests/auto/test_auto_flows.py
+++ b/tests/auto/test_auto_flows.py
@@ -27,8 +27,8 @@ def test_complete_dft_vs_ml_benchmark_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["test"],
-        mp_id="mp-22905",
-        benchmark_structure=structure,
+        benchmark_mp_id="mp-22905",
+        benchmark_structures=structure,
         phonon_displacement_maker=PhononDisplacementMaker(),
     )
 
@@ -108,10 +108,10 @@ def test_add_data_to_dataset_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["test"],
-        mp_id="mp-22905",
-        benchmark_structure=structure,
+        benchmark_mp_ids="mp-22905",
+        benchmark_structures=structure,
         xyz_file=test_dir / "fitting" / "ref_files" / "trainGAP.xyz",
-        dft_reference=None,
+        dft_references=None,
     )
 
     add_data_workflow_with_dft_reference = CompleteDFTvsMLBenchmarkWorkflow(
@@ -124,10 +124,10 @@ def test_add_data_to_dataset_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["test"],
-        mp_id="mp-22905",
-        benchmark_structure=structure,
+        benchmark_mp_ids="mp-22905",
+        benchmark_structures=structure,
         xyz_file=test_dir / "fitting" / "ref_files" / "trainGAP.xyz",
-        dft_reference=dft_reference,
+        dft_references=dft_reference,
     )
 
     add_data_workflow_add_phonon_false = CompleteDFTvsMLBenchmarkWorkflow(
@@ -140,10 +140,10 @@ def test_add_data_to_dataset_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["test"],
-        mp_id="mp-22905",
-        benchmark_structure=structure,
+        benchmark_mp_ids="mp-22905",
+        benchmark_structures=structure,
         xyz_file=test_dir / "fitting" / "ref_files" / "trainGAP.xyz",
-        dft_reference=None,
+        dft_references=None,
     )
 
     add_data_workflow_add_random_false = CompleteDFTvsMLBenchmarkWorkflow(
@@ -156,10 +156,10 @@ def test_add_data_to_dataset_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["test"],
-        mp_id="mp-22905",
-        benchmark_structure=structure,
+        benchmark_mp_ids="mp-22905",
+        benchmark_structures=structure,
         xyz_file=test_dir / "fitting" / "ref_files" / "trainGAP.xyz",
-        dft_reference=None,
+        dft_references=None,
     )
 
     add_data_workflow_with_same_mpid = CompleteDFTvsMLBenchmarkWorkflow(
@@ -171,10 +171,10 @@ def test_add_data_to_dataset_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["mp-22905"],
-        mp_id="mp-22905",
-        benchmark_structure=structure,
+        benchmark_mp_ids="mp-22905",
+        benchmark_structures=structure,
         xyz_file=test_dir / "fitting" / "ref_files" / "trainGAP.xyz",
-        dft_reference=None,
+        dft_references=None,
     )
 
     ref_paths = {
@@ -255,11 +255,11 @@ def test_phonon_dft_ml_data_generation_flow(
 
     flow_data_generation = DFTDataGenerationFlow(
         n_struct=3, min_length=10, symprec=1e-2
-    ).make(structure=structure, mp_id="mp-22905")
+    ).make(structure=structure, benchmark_mp_ids="mp-22905")
 
     flow_data_generation_without_rattled_structures = DFTDataGenerationFlow(
         n_struct=0, min_length=10, symprec=1e-2
-    ).make(structure=structure, mp_id="mp-22905")
+    ).make(structure=structure, benchmark_mp_ids="mp-22905")
 
     ref_paths = {
         "tight relax 1": "dft_ml_data_generation/tight_relax_1/",
diff --git a/tests/benchmark/test_benchmark_flows.py b/tests/benchmark/test_benchmark_flows.py
index f09dbc0f3..1bf160da0 100644
--- a/tests/benchmark/test_benchmark_flows.py
+++ b/tests/benchmark/test_benchmark_flows.py
@@ -21,7 +21,7 @@ def test_benchmark(test_dir, clean_dir):
     ml_bs = get_ph_bs_symm_line(bands_path=ml_bs_file_path)
 
     benchmark_flow = PhononBenchmarkMaker().make(
-        structure=df_bs.structure, ml_phonon_bs=ml_bs, dft_phonon_bs=df_bs, mp_id="test"
+        structure=df_bs.structure, ml_phonon_bs=ml_bs, dft_phonon_bs=df_bs, benchmark_mp_ids="test"
     )
     assert len(benchmark_flow.jobs) == 1
 
diff --git a/tests/data/test_datagen_flows.py b/tests/data/test_datagen_flows.py
index a3fd5fa39..e5fb1f0b6 100644
--- a/tests/data/test_datagen_flows.py
+++ b/tests/data/test_datagen_flows.py
@@ -34,7 +34,7 @@ def test_data_generation(vasp_test_dir, mock_vasp, clean_dir):
             "check_inputs": ["incar", "kpoints", "potcar"],
         },
     }
-    data_gen = RandomStructuresDataGenerator(n_struct=3).make(structure=structure, mp_id=test_mpid)
+    data_gen = RandomStructuresDataGenerator(n_struct=3).make(structure=structure, benchmark_mp_ids=test_mpid)
 
     data_gen = update_user_incar_settings(data_gen, {"ISMEAR": 0})
 

From 62a71d96a6de960c5a98bf48b427d814eed0e519 Mon Sep 17 00:00:00 2001
From: JaGeo <janine.george@bam.de>
Date: Fri, 26 Jan 2024 13:59:23 +0100
Subject: [PATCH 2/5] fix more benchmark structures

---
 autoplex/auto/flows.py           | 230 ++++++++-----------------------
 autoplex/auto/jobs.py            |   2 +-
 tests/auto/test_auto_flows.py    |  26 ++--
 tests/data/test_datagen_flows.py |   2 +-
 4 files changed, 70 insertions(+), 190 deletions(-)

diff --git a/autoplex/auto/flows.py b/autoplex/auto/flows.py
index ea3e16859..befa91980 100644
--- a/autoplex/auto/flows.py
+++ b/autoplex/auto/flows.py
@@ -38,138 +38,6 @@
 # Volker's idea: provide several default flows with different setting/setups
 # TODO TaskDocs
 
-#
-# @dataclass
-# class CompleteDFTvsMLBenchmarkWorkflow(Maker):
-#     """
-#     Maker to calculate harmonic phonons with DFT, fit GAP and benchmark the results.
-#
-#     User has no data.
-#
-#     Parameters
-#     ----------
-#     name : str
-#         Name of the flows produced by this maker
-#     n_struct: int.
-#         The total number of randomly displaced structures to be generated.
-#     displacements: List[float]
-#         displacement distance for phonons
-#     symprec : float
-#         Symmetry precision to use in the
-#         reduction of symmetry to find the primitive/conventional cell
-#         (use_primitive_standard_structure, use_conventional_standard_structure)
-#         and to handle all symmetry-related tasks in phonopy
-#     uc: bool.
-#         If True, will generate randomly distorted structures (unitcells)
-#         and add static computation jobs to the flow
-#
-#     """
-#
-#     name: str = "complete_workflow"
-#     n_struct: int = 1
-#     displacements: list[float] = field(default_factory=lambda: [0.01])
-#     min_length: int = 20
-#     symprec: float = 1e-4
-#     uc: bool = False  # to get rattled unit cells
-#     supercell_matrix: Matrix3D | None = None
-#
-#     def make(
-#         self,
-#         structure_list: list[Structure],
-#         mp_ids,
-#         phonon_displacement_maker,
-#         benchmark_structures: Structure,  # structures
-#         benchmark_mp_ids,  # benchmark_mp_ids
-#         **fit_kwargs,
-#     ):
-#         """
-#         Make the complete workflow for DFT vs. ML benchmarking.
-#
-#         Parameters
-#         ----------
-#         structure_list: List[Structure]
-#             list of pymatgen structures
-#         mp_ids : list.
-#             list of materials project ids
-#         phonon_displacement_maker : .BaseVaspMaker
-#             Maker used to compute the forces for a supercell.
-#         benchmark_structures: Structure.
-#             Structure used for benchmarking.
-#         benchmark_mp_ids: str
-#             materials project ID corresponding to the benchmark structure
-#         """
-#         flows = []
-#         datagen = {}
-#         collect = []
-#         isoatoms = get_iso_atom(structure_list)
-#         flows.append(isoatoms)
-#
-#         for struc_i, structure in enumerate(structure_list):
-#             autoplex_datagen = DFTDataGenerationFlow(
-#                 name="datagen",
-#                 phonon_displacement_maker=phonon_displacement_maker,
-#                 n_struct=self.n_struct,
-#                 displacements=self.displacements,
-#                 min_length=self.min_length,
-#                 symprec=self.symprec,
-#                 uc=self.uc,
-#                 supercell_matrix=self.supercell_matrix,
-#             ).make(structure=structure, benchmark_mp_ids=mp_ids[struc_i])
-#             flows.append(autoplex_datagen)
-#             datagen.update({mp_ids[struc_i]: autoplex_datagen.output})
-#
-#         autoplex_fit = PhononDFTMLFitFlow().make(
-#             species=isoatoms.output["species"],
-#             isolated_atoms_energy=isoatoms.output["energies"],
-#             fit_input=datagen,
-#             **fit_kwargs,
-#         )
-#         flows.append(autoplex_fit)
-#
-#         autoplex_ml_phonon = get_phonon_ml_calculation_jobs(
-#             structure=benchmark_structures,
-#             min_length=self.min_length,
-#             ml_dir=autoplex_fit.output,
-#         )
-#         flows.append(autoplex_ml_phonon)
-#         if benchmark_mp_ids not in mp_ids:
-#             dft_phonons = DFTPhononMaker(
-#                 symprec=self.symprec,
-#                 phonon_displacement_maker=phonon_displacement_maker,
-#                 born_maker=None,
-#                 min_length=self.min_length,
-#             ).make(structure=benchmark_structures)
-#             dft_phonons = update_user_incar_settings(
-#                 dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
-#             )
-#             flows.append(dft_phonons)
-#
-#             dft_references = dft_phonons.output
-#         else:
-#             dft_references = datagen[benchmark_mp_ids]["phonon_data"][
-#                 "001"
-#             ]  # flag take all phonon runs
-#             # explanation for 001 = 0.01
-#
-#         autoplex_bm = PhononDFTMLBenchmarkFlow(name="testBM").make(
-#             structure=benchmark_structures,
-#             benchmark_mp_ids=benchmark_mp_ids,
-#             ml_phonon_task_doc=autoplex_ml_phonon.output,
-#             dft_phonon_task_doc=dft_references,
-#         )
-#         flows.append(autoplex_bm)
-#         collect.append(autoplex_bm.output)
-#
-#         collect_bm = write_benchmark_metrics(
-#             benchmark_structures=benchmark_structures,
-#             benchmark_mp_ids=benchmark_mp_ids,
-#             rmse=collect,
-#             displacements=self.displacements,
-#         )
-#         flows.append(collect_bm)
-#
-#         return Flow(flows)
-
 
 @dataclass
 class CompleteDFTvsMLBenchmarkWorkflow(
@@ -283,50 +151,60 @@ def make(
         )
         flows.append(add_data_fit)
 
-        # not sure if it would make sense to put everything from here in its own flow?
-        add_data_ml_phonon = get_phonon_ml_calculation_jobs(
-            structure=benchmark_structures,
-            min_length=self.min_length,
-            ml_dir=add_data_fit.output,
-        )
-        flows.append(add_data_ml_phonon)
-
-        if dft_references is None:
-            if (benchmark_mp_ids in mp_ids) and self.add_dft_phonon_struct:
-                dft_references = fit_input[benchmark_mp_ids]["phonon_data"]["001"]
-            elif (benchmark_mp_ids not in mp_ids) or (  # else?
-                self.add_dft_phonon_struct is False
-            ):
-                dft_phonons = DFTPhononMaker(
-                    symprec=self.symprec,
-                    phonon_displacement_maker=self.phonon_displacement_maker,
-                    born_maker=None,
-                    min_length=self.min_length,
-                ).make(structure=benchmark_structures)
-                dft_phonons = update_user_incar_settings(
-                    dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
-                )
-                flows.append(dft_phonons)
-                dft_references = dft_phonons.output
-
-        add_data_bm = PhononDFTMLBenchmarkFlow(name="addDataBM").make(
-            structure=benchmark_structures,
-            benchmark_mp_id=benchmark_mp_ids,
-            ml_phonon_task_doc=add_data_ml_phonon.output,
-            dft_phonon_task_doc=dft_references,
-        )
-        flows.append(add_data_bm)
-        collect.append(add_data_bm.output)
-
-        collect_bm = write_benchmark_metrics(
-            benchmark_structure=benchmark_structures,
-            mp_id=benchmark_mp_ids,
-            rmse=collect,
-            displacements=self.displacements,
-        )
-        flows.append(collect_bm)
+        bm_outputs=[]
 
-        return Flow(flows, collect_bm.output)
+        for ibenchmark_structure, benchmark_structure in enumerate(benchmark_structures):
+            # not sure if it would make sense to put everything from here in its own flow?
+            add_data_ml_phonon = get_phonon_ml_calculation_jobs(
+                structure=benchmark_structure,
+                min_length=self.min_length,
+                ml_dir=add_data_fit.output,
+            )
+            flows.append(add_data_ml_phonon)
+
+            if dft_references is None and benchmark_mp_ids is not None:
+                    if (benchmark_mp_ids[ibenchmark_structure] in mp_ids) and self.add_dft_phonon_struct:
+                        dft_references = fit_input[benchmark_mp_ids[ibenchmark_structure]]["phonon_data"]["001"]
+                    elif (benchmark_mp_ids[ibenchmark_structure] not in mp_ids) or (  # else?
+                        self.add_dft_phonon_struct is False
+                    ):
+                        dft_phonons = DFTPhononMaker(
+                            symprec=self.symprec,
+                            phonon_displacement_maker=self.phonon_displacement_maker,
+                            born_maker=None,
+                            min_length=self.min_length,
+                        ).make(structure=benchmark_structure)
+                        dft_phonons = update_user_incar_settings(
+                            dft_phonons, {"NPAR": 4, "ISPIN": 1, "LAECHG": False, "ISMEAR": 0}
+                        )
+                        flows.append(dft_phonons)
+                        dft_references = dft_phonons.output
+
+                    add_data_bm = PhononDFTMLBenchmarkFlow(name="addDataBM").make(
+                        structure=benchmark_structure,
+                        benchmark_mp_id=benchmark_mp_ids[ibenchmark_structure],
+                        ml_phonon_task_doc=add_data_ml_phonon.output,
+                        dft_phonon_task_doc=dft_references,
+                    )
+            else:
+                add_data_bm = PhononDFTMLBenchmarkFlow(name="addDataBM").make(
+                    structure=benchmark_structure,
+                    benchmark_mp_id=benchmark_mp_ids[ibenchmark_structure],
+                    ml_phonon_task_doc=add_data_ml_phonon.output,
+                    dft_phonon_task_doc=dft_references[ibenchmark_structure],
+                )
+            flows.append(add_data_bm)
+            collect.append(add_data_bm.output)
+
+            collect_bm = write_benchmark_metrics(
+                benchmark_structure=benchmark_structure,
+                mp_id=benchmark_mp_ids[ibenchmark_structure],
+                rmse=collect,
+                displacements=self.displacements,
+            )
+            flows.append(collect_bm)
+            bm_outputs.append(collect_bm.output)
+        return Flow(flows, bm_outputs)
 
     def add_dft_phonons(
         self,
@@ -511,6 +389,8 @@ def make(
         return Flow(flows, ml_fit_flow.output)
 
 
+# We need to extend this flow to run over more than one structure.
+# I am not sure why it even is a flow
 @dataclass
 class PhononDFTMLBenchmarkFlow(Maker):
     """
diff --git a/autoplex/auto/jobs.py b/autoplex/auto/jobs.py
index 840110c30..96a485ab5 100644
--- a/autoplex/auto/jobs.py
+++ b/autoplex/auto/jobs.py
@@ -17,7 +17,7 @@
 
 from autoplex.data.flows import IsoAtomMaker, RandomStructuresDataGenerator
 
-
+# This should be a maker rather than a job in a job
 @job
 def get_phonon_ml_calculation_jobs(
     ml_dir: str,
diff --git a/tests/auto/test_auto_flows.py b/tests/auto/test_auto_flows.py
index 420cfbc8d..8ca85d9a5 100644
--- a/tests/auto/test_auto_flows.py
+++ b/tests/auto/test_auto_flows.py
@@ -27,8 +27,8 @@ def test_complete_dft_vs_ml_benchmark_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["test"],
-        benchmark_mp_id="mp-22905",
-        benchmark_structures=structure,
+        benchmark_mp_ids=["mp-22905"],
+        benchmark_structures=[structure],
         phonon_displacement_maker=PhononDisplacementMaker(),
     )
 
@@ -108,8 +108,8 @@ def test_add_data_to_dataset_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["test"],
-        benchmark_mp_ids="mp-22905",
-        benchmark_structures=structure,
+        benchmark_mp_ids=["mp-22905"],
+        benchmark_structures=[structure],
         xyz_file=test_dir / "fitting" / "ref_files" / "trainGAP.xyz",
         dft_references=None,
     )
@@ -124,10 +124,10 @@ def test_add_data_to_dataset_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["test"],
-        benchmark_mp_ids="mp-22905",
-        benchmark_structures=structure,
+        benchmark_mp_ids=["mp-22905"],
+        benchmark_structures=[structure],
         xyz_file=test_dir / "fitting" / "ref_files" / "trainGAP.xyz",
-        dft_references=dft_reference,
+        dft_references=[dft_reference],
     )
 
     add_data_workflow_add_phonon_false = CompleteDFTvsMLBenchmarkWorkflow(
@@ -140,8 +140,8 @@ def test_add_data_to_dataset_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["test"],
-        benchmark_mp_ids="mp-22905",
-        benchmark_structures=structure,
+        benchmark_mp_ids=["mp-22905"],
+        benchmark_structures=[structure],
         xyz_file=test_dir / "fitting" / "ref_files" / "trainGAP.xyz",
         dft_references=None,
     )
@@ -156,8 +156,8 @@ def test_add_data_to_dataset_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["test"],
-        benchmark_mp_ids="mp-22905",
-        benchmark_structures=structure,
+        benchmark_mp_ids=["mp-22905"],
+        benchmark_structures=[structure],
         xyz_file=test_dir / "fitting" / "ref_files" / "trainGAP.xyz",
         dft_references=None,
     )
@@ -171,8 +171,8 @@ def test_add_data_to_dataset_workflow(
     ).make(
         structure_list=[structure],
         mp_ids=["mp-22905"],
-        benchmark_mp_ids="mp-22905",
-        benchmark_structures=structure,
+        benchmark_mp_ids=["mp-22905"],
+        benchmark_structures=[structure],
         xyz_file=test_dir / "fitting" / "ref_files" / "trainGAP.xyz",
         dft_references=None,
     )
diff --git a/tests/data/test_datagen_flows.py b/tests/data/test_datagen_flows.py
index e5fb1f0b6..a3fd5fa39 100644
--- a/tests/data/test_datagen_flows.py
+++ b/tests/data/test_datagen_flows.py
@@ -34,7 +34,7 @@ def test_data_generation(vasp_test_dir, mock_vasp, clean_dir):
             "check_inputs": ["incar", "kpoints", "potcar"],
         },
     }
-    data_gen = RandomStructuresDataGenerator(n_struct=3).make(structure=structure, benchmark_mp_ids=test_mpid)
+    data_gen = RandomStructuresDataGenerator(n_struct=3).make(structure=structure, mp_id=test_mpid)
 
     data_gen = update_user_incar_settings(data_gen, {"ISMEAR": 0})
 

From 827ae81da9eb3a02a594178a747b2d42fed6b937 Mon Sep 17 00:00:00 2001
From: JaGeo <janine.george@bam.de>
Date: Fri, 26 Jan 2024 14:13:44 +0100
Subject: [PATCH 3/5] fix unintended side effect

---
 tests/auto/test_auto_flows.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/auto/test_auto_flows.py b/tests/auto/test_auto_flows.py
index 8ca85d9a5..4d9e547f4 100644
--- a/tests/auto/test_auto_flows.py
+++ b/tests/auto/test_auto_flows.py
@@ -255,11 +255,11 @@ def test_phonon_dft_ml_data_generation_flow(
 
     flow_data_generation = DFTDataGenerationFlow(
         n_struct=3, min_length=10, symprec=1e-2
-    ).make(structure=structure, benchmark_mp_ids="mp-22905")
+    ).make(structure=structure, mp_id="mp-22905")
 
     flow_data_generation_without_rattled_structures = DFTDataGenerationFlow(
         n_struct=0, min_length=10, symprec=1e-2
-    ).make(structure=structure, benchmark_mp_ids="mp-22905")
+    ).make(structure=structure, mp_id="mp-22905")
 
     ref_paths = {
         "tight relax 1": "dft_ml_data_generation/tight_relax_1/",

From 67a35810b79828e841f2585e03bc131cd6c84541 Mon Sep 17 00:00:00 2001
From: JaGeo <janine.george@bam.de>
Date: Fri, 26 Jan 2024 14:16:55 +0100
Subject: [PATCH 4/5] Fix further side effect, comments

---
 autoplex/benchmark/flows.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autoplex/benchmark/flows.py b/autoplex/benchmark/flows.py
index 40b515407..02fd94eaf 100644
--- a/autoplex/benchmark/flows.py
+++ b/autoplex/benchmark/flows.py
@@ -44,7 +44,7 @@ def make(
         ----------
         structure :
             Pymatgen structures drawn from the Materials Project.
-        benchmark_mp_ids: str.
+        benchmark_mp_id: str.
             Materials project IDs for the structure
         ml_phonon_bs: PhononBandStructureSymmLine.
             ML potential generated pymatgen phonon band-structure object

From 169c9a65f8be35505b6fe088a71d7c1852fa6b1e Mon Sep 17 00:00:00 2001
From: JaGeo <janine.george@bam.de>
Date: Fri, 26 Jan 2024 14:21:16 +0100
Subject: [PATCH 5/5]  fix further side effects

---
 tests/benchmark/test_benchmark_flows.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/benchmark/test_benchmark_flows.py b/tests/benchmark/test_benchmark_flows.py
index 1bf160da0..2a6f576ce 100644
--- a/tests/benchmark/test_benchmark_flows.py
+++ b/tests/benchmark/test_benchmark_flows.py
@@ -21,7 +21,7 @@ def test_benchmark(test_dir, clean_dir):
     ml_bs = get_ph_bs_symm_line(bands_path=ml_bs_file_path)
 
     benchmark_flow = PhononBenchmarkMaker().make(
-        structure=df_bs.structure, ml_phonon_bs=ml_bs, dft_phonon_bs=df_bs, benchmark_mp_ids="test"
+        structure=df_bs.structure, ml_phonon_bs=ml_bs, dft_phonon_bs=df_bs, benchmark_mp_id="test"
     )
     assert len(benchmark_flow.jobs) == 1