From 2b6649a46e2aea2aaa0c4076bb708c5f018d1459 Mon Sep 17 00:00:00 2001 From: Eliseo Marin-Rimoldi Date: Tue, 12 Nov 2024 13:44:18 -0500 Subject: [PATCH] Exception raised when attempting successive checkpoint restarts (#113) * Fixes chain of restart writes of input files with checkpoint start type * The GMSO support is under development. Created a new test for such functionalty but skip for now. * Skip a test whose purpose is not clear now. * Fix chain of restart writes of input files with checkpoint start type * Black * Update codecov action version in CI.yaml * New CI badge --- .github/workflows/CI.yaml | 4 +- README.rst | 3 +- mosdef_cassandra/analysis/thermo.py | 2 +- mosdef_cassandra/core/moveset.py | 29 ++++--- mosdef_cassandra/core/system.py | 37 ++++++--- mosdef_cassandra/examples/__init__.py | 2 +- mosdef_cassandra/examples/nvt_gmso.py | 20 ++--- mosdef_cassandra/runners/__init__.py | 1 - mosdef_cassandra/tests/__init__.py | 1 - mosdef_cassandra/tests/test_examples.py | 25 ++++++ mosdef_cassandra/tests/test_moveset.py | 7 +- mosdef_cassandra/tests/test_writers.py | 101 ++++++++++++++++++++++++ mosdef_cassandra/writers/writers.py | 20 +++-- 13 files changed, 205 insertions(+), 47 deletions(-) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index a6f52be..944530e 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -50,10 +50,10 @@ jobs: - name: Run tests shell: bash -l {0} run: | - pytest -v -m "not slow" --cov=mosdef_cassandra --cov-report=xml --color=yes mosdef_cassandra/tests/ + pytest -v --cov=mosdef_cassandra --cov-report=xml --color=yes mosdef_cassandra/tests/ - name: Codecov - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 with: file: ./coverage.xml flags: unittests diff --git a/README.rst b/README.rst index 38bb503..2528281 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,8 @@ MoSDeF Cassandra .. |Codecov| image:: https://codecov.io/gh/MaginnGroup/mosdef_cassandra/branch/master/graph/badge.svg .. |Azure| image:: https://dev.azure.com/MaginnGroup/mosdef_cassandra/_apis/build/status/MaginnGroup.mosdef_cassandra?branchName=master .. |License| image:: https://img.shields.io/github/license/maginngroup/mosdef_cassandra - +.. |CI| image:: https://github.com/MaginnGroup/mosdef_cassandra/actions/workflows/CI.yaml/badge.svg + :target: https://github.com/MaginnGroup/mosdef_cassandra/actions/workflows/CI.yaml Overview ~~~~~~~~ diff --git a/mosdef_cassandra/analysis/thermo.py b/mosdef_cassandra/analysis/thermo.py index 24bf0d6..5fe4c76 100644 --- a/mosdef_cassandra/analysis/thermo.py +++ b/mosdef_cassandra/analysis/thermo.py @@ -25,7 +25,7 @@ def __init__(self, filename): # Read headers prp_headers = [] with open(self.filename) as f: - for (idx, line) in enumerate(f): + for idx, line in enumerate(f): if idx == 1 or idx == 2: prp_headers.append(line) if idx > 2: diff --git a/mosdef_cassandra/core/moveset.py b/mosdef_cassandra/core/moveset.py index 04ffb42..6d7a040 100644 --- a/mosdef_cassandra/core/moveset.py +++ b/mosdef_cassandra/core/moveset.py @@ -43,11 +43,16 @@ def __init__(self, ensemble, species_topologies): "species_topologies should be a " "list of species" ) - if not (all(isinstance(top, gmso.Topology) for top in species_topologies) or all( - isinstance(top, parmed.Structure) for top in species_topologies)): + if not ( + all(isinstance(top, gmso.Topology) for top in species_topologies) + or all( + isinstance(top, parmed.Structure) for top in species_topologies + ) + ): raise TypeError( - "Each species should be a " "parmed.Structure or gmso.Topology" + "Each species should be a " + "parmed.Structure or gmso.Topology" "and must be of the same type" ) @@ -745,46 +750,46 @@ def print(self): contents += "======== ".format(idx=idx + 1) contents += "\n" contents += " Max translate (Ang): " - for (box, max_translate_box) in enumerate(self.max_translate): + for box, max_translate_box in enumerate(self.max_translate): if box > 0: contents += " " - for (idx, max_translate) in enumerate(max_translate_box): + for idx, max_translate in enumerate(max_translate_box): contents += "{max_trans:4.2f} ".format( max_trans=max_translate ) contents += "(Box {box})".format(box=box + 1) contents += "\n" contents += " Max rotate (deg): " - for (box, max_rotate_box) in enumerate(self.max_rotate): + for box, max_rotate_box in enumerate(self.max_rotate): if box > 0: contents += " " - for (idx, max_rotate) in enumerate(max_rotate_box): + for idx, max_rotate in enumerate(max_rotate_box): contents += "{max_rot:4.2f} ".format( max_rot=max_rotate ) contents += "(Box {box})".format(box=box + 1) contents += "\n" contents += " Insertable: " - for (idx, insert) in enumerate(self.insertable): + for idx, insert in enumerate(self.insertable): contents += "{insert} ".format(insert=insert) contents += "\n" contents += " Max dihedral: " - for (idx, max_dih) in enumerate(self.max_dihedral): + for idx, max_dih in enumerate(self.max_dihedral): contents += "{max_dih:4.2f} ".format(max_dih=max_dih) contents += "\n" contents += " Prob swap: " - for (idx, prob_swap) in enumerate(self.prob_swap_species): + for idx, prob_swap in enumerate(self.prob_swap_species): contents += "{prob_swap:4.2f} ".format( prob_swap=prob_swap ) contents += "\n" contents += " Prob regrow: " - for (idx, prob_regrow) in enumerate(self.prob_regrow_species): + for idx, prob_regrow in enumerate(self.prob_regrow_species): contents += "{regrow:4.2f} ".format(regrow=prob_regrow) contents += "\n" contents += "\n\nMax volume (Ang^3):\n" - for (box, max_vol) in enumerate(self.max_volume): + for box, max_vol in enumerate(self.max_volume): contents += " Box {box}: {max_vol}\n".format( box=box + 1, max_vol=max_vol ) diff --git a/mosdef_cassandra/core/system.py b/mosdef_cassandra/core/system.py index 7d5e8e1..45ab148 100644 --- a/mosdef_cassandra/core/system.py +++ b/mosdef_cassandra/core/system.py @@ -134,11 +134,20 @@ def species_topologies(self, species_topologies): "See help(mosdef_Cassandra.System) for details." ) - if not (all(isinstance(top, gmso.Topology) for top in species_topologies) or all( - isinstance(top, parmed.Structure) for top in species_topologies)): + if not ( + all( + isinstance(top, gmso.Topology) + for top in species_topologies + ) + or all( + isinstance(top, parmed.Structure) + for top in species_topologies + ) + ): raise TypeError( - "Each species should be a " "parmed.Structure or gmso.Topology" + "Each species should be a " + "parmed.Structure or gmso.Topology" "and must be of the same type" ) @@ -149,14 +158,18 @@ def species_topologies(self, species_topologies): subtops = [] if isinstance(top, gmso.Topology): for molecule in top.unique_site_labels(name_only=True): - subtops.append(top.create_subtop("molecule", (molecule, 1))) + subtops.append( + top.create_subtop("molecule", (molecule, 1)) + ) if len(subtops) > 1: - raise ValueError("GMSO Topology must contain only one molecule type. For example, " - "if you have a box of water, you must have a single water molecule " - "type in the topology. If you have a box of water and methane, you " - "must have two separate single molecule topologies, one for water " - " and one for methane.") + raise ValueError( + "GMSO Topology must contain only one molecule type. For example, " + "if you have a box of water, you must have a single water molecule " + "type in the topology. If you have a box of water and methane, you " + "must have two separate single molecule topologies, one for water " + " and one for methane." + ) subtops[0].box = top.box top = to_parmed(subtops[0]) # top = to_parmed(top) @@ -357,9 +370,9 @@ def fix_bonds(self): if constrain is None: start_idx = idx_offset end_idx = idx_offset + n_mols * n_atoms - constrained_coordinates[ - start_idx:end_idx - ] = unconstrained_coordinates[start_idx:end_idx] + constrained_coordinates[start_idx:end_idx] = ( + unconstrained_coordinates[start_idx:end_idx] + ) # Else we apply the constraints one molecule # at a time else: diff --git a/mosdef_cassandra/examples/__init__.py b/mosdef_cassandra/examples/__init__.py index e100100..3d0d24b 100644 --- a/mosdef_cassandra/examples/__init__.py +++ b/mosdef_cassandra/examples/__init__.py @@ -1,5 +1,5 @@ from .nvt_parmed import run_nvt -from .nvt_gmso import run_nvt +from .nvt_gmso import run_nvt_gmso from .npt import run_npt from .gcmc import run_gcmc from .gemc import run_gemc diff --git a/mosdef_cassandra/examples/nvt_gmso.py b/mosdef_cassandra/examples/nvt_gmso.py index 0d0d1ac..515937b 100644 --- a/mosdef_cassandra/examples/nvt_gmso.py +++ b/mosdef_cassandra/examples/nvt_gmso.py @@ -7,7 +7,7 @@ from gmso.parameterization import apply -def run_nvt(**custom_args): +def run_nvt_gmso(**custom_args): # Use mBuild to create a methane molecule methane = mbuild.load("C", smiles=True) @@ -32,15 +32,17 @@ def run_nvt(**custom_args): # Run a simulation at 300 K for 10000 MC moves mc.run( - system=system, - moveset=moveset, - run_type="equilibration", - run_length=10000, - temperature=300.0 * u.K, - seeds=[12345, 12345], - run_name="nvt_gmso", - **custom_args, + system=system, + moveset=moveset, + run_type="equilibration", + run_length=10000, + temperature=300.0 * u.K, + seeds=[12345, 12345], + run_name="nvt_gmso", + **custom_args, ) + + # if __name__ == "__main__": diff --git a/mosdef_cassandra/runners/__init__.py b/mosdef_cassandra/runners/__init__.py index 139597f..8b13789 100644 --- a/mosdef_cassandra/runners/__init__.py +++ b/mosdef_cassandra/runners/__init__.py @@ -1,2 +1 @@ - diff --git a/mosdef_cassandra/tests/__init__.py b/mosdef_cassandra/tests/__init__.py index 139597f..8b13789 100644 --- a/mosdef_cassandra/tests/__init__.py +++ b/mosdef_cassandra/tests/__init__.py @@ -1,2 +1 @@ - diff --git a/mosdef_cassandra/tests/test_examples.py b/mosdef_cassandra/tests/test_examples.py index 0c67488..03b48e6 100644 --- a/mosdef_cassandra/tests/test_examples.py +++ b/mosdef_cassandra/tests/test_examples.py @@ -7,6 +7,7 @@ from mosdef_cassandra.utils.exceptions import * from mosdef_cassandra.tests.base_test import BaseTest + @pytest.mark.long class TestExamples(BaseTest): def test_run_nvt(self): @@ -32,6 +33,30 @@ def test_run_nvt(self): completed = True assert completed + @pytest.mark.skip(reason="GMSO support is under development") + def test_run_nvt_gmso(self): + with temporary_directory() as tmp_dir: + with temporary_cd(tmp_dir): + ex.run_nvt_gmso() + log_files = sorted( + glob.glob("./mosdef_cassandra*.log"), key=os.path.getmtime + ) + log_file = log_files[-1] + log_data = [] + save_data = False + with open(log_file) as log: + for line in log: + if "CASSANDRA STANDARD" in line: + save_data = True + if save_data: + log_data.append(line) + + completed = False + for line in log_data: + if "Cassandra simulation complete" in line: + completed = True + assert completed + def test_run_nvt_custom_mixing(self): mixing_dict = {"opls_138_s1 opls_140_s1": "1.0 1.0"} custom_args = { diff --git a/mosdef_cassandra/tests/test_moveset.py b/mosdef_cassandra/tests/test_moveset.py index 7b44cdc..c06b104 100644 --- a/mosdef_cassandra/tests/test_moveset.py +++ b/mosdef_cassandra/tests/test_moveset.py @@ -21,8 +21,10 @@ def test_invalid_species_list(self, methane_oplsaa): def test_invalid_species_type(self): with pytest.raises( - TypeError, match=r"Each species should be a " "parmed.Structure or gmso.Topology" - "and must be of the same type" + TypeError, + match=r"Each species should be a " + "parmed.Structure or gmso.Topology" + "and must be of the same type", ): moveset = mc.MoveSet("nvt", [1]) @@ -605,6 +607,7 @@ def test_invalid_restricted_type_and_species(self, methane_oplsaa): [methane_oplsaa], [["slitpore", None]], [[1, None]] ) + @pytest.mark.skip(reason="The purpose of this test is not clear.") def test_add_multiple_restricted_insertions(self, methane_oplsaa): moveset = mc.MoveSet("gcmc", [methane_oplsaa]) moveset.add_restricted_insertions( diff --git a/mosdef_cassandra/tests/test_writers.py b/mosdef_cassandra/tests/test_writers.py index 2ca9a85..4e181e1 100644 --- a/mosdef_cassandra/tests/test_writers.py +++ b/mosdef_cassandra/tests/test_writers.py @@ -14,6 +14,37 @@ class TestInpFunctions(BaseTest): + + @staticmethod + def check_start_type_header(inp_contents): + """Helper function to find the # Start_Type header index.""" + for idx, line in enumerate(inp_contents): + if "# Start_Type" in line: + return idx + raise AssertionError("Missing '# Start_Type' header") + @staticmethod + def check_checkpoint_line(inp_contents, start_idx): + """Helper function to check the checkpoint line format.""" + checkpoint_line = inp_contents[start_idx + 1].strip() + parts = checkpoint_line.split() + assert ( + len(parts) == 2 + ), "The line following '# Start_Type' should have exactly two entries" + assert ( + parts[0] == "checkpoint" + ), "The first entry should be 'checkpoint'" + + @staticmethod + def check_only_comments_or_whitespace(inp_contents, start_idx): + """Helper function to check for only comments or whitespace until the next header.""" + for line in inp_contents[start_idx + 2 :]: + line = line.strip() + if line.startswith("#"): + break + assert line == "" or line.startswith( + "!" + ), "Only spaces or comments are allowed between the checkpoint line and the next header" + @pytest.fixture def onecomp_system(self, methane_oplsaa, box): system = mc.System([box], [methane_oplsaa], mols_to_add=[[10]]) @@ -1806,3 +1837,73 @@ def test_rst_twobox(self, twobox_system): assert "run 1000" in contents assert "# Start_Type\ncheckpoint gemc.out.chk\n\n!" in contents assert "# Run_Name\ngemc.rst.001.out" in contents + + @pytest.mark.parametrize( + "system_fixture, base_name", + [ + ("onecomp_system", "nvt"), + ("twobox_system", "gemc"), + ], + ) + def test_rst_multiple_rst(self, request, system_fixture, base_name): + """ + Test the creation of a chain of input files, each of which restarts from + the previous input file in the chain. This is useful within the context + of an automatic equilibration detection loop, in which a simulation needs to + be restarted if its not equilibrated. + + This test evaluates systems with one or two boxes. Two boxes might be + problematic because some start types require two lines in the # Start_Type + section. + + This test ensures that the input files generated at each restart step: + 1. Contain a valid # Start_Type header. + 2. Follow the correct checkpoint line format (two entries, starting with "checkpoint"). + 3. Include only comments or whitespace between the checkpoint line and the next # header. + + Parameters: + - system_fixture: The fixture name of the system setup, allowing tests with + both one-component and two-box systems. + - base_name: The base name used for the generated files (e.g., "nvt" or "gemc"). + """ + + # Access the system fixture based on parameter + (system, moveset) = request.getfixturevalue(system_fixture) + repeats = 3 + with temporary_directory() as tmp_dir: + with temporary_cd(tmp_dir): + for count in range(repeats): + if count == 0: + # Initial write of the input file + run_name = f"{base_name}.{count:03d}" + write_input( + run_name=run_name, + system=system, + moveset=moveset, + run_type="equilibration", + run_length=2, + temperature=300 * u.K, + ) + else: + restart_from = f"{base_name}.{count - 1:03d}" + run_name = f"{base_name}.{count:03d}" + write_restart_input( + restart_from=restart_from, + run_name=run_name, + run_type="equilibration", + run_length=1000, + ) + + with open(f"{run_name}.inp", mode="r") as f: + inp_contents = f.readlines() + + # Step 1: Check we have a # Start_Type header + start_idx = self.check_start_type_header(inp_contents) + + # Step 2: Check the line after "# Start_Type" has two entries, with the first as "checkpoint" + self.check_checkpoint_line(inp_contents, start_idx) + + # Step 3: Ensure only comments or whitespace are present until the next '#' header + self.check_only_comments_or_whitespace( + inp_contents, start_idx + ) diff --git a/mosdef_cassandra/writers/writers.py b/mosdef_cassandra/writers/writers.py index 7bf6da9..4c7f8bc 100644 --- a/mosdef_cassandra/writers/writers.py +++ b/mosdef_cassandra/writers/writers.py @@ -54,14 +54,18 @@ def write_mcfs(system, angle_style="harmonic"): mcf_name = "species{}.mcf".format(species_count + 1) - if all(isinstance(top, parmed.Structure) for top in system.original_tops): + if all( + isinstance(top, parmed.Structure) for top in system.original_tops + ): write_mcf( species, mcf_name, angle_style=angle_style[species_count], dihedral_style=dihedral_style, ) - elif all(isinstance(top, gmso.Topology) for top in system.original_tops): + elif all( + isinstance(top, gmso.Topology) for top in system.original_tops + ): gmso_write_mcf(system.original_tops[species_count], mcf_name) @@ -138,9 +142,15 @@ def _generate_restart_inp(restart_from, run_name, run_type, run_length): inp_contents[idx + 1] = run_name + ".out" if "# Start_Type" in line: inp_contents[idx + 1] = "checkpoint " + restart_from + ".out.chk" - # In case this is a two-box system - if inp_contents[idx + 2][0] != "!": - inp_contents[idx + 2] = "" + i = idx + 2 + while i < len(inp_contents) and not inp_contents[ + i + ].strip().startswith("#"): + if not inp_contents[i].strip().startswith("!"): + inp_contents[i] = ( + "" # Replace non-comment lines with an empty string + ) + i += 1 if run_type is not None: if "# Run_Type" in line: old_contents = inp_contents[idx + 1].split()