From c426c3ab94f25aef9c5822cd880ef8f4b2a6b4bf Mon Sep 17 00:00:00 2001 From: Chris Iacovella Date: Fri, 20 Oct 2023 14:06:02 -0400 Subject: [PATCH] Add export to RDKIT to allow for chemdraw style visualization. (#1137) * fixed freud bond issue * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update mbuild/compound.py * added to_rdkit function with bond order * updated bond functions to optionally accept/return bond order * added tests for bond order and converting to rdkit * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed typo in test * fixed typo in test that was missed in last fix * Check value of bond_order and throw error if invalid, fix typos and string formatting * move rdkit conv code to conversion * replace self with compound * update keys of bond order dict * fix dictionary names and keys * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add new unit tests, check for type in add_bond * expand doc strings * add link to rdkit getting started page * add unit test for error in rdkit --------- Co-authored-by: Christopher Iacovella Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Co Quach <43968221+daico007@users.noreply.github.com> Co-authored-by: chrisjonesbsu --- mbuild/compound.py | 83 +++++++++++++++++++++++++++++++---- mbuild/conversion.py | 73 +++++++++++++++++++++++++++++- mbuild/tests/test_compound.py | 74 +++++++++++++++++++++++++++++++ 3 files changed, 220 insertions(+), 10 deletions(-) diff --git a/mbuild/compound.py b/mbuild/compound.py index 5c2e7d000..2fe64ba94 100644 --- a/mbuild/compound.py +++ b/mbuild/compound.py @@ -1189,9 +1189,16 @@ def direct_bonds(self): for b1, b2 in self.root.bond_graph.edges(self): yield b2 - def bonds(self): + def bonds(self, return_bond_order=False): """Return all bonds in the Compound and sub-Compounds. + Parameters + ---------- + return_bond_order : bool, optional, default=False + Return the bond order of the bond as the 3rd argument in the tuple. + bond order is returned as a dictionary with 'bo' as the key. + If bond order is not set, the value will be set to 'default'. + Yields ------ tuple of mb.Compound @@ -1204,9 +1211,11 @@ def bonds(self): """ if self.root.bond_graph: if self.root == self: - return self.root.bond_graph.edges() + return self.root.bond_graph.edges(data=return_bond_order) else: - return self.root.bond_graph.subgraph(self.particles()).edges() + return self.root.bond_graph.subgraph(self.particles()).edges( + data=return_bond_order + ) else: return iter(()) @@ -1246,18 +1255,42 @@ def n_bonds(self): ) return sum(1 for _ in self.bonds()) - def add_bond(self, particle_pair): + def add_bond(self, particle_pair, bond_order=None): """Add a bond between two Particles. Parameters ---------- particle_pair : indexable object, length=2, dtype=mb.Compound The pair of Particles to add a bond between + bond_order : float, optional, default=None + Bond order of the bond. + Available options include "default", "single", "double", + "triple", "aromatic" or "unspecified" """ if self.root.bond_graph is None: self.root.bond_graph = BondGraph() - - self.root.bond_graph.add_edge(particle_pair[0], particle_pair[1]) + if bond_order is None: + bond_order = "default" + else: + if not isinstance(bond_order, str) or bond_order.lower() not in [ + "default", + "single", + "double", + "triple", + "aromatic", + "unspecified", + ]: + raise ValueError( + "Invalid bond_order given. Available bond orders are: " + "single", + "double", + "triple", + "aromatic", + "unspecified", + ) + self.root.bond_graph.add_edge( + particle_pair[0], particle_pair[1], bond_order=bond_order + ) def generate_bonds(self, name_a, name_b, dmin, dmax): """Add Bonds between all pairs of types a/b within [dmin, dmax]. @@ -2654,7 +2687,7 @@ def _energy_minimize_openbabel( particle._element = element_from_symbol(particle.name) except ElementError: try: - element_from_name(particle.name) + particle._element = element_from_name(particle.name) except ElementError: raise MBuildError( "No element assigned to {}; element could not be" @@ -3230,6 +3263,35 @@ def from_parmed(self, structure, coords_only=False, infer_hierarchy=True): infer_hierarchy=infer_hierarchy, ) + def to_rdkit(self): + """Create an RDKit RWMol from an mBuild Compound. + + Returns + ------- + rdkit.Chem.RWmol + + Notes + ----- + Use this method to utilzie rdkit funcitonality. + This method only works when the mBuild compound + contains accurate element information. As a result, + this method is not compatible with compounds containing + abstract particles (e.g. coarse-grained systems) + + Example + ------- + >>> import mbuild + >>> from rdkit.Chem import Draw + + >>> benzene = mb.load("c1cccc1", smiles=True) + >>> benzene_rdkmol = benzene.to_rdkit() + >>> img = Draw.MolToImage(benzene_rdkmol) + + See https://www.rdkit.org/docs/GettingStartedInPython.html + + """ + return conversion.to_rdkit(self) + def to_parmed( self, box=None, @@ -3554,9 +3616,12 @@ def _clone_bonds(self, clone_of=None): newone.bond_graph = BondGraph() for particle in self.particles(): newone.bond_graph.add_node(clone_of[particle]) - for c1, c2 in self.bonds(): + for c1, c2, data in self.bonds(return_bond_order=True): try: - newone.add_bond((clone_of[c1], clone_of[c2])) + # bond order is added to the data dictionary as 'bo' + newone.add_bond( + (clone_of[c1], clone_of[c2]), bond_order=data["bond_order"] + ) except KeyError: raise MBuildError( "Cloning failed. Compound contains bonds to " diff --git a/mbuild/conversion.py b/mbuild/conversion.py index 2263d6c0e..f79302487 100644 --- a/mbuild/conversion.py +++ b/mbuild/conversion.py @@ -884,10 +884,18 @@ def from_rdkit(rdkit_mol, compound=None, coords_only=False, smiles_seed=0): part_list.append(part) comp.add(part_list) + bond_order_dict = { + Chem.BondType.SINGLE: "single", + Chem.BondType.DOUBLE: "double", + Chem.BondType.TRIPLE: "triple", + Chem.BondType.AROMATIC: "aromatic", + Chem.BondType.UNSPECIFIED: "unspecified", + } for bond in mymol.GetBonds(): comp.add_bond( - [comp[bond.GetBeginAtomIdx()], comp[bond.GetEndAtomIdx()]] + [comp[bond.GetBeginAtomIdx()], comp[bond.GetEndAtomIdx()]], + bond_order=bond_order_dict[bond.GetBondType()], ) return comp @@ -1749,6 +1757,69 @@ def to_pybel( return pybelmol +def to_rdkit(compound): + """Create an RDKit RWMol from an mBuild Compound. + + Parameters + ---------- + compound : mbuild.Compound; required + The compound to convert to a Chem.RWmol + + Returns + ------- + rdkit.Chem.RWmol + """ + rdkit = import_("rdkit") + from rdkit import Chem + from rdkit.Chem import AllChem + + for particle in compound.particles(): + if particle.element is None: + try: + particle._element = element_from_symbol(particle.name) + except ElementError: + try: + particle._element = element_from_name(particle.name) + except ElementError: + raise MBuildError( + f"No element assigned to {particle};" + "element could not be" + f"inferred from particle name {particle.name}." + " Cannot perform an energy minimization." + ) + + temp_mol = Chem.RWMol() + p_dict = {particle: i for i, particle in enumerate(compound.particles())} + + bond_order_dict = { + "single": Chem.BondType.SINGLE, + "double": Chem.BondType.DOUBLE, + "triple": Chem.BondType.TRIPLE, + "aromatic": Chem.BondType.AROMATIC, + "unspecified": Chem.BondType.UNSPECIFIED, + "default": Chem.BondType.SINGLE, + } + + for particle in compound.particles(): + temp_atom = Chem.Atom(particle.element.atomic_number) + + # this next line is necessary to prevent rdkit from adding hydrogens + # this will also set the label to be the element with particle index + temp_atom.SetProp( + "atomLabel", f"{temp_atom.GetSymbol()}:{p_dict[particle]}" + ) + + temp_mol.AddAtom(temp_atom) + + for bond in compound.bonds(return_bond_order=True): + bond_indices = (p_dict[bond[0]], p_dict[bond[1]]) + temp_mol.AddBond(*bond_indices) + rdkit_bond = temp_mol.GetBondBetweenAtoms(*bond_indices) + rdkit_bond.SetBondType(bond_order_dict[bond[2]["bond_order"]]) + + return temp_mol + + def to_smiles(compound, backend="pybel"): """Create a SMILES string from an mbuild compound. diff --git a/mbuild/tests/test_compound.py b/mbuild/tests/test_compound.py index c06f6f528..3c6a00519 100644 --- a/mbuild/tests/test_compound.py +++ b/mbuild/tests/test_compound.py @@ -115,6 +115,80 @@ def test_direct_bonds(self, methane): for H in methane.particles_by_name("H"): assert H in bond_particles + def test_bond_order(self, methane): + # test the default behavior + bonds = [bond for bond in methane.bonds(return_bond_order=True)] + assert bonds[0][2]["bond_order"] == "default" + + # test setting bond order when adding a bond + carbon = mb.Compound(pos=[0, 0, 0], name="C") + carbon_clone = mb.clone(carbon) + C2 = mb.Compound([carbon, carbon_clone]) + C2.add_bond([carbon, carbon_clone], bond_order="double") + bonds = [bond for bond in C2.bonds(return_bond_order=True)] + + assert bonds[0][2]["bond_order"] == "double" + + # ensure we propagate bond order information when we clone a compound + C2_clone = mb.clone(C2) + bonds = [bond for bond in C2_clone.bonds(return_bond_order=True)] + + assert bonds[0][2]["bond_order"] == "double" + + @pytest.mark.parametrize( + "bond_order", + ["single", "double", "triple", "aromatic"], + ) + def test_add_bond_order(self, bond_order): + A_bead = mb.Compound(name="A", pos=[0, 0, 0]) + B_bead = mb.Compound(name="B", pos=[0.5, 0.5, 0]) + comp = mb.Compound([A_bead, B_bead]) + comp.add_bond([A_bead, B_bead], bond_order=bond_order) + for bond in comp.bonds(return_bond_order=True): + assert bond[2]["bond_order"] == bond_order + + def test_add_bad_bond_order(self): + A_bead = mb.Compound(name="A", pos=[0, 0, 0]) + B_bead = mb.Compound(name="B", pos=[0.5, 0.5, 0]) + comp = mb.Compound([A_bead, B_bead]) + with pytest.raises(ValueError): + comp.add_bond([A_bead, B_bead], bond_order=1.0) + with pytest.raises(ValueError): + comp.add_bond([A_bead, B_bead], bond_order="quadruple") + + @pytest.mark.skipif(not has_rdkit, reason="RDKit is not installed") + def test_to_rdkit(self, methane): + # check basic conversion + rdkit = import_("rdkit") + rdmol = methane.to_rdkit() + assert isinstance(rdmol, rdkit.Chem.rdchem.RWMol) + + from rdkit import Chem + + atoms = [atom for atom in rdmol.GetAtoms()] + bonds = [bond for bond in rdmol.GetBonds()] + + bond_order_dict = { + Chem.BondType.SINGLE: 1.0, + Chem.BondType.DOUBLE: 2.0, + Chem.BondType.TRIPLE: 3.0, + Chem.BondType.AROMATIC: 1.5, + Chem.BondType.UNSPECIFIED: 0.0, + } + assert len(atoms) == 5 + assert len(bonds) == 4 + + assert bond_order_dict[bonds[0].GetBondType()] == 1.0 + assert bond_order_dict[bonds[1].GetBondType()] == 1.0 + assert bond_order_dict[bonds[2].GetBondType()] == 1.0 + assert bond_order_dict[bonds[3].GetBondType()] == 1.0 + + @pytest.mark.skipif(not has_rdkit, reason="RDKit is not installed") + def test_to_rdkit_no_elements(self): + comp = mb.Compound(name="_A") + with pytest.raises(MBuildError): + comp.to_rdkit() + def test_n_direct_bonds_parent(self, ethane): with pytest.raises(MBuildError): ethane.n_direct_bonds