From 8be35163af67c2201e357ac72ab9fee080b07fe7 Mon Sep 17 00:00:00 2001 From: Marcin Wojdyr Date: Mon, 16 Sep 2024 15:43:31 +0200 Subject: [PATCH] extend CID selection syntax with: "[metals]" and "[nonmetals]" "[nonmetals]" is the same as "[!metals]", but they differs when combined with other elements, for example, "[nonmetals,Fe]" includes Fe but [!metals,Fe] doesn't. --- docs/analysis.rst | 2 ++ src/select.cpp | 28 ++++++++++++++++++++-------- tests/test_misc.py | 4 ++++ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/docs/analysis.rst b/docs/analysis.rst index 4b230dd0..9abc8f81 100644 --- a/docs/analysis.rst +++ b/docs/analysis.rst @@ -575,6 +575,7 @@ Let us go through the individual filters first: * `//*/(ALA)` (or `(ALA)`) -- selects residues with a given name. * `//*//CB` (or `CB:*` or `CB[*]`) -- selects atoms with a given name. * `//*//[P]` (or just `[P]`) -- selects phosphorus atoms. +* `[metals]`, `[nonmetals]` -- selects metal or non-metal atoms. * `//*//:B` (or `:B`) -- selects atoms with altloc B. * `//*//:` (or `:`) -- selects atoms without altloc. * `//*//;q<0.5` (or `;q<0.5`) -- selects atoms with occupancy below 0.5 @@ -590,6 +591,7 @@ The syntax supports also comma-separated lists and negations with `!`: * `(!ALA)` -- all residues but alanine, * `[C,N,O]` -- all C, N and O atoms, * `[!C,N,O]` -- all atoms except C, N and O, +* `[metals,Si]` -- all metal and Si atoms, * `:,A` -- altloc either empty or A (which makes one conformation), * `/1/A,B/20-40/CA[C]:,A` -- multiple selection criteria, all of them must be fulfilled. diff --git a/src/select.cpp b/src/select.cpp index 6d3691ea..020c600b 100644 --- a/src/select.cpp +++ b/src/select.cpp @@ -63,16 +63,28 @@ inline void parse_cid_elements(const std::string& cid, size_t pos, elements.resize((size_t)El::END, char(inverted)); for (;;) { size_t sep = cid.find_first_of(",]", pos); - if (sep == pos || sep > pos + 2) - wrong_syntax(cid, 0, "in [...]"); - char elem_str[2] = {cid[pos], sep > pos+1 ? cid[pos+1] : '\0'}; - Element el = find_element(elem_str); - if (el == El::X && (alpha_up(elem_str[0]) != 'X' || elem_str[1] != '\0')) - wrong_syntax(cid, 0, " (invalid element in [...])"); - elements[el.ordinal()] = char(!inverted); - pos = sep + 1; + if (sep == pos || sep > pos + 2) { + if (sep == pos + 6 && cid.compare(pos, 6, "metals", 6) == 0) { + for (size_t i = 0; i < elements.size(); ++i) + if (is_metal(static_cast(i))) + elements[i] = char(!inverted); + } else if (sep == pos + 9 && cid.compare(pos, 9, "nonmetals", 9) == 0) { + for (size_t i = 0; i < elements.size(); ++i) + if (!is_metal(static_cast(i))) + elements[i] = char(!inverted); + } else { + wrong_syntax(cid, 0, "in [...]"); + } + } else { + char elem_str[2] = {cid[pos], sep > pos+1 ? cid[pos+1] : '\0'}; + Element el = find_element(elem_str); + if (el == El::X && (alpha_up(elem_str[0]) != 'X' || elem_str[1] != '\0')) + wrong_syntax(cid, 0, " (invalid element in [...])"); + elements[el.ordinal()] = char(!inverted); + } if (cid[sep] == ']') break; + pos = sep + 1; } } diff --git a/tests/test_misc.py b/tests/test_misc.py index a84d8334..f3d7d9c0 100755 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -54,6 +54,10 @@ def selstr(s): self.assertEqual(selstr('/1/C/-72(DA)'), '/1/C/-72.(DA)/') self.assertEqual(selstr('C/-72--30'), '//C/-72.--30./') self.assertEqual(selstr('C/-72-2'), '//C/-72.-2./') + self.assertEqual(selstr('[metals]'), selstr('[!nonmetals]')) + self.assertEqual(selstr('[!metals]'), selstr('[nonmetals]')) + self.assertTrue('[X,H,B' in selstr('[!metals,He]')) + self.assertTrue('[!X,H,B' in selstr('[metals,He]')) if __name__ == '__main__': unittest.main()