From f33833e0c58bb8384241641de2bfd4ad4fc54af4 Mon Sep 17 00:00:00 2001 From: David Doty Date: Wed, 6 Sep 2023 18:07:31 -0700 Subject: [PATCH 1/9] bumped version --- nuad/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nuad/__version__.py b/nuad/__version__.py index 4685c67f..20faead3 100644 --- a/nuad/__version__.py +++ b/nuad/__version__.py @@ -1 +1 @@ -version = '0.4.4' # version line; WARNING: do not remove or change this line or comment +version = '0.4.5' # version line; WARNING: do not remove or change this line or comment From d64cdadfd159bac33b88ed7913bdb72dfb2eca78 Mon Sep 17 00:00:00 2001 From: David Doty Date: Sun, 1 Oct 2023 12:25:41 -0700 Subject: [PATCH 2/9] added `_normalize_domains_pairs_disjoint_parameters` --- nuad/constraints.py | 154 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 136 insertions(+), 18 deletions(-) diff --git a/nuad/constraints.py b/nuad/constraints.py index bd14c1c3..120f12c7 100644 --- a/nuad/constraints.py +++ b/nuad/constraints.py @@ -5620,14 +5620,8 @@ def _strand_pairs_constraints_by_number_matching_domains( # and # rna_cofold_strand_pairs_constraints_by_number_matching_domains - if strands is None and pairs is None: - raise ValueError('exactly one of strands or pairs must be specified, but neither is') - elif strands is not None and pairs is not None: - raise ValueError('exactly one of strands or pairs must be specified, but both are') - - if strands is not None: - assert pairs is None - pairs = itertools.combinations_with_replacement(strands, 2) + check_strand_against_itself = True + pairs = _normalize_strands_pairs_disjoint_parameters(strands, pairs, check_strand_against_itself) pairs_by_matching_domains = strand_pairs_by_number_matching_domains(pairs=pairs) keys = set(pairs_by_matching_domains.keys()) @@ -5664,6 +5658,50 @@ def _strand_pairs_constraints_by_number_matching_domains( return constraints +def _normalize_domains_pairs_disjoint_parameters( + domains: Iterable[Domain] | None, + pairs: Iterable[Tuple[Domain, Domain]], + check_domain_against_itself: bool) -> Iterable[Tuple[Domain, Domain]]: + # Enforce that exactly one of domains or pairs is not None, and if domains is specified, + # set pairs to be all pairs from domains. Return those pairs; if pairs is specified, + # just return it. Also normalize to return a tuple. + if domains is None and pairs is None: + raise ValueError('exactly one of domains or pairs must be specified, but neither is') + elif domains is not None and pairs is not None: + raise ValueError('exactly one of domains or pairs must be specified, but both are') + if domains is not None: + assert pairs is None + if check_domain_against_itself: + pairs = itertools.combinations_with_replacement(domains, 2) + else: + pairs = itertools.combinations(domains, 2) + + pairs_tuple = pairs if isinstance(pairs, tuple) else tuple(pairs) + return pairs_tuple + + +def _normalize_strands_pairs_disjoint_parameters( + strands: Iterable[Strand] | None, + pairs: Iterable[Tuple[Strand, Strand]], + check_strand_against_itself: bool) -> Iterable[Tuple[Strand, Strand]]: + # Enforce that exactly one of strands or pairs is not None, and if strands is specified, + # set pairs to be all pairs from strands. Return those pairs; if pairs is specified, + # just return it. Also normalize to return a tuple. + if strands is None and pairs is None: + raise ValueError('exactly one of strands or pairs must be specified, but neither is') + elif strands is not None and pairs is not None: + raise ValueError('exactly one of strands or pairs must be specified, but both are') + if strands is not None: + assert pairs is None + if check_strand_against_itself: + pairs = itertools.combinations_with_replacement(strands, 2) + else: + pairs = itertools.combinations(strands, 2) + + pairs_tuple = pairs if isinstance(pairs, tuple) else tuple(pairs) + return pairs_tuple + + def rna_cofold_strand_pairs_constraints_by_number_matching_domains( *, thresholds: Dict[int, float], @@ -5975,6 +6013,8 @@ def longest_complementary_subsequences(arr1: np.ndarray, arr2: np.ndarray, gc_do for each i, storing in returned list `result[i]`. Unlike :func:`longest_complementary_subsequences_two_loops`, this uses only one Python loop, + by using the "anti-diagonal" method for evaluating the dynamic programming table, + calculating a whole anti-diagonal from the previous two in O(1) numpy commands. When used for DNA sequences, this assumes `arr2` has been reversed along axis 1, i.e., the sequences in `arr1` are assumed to be oriented 5' --> 3', and the sequences in `arr2` @@ -5986,7 +6026,8 @@ def longest_complementary_subsequences(arr1: np.ndarray, arr2: np.ndarray, gc_do oriented 5' --> 3'. arr2: 2D array of DNA sequences, with each row being a single DNA sequence oriented 3' --> 5'. - gc_double: Whether to double the score for G-C base pairs. + gc_double: Whether to double the score for G-C base pairs. (assumes that integers 1,2 represent + C,G respectively) Returns: list `ret` of ints, where `ret[i]` is the length of the longest complementary subsequence @@ -6121,6 +6162,79 @@ def lcs_loop(s1: str, s2: str, gc_double: bool) -> int: return longest_complementary_subsequences_python_loop(arr1, arr2, gc_double)[0] +def lcs_domain_pairs_constraint( + *, + threshold: int, + weight: float = 1.0, + score_transfer_function: Callable[[float], float] = default_score_transfer_function, + description: str | None = None, + short_description: str = 'lcs domain pairs', + domains: Iterable[Domain] | None = None, + pairs: Iterable[Tuple[Domain, Domain]] | None = None, + check_domain_against_itself: bool = True, + gc_double: bool = True, +) -> DomainPairsConstraint: + """ + Checks pairs of domain sequences for longest complementary subsequences. + This can be thought of as a very rough heuristic for "binding energy" that is much less + accurate than NUPACK or ViennaRNA, but much faster to evaluate. + + Args + threshold: Max length of complementary subsequence allowed. + + weight: How much to weigh this :any:`Constraint`. + + score_transfer_function: See :py:data:`Constraint.score_transfer_function`. + + description: Long description of constraint suitable for putting into constraint report. + + short_description: Short description of constraint suitable for logging to stdout. + + pairs: Pairs of :any:`Strand`'s to compare; if not specified, checks all pairs in design. + + check_domain_against_itself: Whether to check domain `x` against `x*`. (Obviously `x` has a maximal + common subsequence with `x`, so we don't check that.) + + gc_double: Whether to weigh G-C base pairs as double (i.e., they count for 2 instead of 1). + + Returns + A :any:`DomainPairsConstraint` that checks given pairs of :any:`Domain`'s for excessive + interaction due to having long complementary subsequences. + """ + if description is None: + description = f'Longest complementary subsequence between domains is > {threshold}' + + def evaluate_bulk(pairs: Iterable[DomainPair]) -> List[Result]: + seqs1 = [pair.domain1.sequence() for pair in pairs] + seqs2 = [pair.domain2.sequence() for pair in pairs] + arr1 = nn.seqs2arr(seqs1) + arr2 = nn.seqs2arr(seqs2) + arr2_rev = np.flip(arr2, axis=1) + + lcs_sizes = longest_complementary_subsequences(arr1, arr2_rev, gc_double) + + results = [] + for lcs_size in lcs_sizes: + excess = lcs_size - threshold + value = f'{lcs_size}' + result = Result(excess=excess, value=value) + results.append(result) + + return results + + pairs = _normalize_domains_pairs_disjoint_parameters(domains, pairs, check_domain_against_itself) + + return DomainPairsConstraint( + description=description, + short_description=short_description, + weight=weight, + score_transfer_function=score_transfer_function, + evaluate_bulk=evaluate_bulk, + pairs=pairs, + check_domain_against_itself=check_domain_against_itself, + ) + + def lcs_strand_pairs_constraint( *, threshold: int, @@ -6133,25 +6247,29 @@ def lcs_strand_pairs_constraint( gc_double: bool = True, ) -> StrandPairsConstraint: """ - TODO: describe + Checks pairs of strand sequences for longest complementary subsequences. + This can be thought of as a very rough heuristic for "binding energy" that is much less + accurate than NUPACK or ViennaRNA, but much faster to evaluate. Args - threshold: + threshold: Max length of complementary subsequence allowed. + + weight: How much to weigh this :any:`Constraint`. - weight: + score_transfer_function: See :py:data:`Constraint.score_transfer_function`. - score_transfer_function: + description: Long description of constraint suitable for putting into constraint report. - description: + short_description: Short description of constraint suitable for logging to stdout. - short_description: + pairs: Pairs of :any:`Strand`'s to compare; if not specified, checks all pairs in design. - pairs: + check_strand_against_itself: Whether to check a strand against itself. gc_double: Whether to weigh G-C base pairs as double (i.e., they count for 2 instead of 1). Returns - A :any: StrandPairsConstraint` that checks given pairs of :any:`Strand`'s for excessive + A :any:`StrandPairsConstraint` that checks given pairs of :any:`Strand`'s for excessive interaction due to having long complementary subsequences. """ if description is None: @@ -6222,7 +6340,7 @@ def lcs_strand_pairs_constraints_by_number_matching_domains( if parameters_filename != '': raise ValueError('should not specify parameters_filename when calling ' 'lcs_strand_pairs_constraints_by_number_matching_domains; ' - 'it is only listed as a parameter for technical reasons relating to code resuse ' + 'it is only listed as a parameter for technical reasons relating to code reuse ' 'with other constraints that use that parameter') def lcs_strand_pairs_constraint_with_dummy_parameters( From 85536d32588fbb9ba9ce066edb8dd755e9ee7d96 Mon Sep 17 00:00:00 2001 From: David Doty Date: Sun, 1 Oct 2023 13:15:17 -0700 Subject: [PATCH 3/9] added `rna_plex_domain_pairs_nonorthogonal_constraint` and cleaned up docstrings --- nuad/constraints.py | 337 ++++++++++++++++++++++++++++++++------------ 1 file changed, 245 insertions(+), 92 deletions(-) diff --git a/nuad/constraints.py b/nuad/constraints.py index 120f12c7..55f77642 100644 --- a/nuad/constraints.py +++ b/nuad/constraints.py @@ -393,7 +393,7 @@ class NumpyFilter(ABC): A :any:`NumpyFilter` is one that can be efficiently encoded as numpy operations on 2D arrays of bytes representing DNA sequences, through the class - :any:`np.DNASeqList` (which uses such a 2D array as the field :py:data:`np.DNASeqList.seqarr`). + :any:`np.DNASeqList` (which uses such a 2D array as the field :data:`np.DNASeqList.seqarr`). Subclasses should set the value :data:`NumpyFilter.name`, inherited from this class. @@ -401,7 +401,7 @@ class NumpyFilter(ABC): such as :any:`RestrictBasesFilter` or :any:`NearestNeighborEnergyFilter`, are dataclasses (https://docs.python.org/3/library/dataclasses.html). There is no requirement that custom subclasses be dataclasses, but since the subclasses will - inherit the field :py:data:`NumpyFilter.name`, you can easily make them dataclasses to get, + inherit the field :data:`NumpyFilter.name`, you can easily make them dataclasses to get, for example, free ``repr`` and ``str`` implementations. See the source code for examples. The related type :any:`SequenceFilter` (which is just an alias for a Python function with @@ -485,8 +485,8 @@ class NearestNeighborEnergyFilter(NumpyFilter): (https://www.annualreviews.org/doi/abs/10.1146/annurev.biophys.32.110601.141800, see Table 1, and example on page 419). It rejects any sequences whose energy according to this sum is outside the range - [:py:data:`NearestNeighborEnergyFilter.low_energy`, - :py:data:`NearestNeighborEnergyFilter.high_energy`]. + [:data:`NearestNeighborEnergyFilter.low_energy`, + :data:`NearestNeighborEnergyFilter.high_energy`]. """ low_energy: float @@ -523,12 +523,12 @@ class BaseCountFilter(NumpyFilter): high_count: int | None = None """ - Count of :py:data:`BaseCountFilter.base` must be at most :py:data:`BaseCountFilter.high_count`. + Count of :data:`BaseCountFilter.base` must be at most :data:`BaseCountFilter.high_count`. """ low_count: int | None = None """ - Count of :py:data:`BaseCountFilter.base` must be at least :py:data:`BaseCountFilter.low_count`. + Count of :data:`BaseCountFilter.base` must be at least :data:`BaseCountFilter.low_count`. """ def __post_init__(self) -> None: @@ -550,7 +550,7 @@ def remove_violating_sequences(self, seqs: nn.DNASeqList) -> nn.DNASeqList: class BaseEndFilter(NumpyFilter): """ Restricts the sequence to contain only certain bases on - (or near, if :py:data:`BaseEndFilter.distance` > 0) each end. + (or near, if :data:`BaseEndFilter.distance` > 0) each end. """ bases: Collection[str] @@ -624,11 +624,11 @@ class BaseAtPositionFilter(NumpyFilter): bases: str | Collection[str] """ - Base(s) to require at position :py:data:`BasePositionConstraint.position`. + Base(s) to require at position :data:`BasePositionConstraint.position`. Can either be a single base, or a collection (e.g., list, tuple, set). - If several bases are specified, the base at :py:data:`BasePositionConstraint.position` - must be one of the bases in :py:data:`BasePositionConstraint.bases`. + If several bases are specified, the base at :data:`BasePositionConstraint.position` + must be one of the bases in :data:`BasePositionConstraint.bases`. """ position: int @@ -709,7 +709,7 @@ def length(self) -> int: return len(first_substring) def remove_violating_sequences(self, seqs: nn.DNASeqList) -> nn.DNASeqList: - """Remove sequences that have a string in :py:data:`ForbiddenSubstringFilter.substrings` + """Remove sequences that have a string in :data:`ForbiddenSubstringFilter.substrings` as a substring.""" assert isinstance(self.substrings, list) sub_len = len(self.substrings[0]) @@ -742,7 +742,7 @@ class RunsOfBasesFilter(NumpyFilter): bases: Collection[str] """ - Bases to forbid in runs of length :py:data:`RunsOfBasesFilter.length`. + Bases to forbid in runs of length :data:`RunsOfBasesFilter.length`. """ length: int @@ -980,10 +980,10 @@ class DomainPool(JSONSerializable): This is used to choose potential sequences to assign to the :any:`Domain`'s in this :any:`DomainPool` in the method :py:meth:`DomainPool.generate_sequence`. - The difference with :py:data:`DomainPool.sequence_filters` is that these constraints can be applied + The difference with :data:`DomainPool.sequence_filters` is that these constraints can be applied efficiently to many sequences at once, represented as a numpy 2D array of bytes (via the class :any:`np.DNASeqList`), so they are done in large batches in advance. - In contrast, the constraints in :py:data:`DomainPool.sequence_filters` are done on Python strings + In contrast, the constraints in :data:`DomainPool.sequence_filters` are done on Python strings representing DNA sequences, and they are called one at a time when a new sequence is requested in :py:meth:`DomainPool.generate_sequence`. @@ -997,9 +997,9 @@ class DomainPool(JSONSerializable): This is used to choose potential sequences to assign to the :any:`Domain`'s in this :any:`DomainPool` in the method :py:meth:`DomainPool.generate`. - See :py:data:`DomainPool.numpy_filters` for an explanation of the difference between them. + See :data:`DomainPool.numpy_filters` for an explanation of the difference between them. - See :py:data:`DomainPool.domain_constraints` for an explanation of the difference between them. + See :data:`DomainPool.domain_constraints` for an explanation of the difference between them. Optional; default is empty. """ @@ -1158,11 +1158,11 @@ def satisfies_sequence_constraints(self, sequence: str) -> bool: def generate_sequence(self, rng: np.random.Generator, previous_sequence: str | None = None) -> str: """ - Returns a DNA sequence of given length satisfying :py:data:`DomainPool.numpy_filters` and - :py:data:`DomainPool.sequence_filters` + Returns a DNA sequence of given length satisfying :data:`DomainPool.numpy_filters` and + :data:`DomainPool.sequence_filters` **Note:** By default, there is no check that the sequence returned is unequal to one already - assigned somewhere in the design, since both :py:data:`DomainPool.numpy_filters` and + assigned somewhere in the design, since both :data:`DomainPool.numpy_filters` and :data:`DomainPool.sequence_filters` do not have access to the whole :any:`Design`. But the :any:`DomainPairConstraint` returned by :meth:`domains_not_substrings_of_each_other_constraint` @@ -1172,7 +1172,7 @@ def generate_sequence(self, rng: np.random.Generator, previous_sequence: str | N and instead a sequence is chosen randomly to be returned from that list. :param rng: - numpy random number generator to use. To use a default, pass :py:data:`np.default_rng`. + numpy random number generator to use. To use a default, pass :data:`np.default_rng`. :param previous_sequence: previously generated sequence to be replaced by a new sequence; None if no previous sequence exists. Used to choose a new sequence "close" to itself in Hamming distance, @@ -1182,8 +1182,8 @@ def generate_sequence(self, rng: np.random.Generator, previous_sequence: str | N picking a Hamming distance from :data:`DomainPool.hamming_probability` with weighted probabilities of choosing each distance. :return: - DNA sequence of given length satisfying :py:data:`DomainPool.numpy_filters` and - :py:data:`DomainPool.sequence_filters` + DNA sequence of given length satisfying :data:`DomainPool.numpy_filters` and + :data:`DomainPool.sequence_filters` """ if self.possible_sequences is not None: if isinstance(self.possible_sequences, list): @@ -1419,7 +1419,7 @@ class Domain(Part, JSONSerializable): If two domains are complementary, they are represented by the same :any:`Domain` object. They are distinguished only by whether the :any:`Strand` object containing them has the - :any:`Domain` in its set :py:data:`Strand.starred_domains` or not. + :any:`Domain` in its set :data:`Strand.starred_domains` or not. A :any:`Domain` uses only its name to compute hash and equality checks, not its sequence. This allows a :any:`Domain` to be used in sets and dicts while modifying the sequence assigned to it, @@ -1449,7 +1449,7 @@ class Domain(Part, JSONSerializable): """ DNA sequence assigned to this :any:`Domain`. This is assumed to be the sequence of the unstarred variant; the starred variant has the Watson-Crick complement, - accessible via :py:data:`Domain.starred_sequence`. + accessible via :data:`Domain.starred_sequence`. """ weight: float = 1.0 @@ -1655,7 +1655,7 @@ def pool(self) -> DomainPool: def pool(self, new_pool: DomainPool) -> None: """ :param new_pool: new :any:`DomainPool` to set - :raises ValueError: if :py:data:`Domain.pool_` is not None and is not same object as `new_pool` + :raises ValueError: if :data:`Domain.pool_` is not None and is not same object as `new_pool` """ if self._pool is not None and new_pool is not self._pool: raise ValueError(f'Assigning pool {new_pool} to domain ' @@ -1827,7 +1827,7 @@ def set_fixed_sequence(self, fixed_sequence: str) -> None: @property def starred_name(self) -> str: """ - :return: The value :py:data:`Domain.name` with `*` appended to it. + :return: The value :data:`Domain.name` with `*` appended to it. """ return self._starred_name @@ -1844,7 +1844,7 @@ def starred_sequence(self) -> str: def get_name(self, starred: bool) -> str: """ :param starred: whether to return the starred or unstarred version of the name - :return: The value :py:data:`Domain.name` or :py:data:`Domain.starred_name`, depending on + :return: The value :data:`Domain.name` or :data:`Domain.starred_name`, depending on the value of parameter `starred`. """ return self._starred_name if starred else self._name @@ -1852,7 +1852,7 @@ def get_name(self, starred: bool) -> str: def concrete_sequence(self, starred: bool) -> str: """ :param starred: whether to return the starred or unstarred version of the sequence - :return: The value :py:data:`Domain.sequence` or :py:data:`Domain.starred_sequence`, depending on + :return: The value :data:`Domain.sequence` or :data:`Domain.starred_sequence`, depending on the value of parameter `starred`. :raises ValueError: if this :any:`Domain` does not have a sequence assigned """ @@ -2170,7 +2170,7 @@ class VendorFields(JSONSerializable): This data is used when automatically generating files used to order DNA from IDT. When exporting to IDT files via :meth:`Design.write_idt_plate_excel_file` - or :meth:`Design.write_idt_bulk_input_file`, the field :py:data:`Strand.name` is used for the + or :meth:`Design.write_idt_bulk_input_file`, the field :data:`Strand.name` is used for the name if it exists, otherwise a reasonable default is chosen.""" scale: str = default_vendor_scale @@ -2255,7 +2255,7 @@ class Strand(Part, JSONSerializable): """The :any:`Domain`'s on this :any:`Strand`, in order from 5' end to 3' end.""" starred_domain_indices: FrozenSet[int] - """Set of positions of :any:`Domain`'s in :py:data:`Strand.domains` + """Set of positions of :any:`Domain`'s in :data:`Strand.domains` on this :any:`Strand` that are starred.""" group: str @@ -2586,14 +2586,14 @@ def __repr__(self) -> str: def unstarred_domains(self) -> List[Domain]: """ :return: list of unstarred :any:`Domain`'s in this :any:`Strand`, in order they appear in - :py:data:`Strand.domains` + :data:`Strand.domains` """ return [domain for idx, domain in enumerate(self.domains) if idx not in self.starred_domain_indices] def starred_domains(self) -> List[Domain]: """ :return: list of starred :any:`Domain`'s in this :any:`Strand`, in order they appear in - :py:data:`Strand.domains` + :data:`Strand.domains` """ return [domain for idx, domain in enumerate(self.domains) if idx in self.starred_domain_indices] @@ -2650,7 +2650,7 @@ def fixed(self) -> bool: def unfixed_domains(self) -> Tuple[Domain]: """ - :return: all :any:`Domain`'s in this :any:`Strand` where :py:data:`Domain.fixed` is False + :return: all :any:`Domain`'s in this :any:`Strand` where :data:`Domain.fixed` is False """ return tuple(domain for domain in self.domains if not domain.fixed) @@ -3062,28 +3062,28 @@ class Design(JSONSerializable): """ List of all :any:`Domain`'s in this :any:`Design`. (without repetitions) - Computed from :py:data:`Design.strands`, so not specified in constructor. + Computed from :data:`Design.strands`, so not specified in constructor. """ strands_by_group_name: Dict[str, List[Strand]] = field(init=False) """ Dict mapping each group name to a list of the :any:`Strand`'s in this :any:`Design` in the group. - Computed from :py:data:`Design.strands`, so not specified in constructor. + Computed from :data:`Design.strands`, so not specified in constructor. """ domain_pools_to_domain_map: Dict[DomainPool, List[Domain]] = field(init=False) """ Dict mapping each :any:`DomainPool` to a list of the :any:`Domain`'s in this :any:`Design` in the pool. - Computed from :py:data:`Design.strands`, so not specified in constructor. + Computed from :data:`Design.strands`, so not specified in constructor. """ domains_by_name: Dict[str, Domain] = field(init=False) """ Dict mapping each name of a :any:`Domain` to the :any:`Domain`'s in this :any:`Design`. - Computed from :py:data:`Design.strands`, so not specified in constructor. + Computed from :data:`Design.strands`, so not specified in constructor. """ def __init__(self, strands: Iterable[Strand] = ()) -> None: @@ -3281,15 +3281,15 @@ def add_strand(self, :param domain_names: Names of the :any:`Domain`'s on this :any:`Strand`. - Mutually exclusive with :py:data:`Strand.domains` and :py:data:`Strand.starred_domain_indices`. + Mutually exclusive with :data:`Strand.domains` and :data:`Strand.starred_domain_indices`. :param domains: list of :any:`Domain`'s on this :any:`Strand`. - Mutually exclusive with :py:data:`Strand.domain_names`, and must be specified jointly with - :py:data:`Strand.starred_domain_indices`. + Mutually exclusive with :data:`Strand.domain_names`, and must be specified jointly with + :data:`Strand.starred_domain_indices`. :param starred_domain_indices: Indices of :any:`Domain`'s in `domains` that are starred. - Mutually exclusive with :py:data:`Strand.domain_names`, and must be specified jointly with - :py:data:`Strand.domains`. + Mutually exclusive with :data:`Strand.domain_names`, and must be specified jointly with + :data:`Strand.domains`. :param group: name of group of this :any:`Strand`. :param name: @@ -3522,7 +3522,7 @@ def write_idt_plate_excel_file(self, *, strands: Iterable[Strand] | None = None) -> None: """ Write ``.xls`` (Microsoft Excel) file encoding the strands of this :any:`Design` with the field - :py:data:`Strand.vendor_fields`, suitable for uploading to IDT + :data:`Strand.vendor_fields`, suitable for uploading to IDT (Integrated DNA Technologies, Coralville, IA, https://www.idtdna.com/) to describe a 96-well or 384-well plate (https://www.idtdna.com/site/order/plate/index/dna/), @@ -3563,7 +3563,7 @@ def write_idt_plate_excel_file(self, *, the parameter `only_strands_with_idt` must be True. :param warn_using_default_plates: specifies whether, if `use_default_plates` is True, to print a warning for strands whose - :data:`Strand.vendor_fields` has the fields :py:data:`VendorFields.plate` and :py:data:`VendorFields.well`, + :data:`Strand.vendor_fields` has the fields :data:`VendorFields.plate` and :data:`VendorFields.well`, since `use_default_plates` directs these fields to be ignored. :param plate_type: a :any:`PlateType` specifying whether to use a 96-well plate or a 384-well plate @@ -3621,7 +3621,7 @@ def from_scadnano_file( Converts a scadnano Design stored in file named `sc_filename` to a a :any:`Design` for doing DNA sequence design. Each Strand name and Domain name from the scadnano Design are assigned as - :py:data:`Strand.name` and :py:data:`Domain.name` in the obvious way. + :data:`Strand.name` and :data:`Domain.name` in the obvious way. Assumes each Strand label is a string describing the strand group. The scadnano package must be importable. @@ -3651,7 +3651,7 @@ def from_scadnano_design(sc_design: sc.Design, """ Converts a scadnano Design `sc_design` to a a :any:`Design` for doing DNA sequence design. Each Strand name and Domain name from the scadnano Design are assigned as - :py:data:`Strand.name` and :py:data:`Domain.name` in the obvious way. + :data:`Strand.name` and :data:`Domain.name` in the obvious way. Assumes each Strand label is a string describing the strand group. The scadnano package must be importable. @@ -3801,7 +3801,7 @@ def assign_sequences_to_scadnano_design(self, sc_design: sc.Design, to assign to this key. If the scadnano strand label is already a dict, it adds this key. If the strand label is not None or a dict, an exception is raised. - Assumes that each domain name in domains in `sc_design` is a :py:data:`Domain.name` of a + Assumes that each domain name in domains in `sc_design` is a :data:`Domain.name` of a :any:`Domain` in this :any:`Design`. If multiple strands in `sc_design` share the same name, then all of them are assigned the @@ -4744,7 +4744,7 @@ def verify_designs_match(design1: Design, design2: Design, check_fixed: bool = T Here is what is checked: - strand names and group names appear in the same order - domain names and pool names appear in the same order in strands with the same name - - :py:data:`Domain.fixed` matches between :any:`Domain`'s + - :data:`Domain.fixed` matches between :any:`Domain`'s """ for idx, (strand1, strand2) in enumerate(zip(design1.strands, design2.strands)): if strand1.name != strand2.name: @@ -4833,7 +4833,7 @@ def nupack_domain_free_energy_constraint( :param weight: how much to weigh this :any:`Constraint` :param score_transfer_function: - See :py:data:`Constraint.score_transfer_function`. + See :data:`Constraint.score_transfer_function`. :param parallel: Whether to use parallelization by running constraint evaluation in separate processes to take advantage of multiple cores. @@ -4902,7 +4902,7 @@ def nupack_strand_free_energy_constraint( :param weight: how much to weigh this :any:`Constraint` :param score_transfer_function: - See :py:data:`Constraint.score_transfer_function`. + See :data:`Constraint.score_transfer_function`. :param parallel: Whether to use parallelization by running constraint evaluation in separate processes to take advantage of multiple cores. @@ -4968,15 +4968,15 @@ def nupack_domain_pair_constraint( molarity of magnesium (Mg++) in moles per liter :param parallel: Whether to test the each pair of :any:`Domain`'s in parallel (i.e., sets field - :py:data:`Constraint.parallel`) + :data:`Constraint.parallel`) :param weight: - How much to weigh this :any:`Constraint`. + See :data:`Constraint.weight`. :param score_transfer_function: - See :py:data:`Constraint.score_transfer_function`. + See :data:`Constraint.score_transfer_function`. :param description: Detailed description of constraint suitable for summary report. :param short_description: - Short description of constraint suitable for logging to stdout. + See :data:`Constraint.short_description` :param pairs: Pairs of :any:`Domain`'s to compare; if not specified, checks all pairs (including a :any:`Domain` against itself). @@ -5098,8 +5098,8 @@ def nupack_strand_pair_constraints_by_number_matching_domains( temperature: Temperature in Celsius. sodium: concentration of Na+ in molar magnesium: concentration of Mg++ in molar - weight: How much to weigh this :any:`Constraint`. - score_transfer_function: See :py:data:`Constraint.score_transfer_function`. + weight: See :data:`Constraint.weight`. + score_transfer_function: See :data:`Constraint.score_transfer_function`. descriptions: Long descriptions of constraint suitable for putting into constraint report. short_descriptions: Short descriptions of constraint suitable for logging to stdout. parallel: Whether to test the each pair of :any:`Strand`'s in parallel. @@ -5182,16 +5182,16 @@ def nupack_strand_pair_constraint( :param magnesium: concentration of Mg++ in molar :param weight: - How much to weigh this :any:`Constraint`. + See :data:`Constraint.weight`. :param score_transfer_function: - See :py:data:`Constraint.score_transfer_function`. + See :data:`Constraint.score_transfer_function`. :param parallel: Whether to use parallelization by running constraint evaluation in separate processes to take advantage of multiple cores. :param description: Detailed description of constraint suitable for report. :param short_description: - Short description of constraint suitable for logging to stdout. + See :data:`Constraint.short_description` :param pairs: Pairs of :any:`Strand`'s to compare; if not specified, checks all pairs (including a :any:`Strand` against itself). @@ -5323,7 +5323,7 @@ def rna_duplex_domain_pairs_constraint( :param weight: how much to weigh this :any:`Constraint` :param score_transfer_function: - See :py:data:`Constraint.score_transfer_function`. + See :data:`Constraint.score_transfer_function`. :param description: long description of constraint suitable for printing in report file :param short_description: @@ -5405,7 +5405,7 @@ def rna_plex_domain_pairs_constraint( -> DomainPairsConstraint: """ Returns constraint that checks given pairs of :any:`Domain`'s for excessive interaction using - Vienna RNA's RNAduplex executable. + Vienna RNA's RNAplex executable. :param threshold: energy threshold @@ -5414,7 +5414,7 @@ def rna_plex_domain_pairs_constraint( :param weight: how much to weigh this :any:`Constraint` :param score_transfer_function: - See :py:data:`Constraint.score_transfer_function`. + See :data:`Constraint.score_transfer_function`. :param description: long description of constraint suitable for printing in report file :param short_description: @@ -5484,6 +5484,159 @@ def evaluate_bulk(domain_pairs: Iterable[DomainPair]) -> List[Result]: pairs=pairs_tuple) +def rna_plex_domain_pairs_nonorthogonal_constraint( + thresholds: Dict[Tuple[Domain, bool, Domain, bool] | Tuple[Domain, Domain], Tuple[float, float]], + temperature: float = nv.default_temperature, + weight: float = 1.0, + score_transfer_function: Callable[[float], float] = lambda x: x, + description: str | None = None, + short_description: str = 'rna_plex_dom_pairs', + parameters_filename: str = nv.default_vienna_rna_parameter_filename) \ + -> DomainPairsConstraint: + """ + Returns constraint that checks given pairs of :any:`Domain`'s for interaction energy in a given interval, + using ViennaRNA's RNAplex executable. + + This can be used to implement "nonorthogonal" domains as described in these papers: + + - https://drops.dagstuhl.de/opus/volltexte/2023/18787/pdf/LIPIcs-DNA-29-4.pdf + - https://www.nature.com/articles/s41557-022-01111-y + + The "binding affinity table" as described in the first paper is implemented as the `thresholds` + parameter of this function. + + :param thresholds: + dict mapping pairs of :any:`Domain`'s, along with Booleans to indicate whether either :any:`Domain` + is starred, to pairs of energy thresholds. Alternately, the key can be a pair of :any:`Domain`'s + ``(a,b)`` without any Booleans; in this case only the unstarred versions of the domains are checked; + this is equivalent to the key ``(a, False, b, False)``. + + For example, if the dict is + + .. code-block:: python + + { + (a, False, b, False): (-9.0, -8.0), + (a, False, b, True): (-5.0, -3.0), + (a, True, c, False): (-1.0, 0.0), + (a, d): (-7.0, -6.0), + } + + then the constraint ensures that RNAplex energy for + + - ``(a,b)`` is between -9.0 and -8.0 kcal/mol, + - ``(a,b*)`` is between -5.0 and -3.0 kcal/mol, + - ``(a*,c)`` is between -1.0 and 0.0 kcal/mol, and + - ``(a,d)`` is between -7.0 and -6.0 kcal/mol. + + For all other pairs of domains not listed as keys in the dict (including starred/unstarred versions + not specified such as (a*,b*), the constraint is not checked. + :param temperature: + temperature in Celsius + :param weight: + See :data:`Constraint.weight`. + :param score_transfer_function: + See :data:`Constraint.score_transfer_function`. + :param description: + short_description + :param short_description: + See :data:`Constraint.short_description`. + :param parameters_filename: + name of parameters file for ViennaRNA; default is + same as :py:meth:`vienna_nupack.rna_duplex_multiple` + :return: + constraint + """ + _check_vienna_rna_installed() + + if description is None: + description = f'domain pair RNAplex energies for nonorthogonal domains at {temperature}C' + + def evaluate_bulk(domain_pairs: Iterable[DomainPair]) -> List[Result]: + sequence_pairs: List[Tuple[str, str]] = [] + names: List[Tuple[str, str]] = [] + domains: List[Tuple[Domain, Domain]] = [] + for pair in domain_pairs: + d1, d2 = pair.individual_parts() + if d1 == d2: + # don't check d-d* or d*-d in this case, but do check d-d and d*-d* + starred_each = [(False, False), (True, True)] + else: + starred_each = [(False, False), (True, True), (False, True), (True, False)] + for starred1, starred2 in starred_each: + seq1 = d1.concrete_sequence(starred1) + seq2 = d2.concrete_sequence(starred2) + name1 = d1.get_name(starred1) + name2 = d2.get_name(starred2) + sequence_pairs.append((seq1, seq2)) + names.append((name1, name2)) + domains.append((d1, d2)) + + energies = nv.rna_plex_multiple(sequence_pairs, logger, temperature, parameters_filename) + + # several consecutive items are from same domain pair but with different wc's; + # group them together in the summary + groups = defaultdict(list) + for (d1, d2), energy, name_pair in zip(domain_tuples, energies, name_pairs): + domain_pair = DomainPair(d1, d2) + groups[domain_pair.name].append((energy, name_pair)) + + # one Result per domain pair + results = [] + for _, energies_and_name_pairs in groups.items(): + energies, name_pairs = zip(*energies_and_name_pairs) + excesses: List[float] = [] + for energy, (name1, name2) in energies_and_name_pairs: + if name1 is not None and name2 is not None: + logger.debug( + f'domain pair threshold: {threshold:6.2f} ' + f'rna_plex({name1}, {name2}, {temperature}) = {energy:6.2f} ') + excess = max(0.0, (threshold - energy)) + excesses.append(excess) + max_excess = max(excesses) + + max_name_length = max(len(name) for name in flatten(name_pairs)) + lines_and_energies = [(f'{name1:{max_name_length}}, ' + f'{name2:{max_name_length}}: ' + f' {energy:6.2f} kcal/mol', energy) + for energy, (name1, name2) in energies_and_name_pairs] + lines_and_energies.sort(key=lambda line_and_energy: line_and_energy[1]) + lines = [line for line, _ in lines_and_energies] + summary = '\n ' + '\n '.join(lines) + max_excess = max(0.0, max_excess) + result = Result(excess=max_excess, summary=summary, value=max_excess) + results.append(result) + + return results + + # gather pairs of domains referenced in `thresholds` + pairs = [] + pairs_set = set() + for key, _ in thresholds.items(): + if len(key) == 2: + d1, d2 = key + else: + if len(key) != 4: + raise ValueError(f'key {key} in thresholds dict must have length 2, if a pair of domains, ' + f'or 4, if a tuple (domain1, starred1, domain2, starred2)') + d1, _, d2, _ = key + if (d1, d2) in thresholds.keys(): + raise ValueError(f'cannot have key (d1,d2) in `thresholds` if (d1, starred1, d2, starred2) ' + f'is also a key in `thresholds`, but I found these keys:' + f'\n {(d1, d2)}' + f'\n {key}') + if (d1, d2) not in pairs_set: + pairs.append((d1, d2)) + pairs_set.add((d1, d2)) + + return DomainPairsConstraint(description=description, + short_description=short_description, + weight=weight, + score_transfer_function=score_transfer_function, + evaluate_bulk=evaluate_bulk, + pairs=pairs) + + def _populate_strand_list_and_pairs(strands: Iterable[Strand] | None, pairs: Iterable[Tuple[Strand, Strand]] | None) \ -> Tuple[List[Strand], List[Tuple[Strand, Strand]]]: @@ -5779,8 +5932,8 @@ def rna_duplex_strand_pairs_constraints_by_number_matching_domains( thresholds: Energy thresholds in kcal/mol. If `k` domains are complementary between the strands, then use threshold `thresholds[k]`. temperature: Temperature in Celsius. - weight: How much to weigh this :any:`Constraint`. - score_transfer_function: See :py:data:`Constraint.score_transfer_function`. + weight: See :data:`Constraint.weight`. + score_transfer_function: See :data:`Constraint.score_transfer_function`. descriptions: Long descriptions of constraint suitable for putting into constraint report. short_descriptions: Short descriptions of constraint suitable for logging to stdout. parallel: Whether to test the each pair of :any:`Strand`'s in parallel. @@ -5866,8 +6019,8 @@ def rna_plex_strand_pairs_constraints_by_number_matching_domains( thresholds: Energy thresholds in kcal/mol. If `k` domains are complementary between the strands, then use threshold `thresholds[k]`. temperature: Temperature in Celsius. - weight: How much to weigh this :any:`Constraint`. - score_transfer_function: See :py:data:`Constraint.score_transfer_function`. + weight: See :data:`Constraint.weight`. + score_transfer_function: See :data:`Constraint.score_transfer_function`. descriptions: Long descriptions of constraint suitable for putting into constraint report. short_descriptions: Short descriptions of constraint suitable for logging to stdout. parallel: Whether to test the each pair of :any:`Strand`'s in parallel. @@ -6182,13 +6335,13 @@ def lcs_domain_pairs_constraint( Args threshold: Max length of complementary subsequence allowed. - weight: How much to weigh this :any:`Constraint`. + weight: See :data:`Constraint.weight`. - score_transfer_function: See :py:data:`Constraint.score_transfer_function`. + score_transfer_function: See :data:`Constraint.score_transfer_function`. - description: Long description of constraint suitable for putting into constraint report. + description: See :data:`Constraint.description` - short_description: Short description of constraint suitable for logging to stdout. + short_description: See :data:`Constraint.short_description` pairs: Pairs of :any:`Strand`'s to compare; if not specified, checks all pairs in design. @@ -6254,13 +6407,13 @@ def lcs_strand_pairs_constraint( Args threshold: Max length of complementary subsequence allowed. - weight: How much to weigh this :any:`Constraint`. + weight: See :data:`Constraint.weight`. - score_transfer_function: See :py:data:`Constraint.score_transfer_function`. + score_transfer_function: See :data:`Constraint.score_transfer_function`. - description: Long description of constraint suitable for putting into constraint report. + description: See :data:`Constraint.description`. - short_description: Short description of constraint suitable for logging to stdout. + short_description: See :data:`Constraint.short_description`. pairs: Pairs of :any:`Strand`'s to compare; if not specified, checks all pairs in design. @@ -6424,13 +6577,13 @@ def rna_duplex_strand_pairs_constraint( :param temperature: Temperature in Celsius. :param weight: - How much to weigh this :any:`Constraint`. + See :data:`Constraint.weight`. :param score_transfer_function: - See :py:data:`Constraint.score_transfer_function`. + See :data:`Constraint.score_transfer_function`. :param description: - Long description of constraint suitable for putting into constraint report. + See :data:`Constraint.description` :param short_description: - Short description of constraint suitable for logging to stdout. + See :data:`Constraint.short_description` :param parallel: Whether to test the each pair of :any:`Strand`'s in parallel. :param pairs: @@ -6511,13 +6664,13 @@ def rna_plex_strand_pairs_constraint( :param temperature: Temperature in Celsius. :param weight: - How much to weigh this :any:`Constraint`. + See :data:`Constraint.weight`. :param score_transfer_function: - See :py:data:`Constraint.score_transfer_function`. + See :data:`Constraint.score_transfer_function`. :param description: - Long description of constraint suitable for putting into constraint report. + See :data:`Constraint.description` :param short_description: - Short description of constraint suitable for logging to stdout. + See :data:`Constraint.short_description` :param parallel: Whether to test the each pair of :any:`Strand`'s in parallel. :param pairs: @@ -6609,13 +6762,13 @@ def rna_cofold_strand_pairs_constraint( :param temperature: Temperature in Celsius. :param weight: - How much to weigh this :any:`Constraint`. + See :data:`Constraint.weight`. :param score_transfer_function: - See :py:data:`Constraint.score_transfer_function`. + See :data:`Constraint.score_transfer_function`. :param description: - Long description of constraint suitable for putting into constraint report. + See :data:`Constraint.description` :param short_description: - Short description of constraint suitable for logging to stdout. + See :data:`Constraint.short_description` :param parallel: Whether to test the each pair of :any:`Strand`'s in parallel. :param pairs: @@ -7437,7 +7590,7 @@ class StrandDomainAddress: """ domain_idx: int - """order in which domain appears in :py:data:`StrandDomainAddress.strand` + """order in which domain appears in :data:`StrandDomainAddress.strand` """ def neighbor_5p(self) -> StrandDomainAddress | None: @@ -8408,7 +8561,7 @@ def nupack_complex_base_pair_probability_constraint( :param base_pair_prob_by_type: Probability lower bounds for each :py:class:`BasePairType`. All :py:class:`BasePairType` comes with a default - such as :py:data:`default_interior_to_strand_probability` which will be + such as :data:`default_interior_to_strand_probability` which will be used if a lower bound is not specified for a particular type. **Note**: Despite the name of this parameter, set thresholds for unpaired @@ -8450,7 +8603,7 @@ def nupack_complex_base_pair_probability_constraint( :type base_unpaired_prob_upper_bound: Optional[Dict[BaseAddress, float]] :param temperature: - Temperature specified in °C, defaults to :py:data:`vienna_nupack.default_temperature`. + Temperature specified in °C, defaults to :data:`vienna_nupack.default_temperature`. :type temperature: float, optional :param sodium: molarity of sodium (more generally, monovalent ions such as Na+, K+, NH4+) @@ -8458,7 +8611,7 @@ def nupack_complex_base_pair_probability_constraint( :param magnesium: molarity of magnesium (Mg++) in moles per liter :param weight: - See :py:data:`Constraint.weight`, defaults to 1.0 + See :data:`Constraint.weight`, defaults to 1.0 :type weight: float, optional :param score_transfer_function: @@ -8467,11 +8620,11 @@ def nupack_complex_base_pair_probability_constraint( threshold. :type score_transfer_function: Callable[[float], float], optional :param description: - See :py:data:`Constraint.description`, defaults to None + See :data:`Constraint.description`, defaults to None :type description: str | None, optional :param short_description: - See :py:data:`Constraint.short_description` defaults to 'complex_secondary_structure_nupack' + See :data:`Constraint.short_description` defaults to 'complex_secondary_structure_nupack' :type short_description: str, optional :param parallel: From 3f19333b4a661be8e1cbdfd6bbd7c37bd451c0d2 Mon Sep 17 00:00:00 2001 From: David Doty Date: Sun, 1 Oct 2023 13:17:38 -0700 Subject: [PATCH 4/9] Update constraints.py --- nuad/constraints.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/nuad/constraints.py b/nuad/constraints.py index 55f77642..b52f156f 100644 --- a/nuad/constraints.py +++ b/nuad/constraints.py @@ -4190,12 +4190,14 @@ class Constraint(Generic[DesignPart], ABC): description: str """ - Description of the constraint, e.g., 'strand has secondary structure exceeding -2.0 kcal/mol'. + Description of the constraint, e.g., 'strand has secondary structure exceeding -2.0 kcal/mol' suitable + for printing in a long text report. """ short_description: str = '' """ - Very short description of the constraint suitable for compactly logging to the screen, e.g., 'strand_ss' + Very short description of the constraint suitable for compactly logging to the screen, where + many of these short descriptions must fit onto one line, e.g., 'strand ss' or 'dom pair nupack' """ weight: float = 1.0 @@ -5538,7 +5540,7 @@ def rna_plex_domain_pairs_nonorthogonal_constraint( :param score_transfer_function: See :data:`Constraint.score_transfer_function`. :param description: - short_description + See :data:`Constraint.description`. :param short_description: See :data:`Constraint.short_description`. :param parameters_filename: From 613676679cbfbc9919907bc4b808be63198ae58c Mon Sep 17 00:00:00 2001 From: David Doty Date: Sun, 1 Oct 2023 14:08:38 -0700 Subject: [PATCH 5/9] Update constraints.py --- nuad/constraints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nuad/constraints.py b/nuad/constraints.py index b52f156f..9b392a8b 100644 --- a/nuad/constraints.py +++ b/nuad/constraints.py @@ -5532,7 +5532,7 @@ def rna_plex_domain_pairs_nonorthogonal_constraint( - ``(a,d)`` is between -7.0 and -6.0 kcal/mol. For all other pairs of domains not listed as keys in the dict (including starred/unstarred versions - not specified such as (a*,b*), the constraint is not checked. + not specified such as ``(a*,b*)``, the constraint is not checked. :param temperature: temperature in Celsius :param weight: From 69972f83a55348ca4ff3af8434b479dc6a8ceb7b Mon Sep 17 00:00:00 2001 From: David Doty Date: Sun, 1 Oct 2023 20:50:29 -0700 Subject: [PATCH 6/9] closes #240: constraint to specify NUPACK/ViennaRNA non-complementary domain binding affinities --- examples/many_strands_no_common_domains.py | 2 +- examples/nonorthogonal_domains.py | 125 ++++++++ nuad/constraints.py | 321 +++++++++++++++------ nuad/vienna_nupack.py | 101 ++++++- 4 files changed, 445 insertions(+), 104 deletions(-) create mode 100644 examples/nonorthogonal_domains.py diff --git a/examples/many_strands_no_common_domains.py b/examples/many_strands_no_common_domains.py index 1e1ec189..23b3786a 100644 --- a/examples/many_strands_no_common_domains.py +++ b/examples/many_strands_no_common_domains.py @@ -55,7 +55,7 @@ def main() -> None: num_strands = 3 # num_strands = 5 - # num_strands = 10 + # num_many_strands_no_common_domains.pystrands = 10 # num_strands = 50 # num_strands = 100 # num_strands = 355 diff --git a/examples/nonorthogonal_domains.py b/examples/nonorthogonal_domains.py new file mode 100644 index 00000000..1bdba786 --- /dev/null +++ b/examples/nonorthogonal_domains.py @@ -0,0 +1,125 @@ +from __future__ import annotations + +import itertools +from typing import NamedTuple +import argparse +import os +import logging + +import nuad.constraints as nc # type: ignore +import nuad.vienna_nupack as nv # type: ignore +import nuad.search as ns # type: ignore + + +def main() -> None: + args: CLArgs = parse_command_line_arguments() + + # dc.logger.setLevel(logging.DEBUG) + nc.logger.setLevel(logging.INFO) + + random_seed = 1 + + domain_length = 20 + + # many 1-domain strands with no common domains, every domain length = 20 + + # num_strands = 3 + # num_strands = 5 + num_strands = 10 + # num_strands = 11 + # num_strands = 50 + # num_strands = 100 + # num_strands = 355 + + design = nc.Design() + # di + # strand i is [------------------> + for i in range(num_strands): + design.add_strand([f'd{i}']) + + some_fixed = False + # some_fixed = True + if some_fixed: + # fix domain of strand 0 + for domain in design.strands[0].domains: + domain.set_fixed_sequence('A' * domain_length) + + domain_pool_20 = nc.DomainPool(f'length-20_domains', domain_length) + + for i, strand in enumerate(design.strands): + if some_fixed and i == 0: + continue + for domain in strand.domains: + domain.pool = domain_pool_20 + + min_energy = -domain_length / 4 + max_distance = num_strands + + epsilon = 1.0 + thresholds = {} + for d1, d2 in itertools.combinations_with_replacement(design.domains, 2): + if d1.name > d2.name: + d1, d2 = d2, d1 + name1 = d1.name + name2 = d2.name + idx1 = int(name1[1:]) + idx2 = int(name2[1:]) + target_distance = abs(idx1 - idx2) if idx1 != idx2 else -min_energy + 4 + target_energy = (target_distance / max_distance) * min_energy + energy_low = target_energy - epsilon + energy_high = target_energy + thresholds[(d1, d2)] = (energy_low, energy_high) + + domain_nupack_pair_nonorth_constraint = nc.nupack_domain_pairs_nonorthogonal_constraint( + thresholds=thresholds, temperature=37, short_description='dom pairs nonorth nupack') + + domain_rna_plex_pair_nonorth_constraint = nc.rna_plex_domain_pairs_nonorthogonal_constraint( + thresholds=thresholds, temperature=37, short_description='dom pairs nonorth plex') + + params = ns.SearchParameters(constraints=[ + domain_nupack_pair_nonorth_constraint, + # domain_rna_plex_pair_nonorth_constraint, + ], + out_directory=args.directory, + restart=args.restart, + random_seed=random_seed, + max_iterations=None, + # save_sequences_for_all_updates=True, + save_report_for_all_updates=True, + # save_design_for_all_updates=True, + force_overwrite=True, + # log_time=True, + scrolling_output=False, + ) + ns.search_for_sequences(design, params) + + +# command-line arguments +class CLArgs(NamedTuple): + directory: str + restart: bool + + +def parse_command_line_arguments() -> CLArgs: + default_directory = os.path.join('output', ns.script_name_no_ext()) + + parser = argparse.ArgumentParser( # noqa + description='Small example using nonorthogonal domains.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('-o', '--output-dir', type=str, default=default_directory, + help='directory in which to place output files') + parser.add_argument('-r', '--restart', action='store_true', + help='If true, then assumes output directory contains output of search that was ' + 'cancelled, to restart ' + 'from. Similar to -i option, but will automatically find the most recent design ' + '(assuming they are numbered with a number such as -84), and will start the ' + 'numbering from there (i.e., the next files to be written upon improving the ' + 'design will have -85).') + + args = parser.parse_args() + + return CLArgs(directory=args.output_dir, restart=args.restart) + + +if __name__ == '__main__': + main() diff --git a/nuad/constraints.py b/nuad/constraints.py index 9b392a8b..23653fe1 100644 --- a/nuad/constraints.py +++ b/nuad/constraints.py @@ -2805,12 +2805,22 @@ def set_fixed_sequence(self, seq: str) -> None: @dataclass class DomainPair(Part, Iterable[Domain]): domain1: Domain + "First domain" + domain2: Domain + "Second domain" + + starred1: bool = False + "Whether first domain is starred (not used in most constraints)" + + starred2: bool = False + "Whether second domain is starred (not used in most constraints)" def __post_init__(self) -> None: - # make this symmetric so make dict lookups work + # make this symmetric so dict lookups work no matter the order if self.domain1.name > self.domain2.name: self.domain1, self.domain2 = self.domain2, self.domain1 + self.starred1, self.starred2 = self.starred2, self.starred1 # needed to avoid unhashable type error; see # https://docs.python.org/3/reference/datamodel.html#object.__hash__ @@ -2818,10 +2828,10 @@ def __post_init__(self) -> None: @property def name(self) -> str: - return f'{self.domain1.name}, {self.domain2.name}' + return self.domain1.get_name(self.starred1) + ", " + self.domain2.get_name(self.starred2) def key(self) -> str: - return f'DomainPair[{self.domain1.name}, {self.domain2.name}]' + return f'DomainPair[{self.name}]' @staticmethod def name_of_part_type(self) -> str: @@ -4461,8 +4471,8 @@ def call_evaluate(self, seqs: Tuple[str, ...], part: DesignPart | None) -> Resul @dataclass(eq=False) class BulkConstraint(Constraint[DesignPart], Generic[DesignPart], ABC): - evaluate_bulk: Callable[[Sequence[DesignPart]], - List[Result]] = lambda _: _raise_unreachable() + evaluate_bulk: Callable[[Sequence[DesignPart]], List[Result]] = \ + lambda _: _raise_unreachable() def call_evaluate_bulk(self, parts: Sequence[DesignPart]) -> List[Result]: results: List[Result[DesignPart]] = (self.evaluate_bulk)(parts) # noqa @@ -5486,15 +5496,166 @@ def evaluate_bulk(domain_pairs: Iterable[DomainPair]) -> List[Result]: pairs=pairs_tuple) +def get_domain_pairs_from_thresholds_dict(thresholds): + # gather pairs of domains referenced in `thresholds` + domain_pairs = [] + for key, _ in thresholds.items(): + if len(key) == 2: + d1, d2 = key + starred1 = starred2 = False + else: + if len(key) != 4: + raise ValueError(f'key {key} in thresholds dict must have length 2, if a pair of domains, ' + f'or 4, if a tuple (domain1, starred1, domain2, starred2)') + d1, starred1, d2, starred2 = key + if (d1, d2) in thresholds.keys(): + raise ValueError(f'cannot have key (d1,d2) in `thresholds` if (d1, starred1, d2, starred2) ' + f'is also a key in `thresholds`, but I found these keys:' + f'\n {(d1, d2)}' + f'\n {key}') + domain_pair = DomainPair(d1, d2, starred1, starred2) + domain_pairs.append(domain_pair) + domain_pairs = tuple(domain_pairs) + return domain_pairs + + +PairsEvaluationFunction = Callable[ + [Sequence[Tuple[str, str]], logging.Logger, float, str, float], + Tuple[float] +] + + +def domain_pairs_nonorthogonal_constraint( + evaluation_function: PairsEvaluationFunction, + tool_name: str, + thresholds: Dict[Tuple[Domain, bool, Domain, bool] | Tuple[Domain, Domain], Tuple[float, float]], + temperature: float = nv.default_temperature, + weight: float = 1.0, + score_transfer_function: Callable[[float], float] = lambda x: x, + description: str | None = None, + short_description: str = 'rna_plex_dom_pairs_nonorth', + max_energy: float = 0.0, + parameters_filename: str = nv.default_vienna_rna_parameter_filename +) -> DomainPairsConstraint: + # common code for evaluating nonorthogonal domain energies using RNAduplex, RNAplex, RNAcofold + + if description is None: + description = f'domain pair {tool_name} energies for nonorthogonal domains at {temperature}C' + + domain_pairs = get_domain_pairs_from_thresholds_dict(thresholds) + + # normalize thresholds dict so all keys are 4-tuples + thresholds_normalized = {} + for key, interval in thresholds.items(): + if len(key) == 2: + thresholds_normalized[(key[0], False, key[1], False)] = interval + else: + assert len(key) == 4 + thresholds_normalized[key] = interval + + thresholds = thresholds_normalized + + def evaluate_bulk(dom_pairs: Iterable[DomainPair]) -> List[Result]: + sequence_pairs: List[Tuple[str, str]] = [] + name_pairs: List[Tuple[str, str]] = [] + domain_tuples: List[Tuple[Domain, Domain]] = [] + for pair in dom_pairs: + dom1, dom2 = pair.individual_parts() + star1 = pair.starred1 + star2 = pair.starred2 + + seq1 = dom1.concrete_sequence(star1) + seq2 = dom2.concrete_sequence(star2) + name1 = dom1.get_name(star1) + name2 = dom2.get_name(star2) + sequence_pairs.append((seq1, seq2)) + name_pairs.append((name1, name2)) + domain_tuples.append((dom1, dom2)) + + energies = evaluation_function(sequence_pairs, logger, temperature, parameters_filename, max_energy) + + results = [] + for dom_pair, energy in zip(dom_pairs, energies): + dom1, dom2 = dom_pair.individual_parts() + star1 = dom_pair.starred1 + star2 = dom_pair.starred2 + if (dom1, star1, dom2, star2) in thresholds: + low_threshold, high_threshold = thresholds[(dom1, star1, dom2, star2)] + elif (dom2, star2, dom1, star1) in thresholds: + low_threshold, high_threshold = thresholds[(dom2, star2, dom1, star1)] + else: + raise ValueError(f'could not find threshold for domain pair ({dom1.name}, {dom2.name})') + if energy < low_threshold: + excess = low_threshold - energy + elif energy > high_threshold: + excess = energy - high_threshold + else: + excess = 0 + value = f'{energy:6.2f} kcal/mol' + summary = (f'{value}; target interval: [{low_threshold}, {high_threshold}]') + result = Result(excess=excess, value=value, summary=summary) + results.append(result) + + return results + + return DomainPairsConstraint(description=description, + short_description=short_description, + weight=weight, + score_transfer_function=score_transfer_function, + evaluate_bulk=evaluate_bulk, + domain_pairs=domain_pairs) + + +def nupack_domain_pairs_nonorthogonal_constraint( + thresholds: Dict[Tuple[Domain, bool, Domain, bool] | Tuple[Domain, Domain], Tuple[float, float]], + temperature: float = nv.default_temperature, + sodium: float = nv.default_sodium, + magnesium: float = nv.default_magnesium, + weight: float = 1.0, + score_transfer_function: Callable[[float], float] = default_score_transfer_function, + description: str | None = None, + short_description: str = 'dom_pair_nupack_nonorth', + parameters_filename: str = nv.default_vienna_rna_parameter_filename, + max_energy: float = 0.0, +) -> DomainPairsConstraint: + """ + Similar to :meth:`rna_plex_domain_pairs_nonorthogonal_constraint`, but uses NUPACK instead of RNAplex. + Only two parameters `sodium` and `magnesium` are different; documented here. + + :param sodium: + concentration of sodium (more generally, monovalent ions such as Na+, K+, NH4+) + in moles per liter + :param magnesium: + concentration of magnesium (Mg++) in moles per liter + """ + _check_nupack_installed() + + eval_func = nv.nupack_multiple_with_sodium_magnesium(sodium=sodium, magnesium=magnesium) + + return domain_pairs_nonorthogonal_constraint( + evaluation_function=eval_func, + tool_name='NUPACK', + thresholds=thresholds, + temperature=temperature, + weight=weight, + score_transfer_function=score_transfer_function, + description=description, + short_description=short_description, + parameters_filename=parameters_filename, + max_energy=max_energy, + ) + + def rna_plex_domain_pairs_nonorthogonal_constraint( thresholds: Dict[Tuple[Domain, bool, Domain, bool] | Tuple[Domain, Domain], Tuple[float, float]], temperature: float = nv.default_temperature, weight: float = 1.0, score_transfer_function: Callable[[float], float] = lambda x: x, description: str | None = None, - short_description: str = 'rna_plex_dom_pairs', - parameters_filename: str = nv.default_vienna_rna_parameter_filename) \ - -> DomainPairsConstraint: + short_description: str = 'rna_plex_dom_pairs_nonorth', + parameters_filename: str = nv.default_vienna_rna_parameter_filename, + max_energy: float = 0.0, +) -> DomainPairsConstraint: """ Returns constraint that checks given pairs of :any:`Domain`'s for interaction energy in a given interval, using ViennaRNA's RNAplex executable. @@ -5551,92 +5712,76 @@ def rna_plex_domain_pairs_nonorthogonal_constraint( """ _check_vienna_rna_installed() - if description is None: - description = f'domain pair RNAplex energies for nonorthogonal domains at {temperature}C' - - def evaluate_bulk(domain_pairs: Iterable[DomainPair]) -> List[Result]: - sequence_pairs: List[Tuple[str, str]] = [] - names: List[Tuple[str, str]] = [] - domains: List[Tuple[Domain, Domain]] = [] - for pair in domain_pairs: - d1, d2 = pair.individual_parts() - if d1 == d2: - # don't check d-d* or d*-d in this case, but do check d-d and d*-d* - starred_each = [(False, False), (True, True)] - else: - starred_each = [(False, False), (True, True), (False, True), (True, False)] - for starred1, starred2 in starred_each: - seq1 = d1.concrete_sequence(starred1) - seq2 = d2.concrete_sequence(starred2) - name1 = d1.get_name(starred1) - name2 = d2.get_name(starred2) - sequence_pairs.append((seq1, seq2)) - names.append((name1, name2)) - domains.append((d1, d2)) - - energies = nv.rna_plex_multiple(sequence_pairs, logger, temperature, parameters_filename) + return domain_pairs_nonorthogonal_constraint( + evaluation_function=nv.rna_plex_multiple, + tool_name='RNAplex', + thresholds=thresholds, + temperature=temperature, + weight=weight, + score_transfer_function=score_transfer_function, + description=description, + short_description=short_description, + parameters_filename=parameters_filename, + max_energy=max_energy, + ) - # several consecutive items are from same domain pair but with different wc's; - # group them together in the summary - groups = defaultdict(list) - for (d1, d2), energy, name_pair in zip(domain_tuples, energies, name_pairs): - domain_pair = DomainPair(d1, d2) - groups[domain_pair.name].append((energy, name_pair)) - # one Result per domain pair - results = [] - for _, energies_and_name_pairs in groups.items(): - energies, name_pairs = zip(*energies_and_name_pairs) - excesses: List[float] = [] - for energy, (name1, name2) in energies_and_name_pairs: - if name1 is not None and name2 is not None: - logger.debug( - f'domain pair threshold: {threshold:6.2f} ' - f'rna_plex({name1}, {name2}, {temperature}) = {energy:6.2f} ') - excess = max(0.0, (threshold - energy)) - excesses.append(excess) - max_excess = max(excesses) +def rna_duplex_domain_pairs_nonorthogonal_constraint( + thresholds: Dict[Tuple[Domain, bool, Domain, bool] | Tuple[Domain, Domain], Tuple[float, float]], + temperature: float = nv.default_temperature, + weight: float = 1.0, + score_transfer_function: Callable[[float], float] = lambda x: x, + description: str | None = None, + short_description: str = 'rna_plex_dom_pairs_nonorth', + parameters_filename: str = nv.default_vienna_rna_parameter_filename, + max_energy: float = 0.0, +) -> DomainPairsConstraint: + """ + Similar to :meth:`rna_plex_domain_pairs_nonorthogonal_constraint`, but uses RNAduplex instead of RNAplex. + """ + _check_vienna_rna_installed() - max_name_length = max(len(name) for name in flatten(name_pairs)) - lines_and_energies = [(f'{name1:{max_name_length}}, ' - f'{name2:{max_name_length}}: ' - f' {energy:6.2f} kcal/mol', energy) - for energy, (name1, name2) in energies_and_name_pairs] - lines_and_energies.sort(key=lambda line_and_energy: line_and_energy[1]) - lines = [line for line, _ in lines_and_energies] - summary = '\n ' + '\n '.join(lines) - max_excess = max(0.0, max_excess) - result = Result(excess=max_excess, summary=summary, value=max_excess) - results.append(result) + return domain_pairs_nonorthogonal_constraint( + evaluation_function=nv.rna_duplex_multiple, + tool_name='RNAduplex', + thresholds=thresholds, + temperature=temperature, + weight=weight, + score_transfer_function=score_transfer_function, + description=description, + short_description=short_description, + parameters_filename=parameters_filename, + max_energy=max_energy, + ) - return results - # gather pairs of domains referenced in `thresholds` - pairs = [] - pairs_set = set() - for key, _ in thresholds.items(): - if len(key) == 2: - d1, d2 = key - else: - if len(key) != 4: - raise ValueError(f'key {key} in thresholds dict must have length 2, if a pair of domains, ' - f'or 4, if a tuple (domain1, starred1, domain2, starred2)') - d1, _, d2, _ = key - if (d1, d2) in thresholds.keys(): - raise ValueError(f'cannot have key (d1,d2) in `thresholds` if (d1, starred1, d2, starred2) ' - f'is also a key in `thresholds`, but I found these keys:' - f'\n {(d1, d2)}' - f'\n {key}') - if (d1, d2) not in pairs_set: - pairs.append((d1, d2)) - pairs_set.add((d1, d2)) +def rna_cofold_domain_pairs_nonorthogonal_constraint( + thresholds: Dict[Tuple[Domain, bool, Domain, bool] | Tuple[Domain, Domain], Tuple[float, float]], + temperature: float = nv.default_temperature, + weight: float = 1.0, + score_transfer_function: Callable[[float], float] = lambda x: x, + description: str | None = None, + short_description: str = 'rna_plex_dom_pairs_nonorth', + parameters_filename: str = nv.default_vienna_rna_parameter_filename, + max_energy: float = 0.0, +) -> DomainPairsConstraint: + """ + Similar to :meth:`rna_plex_domain_pairs_nonorthogonal_constraint`, but uses RNAcofold instead of RNAplex. + """ + _check_vienna_rna_installed() - return DomainPairsConstraint(description=description, - short_description=short_description, - weight=weight, - score_transfer_function=score_transfer_function, - evaluate_bulk=evaluate_bulk, - pairs=pairs) + return domain_pairs_nonorthogonal_constraint( + evaluation_function=nv.rna_cofold_multiple, + tool_name='RNAcofold', + thresholds=thresholds, + temperature=temperature, + weight=weight, + score_transfer_function=score_transfer_function, + description=description, + short_description=short_description, + parameters_filename=parameters_filename, + max_energy=max_energy, + ) def _populate_strand_list_and_pairs(strands: Iterable[Strand] | None, diff --git a/nuad/vienna_nupack.py b/nuad/vienna_nupack.py index 4029f054..d1ea22ac 100644 --- a/nuad/vienna_nupack.py +++ b/nuad/vienna_nupack.py @@ -19,7 +19,7 @@ import subprocess as sub import sys from multiprocessing.pool import ThreadPool -from typing import Sequence, Tuple, List, Iterable +from typing import Sequence, Tuple, List, Iterable, Callable import numpy as np @@ -65,12 +65,13 @@ def calculate_strand_association_penalty(temperature: float, num_seqs: int) -> f return adjust * (num_seqs - 1) -def pfunc(seqs: str | Tuple[str, ...], - temperature: float = default_temperature, - sodium: float = default_sodium, - magnesium: float = default_magnesium, - strand_association_penalty: bool = True, - ) -> float: +def pfunc( + seqs: str | Tuple[str, ...], + temperature: float = default_temperature, + sodium: float = default_sodium, + magnesium: float = default_magnesium, + strand_association_penalty: bool = True, +) -> float: """ Calls pfunc from NUPACK 4 (http://www.nupack.org/) on a complex consisting of the unique strands in seqs, returns energy ("delta G"), i.e., generally a negative number. @@ -480,6 +481,62 @@ def rna_plex_multiple(pairs: Sequence[Tuple[str, str]], return tuple(energies) +def nupack_multiple_with_sodium_magnesium( + sodium: float = default_sodium, + magnesium: float = default_magnesium, +) -> nc.PairsEvaluationFunction: + """ + Used when we want a :any:`BulkConstraint` using NUPACK + (even though most of them are :any:`SingularConstraint`s). + + Calls NUPACK (specifically, the function :meth:`binding`) on a list of pairs: + [ (seq1, seq2), (seq2, seq3), (seq4, seq5), ... ] + where seqi is a string over {A,C,T,G}. Temperature is in Celsius. + Returns a list (in the same order as seqpairs) of free energies. + + :param sodium: + molarity of sodium in moles per liter + :param magnesium: + molarity of magnesium in moles per liter + :return: + list of free energies, in the same order as `pairs` + """ + + def nupack_multiple( + pairs: Sequence[Tuple[str, str]], + logger: logging.Logger = logging.root, + temperature: float = default_temperature, + parameters_filename: str = default_vienna_rna_parameter_filename, + max_energy: float = 0.0, + ) -> Tuple[float]: + """ + :param pairs: + sequence (list or tuple) of pairs of DNA sequences + :param logger: + logger to use for printing error messages + :param temperature: + temperature in Celsius + :param parameters_filename: + name of parameters file for NUPACK + :param max_energy: + This is the maximum energy possible to assign. If NUPACK reports any energies larger than this, + they will be changed to `max_energy`. This is useful in case two sequences have no possible + base pairs between them (e.g., CCCC and TTTT), in which case RNAplex assigns a free energy + of 100000 (perhaps its approximation of infinity). But for meaningful comparison and particularly + for graphing energies, it's nice if there's not some value several orders of magnitude larger + than all the rest. + """ + energies = [] + for seq1, seq2 in pairs: + energy = binding(seq1, seq2, temperature=temperature, sodium=sodium, magnesium=magnesium) + energy = min(energy, max_energy) + energies.append(energy) + + return tuple(energies) + + return nupack_multiple + + def rna_plex_multiple_parallel( thread_pool: ThreadPool, pairs: Sequence[Tuple[str, str]], @@ -535,11 +592,13 @@ def _fix_filename_windows(parameters_filename: str) -> str: return parameters_filename -def rna_cofold_multiple(seq_pairs: Sequence[Tuple[str, str]], - logger: logging.Logger = logging.root, - temperature: float = default_temperature, - parameters_filename: str = default_vienna_rna_parameter_filename, - ) -> List[float]: +def rna_cofold_multiple( + seq_pairs: Sequence[Tuple[str, str]], + logger: logging.Logger = logging.root, + temperature: float = default_temperature, + parameters_filename: str = default_vienna_rna_parameter_filename, + max_energy: float = 0.0, +) -> Tuple[float]: """ Calls RNAcofold (from ViennaRNA package: https://www.tbi.univie.ac.at/RNA/) on a list of pairs, specifically: @@ -555,8 +614,15 @@ def rna_cofold_multiple(seq_pairs: Sequence[Tuple[str, str]], temperature in Celsius :param parameters_filename: name of NUPACK parameters file + :param max_energy: + This is the maximum energy possible to assign. If RNAcofold reports any energies larger than this, + they will be changed to `max_energy`. This is useful in case two sequences have no possible + base pairs between them (e.g., CCCC and TTTT), in which case RNAcofold assigns a free energy + of 100000 (perhaps its approximation of infinity). But for meaningful comparison and particularly + for graphing energies, it's nice if there's not some value several orders of magnitude larger + than all the rest. :return: - list of free energies, in the same order as `seq_pairs` + tuple of free energies, in the same order as `seq_pairs` """ # NB: the string NA_parameter_set needs to be exactly the intended filename; @@ -590,11 +656,16 @@ def rna_cofold_multiple(seq_pairs: Sequence[Tuple[str, str]], lines = output.split('\n') dg_list: List[float] = [] for line in lines[:-1]: - dg_list.append(-float(line.split(':')[1].split('(')[1].split(')')[0])) + energy = -float(line.split(':')[1].split('(')[1].split(')')[0]) + energy = min(energy, max_energy) + dg_list.append(energy) + if len(lines) - 1 != len(seq_pairs): raise AssertionError(f'lengths do not match: #lines:{len(lines) - 1} #seqpairs:{len(seq_pairs)}') - return dg_list + dg_tuple = tuple(dg_list) + + return dg_tuple _wctable = str.maketrans('ACGTacgt', 'TGCAtgca') From 52718c68327f6935b14ebcfa00aae9f90dd00330 Mon Sep 17 00:00:00 2001 From: David Doty Date: Sun, 1 Oct 2023 20:58:09 -0700 Subject: [PATCH 7/9] fixed docstring --- examples/nonorthogonal_domains.py | 3 ++- nuad/vienna_nupack.py | 42 +++++++++++++++---------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/examples/nonorthogonal_domains.py b/examples/nonorthogonal_domains.py index 1bdba786..861de1ad 100644 --- a/examples/nonorthogonal_domains.py +++ b/examples/nonorthogonal_domains.py @@ -57,7 +57,8 @@ def main() -> None: epsilon = 1.0 thresholds = {} - for d1, d2 in itertools.combinations_with_replacement(design.domains, 2): + # for d1, d2 in itertools.combinations_with_replacement(design.domains, 2): + for d1, d2 in itertools.combinations(design.domains, 2): if d1.name > d2.name: d1, d2 = d2, d1 name1 = d1.name diff --git a/nuad/vienna_nupack.py b/nuad/vienna_nupack.py index d1ea22ac..d21b6b18 100644 --- a/nuad/vienna_nupack.py +++ b/nuad/vienna_nupack.py @@ -6,7 +6,9 @@ :meth:`secondary_structure_single_strand` and :meth:`binding`), :meth:`nupack_complex_base_pair_probabilities` (for calculating base pair probabilities with NUPACK), :meth:`rna_duplex_multiple` (for calculating an approximation to two-strand complex free energy -that is much faster than calling :meth:`pfunc` on the same pair of strands). +that is much faster than calling :meth:`pfunc` on the same pair of strands), +and +:meth:`rna_plex_multiple` (which is even faster than :meth:`rna_duplex_multiple`). """ # noqa from __future__ import annotations @@ -487,12 +489,11 @@ def nupack_multiple_with_sodium_magnesium( ) -> nc.PairsEvaluationFunction: """ Used when we want a :any:`BulkConstraint` using NUPACK - (even though most of them are :any:`SingularConstraint`s). + (even though most of them are :any:`SingularConstraint`'s). Calls NUPACK (specifically, the function :meth:`binding`) on a list of pairs: [ (seq1, seq2), (seq2, seq3), (seq4, seq5), ... ] - where seqi is a string over {A,C,T,G}. Temperature is in Celsius. - Returns a list (in the same order as seqpairs) of free energies. + where seqi is a string over {A,C,T,G}. :param sodium: molarity of sodium in moles per liter @@ -509,23 +510,22 @@ def nupack_multiple( parameters_filename: str = default_vienna_rna_parameter_filename, max_energy: float = 0.0, ) -> Tuple[float]: - """ - :param pairs: - sequence (list or tuple) of pairs of DNA sequences - :param logger: - logger to use for printing error messages - :param temperature: - temperature in Celsius - :param parameters_filename: - name of parameters file for NUPACK - :param max_energy: - This is the maximum energy possible to assign. If NUPACK reports any energies larger than this, - they will be changed to `max_energy`. This is useful in case two sequences have no possible - base pairs between them (e.g., CCCC and TTTT), in which case RNAplex assigns a free energy - of 100000 (perhaps its approximation of infinity). But for meaningful comparison and particularly - for graphing energies, it's nice if there's not some value several orders of magnitude larger - than all the rest. - """ + # :param pairs: + # sequence (list or tuple) of pairs of DNA sequences + # :param logger: + # logger to use for printing error messages + # :param temperature: + # temperature in Celsius + # :param parameters_filename: + # name of parameters file for NUPACK + # :param max_energy: + # This is the maximum energy possible to assign. If NUPACK reports any energies larger than this, + # they will be changed to `max_energy`. This is useful in case two sequences have no possible + # base pairs between them (e.g., CCCC and TTTT), in which case RNAplex assigns a free energy + # of 100000 (perhaps its approximation of infinity). But for meaningful comparison and particularly + # for graphing energies, it's nice if there's not some value several orders of magnitude larger + # than all the rest. + energies = [] for seq1, seq2 in pairs: energy = binding(seq1, seq2, temperature=temperature, sodium=sodium, magnesium=magnesium) From a89ff57b1361b40e160fad50645235ffd2ec9bb1 Mon Sep 17 00:00:00 2001 From: Dave Doty Date: Mon, 2 Oct 2023 13:50:53 -0700 Subject: [PATCH 8/9] modified docstrings --- nuad/constraints.py | 48 ++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/nuad/constraints.py b/nuad/constraints.py index 23653fe1..3b479227 100644 --- a/nuad/constraints.py +++ b/nuad/constraints.py @@ -4553,13 +4553,15 @@ class ConstraintWithDomainPairs(Constraint[DesignPart], Generic[DesignPart]): # """ List of :any:`DomainPair`'s to check; if not specified, all pairs in :any:`Design` are checked. - This is set internally in the constructor based on the optional ``__init__`` parameter `pairs`. + This can be specified manmually, or alternately is set internally in the constructor based on + the optional ``__init__`` parameter `pairs`. """ pairs: InitVar[Iterable[Tuple[Domain, Domain], ...] | None] = None """ Init-only variable (specified in constructor, but is not a field in the class) for specifying - pairs of domains to check; if not specified, all pairs in :any:`Design` are checked. + pairs of domains to check; if not specified, all pairs in :any:`Design` are checked, unless + :data:`ConstraintWithDomainPairs.domain_pairs` is specified. """ check_domain_against_itself: bool = True @@ -4569,8 +4571,11 @@ class ConstraintWithDomainPairs(Constraint[DesignPart], Generic[DesignPart]): # """ def __post_init__(self, pairs: Iterable[Tuple[Strand, Strand]] | None) -> None: - domain_pairs = None if pairs is None else tuple(DomainPair(d1, d2) for d1, d2 in pairs) - object.__setattr__(self, 'domain_pairs', domain_pairs) + if self.domain_pairs is None: + if self.pairs is not None: + raise ValueError(f'can only specify at most one of parameters pairs or domain_pairs') + domain_pairs = None if pairs is None else tuple(DomainPair(d1, d2) for d1, d2 in pairs) + object.__setattr__(self, 'domain_pairs', domain_pairs) @dataclass(eq=False) @@ -4579,13 +4584,15 @@ class ConstraintWithStrandPairs(Constraint[DesignPart], Generic[DesignPart]): # """ List of :any:`StrandPair`'s to check; if not specified, all pairs in :any:`Design` are checked. - This is set internally in the constructor based on the optional ``__init__`` parameter `pairs`. + This can be specified manmually, or alternately is set internally in the constructor based on + the optional ``__init__`` parameter `pairs`. """ pairs: InitVar[Iterable[Tuple[Strand, Strand], ...] | None] = None """ Init-only variable (specified in constructor, but is not a field in the class) for specifying - pairs of strands; if not specified, all pairs in :any:`Design` are checked. + pairs of strands; if not specified, all pairs in :any:`Design` are checked, unless + :data:`ConstraintWithStrandPairs.strand_pairs` is specified. """ check_strand_against_itself: bool = True @@ -4598,8 +4605,11 @@ class ConstraintWithStrandPairs(Constraint[DesignPart], Generic[DesignPart]): # # or it may be simplest just to remove the frozen and eq from annotation and use default id-based hash def __post_init__(self, pairs: Iterable[Tuple[Strand, Strand]] | None) -> None: - strand_pairs = None if pairs is None else tuple(StrandPair(s1, s2) for s1, s2 in pairs) - object.__setattr__(self, 'strand_pairs', strand_pairs) + if self.strand_pairs is None: + if self.pairs is not None: + raise ValueError(f'can only specify at most one of parameters pairs or strand_pairs') + strand_pairs = None if pairs is None else tuple(StrandPair(s1, s2) for s1, s2 in pairs) + object.__setattr__(self, 'strand_pairs', strand_pairs) @dataclass(eq=False) # type: ignore @@ -5579,31 +5589,37 @@ def evaluate_bulk(dom_pairs: Iterable[DomainPair]) -> List[Result]: dom1, dom2 = dom_pair.individual_parts() star1 = dom_pair.starred1 star2 = dom_pair.starred2 + if (dom1, star1, dom2, star2) in thresholds: low_threshold, high_threshold = thresholds[(dom1, star1, dom2, star2)] elif (dom2, star2, dom1, star1) in thresholds: low_threshold, high_threshold = thresholds[(dom2, star2, dom1, star1)] else: - raise ValueError(f'could not find threshold for domain pair ({dom1.name}, {dom2.name})') + raise ValueError(f'could not find threshold for domain pair ' + f'({dom1.get_name(star1)}, {dom2.get_name(star2)})') + if energy < low_threshold: excess = low_threshold - energy elif energy > high_threshold: excess = energy - high_threshold else: excess = 0 + value = f'{energy:6.2f} kcal/mol' - summary = (f'{value}; target interval: [{low_threshold}, {high_threshold}]') + summary = (f'{value}; target: [{low_threshold}, {high_threshold}]') result = Result(excess=excess, value=value, summary=summary) results.append(result) return results - return DomainPairsConstraint(description=description, - short_description=short_description, - weight=weight, - score_transfer_function=score_transfer_function, - evaluate_bulk=evaluate_bulk, - domain_pairs=domain_pairs) + constraint = DomainPairsConstraint(description=description, + short_description=short_description, + weight=weight, + score_transfer_function=score_transfer_function, + evaluate_bulk=evaluate_bulk, + domain_pairs=domain_pairs) + + return constraint def nupack_domain_pairs_nonorthogonal_constraint( From c36b4ee15b00ee928b2739493e09059a041ba153 Mon Sep 17 00:00:00 2001 From: Dave Doty Date: Mon, 2 Oct 2023 13:53:44 -0700 Subject: [PATCH 9/9] Update many_strands_no_common_domains.py --- examples/many_strands_no_common_domains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/many_strands_no_common_domains.py b/examples/many_strands_no_common_domains.py index 23b3786a..1e1ec189 100644 --- a/examples/many_strands_no_common_domains.py +++ b/examples/many_strands_no_common_domains.py @@ -55,7 +55,7 @@ def main() -> None: num_strands = 3 # num_strands = 5 - # num_many_strands_no_common_domains.pystrands = 10 + # num_strands = 10 # num_strands = 50 # num_strands = 100 # num_strands = 355