diff --git a/.github/workflows/docs-check.yml b/.github/workflows/docs-check.yml index 1fffaa1f..2d33ffdb 100644 --- a/.github/workflows/docs-check.yml +++ b/.github/workflows/docs-check.yml @@ -1,4 +1,3 @@ - name: "Docs Check" on: pull_request @@ -8,25 +7,25 @@ jobs: container: image: unhumbleben/nupack:latest steps: - - uses: actions/checkout@v2.3.4 - - name: Set up Python - uses: actions/setup-python@v2 - with: + - uses: actions/checkout@v2.3.4 + - name: Set up Python + uses: actions/setup-python@v3 + with: python-version: '3.9' - - name: Upgrade pip - run: python -m pip install --upgrade pip - - name: Install NUPACK - run: | - python -m pip install -U nupack -f /nupack/package - # got from here: https://github.com/marketplace/actions/python-dependency-installation - # should install dependencies in requirements.txt - - name: Install Python dependencies - uses: py-actions/py-dependency-install@v2 - - name: Install Sphinx - run: | - python -m pip install sphinx sphinx_rtd_theme - - name: Move to docs folder and build - run: | - cd doc - pwd - sphinx-build -T -E -W -b html . _build + - name: Upgrade pip + run: python -m pip install --upgrade pip + - name: Install NUPACK + run: | + python -m pip install -U nupack -f /nupack/package + # got from here: https://github.com/marketplace/actions/python-dependency-installation + # should install dependencies in requirements.txt + - name: Install Python dependencies + uses: py-actions/py-dependency-install@v2 + - name: Install Sphinx + run: | + python -m pip install sphinx sphinx_rtd_theme + - name: Move to docs folder and build + run: | + cd doc + pwd + sphinx-build -T -E -W -b html . _build diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9230b6a2..ffeb3796 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -24,9 +24,9 @@ jobs: LICENSE.txt # Publish to PyPI - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: '3.9' - name: Upgrade pip diff --git a/.github/workflows/run_unit_tests.yml b/.github/workflows/run_unit_tests.yml index c78912ea..4a267824 100644 --- a/.github/workflows/run_unit_tests.yml +++ b/.github/workflows/run_unit_tests.yml @@ -13,22 +13,22 @@ jobs: image: unhumbleben/nupack:latest strategy: matrix: - python-version: [3.7, 3.8, 3.9] + python-version: [ 3.7, 3.8, 3.9 ] steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Upgrade pip - run: python -m pip install --upgrade pip - - name: Install NUPACK - run: | - python -m pip install -U nupack -f /nupack/package - - name: Install dependencies - run: | - if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi - - name: Test with unittest - run: | - python -m unittest -v tests/test.py + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Upgrade pip + run: python -m pip install --upgrade pip + - name: Install NUPACK + run: | + python -m pip install -U nupack -f /nupack/package + - name: Install dependencies + run: | + if [ -f requirements.txt ]; then python -m pip install -r requirements.txt; fi + - name: Test with unittest + run: | + python -m unittest -v tests/test.py diff --git a/examples/many_strands_no_common_domains.py b/examples/many_strands_no_common_domains.py index c9f1e411..1e1ec189 100644 --- a/examples/many_strands_no_common_domains.py +++ b/examples/many_strands_no_common_domains.py @@ -12,7 +12,8 @@ def f(x: int | float) -> float: - return x/2 + return x / 2 + # command-line arguments class CLArgs(NamedTuple): @@ -52,8 +53,8 @@ def main() -> None: # many 4-domain strands with no common domains, 4 domains each, every domain length = 10 # just for testing parallel processing - # num_strands = 3 - num_strands = 5 + num_strands = 3 + # num_strands = 5 # num_strands = 10 # num_strands = 50 # num_strands = 100 @@ -141,7 +142,10 @@ def main() -> None: parallel=parallel) strand_pairs_rna_duplex_constraint = nc.rna_duplex_strand_pairs_constraint( - threshold=-1.0, temperature=52, short_description='StrandPairRNA', parallel=parallel) + threshold=-1.0, temperature=52, short_description='RNAduplex', parallel=parallel) + + strand_pairs_rna_plex_constraint = nc.rna_plex_strand_pairs_constraint( + threshold=-1.0, temperature=52, short_description='RNAplex', parallel=parallel) strand_individual_ss_constraint = nc.nupack_strand_free_energy_constraint( threshold=-1.0, temperature=52, short_description='StrandSS', parallel=parallel) @@ -151,8 +155,9 @@ def main() -> None: params = ns.SearchParameters(constraints=[ # domain_nupack_ss_constraint, - strand_individual_ss_constraint, + # strand_individual_ss_constraint, strand_pairs_rna_duplex_constraint, + strand_pairs_rna_plex_constraint, # strand_pair_nupack_constraint, # domain_pair_nupack_constraint, # domain_pairs_rna_duplex_constraint, diff --git a/nuad/__version__.py b/nuad/__version__.py index 55a4d029..4685c67f 100644 --- a/nuad/__version__.py +++ b/nuad/__version__.py @@ -1 +1 @@ -version = '0.4.3' # version line; WARNING: do not remove or change this line or comment +version = '0.4.4' # version line; WARNING: do not remove or change this line or comment diff --git a/nuad/constraints.py b/nuad/constraints.py index b673a8ba..bd14c1c3 100644 --- a/nuad/constraints.py +++ b/nuad/constraints.py @@ -4,7 +4,7 @@ The key classes are :any:`Design`, :any:`Strand`, :any:`Domain` to define a DNA design, and various subclasses of :any:`Constraint`, such as :any:`StrandConstraint` or :any:`StrandPairConstraint`, to define constraints on the sequences assigned to each :any:`Domain` when calling -:meth:`search.search_for_dna_sequences`. +:meth:`search.search_for_dna_sequences`. Also important are two other types of constraints (not subclasses of :any:`Constraint`), which are used prior to the search to determine if it is even @@ -93,14 +93,14 @@ num_sequences_to_generate_key = 'num_sequences_to_generate' rng_state_key = 'rng_state' -idt_key = 'idt' -idt_scale_key = 'scale' -idt_purification_key = 'purification' -idt_plate_key = 'plate' -idt_well_key = 'well' +vendor_fields_key = 'vendor_fields' +vendor_scale_key = 'scale' +vendor_purification_key = 'purification' +vendor_plate_key = 'plate' +vendor_well_key = 'well' -default_idt_scale = "25nm" -default_idt_purification = "STD" +default_vendor_scale = "25nm" +default_vendor_purification = "STD" T = TypeVar('T') KeyFunction = Callable[[T], Any] @@ -2164,16 +2164,16 @@ def evaluate(seqs: Tuple[str, ...], @dataclass -class IDTFields(JSONSerializable): - """Data required when ordering DNA strands from the synthesis company +class VendorFields(JSONSerializable): + """Data required when ordering DNA strands from a synthesis company such as `IDT (Integrated DNA Technologies) `_. This data is used when automatically generating files used to order DNA from IDT. - When exporting to IDT files via :py:meth:`Design.write_idt_plate_excel_file` - or :py:meth:`Design.write_idt_bulk_input_file`, the field :py:data:`Strand.name` is used for the + When exporting to IDT files via :meth:`Design.write_idt_plate_excel_file` + or :meth:`Design.write_idt_bulk_input_file`, the field :py:data:`Strand.name` is used for the name if it exists, otherwise a reasonable default is chosen.""" - scale: str = default_idt_scale + scale: str = default_vendor_scale """Synthesis scale at which to synthesize the strand (third field in IDT bulk input: https://www.idtdna.com/site/order/oligoentry). Choices supplied by IDT at the time this was written: @@ -2181,7 +2181,7 @@ class IDTFields(JSONSerializable): ``"10um"``, ``"4nmU"``, ``"20nmU"``, ``"PU"``, ``"25nmS"``. """ - purification: str = default_idt_purification + purification: str = default_vendor_purification """Purification options (fourth field in IDT bulk input: https://www.idtdna.com/site/order/oligoentry). Choices supplied by IDT at the time this was written: @@ -2191,24 +2191,25 @@ class IDTFields(JSONSerializable): plate: str | None = None """Name of plate in case this strand will be ordered on a 96-well or 384-well plate. - Optional field, but non-optional if :data:`IDTFields.well` is not ``None``. + Optional field, but non-optional if :data:`book = openpyxl.load_workbook(filename=filename).well` + is not ``None``. """ well: str | None = None """Well position on plate in case this strand will be ordered on a 96-well or 384-well plate. - Optional field, but non-optional if :data:`IDTFields.plate` is not ``None``. + Optional field, but non-optional if :data:`VendorFields.plate` is not ``None``. """ def __post_init__(self) -> None: - _check_idt_string_not_none_or_empty(self.scale, 'scale') - _check_idt_string_not_none_or_empty(self.purification, 'purification') + _check_vendor_string_not_none_or_empty(self.scale, 'scale') + _check_vendor_string_not_none_or_empty(self.purification, 'purification') if self.plate is None and self.well is not None: - raise ValueError(f'IDTFields.plate cannot be None if IDTFields.well is not None\n' - f'IDTFields.well = {self.well}') + raise ValueError(f'VendorFields.plate cannot be None if VendorFields.well is not None\n' + f'VendorFields.well = {self.well}') if self.plate is not None and self.well is None: - raise ValueError(f'IDTFields.well cannot be None if IDTFields.plate is not None\n' - f'IDTFields.plate = {self.plate}') + raise ValueError(f'VendorFields.well cannot be None if VendorFields.plate is not None\n' + f'VendorFields.plate = {self.plate}') def to_json_serializable(self, suppress_indent: bool = True, **kwargs: Any) -> NoIndent | Dict[str, Any]: @@ -2220,27 +2221,27 @@ def to_json_serializable(self, suppress_indent: bool = True, return NoIndent(dct) if suppress_indent else dct @staticmethod - def from_json_serializable(json_map: Dict[str, Any]) -> IDTFields: - scale = mandatory_field(IDTFields, json_map, idt_scale_key) - purification = mandatory_field(IDTFields, json_map, idt_purification_key) - plate = json_map.get(idt_plate_key) - well = json_map.get(idt_well_key) - return IDTFields(scale=scale, purification=purification, plate=plate, well=well) - - def clone(self) -> IDTFields: - return IDTFields(scale=self.scale, purification=self.purification, - plate=self.plate, well=self.well) - - def to_scadnano_idt(self) -> sc.IDTFields: - return sc.IDTFields(scale=self.scale, purification=self.purification, + def from_json_serializable(json_map: Dict[str, Any]) -> VendorFields: + scale = mandatory_field(VendorFields, json_map, vendor_scale_key) + purification = mandatory_field(VendorFields, json_map, vendor_purification_key) + plate = json_map.get(vendor_plate_key) + well = json_map.get(vendor_well_key) + return VendorFields(scale=scale, purification=purification, plate=plate, well=well) + + def clone(self) -> VendorFields: + return VendorFields(scale=self.scale, purification=self.purification, plate=self.plate, well=self.well) + def to_scadnano_vendor_fields(self) -> sc.VendorFields: + return sc.VendorFields(scale=self.scale, purification=self.purification, + plate=self.plate, well=self.well) -def _check_idt_string_not_none_or_empty(value: str, field_name: str) -> None: + +def _check_vendor_string_not_none_or_empty(value: str, field_name: str) -> None: if value is None: - raise ValueError(f'field {field_name} in IDTFields cannot be None') + raise ValueError(f'field {field_name} in VendorFields cannot be None') if len(value) == 0: - raise ValueError(f'field {field_name} in IDTFields cannot be empty') + raise ValueError(f'field {field_name} in VendorFields cannot be empty') default_strand_group = 'default_strand_group' @@ -2267,11 +2268,11 @@ class Strand(Part, JSONSerializable): _hash_domain_names_concatenated: int """Hash value of _domain_names_concatenated; cached for efficiency.""" - idt: IDTFields | None = None + vendor_fields: VendorFields | None = None """ - Fields used when ordering strands from the synthesis company IDT + Fields used when ordering strands from a synthesis company such as IDT (Integrated DNA Technologies, Coralville, IA). If present (i.e., not equal to :const:`None`) - then the method :py:meth:`Design.write_idt_bulk_input_file` can be called to automatically + then the method :meth:`Design.write_idt_bulk_input_file` can be called to automatically generate an text file for ordering strands in test tubes: https://www.idtdna.com/site/order/oligoentry, as can the method :py:meth:`Design.write_idt_plate_excel_file` for writing a Microsoft Excel @@ -2321,7 +2322,7 @@ def __init__(self, group: str = default_strand_group, name: str | None = None, label: str | None = None, - idt: IDTFields | None = None, + vendor_fields: VendorFields | None = None, ) -> None: """ A :any:`Strand` can be created only by listing explicit :any:`Domain` objects @@ -2338,8 +2339,8 @@ def __init__(self, Name of this :any:`Strand`. :param label: Label to associate with this :any:`Strand`. - :param idt: - :any:`IDTFields` object to associate with this :any:`Strand`; needed to call + :param vendor_fields: + :any:`VendorFields` object to associate with this :any:`Strand`; needed to call methods for exporting to IDT formats (e.g., :meth:`Strand.write_idt_bulk_input_file`) """ self._all_intersecting_domains = None @@ -2357,7 +2358,7 @@ def __init__(self, self.domains = list(domains) # type: ignore self.starred_domain_indices = frozenset(starred_domain_indices) # type: ignore self.label = label - self.idt = idt + self.vendor_fields = vendor_fields # don't know why we have to do this, but we get a missing attribute error otherwise # https://stackoverflow.com/questions/70986725/python-dataclass-attribute-missing-when-using-explicit-init-constructor-and @@ -2396,9 +2397,9 @@ def clone(self, name: str | None) -> Strand: domains = list(self.domains) starred_domain_indices = list(self.starred_domain_indices) name = name if name is not None else self.name - idt = None if self.idt is None else self.idt.clone() + vendor_fields = None if self.vendor_fields is None else self.vendor_fields.clone() return Strand(domains=domains, starred_domain_indices=starred_domain_indices, name=name, - group=self.group, label=self.label, idt=idt) + group=self.group, label=self.label, vendor_fields=vendor_fields) def compute_derived_fields(self): """ @@ -2469,37 +2470,37 @@ def domain_names_tuple(self) -> Tuple[str, ...]: domain_names.append(domain.get_name(is_starred)) return tuple(domain_names) - def idt_dna_sequence(self) -> str: + def vendor_dna_sequence(self) -> str: """ - :return: DNA sequence as it needs to be typed to order from IDT, with - :py:data:`Modification5Prime`'s, - :py:data:`Modification3Prime`'s, + :return: DNA sequence as it needs to be typed to order from a synthesis company, with + :data:`Modification5Prime`'s, + :data:`Modification3Prime`'s, and - :py:data:`ModificationInternal`'s represented with text codes, e.g., "/5Biosg/ACGT" for sequence - ACGT with a 5' biotin modification. + :data:`ModificationInternal`'s represented with text codes, e.g., "/5Biosg/ACGT" for sequence + ACGT with a 5' biotin modification to order from IDT. """ self._ensure_modifications_legal(check_offsets_legal=True) ret_list: List[str] = [] - if self.modification_5p is not None and self.modification_5p.idt_text is not None: - ret_list.append(self.modification_5p.idt_text) + if self.modification_5p is not None and self.modification_5p.vendor_code is not None: + ret_list.append(self.modification_5p.vendor_code) for offset, base in enumerate(self.sequence(delimiter='')): ret_list.append(base) if offset in self.modifications_int: # if internal mod attached to base, replace base mod = self.modifications_int[offset] - if mod.idt_text is not None: + if mod.vendor_code is not None: if mod.allowed_bases is not None: if base not in mod.allowed_bases: msg = f'internal modification {mod} can only replace one of these bases: ' \ f'{",".join(mod.allowed_bases)}, but the base at offset {offset} is {base}' raise ValueError(msg) - ret_list[-1] = mod.idt_text # replace base with modified base + ret_list[-1] = mod.vendor_code # replace base with modified base else: - ret_list.append(mod.idt_text) # append modification between two bases + ret_list.append(mod.vendor_code) # append modification between two bases - if self.modification_3p is not None and self.modification_3p.idt_text is not None: - ret_list.append(self.modification_3p.idt_text) + if self.modification_3p is not None and self.modification_3p.vendor_code is not None: + ret_list.append(self.modification_3p.vendor_code) return ''.join(ret_list) @@ -2534,8 +2535,8 @@ def to_json_serializable(self, suppress_indent: bool = True) -> NoIndent | Dict[ if self.label is not None: dct[label_key] = NoIndent(self.label) if suppress_indent else self.label - if self.idt is not None: - dct[idt_key] = self.idt.to_json_serializable(suppress_indent) + if self.vendor_fields is not None: + dct[vendor_fields_key] = self.vendor_fields.to_json_serializable(suppress_indent) if self.modification_5p is not None: dct[nm.modification_5p_key] = self.modification_5p.id @@ -2569,14 +2570,14 @@ def from_json_serializable(json_map: Dict[str, Any], label: str = json_map.get(label_key) - idt_json = json_map.get(idt_key) + idt_json = json_map.get(vendor_fields_key) idt = None if idt_json is not None: - idt = IDTFields.from_json_serializable(idt_json) + idt = VendorFields.from_json_serializable(idt_json) strand: Strand = Strand( domains=domains, starred_domain_indices=starred_domain_indices, - group=group, name=name, label=label, idt=idt) + group=group, name=name, label=label, vendor_fields=idt) return strand def __repr__(self) -> str: @@ -2955,7 +2956,7 @@ def _export_dummy_scadnano_design_for_idt_export(strands: Iterable[Strand]) -> s helices = [sc.Helix(max_offset=strand.length()) for strand in strands] sc_strands = [] for helix_idx, strand in enumerate(strands): - idt_export = strand.idt.to_scadnano_idt() if strand.idt is not None else None + vendor_fields_export = strand.vendor_fields.to_scadnano_vendor_fields() if strand.vendor_fields is not None else None sc_domains = [] prev_end = 0 for domain in strand.domains: @@ -2963,21 +2964,22 @@ def _export_dummy_scadnano_design_for_idt_export(strands: Iterable[Strand]) -> s start=prev_end, end=prev_end + domain.get_length()) prev_end = sc_domain.end sc_domains.append(sc_domain) - sc_strand = sc.Strand(domains=sc_domains, idt=idt_export, + sc_strand = sc.Strand(domains=sc_domains, vendor_fields=vendor_fields_export, dna_sequence=strand.sequence(), name=strand.name) # handle modifications if strand.modification_5p is not None: mod = strand.modification_5p - sc_mod = sc.Modification5Prime(idt_text=mod.idt_text, id=mod.id, display_text=mod.idt_text) + sc_mod = sc.Modification5Prime(idt_text=mod.vendor_code, id=mod.id, display_text=mod.vendor_code) sc_strand.modification_5p = sc_mod if strand.modification_3p is not None: mod = strand.modification_3p - sc_mod = sc.Modification3Prime(idt_text=mod.idt_text, id=mod.id, display_text=mod.idt_text) + sc_mod = sc.Modification3Prime(idt_text=mod.vendor_code, id=mod.id, display_text=mod.vendor_code) sc_strand.modification_3p = sc_mod if len(strand.modifications_int) > 0: for offset, mod in strand.modifications_int.items(): - sc_mod = sc.ModificationInternal(idt_text=mod.idt_text, id=mod.id, display_text=mod.idt_text, + sc_mod = sc.ModificationInternal(idt_text=mod.vendor_code, id=mod.id, + display_text=mod.vendor_code, allowed_bases=mod.allowed_bases) sc_strand.modifications_int[offset] = sc_mod @@ -3262,7 +3264,7 @@ def add_strand(self, group: str = default_strand_group, name: str | None = None, label: str | None = None, - idt: IDTFields | None = None, + idt: VendorFields | None = None, ) -> Strand: """ This is an alternative way to create strands instead of calling the :any:`Strand` constructor @@ -3295,7 +3297,7 @@ def add_strand(self, :param label: Label to associate with this :any:`Strand`. :param idt: - :any:`IDTFields` object to associate with this :any:`Strand`; needed to call + :any:`VendorFields` object to associate with this :any:`Strand`; needed to call methods for exporting to IDT formats (e.g., :meth:`Strand.write_idt_bulk_input_file`) :return: the :any:`Strand` that is created @@ -3334,7 +3336,7 @@ def add_strand(self, group=group, name=name, label=label, - idt=idt) + vendor_fields=idt) for existing_strand in self.strands: if strand.name == existing_strand.name: @@ -3423,6 +3425,7 @@ def _ensure_mods_unique_names(all_mods: Set[nm.Modification]) -> None: def to_idt_bulk_input_format(self, delimiter: str = ',', + domain_delimiter: str = '', key: KeyFunction[Strand] | None = None, warn_duplicate_name: bool = False, only_strands_with_idt: bool = False, @@ -3438,7 +3441,13 @@ def to_idt_bulk_input_format(self, if strands is None: strands = self.strands sc_design = _export_dummy_scadnano_design_for_idt_export(strands) - return sc_design.to_idt_bulk_input_format(delimiter, key, warn_duplicate_name, only_strands_with_idt) + return sc_design.to_idt_bulk_input_format( + delimiter=delimiter, + domain_delimiter=domain_delimiter, + key=key, + warn_duplicate_name=warn_duplicate_name, + only_strands_with_idt=only_strands_with_idt, + ) def write_idt_bulk_input_file(self, *, filename: str = None, @@ -3446,11 +3455,12 @@ def write_idt_bulk_input_file(self, *, key: KeyFunction[Strand] | None = None, extension: str | None = None, delimiter: str = ',', + domain_delimiter: str = '', warn_duplicate_name: bool = True, only_strands_with_idt: bool = False, strands: Iterable[Strand] | None = None) -> None: """Write ``.idt`` text file encoding the strands of this :any:`Design` with the field - :any:`Strand.idt`, suitable for pasting into the "Bulk Input" field of IDT + :data:`Strand.vendor_fields`, suitable for pasting into the "Bulk Input" field of IDT (Integrated DNA Technologies, Coralville, IA, https://www.idtdna.com/), with the output file having the same name as the running script but with ``.py`` changed to ``.idt``, unless `filename` is explicitly specified. @@ -3474,22 +3484,25 @@ def write_idt_bulk_input_file(self, *, alternate filename extension to use (instead of idt) :param delimiter: is the symbol to delimit the four IDT fields name,sequence,scale,purification. + :param domain_delimiter: + symbol(s) to put in between DNA sequences of different domains in a strand. :param warn_duplicate_name: if ``True`` prints a warning when two different :any:`Strand`'s have the same - :data:`IDTFields.name` and the same :meth:`Strand.sequence`. A ValueError + :data:`VendorFields.name` and the same :meth:`Strand.sequence`. A ValueError is raised (regardless of the value of this parameter) if two different :any:`Strand`'s have the same name but different sequences, IDT scales, or IDT purifications. :param only_strands_with_idt: If False (the default), all non-scaffold sequences are output, with reasonable default values - chosen if the field :data:`Strand.idt` is missing. - If True, then strands lacking the field :data:`Strand.idt` will not be exported. + chosen if the field :data:`Strand.vendor_fields` is missing. + If True, then strands lacking the field :data:`Strand.vendor_fields` will not be exported. :param strands: strands to export; if not specified, all strands in design are exported. NOTE: it is not checked that each :any:`Strand` in `strands` is actually contained in this any:`Design` """ contents = self.to_idt_bulk_input_format(delimiter=delimiter, + domain_delimiter=domain_delimiter, key=key, warn_duplicate_name=warn_duplicate_name, only_strands_with_idt=only_strands_with_idt, @@ -3509,7 +3522,7 @@ def write_idt_plate_excel_file(self, *, strands: Iterable[Strand] | None = None) -> None: """ Write ``.xls`` (Microsoft Excel) file encoding the strands of this :any:`Design` with the field - :py:data:`Strand.idt`, suitable for uploading to IDT + :py:data:`Strand.vendor_fields`, suitable for uploading to IDT (Integrated DNA Technologies, Coralville, IA, https://www.idtdna.com/) to describe a 96-well or 384-well plate (https://www.idtdna.com/site/order/plate/index/dna/), @@ -3534,23 +3547,23 @@ def write_idt_plate_excel_file(self, *, :meth:`strand_order_key_function` :param warn_duplicate_name: if ``True`` prints a warning when two different :any:`Strand`'s have the same - :data:`IDTFields.name` and the same :meth:`Strand.sequence`. A ValueError is + :data:`VendorFields.name` and the same :meth:`Strand.sequence`. A ValueError is raised (regardless of the value of this parameter) if two different :any:`Strand`'s have the same name but different sequences, IDT scales, or IDT purifications. :param only_strands_with_idt: If False (the default), all non-scaffold sequences are output, with reasonable default values - chosen if the field :data:`Strand.idt` is missing. + chosen if the field :data:`Strand.vendor_fields` is missing. (though scaffold is included if `export_scaffold` is True). - If True, then strands lacking the field :data:`Strand.idt` will not be exported. + If True, then strands lacking the field :data:`Strand.vendor_fields` will not be exported. If False, then `use_default_plates` must be True. :param use_default_plates: Use default values for plate and well (ignoring those in idt fields, which may be None). - If False, each Strand to export must have the field :data:`Strand.idt`, so in particular + If False, each Strand to export must have the field :data:`Strand.vendor_fields`, so in particular the parameter `only_strands_with_idt` must be True. :param warn_using_default_plates: specifies whether, if `use_default_plates` is True, to print a warning for strands whose - :data:`Strand.idt` has the fields :py:data:`IDTFields.plate` and :py:data:`IDTFields.well`, + :data:`Strand.vendor_fields` has the fields :py:data:`VendorFields.plate` and :py:data:`VendorFields.well`, since `use_default_plates` directs these fields to be ignored. :param plate_type: a :any:`PlateType` specifying whether to use a 96-well plate or a 384-well plate @@ -3768,7 +3781,7 @@ def assign_fields_to_scadnano_design(self, sc_design: sc.Design, ignored_strands: Iterable[Strand] = (), overwrite: bool = False): """ - Assigns DNA sequence, IDTFields, and StrandGroups (as a key in a scadnano String.label dict + Assigns DNA sequence, VendorFields, and StrandGroups (as a key in a scadnano String.label dict under key "group"). TODO: document more """ @@ -3886,7 +3899,7 @@ def assign_idt_fields_to_scadnano_design(self, sc_design: sc.Design, ignored_strands: Iterable[Strand] = (), overwrite: bool = False) -> None: """ - Assigns :any:`IDTFields` from this :any:`Design` into `sc_design`. + Assigns :any:`VendorFields` from this :any:`Design` into `sc_design`. If multiple strands in `sc_design` share the same name, then all of them are assigned the IDT fields of the dsd :any:`Strand` with that name. @@ -3905,13 +3918,13 @@ def assign_idt_fields_to_scadnano_design(self, sc_design: sc.Design, for nuad_strand, sc_strands in strand_pairs: for sc_strand in sc_strands: - if nuad_strand.idt is not None: - if sc_strand.idt is not None and not overwrite: + if nuad_strand.vendor_fields is not None: + if sc_strand.vendor_fields is not None and not overwrite: raise ValueError(f'Cannot assign IDT fields from dsd strand to scadnano strand ' f'{sc_strand.name} because the scadnano strand already has ' - f'IDT fields assigned:\n{sc_strand.idt}. ' + f'IDT fields assigned:\n{sc_strand.vendor_fields}. ' f'Set overwrite to True to force an overwrite.') - sc_strand.idt = nuad_strand.idt.to_scadnano_idt() + sc_strand.vendor_fields = nuad_strand.vendor_fields.to_scadnano_vendor_fields() def assign_modifications_to_scadnano_design(self, sc_design: sc.Design, ignored_strands: Iterable[Strand] = (), @@ -5380,6 +5393,97 @@ def evaluate_bulk(domain_pairs: Iterable[DomainPair]) -> List[Result]: pairs=pairs_tuple) +def rna_plex_domain_pairs_constraint( + threshold: float, + temperature: float = nv.default_temperature, + weight: float = 1.0, + score_transfer_function: Callable[[float], float] = lambda x: x, + description: str | None = None, + short_description: str = 'rna_plex_dom_pairs', + pairs: Iterable[Tuple[Domain, Domain]] | None = None, + parameters_filename: str = nv.default_vienna_rna_parameter_filename) \ + -> DomainPairsConstraint: + """ + Returns constraint that checks given pairs of :any:`Domain`'s for excessive interaction using + Vienna RNA's RNAduplex executable. + + :param threshold: + energy threshold + :param temperature: + temperature in Celsius + :param weight: + how much to weigh this :any:`Constraint` + :param score_transfer_function: + See :py:data:`Constraint.score_transfer_function`. + :param description: + long description of constraint suitable for printing in report file + :param short_description: + short description of constraint suitable for logging to stdout + :param pairs: + pairs of :any:`Domain`'s to compare; if not specified, checks all pairs + :param parameters_filename: + name of parameters file for ViennaRNA; default is + same as :py:meth:`vienna_nupack.rna_duplex_multiple` + :return: + constraint + """ + _check_vienna_rna_installed() + + if description is None: + description = _pair_default_description('domain', 'RNAplex', threshold, temperature) + + def evaluate_bulk(domain_pairs: Iterable[DomainPair]) -> List[Result]: + sequence_pairs, name_pairs, domain_tuples = _all_pairs_domain_sequences_complements_names_from_domains( + domain_pairs) + energies = nv.rna_plex_multiple(sequence_pairs, logger, temperature, parameters_filename) + + # several consecutive items are from same domain pair but with different wc's; + # group them together in the summary + groups = defaultdict(list) + for (d1, d2), energy, name_pair in zip(domain_tuples, energies, name_pairs): + domain_pair = DomainPair(d1, d2) + groups[domain_pair.name].append((energy, name_pair)) + + # one Result per domain pair + results = [] + for _, energies_and_name_pairs in groups.items(): + energies, name_pairs = zip(*energies_and_name_pairs) + excesses: List[float] = [] + for energy, (name1, name2) in energies_and_name_pairs: + if name1 is not None and name2 is not None: + logger.debug( + f'domain pair threshold: {threshold:6.2f} ' + f'rna_plex({name1}, {name2}, {temperature}) = {energy:6.2f} ') + excess = max(0.0, (threshold - energy)) + excesses.append(excess) + max_excess = max(excesses) + + max_name_length = max(len(name) for name in flatten(name_pairs)) + lines_and_energies = [(f'{name1:{max_name_length}}, ' + f'{name2:{max_name_length}}: ' + f' {energy:6.2f} kcal/mol', energy) + for energy, (name1, name2) in energies_and_name_pairs] + lines_and_energies.sort(key=lambda line_and_energy: line_and_energy[1]) + lines = [line for line, _ in lines_and_energies] + summary = '\n ' + '\n '.join(lines) + max_excess = max(0.0, max_excess) + result = Result(excess=max_excess, summary=summary, value=max_excess) + results.append(result) + + return results + + pairs_tuple = None + if pairs is not None: + pairs_tuple = tuple(pairs) + + return DomainPairsConstraint(description=description, + short_description=short_description, + weight=weight, + score_transfer_function=score_transfer_function, + evaluate_bulk=evaluate_bulk, + pairs=pairs_tuple) + + def _populate_strand_list_and_pairs(strands: Iterable[Strand] | None, pairs: Iterable[Tuple[Strand, Strand]] | None) \ -> Tuple[List[Strand], List[Tuple[Strand, Strand]]]: @@ -5690,6 +5794,93 @@ def rna_duplex_strand_pairs_constraints_by_number_matching_domains( ) +def rna_plex_strand_pairs_constraints_by_number_matching_domains( + *, + thresholds: Dict[int, float], + temperature: float = nv.default_temperature, + weight: float = 1.0, + score_transfer_function: Callable[[float], float] = default_score_transfer_function, + descriptions: Dict[int, str] | None = None, + short_descriptions: Dict[int, str] | None = None, + parallel: bool = False, + strands: Iterable[Strand] | None = None, + pairs: Iterable[Tuple[Strand, Strand]] | None = None, + parameters_filename: str = nv.default_vienna_rna_parameter_filename, + ignore_missing_thresholds: bool = False, +) -> List[StrandPairsConstraint]: + """ + Convenience function for creating many constraints as returned by + :func:`rna_plex_strand_pairs_constraint`, one for each threshold specified in parameter `thresholds`, + based on number of matching (complementary) domains between pairs of strands. + + Optional parameters `description` and `short_description` are also dicts keyed by the same keys. + + Exactly one of `strands` or `pairs` must be specified. If `strands`, then all pairs of strands + (including a strand with itself) will be checked; otherwise only those pairs in `pairs` will be checked. + + It is also common to set different thresholds according to the lengths of the strands. + This can be done by calling :meth:`strand_pairs_by_lengths` to separate first by lengths + in a dict mapping length pairs to strand pairs, + then calling this function once for each (key, value) in that dict, giving the value + (which is a list of pairs of strands) as the `pairs` parameter to this function. + + Args: + thresholds: Energy thresholds in kcal/mol. If `k` domains are complementary between the strands, + then use threshold `thresholds[k]`. + temperature: Temperature in Celsius. + weight: How much to weigh this :any:`Constraint`. + score_transfer_function: See :py:data:`Constraint.score_transfer_function`. + descriptions: Long descriptions of constraint suitable for putting into constraint report. + short_descriptions: Short descriptions of constraint suitable for logging to stdout. + parallel: Whether to test the each pair of :any:`Strand`'s in parallel. + strands: :any:`Strand`'s to compare; if not specified, checks all in design. + Mutually exclusive with `pairs`. + pairs: Pairs of :any:`Strand`'s to compare; if not specified, checks all pairs in `strands`, + including each strand with itself. + Mutually exclusive with `strands`. + parameters_filename: Name of parameters file for ViennaRNA; + default is same as :py:meth:`vienna_nupack.rna_plex_multiple` + ignore_missing_thresholds: + If True, then a key `num` left out of `thresholds` dict will cause no constraint to be + returned for pairs of strands with `num` complementary domains. + If False, then a ValueError is raised. + + Returns: + list of constraints, one per threshold in `thresholds` + """ + rna_plex_with_parameters_filename: _StrandPairsConstraintCreator = \ + functools.partial(rna_plex_strand_pairs_constraint, # type:ignore + parameters_filename=parameters_filename) + + if descriptions is None: + descriptions = { + num_matching: (_pair_default_description('strand', 'RNAplex', threshold, temperature) + + f'\nfor strands with {num_matching} complementary ' + f'{"domain" if num_matching == 1 else "domains"}') + for num_matching, threshold in thresholds.items() + } + + if short_descriptions is None: + short_descriptions = { + num_matching: f'RNAdup{num_matching}comp' + for num_matching, threshold in thresholds.items() + } + + return _strand_pairs_constraints_by_number_matching_domains( + constraint_creator=rna_plex_with_parameters_filename, + thresholds=thresholds, + temperature=temperature, + weight=weight, + score_transfer_function=score_transfer_function, + descriptions=descriptions, + short_descriptions=short_descriptions, + parallel=parallel, + strands=strands, + pairs=pairs, + ignore_missing_thresholds=ignore_missing_thresholds, + ) + + def longest_complementary_subsequences_python_loop(arr1: np.ndarray, arr2: np.ndarray, gc_double: bool) -> List[int]: """ @@ -6175,6 +6366,93 @@ def evaluate_bulk(strand_pairs: Iterable[StrandPair]) -> List[Result]: pairs=pairs_tuple) +def rna_plex_strand_pairs_constraint( + *, + threshold: float, + temperature: float = nv.default_temperature, + weight: float = 1.0, + score_transfer_function: Callable[[float], float] = default_score_transfer_function, + description: str | None = None, + short_description: str = 'rna_plex_strand_pairs', + parallel: bool = False, + pairs: Iterable[Tuple[Strand, Strand]] | None = None, + parameters_filename: str = nv.default_vienna_rna_parameter_filename +) -> StrandPairsConstraint: + """ + Returns constraint that checks given pairs of :any:`Strand`'s for excessive interaction using + Vienna RNA's RNAplex executable. + + Often one wishes to let the threshold depend on how many domains match between a pair of strands. + The function :meth:`rna_plex_strand_pairs_constraints_by_number_matching_domains` is useful + for this purpose, returning a list of :any:`StrandPairsConstraint`'s such as those returned by this + function, one for each possible number of matching domains. + + :param threshold: + Energy threshold in kcal/mol. If a float, this is used for all pairs of strands. + If a dict[int, float], interpreted to mean that + :param temperature: + Temperature in Celsius. + :param weight: + How much to weigh this :any:`Constraint`. + :param score_transfer_function: + See :py:data:`Constraint.score_transfer_function`. + :param description: + Long description of constraint suitable for putting into constraint report. + :param short_description: + Short description of constraint suitable for logging to stdout. + :param parallel: + Whether to test the each pair of :any:`Strand`'s in parallel. + :param pairs: + Pairs of :any:`Strand`'s to compare; if not specified, checks all pairs in design. + :param parameters_filename: + Name of parameters file for ViennaRNA; + default is same as :py:meth:`vienna_nupack.rna_plex_multiple` + :return: + The :any:`StrandPairsConstraint`. + """ + _check_vienna_rna_installed() + + if description is None: + description = _pair_default_description('strand', 'RNAduplex', threshold, temperature) + + num_cores = max(cpu_count(), 1) + + # we use ThreadPool instead of pathos because we're farming this out to processes through + # subprocess module anyway, no need for pathos to boot up separate processes or serialize through dill + thread_pool = ThreadPool(processes=num_cores) + + def calculate_energies(seq_pairs: Sequence[Tuple[str, str]]) -> Tuple[float]: + if parallel: + energies = nv.rna_plex_multiple_parallel(thread_pool, seq_pairs, logger, temperature, + parameters_filename) + else: + energies = nv.rna_plex_multiple(seq_pairs, logger, temperature, parameters_filename) + return energies + + def evaluate_bulk(strand_pairs: Iterable[StrandPair]) -> List[Result]: + sequence_pairs = [(pair.strand1.sequence(), pair.strand2.sequence()) for pair in strand_pairs] + energies = calculate_energies(sequence_pairs) + + results = [] + for pair, energy in zip(strand_pairs, energies): + excess = threshold - energy + value = f'{energy:6.2f} kcal/mol' + result = Result(excess=excess, value=value) + results.append(result) + return results + + pairs_tuple = None + if pairs is not None: + pairs_tuple = tuple(pairs) + + return StrandPairsConstraint(description=description, + short_description=short_description, + weight=weight, + score_transfer_function=score_transfer_function, + evaluate_bulk=evaluate_bulk, + pairs=pairs_tuple) + + def energy_excess(energy: float, threshold: float) -> float: excess = threshold - energy return excess diff --git a/nuad/modifications.py b/nuad/modifications.py index 3c1d1678..3fb54e29 100644 --- a/nuad/modifications.py +++ b/nuad/modifications.py @@ -23,7 +23,7 @@ mod_location_key = 'location' mod_display_text_key = 'display_text' mod_id_key = 'id' -mod_idt_text_key = 'idt_text' +mod_vendor_code_key = 'vendor_code' mod_font_size_key = 'font_size' mod_display_connector_key = 'display_connector' mod_allowed_bases_key = 'allowed_bases' @@ -51,14 +51,14 @@ class Modification(JSONSerializable, ABC): :any:`Modification3Prime`, :any:`Modification5Prime`, or :any:`ModificationInternal` to instantiate. - If :data:`Modification.id` is not specified, then :data:`Modification.idt_text` is used as + If :data:`Modification.id` is not specified, then :data:`Modification.vendor_code` is used as the unique ID. Each :data:`Modification.id` must be unique. For example if you create a 5' "modification" to represent 6 T bases: ``t6_5p = Modification5Prime(display_text='6T', idt_text='TTTTTT')`` (this is a useful hack for putting single-stranded extensions on strands until loopouts on the end of a strand are supported; see https://github.com/UC-Davis-molecular-computing/scadnano-python-package/issues/2), then this would clash with a similar 3' modification without specifying unique IDs for them: - ``t6_3p = Modification3Prime(display_text='6T', idt_text='TTTTTT') # ERROR``. + ``t6_3p = Modification3Prime(display_text='6T', vendor_code='TTTTTT') # ERROR``. In general it is recommended to create a single :any:`Modification` object for each *type* of modification in the design. For example, if many strands have a 5' biotin, then it is recommended to @@ -66,27 +66,27 @@ class Modification(JSONSerializable, ABC): .. code-block:: python - biotin_5p = Modification5Prime(display_text='B', idt_text='/5Biosg/') + biotin_5p = Modification5Prime(display_text='B', vendor_code='/5Biosg/') design.strand(0, 0).move(8).with_modification_5p(biotin_5p) design.strand(1, 0).move(8).with_modification_5p(biotin_5p) """ - idt_text: str - """IDT text string specifying this modification (e.g., '/5Biosg/' for 5' biotin). optional""" + vendor_code: str + """Text string specifying this modification (e.g., '/5Biosg/' for 5' biotin). optional""" id: str = _default_modification_id """ Representation as a string; used to write in :any:`Strand` json representation, while the full description of the modification is written under a global key in the :any:`Design`. - If not specified, but :py:data:`Modification.idt_text` is specified, then it will be set equal to that. + If not specified, but :data:`Modification.idt_text` is specified, then it will be set equal to that. """ def __post_init__(self) -> None: if self.id == _default_modification_id: - object.__setattr__(self, 'id', self.idt_text) + object.__setattr__(self, 'id', self.vendor_code) def to_json_serializable(self, suppress_indent: bool = True, **kwargs: Any) -> Dict[str, Any]: - ret = {mod_idt_text_key: self.idt_text, mod_id_key: self.id} + ret = {mod_vendor_code_key: self.vendor_code, mod_id_key: self.id} return ret @staticmethod @@ -123,15 +123,15 @@ def from_json(json_map: Dict[str, Any]) -> 'Modification5Prime': id_ = json_map[mod_id_key] location = json_map[mod_location_key] assert location == "5'" - idt_text = json_map.get(mod_idt_text_key) - return Modification5Prime(idt_text=idt_text, id=id_) + idt_text = json_map.get(mod_vendor_code_key) + return Modification5Prime(vendor_code=idt_text, id=id_) @staticmethod def modification_type() -> ModificationType: return ModificationType.five_prime def to_scadnano_modification(self) -> sc.Modification5Prime: - return sc.Modification5Prime(display_text=self.idt_text, idt_text=self.idt_text, id=self.id) + return sc.Modification5Prime(display_text=self.vendor_code, idt_text=self.vendor_code, id=self.id) @dataclass(frozen=True, eq=True) @@ -149,15 +149,15 @@ def from_json(json_map: Dict[str, Any]) -> 'Modification3Prime': id_ = json_map[mod_id_key] location = json_map[mod_location_key] assert location == "3'" - idt_text = json_map.get(mod_idt_text_key) - return Modification3Prime(idt_text=idt_text, id=id_) + idt_text = json_map.get(mod_vendor_code_key) + return Modification3Prime(vendor_code=idt_text, id=id_) @staticmethod def modification_type() -> ModificationType: return ModificationType.three_prime def to_scadnano_modification(self) -> sc.Modification3Prime: - return sc.Modification3Prime(display_text=self.idt_text, idt_text=self.idt_text, id=self.id) + return sc.Modification3Prime(display_text=self.vendor_code, idt_text=self.vendor_code, id=self.id) @dataclass(frozen=True, eq=True) @@ -190,15 +190,15 @@ def from_json(json_map: Dict[str, Any]) -> 'ModificationInternal': id_ = json_map[mod_id_key] location = json_map[mod_location_key] assert location == "internal" - idt_text = json_map.get(mod_idt_text_key) + idt_text = json_map.get(mod_vendor_code_key) allowed_bases_list = json_map.get(mod_allowed_bases_key) allowed_bases = frozenset(allowed_bases_list) if allowed_bases_list is not None else None - return ModificationInternal(idt_text=idt_text, id=id_, allowed_bases=allowed_bases) + return ModificationInternal(vendor_code=idt_text, id=id_, allowed_bases=allowed_bases) @staticmethod def modification_type() -> ModificationType: return ModificationType.internal def to_scadnano_modification(self) -> sc.ModificationInternal: - return sc.ModificationInternal(display_text=self.idt_text, idt_text=self.idt_text, id=self.id, + return sc.ModificationInternal(display_text=self.vendor_code, idt_text=self.vendor_code, id=self.id, allowed_bases=self.allowed_bases) diff --git a/nuad/vienna_nupack.py b/nuad/vienna_nupack.py index d3b1d562..4029f054 100644 --- a/nuad/vienna_nupack.py +++ b/nuad/vienna_nupack.py @@ -139,6 +139,7 @@ def tupleize(seqs: str | Iterable[str]) -> Tuple[str, ...]: try: from pathos.pools import ProcessPool + def pfunc_parallel( pool: ProcessPool, all_seqs: Sequence[str | Tuple[str, ...]], @@ -337,12 +338,12 @@ def rna_duplex_multiple(pairs: Sequence[Tuple[str, str]], 'is a different error that I don\'t know how to handle. Exiting...' f'\nerror:\n{error}') - lines = output.split('\n') - if len(lines) - 1 != len(pairs): + lines = [line for line in output.split('\n') if line.strip() != ''] + if len(lines) != len(pairs): raise ValueError(f'lengths do not match: #lines:{len(lines) - 1} #seqpairs:{len(pairs)}') energies = [] - for line in lines[:-1]: + for line in lines: energy = float(line.split(':')[1].split('(')[1].split(')')[0]) energy = min(energy, max_energy) energies.append(energy) @@ -394,6 +395,135 @@ def calculate_energies_sequential(seq_pairs: Sequence[Tuple[str, str]]) -> Tuple return tuple(energies) +def rna_plex_multiple(pairs: Sequence[Tuple[str, str]], + logger: logging.Logger = logging.root, + temperature: float = default_temperature, + parameters_filename: str = default_vienna_rna_parameter_filename, + max_energy: float = 0.0, + ) -> Tuple[float]: + """ + Calls RNAplex (from ViennaRNA package: https://www.tbi.univie.ac.at/RNA/) + on a list of pairs, specifically: + [ (seq1, seq2), (seq2, seq3), (seq4, seq5), ... ] + where seqi is a string over {A,C,T,G}. Temperature is in Celsius. + Returns a list (in the same order as seqpairs) of free energies. + + RNAplex is supposedly faster than RNAduplex, but less accurate since, compared to RNAduplex, + " loop energy is an affine function of the loop size instead of a logarithmic function" + (https://doi.org/10.1093/bioinformatics/btn193). + However, in testing with random sequences, RNAplex can actually be SLOWER, for instance + with 1000 pairs of DNA sequences each of length 64, RNAduplex takes 1.93 s on average, + compared to 2.28 s for RNAplex. (tested using %timeit in Jupyter lab) + This is with default parameters; using "-f 1" speeds up the computing time by about factor 5; + in this case RNAplex takes only 0.41 s on average instead of 2.28 s. + + :param pairs: + sequence (list or tuple) of pairs of DNA sequences + :param logger: + logger to use for printing error messages + :param temperature: + temperature in Celsius + :param parameters_filename: + name of parameters file for NUPACK + :param max_energy: + This is the maximum energy possible to assign. If RNAplex reports any energies larger than this, + they will be changed to `max_energy`. This is useful in case two sequences have no possible + base pairs between them (e.g., CCCC and TTTT), in which case RNAplex assigns a free energy + of 100000 (perhaps its approximation of infinity). But for meaningful comparison and particularly + for graphing energies, it's nice if there's not some value several orders of magnitude larger + than all the rest. + :return: + list of free energies, in the same order as `pairs` + """ + # NB: the string NA_parameter_set needs to be exactly the intended filename; + # e.g. any extra whitespace characters cause RNAplex to default to RNA parameter set + # without warning the user! + + # Note that loading parameter set dna_mathews2004.par throws a warning encoded in that parameter set: + # WARNING: stacking enthalpies not symmetric + + # https://stackoverflow.com/questions/10174211/how-to-make-an-always-relative-to-current-module-file-path + full_parameters_filename = os.path.join(os.path.dirname(__file__), + parameter_set_directory, parameters_filename) + + if os_is_windows: + full_parameters_filename = _fix_filename_windows(full_parameters_filename) + + command_strs: List[str] = ['RNAplex', + '-P', full_parameters_filename, + '-T', str(temperature), + '-f', '1', + ] + + # DNA sequences to type after RNAplex starts up + user_input = '\n'.join(f'{seq1}\n{seq2}' for seq1, seq2 in pairs) + '\n@\n' + + output, error = call_subprocess(command_strs, user_input) + + if error.strip() != '': + logger.warning('error from RNAplex: ', error) + if error.split('\n')[0] != 'WARNING: stacking enthalpies not symmetric': + raise ValueError('I will ignore errors about "stacking enthalpies not symmetric", but this ' + 'is a different error that I don\'t know how to handle. Exiting...' + f'\nerror:\n{error}') + + lines = [line for line in output.split('\n') if line.strip() != ''] + if len(lines) != len(pairs): + raise ValueError(f'lengths do not match: #lines:{len(lines) - 1} #seqpairs:{len(pairs)}') + + energies = [] + for line in lines: + energy = float(line.split(':')[1].split('(')[1].split(')')[0]) + energy = min(energy, max_energy) + energies.append(energy) + + return tuple(energies) + + +def rna_plex_multiple_parallel( + thread_pool: ThreadPool, + pairs: Sequence[Tuple[str, str]], + logger: logging.Logger = logging.root, + temperature: float = default_temperature, + parameters_filename: str = default_vienna_rna_parameter_filename, + max_energy: float = 0.0, +) -> Tuple[float]: + """ + Parallel version of :meth:`rna_plex_multiple`. TODO document this + """ + num_pairs = len(pairs) + if num_pairs == 0: + return tuple() + + bases = len(pairs[0][0] + pairs[0][1]) + num_cores = nc.cpu_count(logical=True) + + # these thresholds were measured empirically; see notebook nuad_parallel_time_trials.ipynb + call_sequential = (len(pairs) == 1 + or (bases <= 10 and num_pairs <= 20000) + or (bases <= 15 and num_pairs <= 10000) + or (bases <= 20 and num_pairs <= 5000) + or (bases <= 30 and num_pairs <= 2000) + or (bases <= 40 and num_pairs <= 1000) + or (bases <= 50 and num_pairs <= 800) + or (bases <= 75 and num_pairs <= 200) + or (bases <= 100 and num_pairs <= 150) + or (num_pairs < num_cores) + ) + + def calculate_energies_sequential(seq_pairs: Sequence[Tuple[str, str]]) -> Tuple[float]: + return rna_plex_multiple(pairs=seq_pairs, logger=logger, temperature=temperature, + parameters_filename=parameters_filename, max_energy=max_energy) + + if call_sequential: + return calculate_energies_sequential(pairs) + + lists_of_sequence_pairs = nc.chunker(pairs, num_chunks=num_cores) + lists_of_energies = thread_pool.map(calculate_energies_sequential, lists_of_sequence_pairs) + energies = nc.flatten(lists_of_energies) + return tuple(energies) + + def _fix_filename_windows(parameters_filename: str) -> str: # FIXME: Here's the story: when developing in Windows, os.path will construct the path with Windows # absolute paths. But we need to pass off the computation to wsl.exe (Windows Subsystem for Linux), diff --git a/papers/Fast and effective prediction of microRNA-target duplexes.pdf b/papers/Fast and effective prediction of microRNA-target duplexes.pdf new file mode 100644 index 00000000..cb2c7f4f Binary files /dev/null and b/papers/Fast and effective prediction of microRNA-target duplexes.pdf differ diff --git a/papers/RNAplex - A fast tool for RNA-RNA interaction search.pdf b/papers/RNAplex - A fast tool for RNA-RNA interaction search.pdf new file mode 100644 index 00000000..d261d43a Binary files /dev/null and b/papers/RNAplex - A fast tool for RNA-RNA interaction search.pdf differ diff --git a/requirements.txt b/requirements.txt index 0b29c3ac..af87cbdc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,6 @@ numpy psutil ordered_set pathos -xlwt -xlrd nupack tabulate pint diff --git a/tests/test.py b/tests/test.py index 822b75e7..fbe791cf 100644 --- a/tests/test.py +++ b/tests/test.py @@ -3,9 +3,8 @@ import os import numpy -import xlrd +import openpyxl -from nuad import constraints import nuad.constraints as nc import nuad.search as ns import scadnano as sc @@ -279,7 +278,7 @@ def test_two_instances_of_domain(self) -> None: class TestExportDNASequences(unittest.TestCase): def test_idt_bulk_export(self) -> None: - custom_idt = nc.IDTFields(scale='100nm', purification='PAGE') + custom_idt = nc.VendorFields(scale='100nm', purification='PAGE') design = nc.Design() design.add_strand(domain_names=['a', 'b*', 'c', 'd*'], name='s0', idt=custom_idt) design.add_strand(domain_names=['d', 'c*', 'e', 'f'], name='s1') @@ -310,21 +309,21 @@ def test_write_idt_plate_excel_file(self) -> None: # add 10 strands in excess of 3 plates for plate_type in [sc.PlateType.wells96, sc.PlateType.wells384]: - filename = f'test_excel_export_{plate_type.num_wells_per_plate()}.xls' + filename = f'test_excel_export_{plate_type.num_wells_per_plate()}.xlsx' design = nc.Design() for strand_idx in range(3 * plate_type.num_wells_per_plate() + 10): - idt = nc.IDTFields() + idt = nc.VendorFields() strand = design.add_strand(name=f's{strand_idx}', domain_names=[f'd{strand_idx}'], idt=idt) strand.domains[0].set_fixed_sequence('T' * strand_len) design.write_idt_plate_excel_file(filename=filename, plate_type=plate_type) - book = xlrd.open_workbook(filename) - self.assertEqual(4, book.nsheets) + book = openpyxl.load_workbook(filename=filename) + self.assertEqual(4, len(book.worksheets)) for plate in range(4): - sheet = book.sheet_by_index(plate) - self.assertEqual(3, sheet.ncols) + sheet = book.worksheets[plate] + self.assertEqual(3, sheet.max_column) if plate == 2: # penultimate plate expected_wells = plate_type.num_wells_per_plate() - plate_type.min_wells_per_plate() + 10 @@ -333,7 +332,7 @@ def test_write_idt_plate_excel_file(self) -> None: else: expected_wells = plate_type.num_wells_per_plate() - self.assertEqual(expected_wells + 1, sheet.nrows) + self.assertEqual(expected_wells + 1, sheet.max_row) os.remove(filename) diff --git a/tests/test_excel_export_96.xls b/tests/test_excel_export_96.xls new file mode 100644 index 00000000..63d95a3e Binary files /dev/null and b/tests/test_excel_export_96.xls differ