From aa8001c96538b5c1828b6c852d9446b966368270 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Thu, 31 Aug 2023 11:15:47 +0200 Subject: [PATCH 01/30] nb2galaxy prototype --- nb2workflow/galaxy.py | 237 ++++++++++++++++++++++++++++++++++++++++++ setup.py | 1 + 2 files changed, 238 insertions(+) create mode 100644 nb2workflow/galaxy.py diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py new file mode 100644 index 00000000..309e4f7d --- /dev/null +++ b/nb2workflow/galaxy.py @@ -0,0 +1,237 @@ +#%% +import xml.etree.ElementTree as ET +import os +import re +from textwrap import dedent +import argparse +import logging + +from cdci_data_analysis.analysis.ontology import Ontology +# TODO: ontology module must be separated from the dispatcher +from nb2workflow.nbadapter import NotebookAdapter + +import nbformat +from nbconvert.exporters import ScriptExporter + +logger = logging.getLogger() + +#%% +onto = Ontology('http://odahub.io/ontology/ontology.ttl') + +#%% + +class GalaxyParameter: + def __init__(self, + name, + python_type, + description=None, + default_value=None, + min_value=None, + max_value=None, + allowed_values=None): + + partype_lookup = {str: 'text', + bool: 'boolean', + float: 'float', + int: 'integer'} + + partype = partype_lookup[python_type] + + if allowed_values is not None: + partype = 'select' + + self.name = name + self.partype = partype + self.description=description + self.default_value = default_value + self.min_value = min_value + self.max_value = max_value + self.allowed_values = allowed_values + + + @classmethod + def from_owl(cls, name, owl_uri, python_type, extra_ttl=None, default_value=None): + if extra_ttl is not None: + onto.parse_extra_triples(extra_ttl) + parameter_hierarchy = onto.get_parameter_hierarchy(owl_uri) + par_format = onto.get_parameter_format(owl_uri) + par_unit = onto.get_parameter_unit(owl_uri) + min_value, max_value = onto.get_limits(owl_uri) + allowed_values = onto.get_allowed_values(owl_uri) + + description = f"type: {owl_uri}" + if par_format is not None: + description += f"; format: {par_format}" + if par_unit is not None: + description += f"; units: {par_unit}" + + return cls(name, + python_type, #TODO: type is fully defined by owl_type. Use it instead? + description=description, + default_value=default_value, + min_value=min_value, + max_value=max_value, + allowed_values=allowed_values) + + def to_xml_tree(self): + attrs = {'name': self.name, + 'type': self.partype} + if self.default_value is not None and self.partype != 'select': + attrs['value'] = str(self.default_value) + if self.description is not None: + attrs['label'] = self.description + + if self.min_value is not None: + attrs['min'] = str(self.min_value) + if self.max_value is not None: + attrs['max'] = str(self.max_value) + + element = ET.Element('param', + **attrs) + + if self.allowed_values is not None: + for val in self.allowed_values: + attrs = {'value': str(val)} + if val == self.default_value: + attrs['selected'] = 'true' + option = ET.SubElement(element, 'option', *attrs) + option.text = str(val) + + # TODO: do we need additional validation? + + return element + +def _nb2script(nba): + input_nb = nba.notebook_fn + mynb = nbformat.read(input_nb, as_version=4) + outputs = nba.extract_output_declarations() + + inject_pos = 0 + for pos, cell in enumerate(mynb.cells): + if 'parameters' in cell['metadata'].get('tags', []): + inject_pos = pos + 1 + break + # NOTE: validation of args is external + inject_read = nbformat.v4.new_code_cell( + dedent(""" + import json + import sys + with open(sys.argv[1], 'r') as fd: + inp_dic = json.load(fd) + for vn, vv in inp_dic.items(): + globals()[vn] = type(globals()[vn])(vv) + """)) + inject_read.metadata['tags'] = ['injected-input'] + mynb.cells.insert(inject_pos, inject_read) + + outp_code = dedent(""" + from json_tricks import dump as e_dump + output_dic = {} + """) + for vn in outputs.keys(): + outp_code += f"output_dic['{vn}']={vn}\n" + outp_code += "e_dump(output_dic, sys.argv[2])\n" + + inject_write = nbformat.v4.new_code_cell(outp_code) + inject_write.metadata['tags'] = ['injected-output'] + mynb.cells.append(inject_write) + + exporter = ScriptExporter() + script, resources = exporter.from_notebook_node(mynb) + + return script + +# TODO: several notebooks +def to_galaxy(input_nb, toolname, requirements_path, out_dir): + nba = NotebookAdapter(input_nb) + inputs = nba.input_parameters + + script_str = _nb2script(nba) + + with open(os.path.join(out_dir, 'script.py'), 'w') as fd: + fd.write(script_str) + + tool_root = ET.Element('tool', + id=toolname.replace(' ', '_'), + name=toolname, + version='0.1.0+galaxy0', #TODO: + profile='23.0') + + reqs = ET.SubElement(tool_root, 'requirements') + + for greq in ['json_tricks']: + req = ET.SubElement(reqs, + 'requirement', + type='package' + ) + req.text = greq + if requirements_path is not None: + with open(requirements_path, 'r') as fd: + for line in fd: + # TODO: this is just an example as galaxy doesn't use pip for resolving + # we still want to find correspondance in conda-forge and also use envitronment.yml + # also package version (does galaxy allow gt/lt?) + m = re.match(r'[^#(git)]\S+', line) + if m is not None: + req = ET.SubElement(reqs, + 'requirement', + type='package' + ) + req.text = m.group(0) + + comm = ET.SubElement(tool_root, 'command', detect_errors='exit_code') + comm.text = "python '$__tool_directory__/script.py' inputs.json '$output'" + # NOTE: CDATA if needed https://gist.github.com/zlalanne/5711847 + + conf = ET.SubElement(tool_root, 'configfiles') + inp = ET.SubElement(conf, 'inputs', name='inputs', filename='inputs.json') + + inps = ET.SubElement(tool_root, 'inputs') + for pn, pv in inputs.items(): + galaxy_par = GalaxyParameter.from_owl(pn, + pv['owl_type'], + pv['python_type'], + extra_ttl=pv['extra_ttl'], + default_value=pv['default_value'] + ) + inps.append(galaxy_par.to_xml_tree()) + + outps = ET.SubElement(tool_root, 'outputs') + outp = ET.SubElement(outps, 'data', name='output', format='json') + + #TODO: tests + + help_block = ET.SubElement(tool_root, 'help') + help_block.text = 'help me!' # TODO: + + citats = ET.SubElement(tool_root, 'citations') + citate = ET.SubElement(citats, 'citation', type='doi') + citate.text = '10.5281/zenodo.6299481' # TODO: + + tree = ET.ElementTree(tool_root) + ET.indent(tree) + + out_xml_path = os.path.join(out_dir, f"{toolname}.xml") + tree.write(out_xml_path) + +# %% +def main(): + parser = argparse.ArgumentParser( + description='Convert python notebook to galaxy tool' + ) + parser.add_argument('notebook', type=str) + parser.add_argument('outdir', type=str) + parser.add_argument('--name', type=str, default='example') + parser.add_argument('--requirements_path', required=False) + args = parser.parse_args() + + input_nb = args.notebook + output_dir = args.outdir + toolname = args.name + requirements_path = args.requirements_path + + to_galaxy(input_nb, toolname, requirements_path, output_dir) + +if __name__ == '__main__': + main() + diff --git a/setup.py b/setup.py index 0d704d71..9573113c 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ 'nbrun=nb2workflow.nbadapter:main', 'nbinspect=nb2workflow.nbadapter:main_inspect', 'nbreduce=nb2workflow.nbadapter:main_reduce', + 'nb2galaxy=nb2workflow.galaxy:main', ] }, From 1e4de7d6e3bb251647a396853b235d2c8db959bd Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Sun, 3 Sep 2023 00:14:09 +0200 Subject: [PATCH 02/30] split outputs --- nb2workflow/galaxy.py | 202 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 166 insertions(+), 36 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 309e4f7d..021c4fe2 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -1,4 +1,4 @@ -#%% + import xml.etree.ElementTree as ET import os import re @@ -15,10 +15,46 @@ logger = logging.getLogger() -#%% -onto = Ontology('http://odahub.io/ontology/ontology.ttl') +# NOTE: to include into the base class when separated from dispatcher +class ModOntology(Ontology): + def get_oda_label(self, param_uri): + if param_uri.startswith("http"): param_uri = f"<{param_uri}>" + + query = "SELECT ?label WHERE {%s oda:label ?label}" % (param_uri) + + qres = self.g.query(query) + + if len(qres) == 0: return None + + label = " ".join([str(x[0]) for x in qres]) + + return label + + def is_data_product(self, owl_uri, include_parameter_products=True): + if owl_uri.startswith("http"): owl_uri = f"<{owl_uri}>" + + filt_param = 'MINUS{?cl rdfs:subClassOf* oda:ParameterProduct. }' if not include_parameter_products else '' + query = dedent(""" + SELECT (count(?cl) as ?count) WHERE { + VALUES ?cl { %s } + ?cl rdfs:subClassOf* oda:DataProduct. + %s + } + """ % (owl_uri, filt_param)) + qres = self.g.query(query) + + if int(list(qres)[0][0]) == 0: return False + + return True + + +# TODO: configurable +#ontology_path = 'http://odahub.io/ontology/ontology.ttl' +ontology_path = '/home/dsavchenko/Projects/MMODA/ontology/ontology.ttl' + + +global_req = [] -#%% class GalaxyParameter: def __init__(self, @@ -30,11 +66,11 @@ def __init__(self, max_value=None, allowed_values=None): + #TODO: type is fully defined by owl_type. Use it instead? partype_lookup = {str: 'text', bool: 'boolean', float: 'float', int: 'integer'} - partype = partype_lookup[python_type] if allowed_values is not None: @@ -50,25 +86,29 @@ def __init__(self, @classmethod - def from_owl(cls, name, owl_uri, python_type, extra_ttl=None, default_value=None): - if extra_ttl is not None: - onto.parse_extra_triples(extra_ttl) - parameter_hierarchy = onto.get_parameter_hierarchy(owl_uri) + def from_inspect(cls, par_details): + onto = ModOntology(ontology_path) + + owl_uri = par_details['owl_type'] + + if par_details.get('extra_ttl') is not None: + onto.parse_extra_triples(par_details['extra_ttl']) par_format = onto.get_parameter_format(owl_uri) par_unit = onto.get_parameter_unit(owl_uri) min_value, max_value = onto.get_limits(owl_uri) allowed_values = onto.get_allowed_values(owl_uri) - - description = f"type: {owl_uri}" + label = onto.get_oda_label(owl_uri) + + description = label if label is not None else par_details['name'] if par_format is not None: - description += f"; format: {par_format}" + description += f" Format: {par_format}" if par_unit is not None: - description += f"; units: {par_unit}" + description += f" Units: {par_unit}" - return cls(name, - python_type, #TODO: type is fully defined by owl_type. Use it instead? + return cls(par_details['name'], + par_details['python_type'], description=description, - default_value=default_value, + default_value=par_details['default_value'], min_value=min_value, max_value=max_value, allowed_values=allowed_values) @@ -94,18 +134,71 @@ def to_xml_tree(self): attrs = {'value': str(val)} if val == self.default_value: attrs['selected'] = 'true' - option = ET.SubElement(element, 'option', *attrs) + option = ET.SubElement(element, 'option', **attrs) option.text = str(val) # TODO: do we need additional validation? return element + +class GalaxyOutput: + def __init__(self, name, is_oda): + self.name = name + self.dataname = f"out_{self.name}" + self.is_oda = is_oda + self.outfile_name = f"{name}_galaxy.output" + @classmethod + def from_inspect(cls, outp_details): + onto = ModOntology(ontology_path) + + owl_uri = outp_details['owl_type'] + if outp_details['extra_ttl'] is not None: + onto.parse_extra_triples(outp_details['extra_ttl']) + + if onto.is_data_product(owl_uri, include_parameter_products=False): + is_oda = True + else: + is_oda = False + + return cls(outp_details['name'], is_oda) + + def to_xml_tree(self): + attrs = {'label': "${tool.name} -> %s"%self.name, + 'name': self.dataname, + #'auto_format': 'true', + 'format': 'auto', + 'from_work_dir': self.outfile_name} + + element = ET.Element('data', **attrs) + + return element + + + def _nb2script(nba): input_nb = nba.notebook_fn mynb = nbformat.read(input_nb, as_version=4) outputs = nba.extract_output_declarations() - + + inject_import = nbformat.v4.new_code_cell( + dedent( """ + import json + import os + import shutil + import sys + + try: + import numpy as np + _numpy_available = True + except: + _numpy_available = False + + _galaxy_wd = os.getcwd() + """)) + inject_import.metadata['tags'] = ['injected_import'] + mynb.cells.insert(0, inject_import) + inject_pos = 0 for pos, cell in enumerate(mynb.cells): if 'parameters' in cell['metadata'].get('tags', []): @@ -114,8 +207,6 @@ def _nb2script(nba): # NOTE: validation of args is external inject_read = nbformat.v4.new_code_cell( dedent(""" - import json - import sys with open(sys.argv[1], 'r') as fd: inp_dic = json.load(fd) for vn, vv in inp_dic.items(): @@ -123,14 +214,55 @@ def _nb2script(nba): """)) inject_read.metadata['tags'] = ['injected-input'] mynb.cells.insert(inject_pos, inject_read) + + outp_code = "_simple_outs, _oda_outs = [], []\n" + outp_code += "_galaxy_meta_data = {}\n" + + for vn, vv in outputs.items(): + outp = GalaxyOutput.from_inspect(vv) + if outp.is_oda: + outp_code += f"_oda_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" + else: + outp_code += f"_simple_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" + + outp_code += dedent(""" + for _outn, _outfn, _outv in _oda_outs: + _galaxy_outfile_name = os.path.join(_galaxy_wd, _outfn) + if isinstance(_outv, str) and os.path.isfile(_outv): + shutil.move(_outv, _galaxy_outfile_name) + _galaxy_meta_data[_outn] = {'ext': '_sniff_'} + elif getattr(_outv, "write_fits_file", None): + _outv.write_fits_file(_galaxy_outfile_name) + _galaxy_meta_data[_outn] = {'ext': 'fits'} + elif getattr(_outv, "write_file", None): + _outv.write_file(_galaxy_outfile_name) + _galaxy_meta_data[_outn] = {'ext': '_sniff_'} + else: + with open(_galaxy_outfile_name, 'w') as fd: + json.dump(_outv.encode(), fd) + _galaxy_meta_data[_outn] = {'ext': 'json'} + """) + + outp_code += dedent(""" + for _outn, _outfn, _outv in _simple_outs: + _galaxy_outfile_name = os.path.join(_galaxy_wd, _outfn) + if isinstance(_outv, str) and os.path.isfile(_outv): + shutil.move(_outv, _galaxy_outfile_name) + _galaxy_meta_data[_outn] = {'ext': '_sniff_'} + elif _numpy_available and isinstance(_outv, np.ndarray): + with open(_galaxy_outfile_name, 'wb') as fd: + np.savez(fd, _outv) + _galaxy_meta_data[_outn] = {'ext': 'npz'} + else: + with open(_galaxy_outfile_name, 'w') as fd: + json.dump(_outv, fd) + _galaxy_meta_data[_outn] = {'ext': 'expression.json'} + """) - outp_code = dedent(""" - from json_tricks import dump as e_dump - output_dic = {} - """) - for vn in outputs.keys(): - outp_code += f"output_dic['{vn}']={vn}\n" - outp_code += "e_dump(output_dic, sys.argv[2])\n" + outp_code += dedent(""" + with open(os.path.join(_galaxy_wd, 'galaxy.json'), 'w') as fd: + json.dump(_galaxy_meta_data, fd) + """) inject_write = nbformat.v4.new_code_cell(outp_code) inject_write.metadata['tags'] = ['injected-output'] @@ -159,7 +291,7 @@ def to_galaxy(input_nb, toolname, requirements_path, out_dir): reqs = ET.SubElement(tool_root, 'requirements') - for greq in ['json_tricks']: + for greq in global_req: req = ET.SubElement(reqs, 'requirement', type='package' @@ -180,7 +312,7 @@ def to_galaxy(input_nb, toolname, requirements_path, out_dir): req.text = m.group(0) comm = ET.SubElement(tool_root, 'command', detect_errors='exit_code') - comm.text = "python '$__tool_directory__/script.py' inputs.json '$output'" + comm.text = "python '$__tool_directory__/script.py' inputs.json" # NOTE: CDATA if needed https://gist.github.com/zlalanne/5711847 conf = ET.SubElement(tool_root, 'configfiles') @@ -188,16 +320,14 @@ def to_galaxy(input_nb, toolname, requirements_path, out_dir): inps = ET.SubElement(tool_root, 'inputs') for pn, pv in inputs.items(): - galaxy_par = GalaxyParameter.from_owl(pn, - pv['owl_type'], - pv['python_type'], - extra_ttl=pv['extra_ttl'], - default_value=pv['default_value'] - ) + galaxy_par = GalaxyParameter.from_inspect(pv) inps.append(galaxy_par.to_xml_tree()) outps = ET.SubElement(tool_root, 'outputs') - outp = ET.SubElement(outps, 'data', name='output', format='json') + outputs = nba.extract_output_declarations() + for outn, outv in outputs.items(): + outp = GalaxyOutput.from_inspect(outv) + outps.append(outp.to_xml_tree()) #TODO: tests From c5a55ade70d2e31260fc3f6610578f16885aa24b Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Mon, 4 Sep 2023 16:22:32 +0200 Subject: [PATCH 03/30] fix regex --- nb2workflow/galaxy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 021c4fe2..857ad6ad 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -303,7 +303,7 @@ def to_galaxy(input_nb, toolname, requirements_path, out_dir): # TODO: this is just an example as galaxy doesn't use pip for resolving # we still want to find correspondance in conda-forge and also use envitronment.yml # also package version (does galaxy allow gt/lt?) - m = re.match(r'[^#(git)]\S+', line) + m = re.match(r'(?!#|git)\S+', line) if m is not None: req = ET.SubElement(reqs, 'requirement', From c55c802685200388170fe6fa542c7d08de471ba9 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Mon, 4 Sep 2023 17:22:03 +0200 Subject: [PATCH 04/30] run with ipython --- nb2workflow/galaxy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 857ad6ad..66a79504 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -53,7 +53,7 @@ def is_data_product(self, owl_uri, include_parameter_products=True): ontology_path = '/home/dsavchenko/Projects/MMODA/ontology/ontology.ttl' -global_req = [] +global_req = ['ipython'] class GalaxyParameter: @@ -312,7 +312,7 @@ def to_galaxy(input_nb, toolname, requirements_path, out_dir): req.text = m.group(0) comm = ET.SubElement(tool_root, 'command', detect_errors='exit_code') - comm.text = "python '$__tool_directory__/script.py' inputs.json" + comm.text = "ipython '$__tool_directory__/script.py' inputs.json" # NOTE: CDATA if needed https://gist.github.com/zlalanne/5711847 conf = ET.SubElement(tool_root, 'configfiles') From ef496c8e3f3d2d196cdfb70121812c97a780027b Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Thu, 7 Sep 2023 19:11:45 +0200 Subject: [PATCH 05/30] fix encoding oda-annotated basic types --- nb2workflow/galaxy.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 66a79504..3a4ce2ee 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -191,9 +191,14 @@ def _nb2script(nba): try: import numpy as np _numpy_available = True - except: + except ImportError: _numpy_available = False + try: + from oda_api.json import CustomJSONEncoder + except ImportError: + from json import JSONEncoder as CustomJSONEncoder + _galaxy_wd = os.getcwd() """)) inject_import.metadata['tags'] = ['injected_import'] @@ -239,7 +244,7 @@ def _nb2script(nba): _galaxy_meta_data[_outn] = {'ext': '_sniff_'} else: with open(_galaxy_outfile_name, 'w') as fd: - json.dump(_outv.encode(), fd) + json.dump(_outv, fd, cls=CustomJSONEncoder) _galaxy_meta_data[_outn] = {'ext': 'json'} """) From 103f05fe01645263221b4c0cdf0b0de2432f509e Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Thu, 21 Sep 2023 18:40:02 +0200 Subject: [PATCH 06/30] multiple data products --- nb2workflow/galaxy.py | 72 ++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 3a4ce2ee..e2c4cd6d 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -8,7 +8,7 @@ from cdci_data_analysis.analysis.ontology import Ontology # TODO: ontology module must be separated from the dispatcher -from nb2workflow.nbadapter import NotebookAdapter +from nb2workflow.nbadapter import NotebookAdapter, find_notebooks import nbformat from nbconvert.exporters import ScriptExporter @@ -142,14 +142,18 @@ def to_xml_tree(self): return element class GalaxyOutput: - def __init__(self, name, is_oda): + def __init__(self, name, is_oda, dprod=None): self.name = name - self.dataname = f"out_{self.name}" + if dprod is None: + dprod='' + else: + dprod += '_' + self.dataname = f"out_{dprod}{self.name}" self.is_oda = is_oda self.outfile_name = f"{name}_galaxy.output" @classmethod - def from_inspect(cls, outp_details): + def from_inspect(cls, outp_details, dprod=None): onto = ModOntology(ontology_path) owl_uri = outp_details['owl_type'] @@ -161,7 +165,7 @@ def from_inspect(cls, outp_details): else: is_oda = False - return cls(outp_details['name'], is_oda) + return cls(outp_details['name'], is_oda, dprod) def to_xml_tree(self): attrs = {'label': "${tool.name} -> %s"%self.name, @@ -212,10 +216,11 @@ def _nb2script(nba): # NOTE: validation of args is external inject_read = nbformat.v4.new_code_cell( dedent(""" - with open(sys.argv[1], 'r') as fd: + with open('inputs.json', 'r') as fd: inp_dic = json.load(fd) - for vn, vv in inp_dic.items(): - globals()[vn] = type(globals()[vn])(vv) + for vn, vv in inp_dic['data_product'].items(): + if vn != '_selector': + globals()[vn] = type(globals()[vn])(vv) """)) inject_read.metadata['tags'] = ['injected-input'] mynb.cells.insert(inject_pos, inject_read) @@ -224,7 +229,7 @@ def _nb2script(nba): outp_code += "_galaxy_meta_data = {}\n" for vn, vv in outputs.items(): - outp = GalaxyOutput.from_inspect(vv) + outp = GalaxyOutput.from_inspect(vv, nba.name) if outp.is_oda: outp_code += f"_oda_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" else: @@ -267,6 +272,7 @@ def _nb2script(nba): outp_code += dedent(""" with open(os.path.join(_galaxy_wd, 'galaxy.json'), 'w') as fd: json.dump(_galaxy_meta_data, fd) + print('*** Job finished successfully ***') """) inject_write = nbformat.v4.new_code_cell(outp_code) @@ -280,14 +286,8 @@ def _nb2script(nba): # TODO: several notebooks def to_galaxy(input_nb, toolname, requirements_path, out_dir): - nba = NotebookAdapter(input_nb) - inputs = nba.input_parameters - - script_str = _nb2script(nba) + nbas = find_notebooks(input_nb) - with open(os.path.join(out_dir, 'script.py'), 'w') as fd: - fd.write(script_str) - tool_root = ET.Element('tool', id=toolname.replace(' ', '_'), name=toolname, @@ -317,22 +317,43 @@ def to_galaxy(input_nb, toolname, requirements_path, out_dir): req.text = m.group(0) comm = ET.SubElement(tool_root, 'command', detect_errors='exit_code') - comm.text = "ipython '$__tool_directory__/script.py' inputs.json" + comm.text = "ipython '$__tool_directory__/${data_product._selector}.py'" # NOTE: CDATA if needed https://gist.github.com/zlalanne/5711847 conf = ET.SubElement(tool_root, 'configfiles') inp = ET.SubElement(conf, 'inputs', name='inputs', filename='inputs.json') inps = ET.SubElement(tool_root, 'inputs') - for pn, pv in inputs.items(): - galaxy_par = GalaxyParameter.from_inspect(pv) - inps.append(galaxy_par.to_xml_tree()) - + dprod_cond = ET.SubElement(inps, 'conditional', name='data_product') + dprod_sel = ET.SubElement(dprod_cond, 'param', name="_selector", type="select", label = "Data Product") + sflag = True + for name in nbas.keys(): + opt = ET.SubElement(dprod_sel, 'option', value=name, selected='true' if sflag else 'false') + opt.text = name + sflag = False + outps = ET.SubElement(tool_root, 'outputs') - outputs = nba.extract_output_declarations() - for outn, outv in outputs.items(): - outp = GalaxyOutput.from_inspect(outv) - outps.append(outp.to_xml_tree()) + + for nb_name, nba in nbas.items(): + when = ET.SubElement(dprod_cond, 'when', value=nb_name) + inputs = nba.input_parameters + + script_str = _nb2script(nba) + with open(os.path.join(out_dir, f'{nb_name}.py'), 'w') as fd: + fd.write(script_str) + + for pv in inputs.values(): + galaxy_par = GalaxyParameter.from_inspect(pv) + when.append(galaxy_par.to_xml_tree()) + + outputs = nba.extract_output_declarations() + for outv in outputs.values(): + outp = GalaxyOutput.from_inspect(outv, nb_name) + outp_tree = outp.to_xml_tree() + fltr = ET.SubElement(outp_tree, 'filter') + fltr.text = f"data_product['_selector'] == '{nb_name}'" + outps.append(outp_tree) + #TODO: tests @@ -365,6 +386,7 @@ def main(): toolname = args.name requirements_path = args.requirements_path + os.makedirs(output_dir, exist_ok=True) to_galaxy(input_nb, toolname, requirements_path, output_dir) if __name__ == '__main__': From 9f1f37cc277f43bee984a5066b6cddfd535ecbdd Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 22 Sep 2023 16:42:45 +0200 Subject: [PATCH 07/30] either one dataproduct or several --- nb2workflow/galaxy.py | 46 ++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index e2c4cd6d..382e1f84 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -218,7 +218,12 @@ def _nb2script(nba): dedent(""" with open('inputs.json', 'r') as fd: inp_dic = json.load(fd) - for vn, vv in inp_dic['data_product'].items(): + if '_data_product' in inp_dic.keys(): + inp_pdic = inp_dic['_data_product'] + else: + inp_pdic = inp_dic + + for vn, vv in inp_pdic.items(): if vn != '_selector': globals()[vn] = type(globals()[vn])(vv) """)) @@ -285,8 +290,8 @@ def _nb2script(nba): return script # TODO: several notebooks -def to_galaxy(input_nb, toolname, requirements_path, out_dir): - nbas = find_notebooks(input_nb) +def to_galaxy(input_path, toolname, requirements_path, out_dir): + nbas = find_notebooks(input_path) tool_root = ET.Element('tool', id=toolname.replace(' ', '_'), @@ -317,25 +322,32 @@ def to_galaxy(input_nb, toolname, requirements_path, out_dir): req.text = m.group(0) comm = ET.SubElement(tool_root, 'command', detect_errors='exit_code') - comm.text = "ipython '$__tool_directory__/${data_product._selector}.py'" + if len(nbas) > 1: + comm.text = "ipython '$__tool_directory__/${_data_product._selector}.py'" + else: + comm.text = f"ipython '$__tool_directory__/{list(nbas.keys())[0]}.py'" # NOTE: CDATA if needed https://gist.github.com/zlalanne/5711847 conf = ET.SubElement(tool_root, 'configfiles') inp = ET.SubElement(conf, 'inputs', name='inputs', filename='inputs.json') inps = ET.SubElement(tool_root, 'inputs') - dprod_cond = ET.SubElement(inps, 'conditional', name='data_product') - dprod_sel = ET.SubElement(dprod_cond, 'param', name="_selector", type="select", label = "Data Product") - sflag = True - for name in nbas.keys(): - opt = ET.SubElement(dprod_sel, 'option', value=name, selected='true' if sflag else 'false') - opt.text = name - sflag = False - outps = ET.SubElement(tool_root, 'outputs') + + if len(nbas) > 1: + dprod_cond = ET.SubElement(inps, 'conditional', name='_data_product') + dprod_sel = ET.SubElement(dprod_cond, 'param', name="_selector", type="select", label = "Data Product") + sflag = True + for name in nbas.keys(): + opt = ET.SubElement(dprod_sel, 'option', value=name, selected='true' if sflag else 'false') + opt.text = name + sflag = False - for nb_name, nba in nbas.items(): - when = ET.SubElement(dprod_cond, 'when', value=nb_name) + for nb_name, nba in nbas.items(): + if len(nbas) > 1: + when = ET.SubElement(dprod_cond, 'when', value=nb_name) + else: + when = inps inputs = nba.input_parameters script_str = _nb2script(nba) @@ -346,12 +358,14 @@ def to_galaxy(input_nb, toolname, requirements_path, out_dir): galaxy_par = GalaxyParameter.from_inspect(pv) when.append(galaxy_par.to_xml_tree()) + outputs = nba.extract_output_declarations() for outv in outputs.values(): outp = GalaxyOutput.from_inspect(outv, nb_name) outp_tree = outp.to_xml_tree() - fltr = ET.SubElement(outp_tree, 'filter') - fltr.text = f"data_product['_selector'] == '{nb_name}'" + if len(nbas) > 1: + fltr = ET.SubElement(outp_tree, 'filter') + fltr.text = f"_data_product['_selector'] == '{nb_name}'" outps.append(outp_tree) From 65826b0e48c28e4065f0ec5b9576353aa55b021a Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Thu, 5 Oct 2023 11:50:09 +0200 Subject: [PATCH 08/30] remove todo comment --- nb2workflow/galaxy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 382e1f84..81aea9c5 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -289,7 +289,6 @@ def _nb2script(nba): return script -# TODO: several notebooks def to_galaxy(input_path, toolname, requirements_path, out_dir): nbas = find_notebooks(input_path) From c4bbb61fe7a7c37ef71ef8074d6fd16002df1dc8 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Tue, 24 Oct 2023 17:33:21 +0200 Subject: [PATCH 09/30] basic requrements parsing --- nb2workflow/galaxy.py | 97 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 80 insertions(+), 17 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 81aea9c5..a94b0c6f 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -6,6 +6,8 @@ import argparse import logging +import yaml + from cdci_data_analysis.analysis.ontology import Ontology # TODO: ontology module must be separated from the dispatcher from nb2workflow.nbadapter import NotebookAdapter, find_notebooks @@ -289,7 +291,73 @@ def _nb2script(nba): return script -def to_galaxy(input_path, toolname, requirements_path, out_dir): +def _parse_environment_yml(filepath, available_channels): + + match_spec = re.compile(r'^(?P[^=<> ]+)\s*(?P={0,2})(?P[<>]?=?)(?P.*)$') + # TODO: currently only basic version spec + # see https://github.com/conda/conda/blob/d58be31dadac66a14a7c488eab41004eaf578f50/conda/models/match_spec.py#L74 + # https://docs.conda.io/projects/conda-build/en/stable/resources/package-spec.html#package-match-specifications + with open(filepath, 'r') as fd: + env_yaml = yaml.safe_load(fd) + + if env_yaml.get('dependencies'): + if env_yaml.get('channels'): + extra_channels = set(env_yaml['channels']) - set(available_channels) + if extra_channels: + raise ValueError('Conda channels %s are not supported by galaxy instance', extra_channels) + else: + logger.warning('Conda channels are not defined in evironment file.') + + reqs_elements = [] + for dep in env_yaml['dependencies']: + m = match_spec.match(dep) + if m is None: + raise ValueError('Dependency spec not recognised for %s', dep) + + varg = {} + if m.group('ver'): + varg['version'] = m.group('uneq') + m.group('ver') if m.group('uneq') else m.group('ver') + reqs_elements.append(ET.Element('requirement', type='package', **varg)) + reqs_elements[-1].text = m.group('pac') + return reqs_elements + else: + return [] + +def _parse_requirements_txt(filepath): + + match_spec = re.compile(r'^(?P[A-Z0-9][A-Z0-9._-]*[A-Z0-9])\s*(?:\[.*\])?\s*(?P[~=]{0,2})(?P[<>]?=?)\s*(?P[0-9.\*]*)', re.I) + # TODO: basic, see https://pip.pypa.io/en/stable/reference/requirement-specifiers/ + + logger.warning('Package names in PyPI may not coincide with those in conda. Please revise galaxy tool requirements after generation.') + + with open(filepath, 'r') as fd: + reqs_elements = [] + for line in fd: + if line.startswith('#') or re.match(r'^\s*$', line): + continue + elif line.startswith('git+'): + logger.warning('Guessing package name from git repository name') + pac = line.split('/')[-1] + pac = pac.split('.')[0] + reqs_elements.append(ET.Element('requirement', type='package')) + reqs_elements[-1].text = pac + else: + m = match_spec.match(line) + if m is None: + raise ValueError('Dependency spec not recognised for %s', line) + varg = {} + if m.group('ver'): + varg['version'] = m.group('uneq') + m.group('ver') if m.group('uneq') else m.group('ver') + reqs_elements.append(ET.Element('requirement', type='package', **varg)) + reqs_elements[-1].text = m.group('pac') + + return reqs_elements + + +def to_galaxy(input_path, toolname, out_dir, + requirements_file = None, + conda_environment_file = None, + available_channels = ['default', 'conda-forge', 'bioconda', 'fermi']): nbas = find_notebooks(input_path) tool_root = ET.Element('tool', @@ -306,19 +374,12 @@ def to_galaxy(input_path, toolname, requirements_path, out_dir): type='package' ) req.text = greq - if requirements_path is not None: - with open(requirements_path, 'r') as fd: - for line in fd: - # TODO: this is just an example as galaxy doesn't use pip for resolving - # we still want to find correspondance in conda-forge and also use envitronment.yml - # also package version (does galaxy allow gt/lt?) - m = re.match(r'(?!#|git)\S+', line) - if m is not None: - req = ET.SubElement(reqs, - 'requirement', - type='package' - ) - req.text = m.group(0) + + if requirements_file is not None: + reqs.extend(_parse_requirements_txt(requirements_file)) + + if conda_environment_file is not None: + reqs.extend(_parse_environment_yml(conda_environment_file, available_channels)) comm = ET.SubElement(tool_root, 'command', detect_errors='exit_code') if len(nbas) > 1: @@ -391,16 +452,18 @@ def main(): parser.add_argument('notebook', type=str) parser.add_argument('outdir', type=str) parser.add_argument('--name', type=str, default='example') - parser.add_argument('--requirements_path', required=False) + parser.add_argument('--requirements_txt', required=False) + parser.add_argument('--environment_yml', required=False) args = parser.parse_args() input_nb = args.notebook output_dir = args.outdir toolname = args.name - requirements_path = args.requirements_path + requirements_txt = args.requirements_txt + environment_yml = args.environment_yml os.makedirs(output_dir, exist_ok=True) - to_galaxy(input_nb, toolname, requirements_path, output_dir) + to_galaxy(input_nb, toolname, output_dir, requirements_txt, environment_yml) if __name__ == '__main__': main() From ec36449d12b6882d8fdc56cf3ddafb42eb8d147a Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Thu, 26 Oct 2023 16:34:34 +0200 Subject: [PATCH 10/30] default tests --- .gitignore | 3 +- nb2workflow/galaxy.py | 87 +++++++++++++++++++------------------------ 2 files changed, 40 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index 34d87571..17ddf8ff 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,5 @@ callback.json function.xml *_output.html *_output.json -.nb2workflow/cache \ No newline at end of file +.nb2workflow/cache +tests/testfiles/.nb2workflow \ No newline at end of file diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index a94b0c6f..8056f90c 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -8,46 +8,13 @@ import yaml -from cdci_data_analysis.analysis.ontology import Ontology -# TODO: ontology module must be separated from the dispatcher +from oda_api.ontology_helper import Ontology from nb2workflow.nbadapter import NotebookAdapter, find_notebooks import nbformat from nbconvert.exporters import ScriptExporter logger = logging.getLogger() - -# NOTE: to include into the base class when separated from dispatcher -class ModOntology(Ontology): - def get_oda_label(self, param_uri): - if param_uri.startswith("http"): param_uri = f"<{param_uri}>" - - query = "SELECT ?label WHERE {%s oda:label ?label}" % (param_uri) - - qres = self.g.query(query) - - if len(qres) == 0: return None - - label = " ".join([str(x[0]) for x in qres]) - - return label - - def is_data_product(self, owl_uri, include_parameter_products=True): - if owl_uri.startswith("http"): owl_uri = f"<{owl_uri}>" - - filt_param = 'MINUS{?cl rdfs:subClassOf* oda:ParameterProduct. }' if not include_parameter_products else '' - query = dedent(""" - SELECT (count(?cl) as ?count) WHERE { - VALUES ?cl { %s } - ?cl rdfs:subClassOf* oda:DataProduct. - %s - } - """ % (owl_uri, filt_param)) - qres = self.g.query(query) - - if int(list(qres)[0][0]) == 0: return False - - return True # TODO: configurable @@ -57,6 +24,7 @@ def is_data_product(self, owl_uri, include_parameter_products=True): global_req = ['ipython'] +_success_text = '*** Job finished successfully ***' class GalaxyParameter: def __init__(self, @@ -89,7 +57,7 @@ def __init__(self, @classmethod def from_inspect(cls, par_details): - onto = ModOntology(ontology_path) + onto = Ontology(ontology_path) owl_uri = par_details['owl_type'] @@ -156,7 +124,7 @@ def __init__(self, name, is_oda, dprod=None): @classmethod def from_inspect(cls, outp_details, dprod=None): - onto = ModOntology(ontology_path) + onto = Ontology(ontology_path) owl_uri = outp_details['owl_type'] if outp_details['extra_ttl'] is not None: @@ -276,10 +244,10 @@ def _nb2script(nba): _galaxy_meta_data[_outn] = {'ext': 'expression.json'} """) - outp_code += dedent(""" + outp_code += dedent(f""" with open(os.path.join(_galaxy_wd, 'galaxy.json'), 'w') as fd: json.dump(_galaxy_meta_data, fd) - print('*** Job finished successfully ***') + print("{_success_text}") """) inject_write = nbformat.v4.new_code_cell(outp_code) @@ -291,6 +259,8 @@ def _nb2script(nba): return script +# FIXME: seems galaxy only support exact versions. So e.g. 'oda-api>=1.44' will not work, resulting in `conda install 'oda-api=>=1.44'`. +# at least warn about it, but better resolve automatically somehow. (In both parsers.) def _parse_environment_yml(filepath, available_channels): match_spec = re.compile(r'^(?P[^=<> ]+)\s*(?P={0,2})(?P[<>]?=?)(?P.*)$') @@ -354,16 +324,20 @@ def _parse_requirements_txt(filepath): return reqs_elements -def to_galaxy(input_path, toolname, out_dir, +def to_galaxy(input_path, + toolname, + out_dir, + tool_version = '0.1.0+galaxy0', requirements_file = None, conda_environment_file = None, - available_channels = ['default', 'conda-forge', 'bioconda', 'fermi']): + available_channels = ['default', 'conda-forge', 'bioconda', 'fermi'], + ): nbas = find_notebooks(input_path) tool_root = ET.Element('tool', id=toolname.replace(' ', '_'), name=toolname, - version='0.1.0+galaxy0', #TODO: + version=tool_version, #TODO: profile='23.0') reqs = ET.SubElement(tool_root, 'requirements') @@ -389,10 +363,12 @@ def to_galaxy(input_path, toolname, out_dir, # NOTE: CDATA if needed https://gist.github.com/zlalanne/5711847 conf = ET.SubElement(tool_root, 'configfiles') - inp = ET.SubElement(conf, 'inputs', name='inputs', filename='inputs.json') + conf.append(ET.Element('inputs', name='inputs', filename='inputs.json')) inps = ET.SubElement(tool_root, 'inputs') outps = ET.SubElement(tool_root, 'outputs') + tests = ET.SubElement(tool_root, 'tests') + if len(nbas) > 1: dprod_cond = ET.SubElement(inps, 'conditional', name='_data_product') @@ -404,11 +380,18 @@ def to_galaxy(input_path, toolname, out_dir, sflag = False for nb_name, nba in nbas.items(): + inputs = nba.input_parameters + outputs = nba.extract_output_declarations() + + default_test = ET.SubElement(tests, 'test', expect_num_outputs=str(len(outputs))) + if len(nbas) > 1: when = ET.SubElement(dprod_cond, 'when', value=nb_name) + test_par_root = ET.SubElement(default_test, 'conditional', name='_data_product') + test_par_root.append(ET.Element('param', name='_selector', value=nb_name)) else: when = inps - inputs = nba.input_parameters + test_par_root = default_test script_str = _nb2script(nba) with open(os.path.join(out_dir, f'{nb_name}.py'), 'w') as fd: @@ -417,9 +400,8 @@ def to_galaxy(input_path, toolname, out_dir, for pv in inputs.values(): galaxy_par = GalaxyParameter.from_inspect(pv) when.append(galaxy_par.to_xml_tree()) + test_par_root.append(ET.Element('param', name=galaxy_par.name, value=str(galaxy_par.default_value))) - - outputs = nba.extract_output_declarations() for outv in outputs.values(): outp = GalaxyOutput.from_inspect(outv, nb_name) outp_tree = outp.to_xml_tree() @@ -428,9 +410,9 @@ def to_galaxy(input_path, toolname, out_dir, fltr.text = f"_data_product['_selector'] == '{nb_name}'" outps.append(outp_tree) - - #TODO: tests - + assert_stdout = ET.SubElement(default_test, 'assert_stdout') + assert_stdout.append(ET.Element('has_text', text=_success_text)) + help_block = ET.SubElement(tool_root, 'help') help_block.text = 'help me!' # TODO: @@ -452,6 +434,7 @@ def main(): parser.add_argument('notebook', type=str) parser.add_argument('outdir', type=str) parser.add_argument('--name', type=str, default='example') + parser.add_argument('--tool_version', type=str, default='0.1.0+galaxy0') parser.add_argument('--requirements_txt', required=False) parser.add_argument('--environment_yml', required=False) args = parser.parse_args() @@ -461,9 +444,15 @@ def main(): toolname = args.name requirements_txt = args.requirements_txt environment_yml = args.environment_yml + tool_version = args.tool_version os.makedirs(output_dir, exist_ok=True) - to_galaxy(input_nb, toolname, output_dir, requirements_txt, environment_yml) + to_galaxy(input_nb, + toolname, + output_dir, + tool_version=tool_version, + requirements_file=requirements_txt, + conda_environment_file=environment_yml) if __name__ == '__main__': main() From 378a82e42af8f5dc4cea6a3421ffd6fa3f246f49 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Mon, 30 Oct 2023 21:03:49 +0100 Subject: [PATCH 11/30] combine rrequirements --- nb2workflow/galaxy.py | 232 +++++++++++++++++++++++++++--------------- setup.py | 4 + 2 files changed, 156 insertions(+), 80 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 8056f90c..08fcf7a1 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -7,6 +7,7 @@ import logging import yaml +import json from oda_api.ontology_helper import Ontology from nb2workflow.nbadapter import NotebookAdapter, find_notebooks @@ -14,13 +15,15 @@ import nbformat from nbconvert.exporters import ScriptExporter +from ensureconda.api import ensureconda +import subprocess as sp + +import tempfile + logger = logging.getLogger() -# TODO: configurable -#ontology_path = 'http://odahub.io/ontology/ontology.ttl' -ontology_path = '/home/dsavchenko/Projects/MMODA/ontology/ontology.ttl' - +default_ontology_path = 'http://odahub.io/ontology/ontology.ttl' global_req = ['ipython'] @@ -56,7 +59,7 @@ def __init__(self, @classmethod - def from_inspect(cls, par_details): + def from_inspect(cls, par_details, ontology_path): onto = Ontology(ontology_path) owl_uri = par_details['owl_type'] @@ -123,7 +126,7 @@ def __init__(self, name, is_oda, dprod=None): self.outfile_name = f"{name}_galaxy.output" @classmethod - def from_inspect(cls, outp_details, dprod=None): + def from_inspect(cls, outp_details, ontology_path, dprod=None): onto = Ontology(ontology_path) owl_uri = outp_details['owl_type'] @@ -150,7 +153,7 @@ def to_xml_tree(self): -def _nb2script(nba): +def _nb2script(nba, ontology_path): input_nb = nba.notebook_fn mynb = nbformat.read(input_nb, as_version=4) outputs = nba.extract_output_declarations() @@ -204,7 +207,7 @@ def _nb2script(nba): outp_code += "_galaxy_meta_data = {}\n" for vn, vv in outputs.items(): - outp = GalaxyOutput.from_inspect(vv, nba.name) + outp = GalaxyOutput.from_inspect(vv, ontology_path=ontology_path, dprod=nba.name) if outp.is_oda: outp_code += f"_oda_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" else: @@ -259,69 +262,141 @@ def _nb2script(nba): return script -# FIXME: seems galaxy only support exact versions. So e.g. 'oda-api>=1.44' will not work, resulting in `conda install 'oda-api=>=1.44'`. -# at least warn about it, but better resolve automatically somehow. (In both parsers.) -def _parse_environment_yml(filepath, available_channels): - - match_spec = re.compile(r'^(?P[^=<> ]+)\s*(?P={0,2})(?P[<>]?=?)(?P.*)$') - # TODO: currently only basic version spec - # see https://github.com/conda/conda/blob/d58be31dadac66a14a7c488eab41004eaf578f50/conda/models/match_spec.py#L74 - # https://docs.conda.io/projects/conda-build/en/stable/resources/package-spec.html#package-match-specifications - with open(filepath, 'r') as fd: - env_yaml = yaml.safe_load(fd) + +class Requirements: - if env_yaml.get('dependencies'): - if env_yaml.get('channels'): - extra_channels = set(env_yaml['channels']) - set(available_channels) - if extra_channels: - raise ValueError('Conda channels %s are not supported by galaxy instance', extra_channels) + def __init__(self, available_channels, conda_env_yml = None, requirements_txt = None): + self.tmpdir = tempfile.TemporaryDirectory() + self.fullenv_file_path = os.path.join(self.tmpdir.name, 'environment.yml') + + self.micromamba = self._get_micromamba_binary() + + boilerplate_env_dict = {'channels': available_channels, 'dependencies': global_req} + + if conda_env_yml is not None: + with open(conda_env_yml, 'r') as fd: + self.env_dict = yaml.safe_load(fd) + + if self.env_dict.get('dependencies'): + if self.env_dict.get('channels'): + extra_channels = set(self.env_dict['channels']) - set(available_channels) + if extra_channels: + raise ValueError('Conda channels %s are not supported by galaxy instance', extra_channels) + else: + logger.warning('Conda channels are not defined in evironment file.') + self.env_dict['channels'] = available_channels + self.env_dict['dependencies'].extend(global_req) + else: + self.env_dict = boilerplate_env_dict else: - logger.warning('Conda channels are not defined in evironment file.') + self.env_dict = boilerplate_env_dict - reqs_elements = [] - for dep in env_yaml['dependencies']: + match_spec = re.compile(r'^(?P[^=<> ]+)') + self._direct_dependencies = [] + for dep in self.env_dict['dependencies']: m = match_spec.match(dep) - if m is None: - raise ValueError('Dependency spec not recognised for %s', dep) - - varg = {} - if m.group('ver'): - varg['version'] = m.group('uneq') + m.group('ver') if m.group('uneq') else m.group('ver') - reqs_elements.append(ET.Element('requirement', type='package', **varg)) - reqs_elements[-1].text = m.group('pac') - return reqs_elements - else: - return [] + self._direct_dependencies.append((m.group('pac'), '', 0, '')) + + if requirements_txt is not None: + pip_reqs = self._parse_requirements_txt(requirements_txt) -def _parse_requirements_txt(filepath): - - match_spec = re.compile(r'^(?P[A-Z0-9][A-Z0-9._-]*[A-Z0-9])\s*(?:\[.*\])?\s*(?P[~=]{0,2})(?P[<>]?=?)\s*(?P[0-9.\*]*)', re.I) - # TODO: basic, see https://pip.pypa.io/en/stable/reference/requirement-specifiers/ - - logger.warning('Package names in PyPI may not coincide with those in conda. Please revise galaxy tool requirements after generation.') - - with open(filepath, 'r') as fd: - reqs_elements = [] - for line in fd: - if line.startswith('#') or re.match(r'^\s*$', line): + channels_cl = [] + for ch in self.env_dict['channels']: + channels_cl.append('-c') + channels_cl.append(ch) + + for req in pip_reqs: + run_cmd = [self.micromamba, 'search', '--json'] + run_cmd.extend(channels_cl) + run_cmd.append(req[0]+req[1]) + + search_res = sp.run(run_cmd, check=True, capture_output=True, text=True) + search_json = json.loads(search_res.stdout) + if search_json['result']['pkgs']: + self._direct_dependencies.append(req) + self.env_dict['dependencies'].append(req[0]+req[1]) + else: + self._direct_dependencies.append((req[0], req[1], 2, req[3])) + + with open(self.fullenv_file_path, 'w') as fd: + yaml.dump(self.env_dict, fd) + + resolved_env = self._resolve_environment_yml() + + self.final_dependencies = {} + for dep in self._direct_dependencies: + if dep[2] == 2: + self.final_dependencies[dep[0]] = (dep[1], dep[2], dep[3]) + elif dep[0] in self.final_dependencies.keys(): continue - elif line.startswith('git+'): - logger.warning('Guessing package name from git repository name') - pac = line.split('/')[-1] - pac = pac.split('.')[0] - reqs_elements.append(ET.Element('requirement', type='package')) - reqs_elements[-1].text = pac else: - m = match_spec.match(line) - if m is None: - raise ValueError('Dependency spec not recognised for %s', line) - varg = {} - if m.group('ver'): - varg['version'] = m.group('uneq') + m.group('ver') if m.group('uneq') else m.group('ver') - reqs_elements.append(ET.Element('requirement', type='package', **varg)) - reqs_elements[-1].text = m.group('pac') + self.final_dependencies[dep[0]] = (resolved_env[dep[0]], dep[2], dep[3]) + + def _resolve_environment_yml(self): + + run_command = [str(self.micromamba), + 'env', 'create', + '-n', '__temp_env_name', + '--dry-run', + '--json', + '-f', str(self.fullenv_file_path)] + run_proc = sp.run(run_command, capture_output=True, check=True, text=True) + resolved_env = json.loads(run_proc.stdout)['actions']['FETCH'] + resolved_env = {x['name']: x['version'] for x in resolved_env} + + return resolved_env + + def to_xml_tree(self): + reqs_elements = [] + for name, det in self.final_dependencies.items(): + if det[1] == 2: + reqs_elements.append(ET.Comment( + f"Requirements string {det[2]} can't be resolved with conda!")) + else: + reqs_elements.append(ET.Element('requirement', + type='package', + version = det[0])) + reqs_elements[-1].text = name + + return reqs_elements - return reqs_elements + @staticmethod + def _parse_requirements_txt(filepath): + + match_spec = re.compile(r'^(?P[A-Z0-9][A-Z0-9._-]*[A-Z0-9])\s*(?:\[.*\])?\s*(?P[~=]{0,2})(?P[<>]?=?)\s*(?P[0-9.\*]*)', re.I) + match_from_url = re.compile(r'^(?P[A-Z0-9][A-Z0-9._-]*[A-Z0-9])\s*@(?P.*)', re.I) + + # TODO: basic, see https://pip.pypa.io/en/stable/reference/requirement-specifiers/ + + with open(filepath, 'r') as fd: + reqs_str_list = [] + for line in fd: + if line.startswith('#') or re.match(r'^\s*$', line): + continue + elif line.startswith('git+'): + raise ValueError('Dependency from git repo is not supported: %s', line) + elif match_from_url.match(line): + raise ValueError('Dependency from url is not supported %s', line) + else: + m = match_spec.match(line) + if m is None: + raise ValueError('Dependency spec not recognised for %s', line) + if m.group('ver'): + ver = m.group('uneq') + m.group('ver') if m.group('uneq') else m.group('eq') + m.group('ver') + else: + ver = '' + reqs_str_list.append((m.group('pac'), ver, 1, line)) + + return reqs_str_list + + @staticmethod + def _get_micromamba_binary(): + mamba_bin = ensureconda(no_install=False, + micromamba=True, + mamba=False, + conda=False, + conda_exe=False) + return mamba_bin def to_galaxy(input_path, @@ -330,7 +405,8 @@ def to_galaxy(input_path, tool_version = '0.1.0+galaxy0', requirements_file = None, conda_environment_file = None, - available_channels = ['default', 'conda-forge', 'bioconda', 'fermi'], + available_channels = ['default', 'conda-forge'], + ontology_path = default_ontology_path, ): nbas = find_notebooks(input_path) @@ -342,18 +418,9 @@ def to_galaxy(input_path, reqs = ET.SubElement(tool_root, 'requirements') - for greq in global_req: - req = ET.SubElement(reqs, - 'requirement', - type='package' - ) - req.text = greq - - if requirements_file is not None: - reqs.extend(_parse_requirements_txt(requirements_file)) - - if conda_environment_file is not None: - reqs.extend(_parse_environment_yml(conda_environment_file, available_channels)) + reqs.extend(Requirements(available_channels=available_channels, + conda_env_yml=conda_environment_file, + requirements_txt=requirements_file).to_xml_tree()) comm = ET.SubElement(tool_root, 'command', detect_errors='exit_code') if len(nbas) > 1: @@ -393,17 +460,17 @@ def to_galaxy(input_path, when = inps test_par_root = default_test - script_str = _nb2script(nba) + script_str = _nb2script(nba, ontology_path) with open(os.path.join(out_dir, f'{nb_name}.py'), 'w') as fd: fd.write(script_str) for pv in inputs.values(): - galaxy_par = GalaxyParameter.from_inspect(pv) + galaxy_par = GalaxyParameter.from_inspect(pv, ontology_path=ontology_path) when.append(galaxy_par.to_xml_tree()) test_par_root.append(ET.Element('param', name=galaxy_par.name, value=str(galaxy_par.default_value))) for outv in outputs.values(): - outp = GalaxyOutput.from_inspect(outv, nb_name) + outp = GalaxyOutput.from_inspect(outv, ontology_path=ontology_path, dprod=nb_name) outp_tree = outp.to_xml_tree() if len(nbas) > 1: fltr = ET.SubElement(outp_tree, 'filter') @@ -437,6 +504,7 @@ def main(): parser.add_argument('--tool_version', type=str, default='0.1.0+galaxy0') parser.add_argument('--requirements_txt', required=False) parser.add_argument('--environment_yml', required=False) + parser.add_argument('--ontology_path', required=False) args = parser.parse_args() input_nb = args.notebook @@ -445,6 +513,9 @@ def main(): requirements_txt = args.requirements_txt environment_yml = args.environment_yml tool_version = args.tool_version + ontology_path = args.ontology_path + if ontology_path is None: + ontology_path = default_ontology_path os.makedirs(output_dir, exist_ok=True) to_galaxy(input_nb, @@ -452,7 +523,8 @@ def main(): output_dir, tool_version=tool_version, requirements_file=requirements_txt, - conda_environment_file=environment_yml) + conda_environment_file=environment_yml, + ontology_path=ontology_path) if __name__ == '__main__': main() diff --git a/setup.py b/setup.py index 5b885659..6900595b 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,10 @@ ], "k8s":[ 'kubernetes' + ], + 'galaxy':[ + 'oda_api', + 'ensureconda', ] }, From be14c51bb74cbdb22239224bb3f7100ef2516141 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Tue, 31 Oct 2023 18:41:14 +0100 Subject: [PATCH 12/30] no exception for unhandled requirements --- nb2workflow/galaxy.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 08fcf7a1..0e8c7052 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -305,7 +305,10 @@ def __init__(self, available_channels, conda_env_yml = None, requirements_txt = channels_cl.append('-c') channels_cl.append(ch) - for req in pip_reqs: + for req in pip_reqs: + if req[2] == 2: + self._direct_dependencies.append(req) + continue run_cmd = [self.micromamba, 'search', '--json'] run_cmd.extend(channels_cl) run_cmd.append(req[0]+req[1]) @@ -316,6 +319,7 @@ def __init__(self, available_channels, conda_env_yml = None, requirements_txt = self._direct_dependencies.append(req) self.env_dict['dependencies'].append(req[0]+req[1]) else: + logger.warning(f'Dependency {req[0]} not found in conda channels.') self._direct_dependencies.append((req[0], req[1], 2, req[3])) with open(self.fullenv_file_path, 'w') as fd: @@ -331,7 +335,8 @@ def __init__(self, available_channels, conda_env_yml = None, requirements_txt = continue else: self.final_dependencies[dep[0]] = (resolved_env[dep[0]], dep[2], dep[3]) - + + def _resolve_environment_yml(self): run_command = [str(self.micromamba), @@ -351,7 +356,8 @@ def to_xml_tree(self): for name, det in self.final_dependencies.items(): if det[1] == 2: reqs_elements.append(ET.Comment( - f"Requirements string {det[2]} can't be resolved with conda!")) + (f"Requirements string '{det[2]}' can't be converted automatically. " + "Please add the galaxy/conda requirement manually or modify the requirements file!"))) else: reqs_elements.append(ET.Element('requirement', type='package', @@ -374,13 +380,16 @@ def _parse_requirements_txt(filepath): if line.startswith('#') or re.match(r'^\s*$', line): continue elif line.startswith('git+'): - raise ValueError('Dependency from git repo is not supported: %s', line) + logger.warning('Dependency from git repo is not supported: %s', line) + reqs_str_list.append((line, '', 2, line)) elif match_from_url.match(line): - raise ValueError('Dependency from url is not supported %s', line) + logger.warning('Dependency from url is not supported %s', line) + reqs_str_list.append((line, '', 2, line)) else: m = match_spec.match(line) if m is None: - raise ValueError('Dependency spec not recognised for %s', line) + logger.warning('Dependency spec not recognised for %s', line) + reqs_str_list.append((line, '', 2, line)) if m.group('ver'): ver = m.group('uneq') + m.group('ver') if m.group('uneq') else m.group('eq') + m.group('ver') else: From 4bc343d3686627dd4ed3fb9f70ffb7bbaefea1c3 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 3 Nov 2023 12:54:29 +0100 Subject: [PATCH 13/30] no label duplication --- nb2workflow/galaxy.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 0e8c7052..6bf00287 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -119,7 +119,9 @@ def __init__(self, name, is_oda, dprod=None): self.name = name if dprod is None: dprod='' + self.dprod = dprod else: + self.dprod = dprod dprod += '_' self.dataname = f"out_{dprod}{self.name}" self.is_oda = is_oda @@ -141,7 +143,12 @@ def from_inspect(cls, outp_details, ontology_path, dprod=None): return cls(outp_details['name'], is_oda, dprod) def to_xml_tree(self): - attrs = {'label': "${tool.name} -> %s"%self.name, + if self.dprod is None: + label = "${tool.name} -> %s"%self.name + else: + label = "${tool.name} -> %s %s"%(self.dprod, self.name) + + attrs = {'label': label, 'name': self.dataname, #'auto_format': 'true', 'format': 'auto', From 1ee87061f406fa04b25a8bbae1764ebd8c4389b4 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 3 Nov 2023 14:53:05 +0100 Subject: [PATCH 14/30] citations and help --- nb2workflow/galaxy.py | 60 ++++++++++++++++++++++++++++++++++++------- setup.py | 2 ++ 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 6bf00287..607b4c8a 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -18,6 +18,9 @@ from ensureconda.api import ensureconda import subprocess as sp +import bibtexparser as bib +import pypandoc + import tempfile logger = logging.getLogger() @@ -413,7 +416,33 @@ def _get_micromamba_binary(): conda=False, conda_exe=False) return mamba_bin - + +def _split_bibfile(filepath): + # parse bibfile and return only entries (no preamble/comments/strings) as list of strings + biblib = bib.parse_file(filepath) + + out = [] + for ent in biblib.entries: + tmplib = bib.Library() + tmplib.add(ent) + outstr = bib.write_string(tmplib) + outstr = outstr.replace('\n', '\n\t\t') + if outstr.endswith('\n\t\t'): + outstr = outstr[:-3] + out.append(outstr) + return out + +def _read_help_file(filepath): + if filepath.endswith('.rst'): + with open(filepath, 'r') as fd: + help_text = fd.read() + elif filepath.endswith('.md'): + with open(filepath, 'r') as fd: + help_md = fd.read() + help_text = pypandoc.convert_file(filepath, 'rst') + else: + NotImplementedError('Unknown help file extension.') + return help_text def to_galaxy(input_path, toolname, @@ -421,6 +450,8 @@ def to_galaxy(input_path, tool_version = '0.1.0+galaxy0', requirements_file = None, conda_environment_file = None, + citations_bibfile = None, + help_file = None, available_channels = ['default', 'conda-forge'], ontology_path = default_ontology_path, ): @@ -429,7 +460,7 @@ def to_galaxy(input_path, tool_root = ET.Element('tool', id=toolname.replace(' ', '_'), name=toolname, - version=tool_version, #TODO: + version=tool_version, profile='23.0') reqs = ET.SubElement(tool_root, 'requirements') @@ -495,13 +526,18 @@ def to_galaxy(input_path, assert_stdout = ET.SubElement(default_test, 'assert_stdout') assert_stdout.append(ET.Element('has_text', text=_success_text)) - - help_block = ET.SubElement(tool_root, 'help') - help_block.text = 'help me!' # TODO: - - citats = ET.SubElement(tool_root, 'citations') - citate = ET.SubElement(citats, 'citation', type='doi') - citate.text = '10.5281/zenodo.6299481' # TODO: + + if help_file is not None: + help_block = ET.SubElement(tool_root, 'help') + help_text = _read_help_file(help_file) + help_block.text = help_text + + if citations_bibfile is not None: + citats = ET.SubElement(tool_root, 'citations') + bibentries = _split_bibfile(citations_bibfile) + for entry in bibentries: + citate = ET.SubElement(citats, 'citation', type='bibtex') + citate.text = entry tree = ET.ElementTree(tool_root) ET.indent(tree) @@ -521,6 +557,8 @@ def main(): parser.add_argument('--requirements_txt', required=False) parser.add_argument('--environment_yml', required=False) parser.add_argument('--ontology_path', required=False) + parser.add_argument('--citations_bibfile', required=False) + parser.add_argument('--help_file', required=False) args = parser.parse_args() input_nb = args.notebook @@ -532,6 +570,8 @@ def main(): ontology_path = args.ontology_path if ontology_path is None: ontology_path = default_ontology_path + bibfile = args.citations_bibfile + help_file = args.help_file os.makedirs(output_dir, exist_ok=True) to_galaxy(input_nb, @@ -540,6 +580,8 @@ def main(): tool_version=tool_version, requirements_file=requirements_txt, conda_environment_file=environment_yml, + citations_bibfile=bibfile, + help_file=help_file, ontology_path=ontology_path) if __name__ == '__main__': diff --git a/setup.py b/setup.py index d9de7ef4..c1e0de48 100644 --- a/setup.py +++ b/setup.py @@ -68,6 +68,8 @@ 'galaxy':[ 'oda_api', 'ensureconda', + 'bibtexparser >= 2.0.0b3', + 'pypandoc_binary', ] }, From 9e79463760cd24119f6e5d17ab97a03ab46232a9 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 3 Nov 2023 15:09:04 +0100 Subject: [PATCH 15/30] remove unused md + todo --- nb2workflow/galaxy.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 607b4c8a..c974d52d 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -437,8 +437,7 @@ def _read_help_file(filepath): with open(filepath, 'r') as fd: help_text = fd.read() elif filepath.endswith('.md'): - with open(filepath, 'r') as fd: - help_md = fd.read() + # TODO: test and adapt formatting arguments help_text = pypandoc.convert_file(filepath, 'rst') else: NotImplementedError('Unknown help file extension.') From 61807971e687d13acf9a2619cb96971657f073d4 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 3 Nov 2023 18:58:09 +0100 Subject: [PATCH 16/30] fix no dir --- nb2workflow/galaxy.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index c974d52d..8c3c52f4 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -454,6 +454,9 @@ def to_galaxy(input_path, available_channels = ['default', 'conda-forge'], ontology_path = default_ontology_path, ): + + os.makedirs(out_dir, exist_ok=True) + nbas = find_notebooks(input_path) tool_root = ET.Element('tool', From a3291f062eb6bb1fc76fb73b3b81fc6e2041bff6 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Thu, 9 Nov 2023 13:58:00 +0100 Subject: [PATCH 17/30] comment about galaxyxml package --- nb2workflow/galaxy.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 8c3c52f4..6040ef3c 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -90,6 +90,9 @@ def from_inspect(cls, par_details, ontology_path): allowed_values=allowed_values) def to_xml_tree(self): + + # TODO: consider using https://github.com/hexylena/galaxyxml + attrs = {'name': self.name, 'type': self.partype} if self.default_value is not None and self.partype != 'select': From 7068a1cc20cd81a5e66774d835729833c0339d02 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Wed, 20 Dec 2023 11:57:07 +0100 Subject: [PATCH 18/30] tool id --- nb2workflow/galaxy.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 6040ef3c..ae764e02 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -449,6 +449,7 @@ def _read_help_file(filepath): def to_galaxy(input_path, toolname, out_dir, + tool_id = None, tool_version = '0.1.0+galaxy0', requirements_file = None, conda_environment_file = None, @@ -462,8 +463,13 @@ def to_galaxy(input_path, nbas = find_notebooks(input_path) + if tool_id is not None: + tid = tool_id + else: + tid = re.sub(r'[^a-z0-9_]', '_', toolname.lower()) + tool_root = ET.Element('tool', - id=toolname.replace(' ', '_'), + id=tid, name=toolname, version=tool_version, profile='23.0') From 1d234c930a749012f1d1b7725ffbf8d645505638 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Wed, 20 Dec 2023 12:12:26 +0100 Subject: [PATCH 19/30] xml filename --- nb2workflow/galaxy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index ae764e02..91a0b89c 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -553,7 +553,7 @@ def to_galaxy(input_path, tree = ET.ElementTree(tool_root) ET.indent(tree) - out_xml_path = os.path.join(out_dir, f"{toolname}.xml") + out_xml_path = os.path.join(out_dir, f"{tid}.xml") tree.write(out_xml_path) # %% From c375d94bbe109cbc7ed506ddbf0aa35d948058b9 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Wed, 31 Jan 2024 15:27:15 +0100 Subject: [PATCH 20/30] preferably use doi in galaxy xml --- nb2workflow/galaxy.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 91a0b89c..254e2e06 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -426,13 +426,16 @@ def _split_bibfile(filepath): out = [] for ent in biblib.entries: - tmplib = bib.Library() - tmplib.add(ent) - outstr = bib.write_string(tmplib) - outstr = outstr.replace('\n', '\n\t\t') - if outstr.endswith('\n\t\t'): - outstr = outstr[:-3] - out.append(outstr) + if 'doi' in [x.key for x in ent.fields if x.value]: + out.append(('doi', [x.value for x in ent.fields if x.key=='doi'][0])) + else: + tmplib = bib.Library() + tmplib.add(ent) + outstr = bib.write_string(tmplib) + outstr = outstr.replace('\n', '\n\t\t') + if outstr.endswith('\n\t\t'): + outstr = outstr[:-3] + out.append(('bibtex', outstr)) return out def _read_help_file(filepath): @@ -547,8 +550,8 @@ def to_galaxy(input_path, citats = ET.SubElement(tool_root, 'citations') bibentries = _split_bibfile(citations_bibfile) for entry in bibentries: - citate = ET.SubElement(citats, 'citation', type='bibtex') - citate.text = entry + citate = ET.SubElement(citats, 'citation', type=entry[0]) + citate.text = entry[1] tree = ET.ElementTree(tool_root) ET.indent(tree) From 840961d562e3b2ea6cc86c32e807380adcad1b9a Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Wed, 7 Feb 2024 16:32:28 +0100 Subject: [PATCH 21/30] optional ipython --- nb2workflow/galaxy.py | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 254e2e06..70dbce25 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -28,7 +28,7 @@ default_ontology_path = 'http://odahub.io/ontology/ontology.ttl' -global_req = ['ipython'] +global_req = [] _success_text = '*** Job finished successfully ***' @@ -278,13 +278,13 @@ def _nb2script(nba, ontology_path): class Requirements: - def __init__(self, available_channels, conda_env_yml = None, requirements_txt = None): + def __init__(self, available_channels, conda_env_yml = None, requirements_txt = None, extra_req = []): self.tmpdir = tempfile.TemporaryDirectory() self.fullenv_file_path = os.path.join(self.tmpdir.name, 'environment.yml') self.micromamba = self._get_micromamba_binary() - boilerplate_env_dict = {'channels': available_channels, 'dependencies': global_req} + boilerplate_env_dict = {'channels': available_channels, 'dependencies': extra_req} if conda_env_yml is not None: with open(conda_env_yml, 'r') as fd: @@ -298,7 +298,7 @@ def __init__(self, available_channels, conda_env_yml = None, requirements_txt = else: logger.warning('Conda channels are not defined in evironment file.') self.env_dict['channels'] = available_channels - self.env_dict['dependencies'].extend(global_req) + self.env_dict['dependencies'].extend(extra_req) else: self.env_dict = boilerplate_env_dict else: @@ -478,18 +478,13 @@ def to_galaxy(input_path, profile='23.0') reqs = ET.SubElement(tool_root, 'requirements') - - reqs.extend(Requirements(available_channels=available_channels, - conda_env_yml=conda_environment_file, - requirements_txt=requirements_file).to_xml_tree()) + extra_req = global_req + # will be populated after script generation to check if ipython is needed comm = ET.SubElement(tool_root, 'command', detect_errors='exit_code') - if len(nbas) > 1: - comm.text = "ipython '$__tool_directory__/${_data_product._selector}.py'" - else: - comm.text = f"ipython '$__tool_directory__/{list(nbas.keys())[0]}.py'" - # NOTE: CDATA if needed https://gist.github.com/zlalanne/5711847 - + python_binary = 'python' + # the same to decide python/ipython + conf = ET.SubElement(tool_root, 'configfiles') conf.append(ET.Element('inputs', name='inputs', filename='inputs.json')) @@ -522,6 +517,11 @@ def to_galaxy(input_path, test_par_root = default_test script_str = _nb2script(nba, ontology_path) + if 'get_ipython()' in script_str: + python_binary = 'ipython' + if 'ipython' not in extra_req: + extra_req.append('ipython') + with open(os.path.join(out_dir, f'{nb_name}.py'), 'w') as fd: fd.write(script_str) @@ -540,7 +540,17 @@ def to_galaxy(input_path, assert_stdout = ET.SubElement(default_test, 'assert_stdout') assert_stdout.append(ET.Element('has_text', text=_success_text)) - + + reqs.extend(Requirements(available_channels=available_channels, + conda_env_yml=conda_environment_file, + requirements_txt=requirements_file, + extra_req=extra_req).to_xml_tree()) + if len(nbas) > 1: + comm.text = python_binary + " '$__tool_directory__/${_data_product._selector}.py'" + else: + comm.text = f"{python_binary} '$__tool_directory__/{list(nbas.keys())[0]}.py'" + # NOTE: CDATA if needed https://gist.github.com/zlalanne/5711847 + if help_file is not None: help_block = ET.SubElement(tool_root, 'help') help_text = _read_help_file(help_file) From 1ae0156d8a9acc92aafe5c627a1931f577e90e3f Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Thu, 15 Feb 2024 11:44:00 +0100 Subject: [PATCH 22/30] script styling --- nb2workflow/galaxy.py | 16 ++++++++++++++++ setup.py | 1 + 2 files changed, 17 insertions(+) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 70dbce25..3feffefd 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -22,6 +22,7 @@ import pypandoc import tempfile +import black logger = logging.getLogger() @@ -273,6 +274,21 @@ def _nb2script(nba, ontology_path): exporter = ScriptExporter() script, resources = exporter.from_notebook_node(mynb) + # restyling + script = re.sub(r'^# In\[[\d\s]*\]:$', '', script) + + if 'get_ipython' in script: + script = 'from IPython import get_ipython\n' + script + + BLACK_MODE = black.Mode(target_versions={black.TargetVersion.PY37}, line_length=79) + try: + script = black.format_file_contents(script, fast=False, mode=BLACK_MODE) + except black.NothingChanged: + pass + finally: + if script[-1] != "\n": + script += "\n" + return script diff --git a/setup.py b/setup.py index c1e0de48..260413eb 100644 --- a/setup.py +++ b/setup.py @@ -70,6 +70,7 @@ 'ensureconda', 'bibtexparser >= 2.0.0b3', 'pypandoc_binary', + 'black' ] }, From 749ff081dee374c056b3c6bc799798f36ef0d94c Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Thu, 15 Feb 2024 12:20:11 +0100 Subject: [PATCH 23/30] ignore get_ipython undefined with noqa --- nb2workflow/galaxy.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 3feffefd..98b22795 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -276,9 +276,7 @@ def _nb2script(nba, ontology_path): # restyling script = re.sub(r'^# In\[[\d\s]*\]:$', '', script) - - if 'get_ipython' in script: - script = 'from IPython import get_ipython\n' + script + script = re.sub(r'^(.*get_ipython.*)$', '\1 # noqa: F821', script) BLACK_MODE = black.Mode(target_versions={black.TargetVersion.PY37}, line_length=79) try: From 33f0030399af7ecfce85fce7ffad31a5a6d94fc8 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Thu, 15 Feb 2024 17:22:44 +0100 Subject: [PATCH 24/30] more styling --- nb2workflow/galaxy.py | 61 +++++++++++++++++++++++++++---------------- setup.py | 4 ++- 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 98b22795..bafa4860 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -23,6 +23,8 @@ import tempfile import black +import autoflake +import isort logger = logging.getLogger() @@ -178,19 +180,6 @@ def _nb2script(nba, ontology_path): import os import shutil import sys - - try: - import numpy as np - _numpy_available = True - except ImportError: - _numpy_available = False - - try: - from oda_api.json import CustomJSONEncoder - except ImportError: - from json import JSONEncoder as CustomJSONEncoder - - _galaxy_wd = os.getcwd() """)) inject_import.metadata['tags'] = ['injected_import'] mynb.cells.insert(0, inject_import) @@ -203,6 +192,8 @@ def _nb2script(nba, ontology_path): # NOTE: validation of args is external inject_read = nbformat.v4.new_code_cell( dedent(""" + _galaxy_wd = os.getcwd() + with open('inputs.json', 'r') as fd: inp_dic = json.load(fd) if '_data_product' in inp_dic.keys(): @@ -216,9 +207,33 @@ def _nb2script(nba, ontology_path): """)) inject_read.metadata['tags'] = ['injected-input'] mynb.cells.insert(inject_pos, inject_read) + + exporter = ScriptExporter() + script, resources = exporter.from_notebook_node(mynb) + + outp_code = dedent(""" + # output gathering + try: + from oda_api.json import CustomJSONEncoder + except ImportError: + from json import JSONEncoder as CustomJSONEncoder + """) + + if re.search(r'^import numpy as np', script, flags=re.M): + outp_code += "_numpy_available = True\n" + else: + outp_code += dedent(""" + try: + import numpy as np + _numpy_available = True + except ImportError: + _numpy_available = False + """) - outp_code = "_simple_outs, _oda_outs = [], []\n" - outp_code += "_galaxy_meta_data = {}\n" + outp_code += dedent(""" + _simple_outs, _oda_outs = [], [] + _galaxy_meta_data = {} + """) for vn, vv in outputs.items(): outp = GalaxyOutput.from_inspect(vv, ontology_path=ontology_path, dprod=nba.name) @@ -266,18 +281,16 @@ def _nb2script(nba, ontology_path): json.dump(_galaxy_meta_data, fd) print("{_success_text}") """) - - inject_write = nbformat.v4.new_code_cell(outp_code) - inject_write.metadata['tags'] = ['injected-output'] - mynb.cells.append(inject_write) - exporter = ScriptExporter() - script, resources = exporter.from_notebook_node(mynb) + script += outp_code # restyling - script = re.sub(r'^# In\[[\d\s]*\]:$', '', script) - script = re.sub(r'^(.*get_ipython.*)$', '\1 # noqa: F821', script) + script = re.sub(r'^# In\[[\d\s]*\]:$', '', script, flags=re.M) + + script = autoflake.fix_code(script, remove_all_unused_imports=True, remove_unused_variables=True) + script = isort.api.sort_code_string(script, config=isort.Config(profile="black")) + BLACK_MODE = black.Mode(target_versions={black.TargetVersion.PY37}, line_length=79) try: script = black.format_file_contents(script, fast=False, mode=BLACK_MODE) @@ -286,6 +299,8 @@ def _nb2script(nba, ontology_path): finally: if script[-1] != "\n": script += "\n" + + script = re.sub(r'^(.*get_ipython.*)$', r'\1 # noqa: F821', script, flags=re.M) return script diff --git a/setup.py b/setup.py index 260413eb..4f75982a 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,9 @@ 'ensureconda', 'bibtexparser >= 2.0.0b3', 'pypandoc_binary', - 'black' + 'black', + 'isort', + 'autoflake' ] }, From 736354b3713ca5e9dd29cbc6852a9e78cdad7688 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 16 Feb 2024 10:58:19 +0100 Subject: [PATCH 25/30] better code injection --- nb2workflow/galaxy.py | 131 +++++++++++++++++++++--------------------- 1 file changed, 67 insertions(+), 64 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index bafa4860..ce266c9b 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -173,14 +173,27 @@ def _nb2script(nba, ontology_path): input_nb = nba.notebook_fn mynb = nbformat.read(input_nb, as_version=4) outputs = nba.extract_output_declarations() + + has_oda_outs = False + has_simple_outs = False + for vn, vv in outputs.items(): + outp = GalaxyOutput.from_inspect(vv, ontology_path=ontology_path, dprod=nba.name) + if outp.is_oda: + has_oda_outs = True + outp_code += f"_oda_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" + else: + outp_code += f"_simple_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" + has_simple_outs = True - inject_import = nbformat.v4.new_code_cell( - dedent( """ + import_code = dedent( """ import json import os import shutil - import sys - """)) + """) + if has_oda_outs: + import_code += "from oda_api.json import CustomJSONEncoder\n" + + inject_import = nbformat.v4.new_code_cell(import_code) inject_import.metadata['tags'] = ['injected_import'] mynb.cells.insert(0, inject_import) @@ -212,69 +225,58 @@ def _nb2script(nba, ontology_path): script, resources = exporter.from_notebook_node(mynb) outp_code = dedent(""" - # output gathering - try: - from oda_api.json import CustomJSONEncoder - except ImportError: - from json import JSONEncoder as CustomJSONEncoder - """) + # output gathering + _galaxy_meta_data = {} + """) - if re.search(r'^import numpy as np', script, flags=re.M): - outp_code += "_numpy_available = True\n" - else: + if has_oda_outs: outp_code += dedent(""" - try: - import numpy as np - _numpy_available = True - except ImportError: - _numpy_available = False - """) - - outp_code += dedent(""" - _simple_outs, _oda_outs = [], [] - _galaxy_meta_data = {} - """) - - for vn, vv in outputs.items(): - outp = GalaxyOutput.from_inspect(vv, ontology_path=ontology_path, dprod=nba.name) - if outp.is_oda: - outp_code += f"_oda_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" + _oda_outs = [] + for _outn, _outfn, _outv in _oda_outs: + _galaxy_outfile_name = os.path.join(_galaxy_wd, _outfn) + if isinstance(_outv, str) and os.path.isfile(_outv): + shutil.move(_outv, _galaxy_outfile_name) + _galaxy_meta_data[_outn] = {'ext': '_sniff_'} + elif getattr(_outv, "write_fits_file", None): + _outv.write_fits_file(_galaxy_outfile_name) + _galaxy_meta_data[_outn] = {'ext': 'fits'} + elif getattr(_outv, "write_file", None): + _outv.write_file(_galaxy_outfile_name) + _galaxy_meta_data[_outn] = {'ext': '_sniff_'} + else: + with open(_galaxy_outfile_name, 'w') as fd: + json.dump(_outv, fd, cls=CustomJSONEncoder) + _galaxy_meta_data[_outn] = {'ext': 'json'} + """) + if has_simple_outs: + outp_code += "_simple_outs = []\n" + + if re.search(r'^\s*import numpy as np', script, flags=re.M): + outp_code += "_numpy_available = True\n" else: - outp_code += f"_simple_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" - - outp_code += dedent(""" - for _outn, _outfn, _outv in _oda_outs: - _galaxy_outfile_name = os.path.join(_galaxy_wd, _outfn) - if isinstance(_outv, str) and os.path.isfile(_outv): - shutil.move(_outv, _galaxy_outfile_name) - _galaxy_meta_data[_outn] = {'ext': '_sniff_'} - elif getattr(_outv, "write_fits_file", None): - _outv.write_fits_file(_galaxy_outfile_name) - _galaxy_meta_data[_outn] = {'ext': 'fits'} - elif getattr(_outv, "write_file", None): - _outv.write_file(_galaxy_outfile_name) - _galaxy_meta_data[_outn] = {'ext': '_sniff_'} - else: - with open(_galaxy_outfile_name, 'w') as fd: - json.dump(_outv, fd, cls=CustomJSONEncoder) - _galaxy_meta_data[_outn] = {'ext': 'json'} - """) - - outp_code += dedent(""" - for _outn, _outfn, _outv in _simple_outs: - _galaxy_outfile_name = os.path.join(_galaxy_wd, _outfn) - if isinstance(_outv, str) and os.path.isfile(_outv): - shutil.move(_outv, _galaxy_outfile_name) - _galaxy_meta_data[_outn] = {'ext': '_sniff_'} - elif _numpy_available and isinstance(_outv, np.ndarray): - with open(_galaxy_outfile_name, 'wb') as fd: - np.savez(fd, _outv) - _galaxy_meta_data[_outn] = {'ext': 'npz'} - else: - with open(_galaxy_outfile_name, 'w') as fd: - json.dump(_outv, fd) - _galaxy_meta_data[_outn] = {'ext': 'expression.json'} - """) + outp_code += dedent(""" + try: + import numpy as np # noqa: E402 + _numpy_available = True + except ImportError: + _numpy_available = False + """) + + outp_code += dedent(""" + for _outn, _outfn, _outv in _simple_outs: + _galaxy_outfile_name = os.path.join(_galaxy_wd, _outfn) + if isinstance(_outv, str) and os.path.isfile(_outv): + shutil.move(_outv, _galaxy_outfile_name) + _galaxy_meta_data[_outn] = {'ext': '_sniff_'} + elif _numpy_available and isinstance(_outv, np.ndarray): + with open(_galaxy_outfile_name, 'wb') as fd: + np.savez(fd, _outv) + _galaxy_meta_data[_outn] = {'ext': 'npz'} + else: + with open(_galaxy_outfile_name, 'w') as fd: + json.dump(_outv, fd) + _galaxy_meta_data[_outn] = {'ext': 'expression.json'} + """) outp_code += dedent(f""" with open(os.path.join(_galaxy_wd, 'galaxy.json'), 'w') as fd: @@ -301,6 +303,7 @@ def _nb2script(nba, ontology_path): script += "\n" script = re.sub(r'^(.*get_ipython.*)$', r'\1 # noqa: F821', script, flags=re.M) + script = re.sub(r'(?<=^\n)\n', '', script, flags=re.M) return script From d9200940222fc9458e2e276fb0ede26dd6b9934f Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 16 Feb 2024 11:12:57 +0100 Subject: [PATCH 26/30] fix undefined outp_code --- nb2workflow/galaxy.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index ce266c9b..c38cdd11 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -174,23 +174,21 @@ def _nb2script(nba, ontology_path): mynb = nbformat.read(input_nb, as_version=4) outputs = nba.extract_output_declarations() - has_oda_outs = False - has_simple_outs = False + oda_outp_code = '' + simple_outp_code = '' for vn, vv in outputs.items(): outp = GalaxyOutput.from_inspect(vv, ontology_path=ontology_path, dprod=nba.name) if outp.is_oda: - has_oda_outs = True - outp_code += f"_oda_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" + oda_outp_code += f"_oda_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" else: - outp_code += f"_simple_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" - has_simple_outs = True + simple_outp_code += f"_simple_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" import_code = dedent( """ import json import os import shutil """) - if has_oda_outs: + if oda_outp_code: import_code += "from oda_api.json import CustomJSONEncoder\n" inject_import = nbformat.v4.new_code_cell(import_code) @@ -229,9 +227,10 @@ def _nb2script(nba, ontology_path): _galaxy_meta_data = {} """) - if has_oda_outs: + if oda_outp_code: + outp_code += "_oda_outs = []\n" + outp_code += oda_outp_code outp_code += dedent(""" - _oda_outs = [] for _outn, _outfn, _outv in _oda_outs: _galaxy_outfile_name = os.path.join(_galaxy_wd, _outfn) if isinstance(_outv, str) and os.path.isfile(_outv): @@ -248,8 +247,9 @@ def _nb2script(nba, ontology_path): json.dump(_outv, fd, cls=CustomJSONEncoder) _galaxy_meta_data[_outn] = {'ext': 'json'} """) - if has_simple_outs: + if simple_outp_code: outp_code += "_simple_outs = []\n" + outp_code += simple_outp_code if re.search(r'^\s*import numpy as np', script, flags=re.M): outp_code += "_numpy_available = True\n" From 2abc70f6b1867d458ed5cc9415df5c0e5ccab3ac Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 16 Feb 2024 17:58:07 +0100 Subject: [PATCH 27/30] ignore linting for now --- nb2workflow/galaxy.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index c38cdd11..3e365387 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -184,6 +184,8 @@ def _nb2script(nba, ontology_path): simple_outp_code += f"_simple_outs.append(('{outp.dataname}', '{outp.outfile_name}', {vn}))\n" import_code = dedent( """ + # flake8: noqa + import json import os import shutil @@ -256,7 +258,7 @@ def _nb2script(nba, ontology_path): else: outp_code += dedent(""" try: - import numpy as np # noqa: E402 + import numpy as np # noqa: E402 _numpy_available = True except ImportError: _numpy_available = False @@ -302,7 +304,8 @@ def _nb2script(nba, ontology_path): if script[-1] != "\n": script += "\n" - script = re.sub(r'^(.*get_ipython.*)$', r'\1 # noqa: F821', script, flags=re.M) + script = re.sub(r'^(.*get_ipython\(\).*)$', r'\1 # noqa: F821', script, flags=re.M) + script = re.sub(r'^(\s*display\(.*)$', r'\1 # noqa: F821', script, flags=re.M) script = re.sub(r'(?<=^\n)\n', '', script, flags=re.M) return script @@ -549,7 +552,7 @@ def to_galaxy(input_path, test_par_root = default_test script_str = _nb2script(nba, ontology_path) - if 'get_ipython()' in script_str: + if 'get_ipython()' in script_str or re.search(r'^\s*display\(', script_str): python_binary = 'ipython' if 'ipython' not in extra_req: extra_req.append('ipython') From 3cc9c8c99cf16882fb34131ffa7b862f70523590 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Fri, 16 Feb 2024 21:39:14 +0100 Subject: [PATCH 28/30] file input --- nb2workflow/galaxy.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 3e365387..71f6929f 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -34,11 +34,14 @@ global_req = [] _success_text = '*** Job finished successfully ***' +_ontology_base = 'http://odahub.io/ontology#' +_dataset_term = _ontology_base + 'POSIXPath' class GalaxyParameter: def __init__(self, name, python_type, + ontology_parameter_hierarchy, description=None, default_value=None, min_value=None, @@ -55,6 +58,11 @@ def __init__(self, if allowed_values is not None: partype = 'select' + if _dataset_term in ontology_parameter_hierarchy: + partype = 'data' + default_value = None + # TODO: dataset type when in ontology + self.name = name self.partype = partype self.description=description @@ -72,6 +80,7 @@ def from_inspect(cls, par_details, ontology_path): if par_details.get('extra_ttl') is not None: onto.parse_extra_triples(par_details['extra_ttl']) + par_hierarchy = onto.get_parameter_hierarchy(owl_uri) par_format = onto.get_parameter_format(owl_uri) par_unit = onto.get_parameter_unit(owl_uri) min_value, max_value = onto.get_limits(owl_uri) @@ -86,6 +95,7 @@ def from_inspect(cls, par_details, ontology_path): return cls(par_details['name'], par_details['python_type'], + par_hierarchy, description=description, default_value=par_details['default_value'], min_value=min_value, From da8eb0f51a791708f1c9201499039862adc29bd8 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Mon, 19 Feb 2024 17:02:49 +0100 Subject: [PATCH 29/30] {data_style: paths} for data parameter --- nb2workflow/galaxy.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 71f6929f..75c8c939 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -46,7 +46,8 @@ def __init__(self, default_value=None, min_value=None, max_value=None, - allowed_values=None): + allowed_values=None, + additional_attrs=None): #TODO: type is fully defined by owl_type. Use it instead? partype_lookup = {str: 'text', @@ -61,6 +62,7 @@ def __init__(self, if _dataset_term in ontology_parameter_hierarchy: partype = 'data' default_value = None + additional_attrs = {"data_style": "paths"} # TODO: dataset type when in ontology self.name = name @@ -70,6 +72,7 @@ def __init__(self, self.min_value = min_value self.max_value = max_value self.allowed_values = allowed_values + self.additional_attrs = additional_attrs @classmethod @@ -89,9 +92,9 @@ def from_inspect(cls, par_details, ontology_path): description = label if label is not None else par_details['name'] if par_format is not None: - description += f" Format: {par_format}" + description += f" (format: {par_format})" if par_unit is not None: - description += f" Units: {par_unit}" + description += f" (unit: {par_unit})" return cls(par_details['name'], par_details['python_type'], @@ -118,6 +121,9 @@ def to_xml_tree(self): if self.max_value is not None: attrs['max'] = str(self.max_value) + if self.additional_attrs is not None: + attrs.update(self.additional_attrs) + element = ET.Element('param', **attrs) @@ -128,7 +134,6 @@ def to_xml_tree(self): attrs['selected'] = 'true' option = ET.SubElement(element, 'option', **attrs) option.text = str(val) - # TODO: do we need additional validation? return element From d6f52919a4cfeba1efc0fbb279719f524bac671d Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Mon, 19 Feb 2024 17:19:33 +0100 Subject: [PATCH 30/30] data_style in the right place --- nb2workflow/galaxy.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nb2workflow/galaxy.py b/nb2workflow/galaxy.py index 75c8c939..abf1595c 100644 --- a/nb2workflow/galaxy.py +++ b/nb2workflow/galaxy.py @@ -62,7 +62,6 @@ def __init__(self, if _dataset_term in ontology_parameter_hierarchy: partype = 'data' default_value = None - additional_attrs = {"data_style": "paths"} # TODO: dataset type when in ontology self.name = name @@ -536,7 +535,7 @@ def to_galaxy(input_path, # the same to decide python/ipython conf = ET.SubElement(tool_root, 'configfiles') - conf.append(ET.Element('inputs', name='inputs', filename='inputs.json')) + conf.append(ET.Element('inputs', name='inputs', filename='inputs.json', data_style='paths')) inps = ET.SubElement(tool_root, 'inputs') outps = ET.SubElement(tool_root, 'outputs')