diff --git a/anvio/parsers/hmmer.py b/anvio/parsers/hmmer.py index 7dbd0828a9..db37e72663 100644 --- a/anvio/parsers/hmmer.py +++ b/anvio/parsers/hmmer.py @@ -233,7 +233,6 @@ def find_line(self, condition): def read_lines_until(self, condition, include_last=False, store=True): lines = [] - return_value = lines if store else True for line in self.query_lines[self.line_no:]: self.line_no += 1 @@ -535,7 +534,7 @@ class HMMERTableOutput(Parser): Which HMMER program was used to generate the output we are parsing? Pick from {'hmmscan', 'hmmsearch'} """ - def __init__(self, hmmer_table_txt, alphabet='AA', context='GENE', program='hmmscan', run=terminal.Run()): + def __init__(self, hmmer_table_txt, alphabet='AA', context='GENE', program='hmmscan', no_header=True, run=terminal.Run()): self.alphabet = alphabet self.context = context self.program = program @@ -563,7 +562,7 @@ def __init__(self, hmmer_table_txt, alphabet='AA', context='GENE', program='hmms 'col_names': col_names, 'col_mapping': col_mapping, 'indexing_field': -1, - 'no_header': True, + 'no_header': no_header, }, } diff --git a/anvio/tests/run_workflow_tests_for_ecophylo.sh b/anvio/tests/run_workflow_tests_for_ecophylo.sh index ea5cafc1e3..77616be5f5 100755 --- a/anvio/tests/run_workflow_tests_for_ecophylo.sh +++ b/anvio/tests/run_workflow_tests_for_ecophylo.sh @@ -12,8 +12,7 @@ cp $files/data/genomes/bacteria/*.db $output_ cp $files/data/genomes/archaea/*.db $output_dir/workflow_test cp $files/data/input_files/metagenomes.txt $output_dir/workflow_test cp $files/data/input_files/external-genomes.txt $output_dir/workflow_test -cp $files/data/input_files/hmm_list.txt $output_dir/workflow_test -cp $files/data/input_files/hmm_list_external.txt $output_dir/workflow_test +cp $files/data/input_files/hmm_list* $output_dir/workflow_test cd $output_dir/workflow_test INFO "Creating a default config for ecophylo workflow" @@ -50,7 +49,7 @@ anvi-run-workflow -w ecophylo -c only-metagenomes-txt-config.json -A --dry-run INFO "Running ecophylo workflow with ecophylo dry-run: only external-genomes.txt" anvi-run-workflow -w ecophylo -c only-external-genomes-txt-config.json -A --dry-run -INFO "Running ecophylo workflow" +INFO "EcoPhylo: profiling evolution AND ecology with Ribosomal_L16 on metagenomes and genomes" anvi-run-workflow -w ecophylo -c default-config.json INFO "Running ecophylo workflow interactive" @@ -63,11 +62,11 @@ rm -rf $output_dir/workflow_test/ECOPHYLO_WORKFLOW/ INFO "Saving a workflow graph - no samples.txt" anvi-run-workflow -w ecophylo -c no-samples-txt-config.json --save-workflow-graph -INFO "Running ecophylo workflow with ecophylo dry-run - no samples.txt" +INFO "EcoPhylo: profiling just evolution with Ribosomal_L16 and Ribosomal_L2 on metagenomes and genomes - no samples.txt (dry-run)" anvi-run-workflow -w ecophylo -c no-samples-txt-config.json -A --dry-run -INFO "Running ecophylo workflow - no samples.txt" -anvi-run-workflow -w ecophylo -c no-samples-txt-config.json +INFO "EcoPhylo: profiling just evolution with Ribosomal_L16 and Ribosomal_L2 on metagenomes and genomes - no samples.txt" +anvi-run-workflow -w ecophylo -c no-samples-txt-config.json INFO "Running ecophylo workflow interactive" HMM="Ribosomal_L16" @@ -82,6 +81,5 @@ anvi-run-workflow -w ecophylo -c only-external-genomes-txt-config.json INFO "Running ecophylo workflow interactive from external HMM" HMM="Ribosomal_L16" -anvi-interactive -t ECOPHYLO_WORKFLOW/05_TREES/"${HMM}"/"${HMM}"_renamed.nwk \ - -p ECOPHYLO_WORKFLOW/05_TREES/"${HMM}"/"${HMM}"-PROFILE.db \ - --manual \ No newline at end of file +anvi-interactive -c ECOPHYLO_WORKFLOW/METAGENOMICS_WORKFLOW/03_CONTIGS/"${HMM}"-contigs.db \ + -p ECOPHYLO_WORKFLOW/METAGENOMICS_WORKFLOW/06_MERGED/"${HMM}"/PROFILE.db \ No newline at end of file diff --git a/anvio/tests/sandbox/data/input_files/hmm_list-no-samples-txt.txt b/anvio/tests/sandbox/data/input_files/hmm_list-no-samples-txt.txt new file mode 100644 index 0000000000..56b506ee2c --- /dev/null +++ b/anvio/tests/sandbox/data/input_files/hmm_list-no-samples-txt.txt @@ -0,0 +1,3 @@ +name source path +Ribosomal_L16 Bacteria_71 INTERNAL +Ribosomal_L2 Bacteria_71 INTERNAL diff --git a/anvio/tests/sandbox/workflows/ecophylo/no-samples-txt-config.json b/anvio/tests/sandbox/workflows/ecophylo/no-samples-txt-config.json index dc959d1b47..9bd6e10f41 100644 --- a/anvio/tests/sandbox/workflows/ecophylo/no-samples-txt-config.json +++ b/anvio/tests/sandbox/workflows/ecophylo/no-samples-txt-config.json @@ -1,7 +1,7 @@ { "metagenomes": "metagenomes.txt", "external_genomes": "external-genomes.txt", - "hmm_list": "hmm_list.txt", + "hmm_list": "hmm_list-no-samples-txt.txt", "cluster_representative_method": { "method": "mmseqs" }, diff --git a/anvio/workflows/ecophylo/Snakefile b/anvio/workflows/ecophylo/Snakefile index bccd64cb32..8617c27884 100644 --- a/anvio/workflows/ecophylo/Snakefile +++ b/anvio/workflows/ecophylo/Snakefile @@ -44,7 +44,7 @@ rule anvi_run_hmms_hmmsearch: log: os.path.join(dirs_dict['LOGS_DIR'], "anvi_run_hmms_hmmsearch-{sample_name}-{HMM}.log") input: output: - done = os.path.join(dirs_dict['EXTRACTED_RIBO_PROTEINS_DIR'], "{sample_name}-{HMM}-dom_hmmsearch/contigs-hmmsearch.done"), + done = os.path.join(dirs_dict['EXTRACTED_RIBO_PROTEINS_DIR'], "{sample_name}-{HMM}-contigs-hmmsearch.done"), params: hmm_source = M.get_param_value_from_config(['anvi_run_hmms_hmmsearch', '--installed-hmm-profile']), additional_params = M.get_param_value_from_config(['trim_alignment', 'additional_params']) @@ -116,20 +116,59 @@ rule filter_hmm_hits_by_query_coverage: HMM_dir = os.path.join(M.HMM_path_dict[wildcards.HMM]) HMM_source = M.HMM_source_dict[wildcards.HMM] domtblout = os.path.join(dirs_dict['EXTRACTED_RIBO_PROTEINS_DIR'], f"{wildcards.sample_name}-{HMM_source}-dom_hmmsearch/hmm.domtable") + hmmer_output_dir = os.path.join(dirs_dict['EXTRACTED_RIBO_PROTEINS_DIR'], f"{wildcards.sample_name}-{HMM_source}-dom_hmmsearch") + + # Check if anvi-run-scg-taxonomy and/or anvi-script-filter-hmm-hits-table has been run already + contigs_db = ContigsDatabase(contigsDB) + try: + HMM_dom_filter_sources = contigs_db.meta['HMM_dom_filter_sources'] + HMM_dom_filter_target_coverage = contigs_db.meta['HMM_dom_filter_target_coverage'] + HMM_dom_filter_query_coverage = contigs_db.meta['HMM_dom_filter_query_coverage'] + + HMM_dom_filter_sources_list = HMM_dom_filter_sources.split(",") + HMM_dom_filter_target_coverage_list = HMM_dom_filter_target_coverage.split(",") + HMM_dom_filter_query_coverage_list = HMM_dom_filter_query_coverage.split(",") + + source_domain_filter_values = list(zip(HMM_dom_filter_sources_list, HMM_dom_filter_target_coverage_list, HMM_dom_filter_query_coverage_list)) + + domain_filter_values_dict = {} + for item in source_domain_filter_values: + domain_filter_values_dict[item[0]] = item + except: + domain_filter_values_dict = {} + + contigs_db.disconnect() if HMM_source in M.internal_HMM_sources: - shell(f"anvi-script-filter-hmm-hits-table -c {contigsDB} \ - --domain-hits-table {domtblout} \ - --hmm-source {HMM_source} \ - --query-coverage {params.query_coverage} \ - {params.additional_params} 2> {log}") + if HMM_source in domain_filter_values_dict.keys(): + if float(params.query_coverage) > float(domain_filter_values_dict[HMM_source][2]): + shell(f"anvi-script-filter-hmm-hits-table -c {contigsDB} \ + --domain-hits-table {domtblout} \ + --hmm-source {HMM_source} \ + --query-coverage {params.query_coverage} \ + {params.additional_params} 2> {log}") + else: + print(f"The HMM source {HMM_source} has already been filtered with more stringent query coverage value: {domain_filter_values_dict[HMM_source][2]}, skipping filter_hmm_hits_by_query_coverage!") + pass + else: + shell(f"anvi-script-filter-hmm-hits-table -c {contigsDB} \ + --domain-hits-table {domtblout} \ + --hmm-source {HMM_source} \ + --query-coverage {params.query_coverage} \ + {params.additional_params} 2> {log}") else: - shell(f"anvi-script-filter-hmm-hits-table -c {contigsDB} \ - --domain-hits-table {domtblout} \ - --hmm-profile-dir {HMM_dir} \ - --hmm-source {HMM_source} \ - --query-coverage {params.query_coverage} \ - {params.additional_params} 2> {log}") + if HMM_source in domain_filter_values_dict.keys(): + if float(params.query_coverage) > float(domain_filter_values_dict[HMM_source][2]): + shell(f"anvi-script-filter-hmm-hits-table -c {contigsDB} \ + --domain-hits-table {domtblout} \ + --hmm-profile-dir {HMM_dir} \ + --hmm-source {HMM_source} \ + --query-coverage {params.query_coverage} \ + {params.additional_params} 2> {log}") + else: + print(f"The HMM source {HMM_source} has already been filtered with more stringent query coverage value: {domain_filter_values_dict[HMM_source][2]}, skipping filter_hmm_hits_by_query_coverage!") + pass + shell('touch {output.done}') @@ -968,7 +1007,7 @@ if M.samples_txt_file: # basics state_dict['version'] = '3' state_dict['tree-type'] = 'phylogram' - state_dict['current-view'] = 'single' + state_dict['current-view'] = 'mean_coverage' # height and width # FIXME: It's unclear to me how the interactive interface determines @@ -1066,22 +1105,23 @@ if M.samples_txt_file: # views views_dict = {} - single_dict = {} + mean_coverage_dict = {} + false = False percent_identity = { "normalization": "none", "min": { "value": "90", - "disabled": "false" + "disabled": false }, "max": { "value": "100", - "disabled": "false" + "disabled": false } } - single_dict['percent_identity'] = percent_identity - views_dict['single'] = single_dict + mean_coverage_dict['percent_identity'] = percent_identity + views_dict['mean_coverage'] = mean_coverage_dict state_dict['views'] = views_dict with open(output.state_file, "w") as outfile: @@ -1177,23 +1217,16 @@ else: # layer-orders - first_layers = ["__parent__", "length", "gc_content"] - - layer_order = first_layers + misc_layers_list + if HMM_source in M.internal_HMM_sources: + layer_order = misc_layers_list + scg_taxonomy_layers_list + else: + layer_order = misc_layers_list state_dict['layer-order'] = layer_order # layers layers_dict = {} - layer_attributes_parent = { - "color": "#000000", - "height": "0", - "margin": "15", - "type": "color", - "color-start": "#FFFFFF" - } - length = { "color": "#000000", "height": "0", @@ -1202,22 +1235,6 @@ else: "color-start": "#FFFFFF" } - gc_content = { - "color": "#000000", - "height": "0", - "margin": "15", - "type": "color", - "color-start": "#FFFFFF" - } - - identifier = { - "color": "#000000", - "height": "0", - "margin": "15", - "type": "color", - "color-start": "#FFFFFF" - } - names = { "color": "#000000", "height": "0", @@ -1234,12 +1251,7 @@ else: "color-start": "#FFFFFF" } - layers_dict['__parent__'] = layer_attributes_parent - layers_dict['length'] = length - layers_dict['gc_content'] = gc_content - layers_dict['identifier'] = identifier layers_dict['percent_identity'] = percent_identity - state_dict['layers'] = layers_dict # views diff --git a/bin/anvi-run-hmms b/bin/anvi-run-hmms index d46491c872..384259b65a 100755 --- a/bin/anvi-run-hmms +++ b/bin/anvi-run-hmms @@ -7,14 +7,15 @@ import sys import anvio import anvio.utils as utils import anvio.terminal as terminal +import anvio.filesnpaths as filesnpaths with terminal.SuppressAllOutput(): import anvio.data.hmm as hmm_data available_hmm_sources = list(hmm_data.sources.keys()) -from anvio.errors import ConfigError, FilesNPathsError from anvio.terminal import time_program +from anvio.errors import ConfigError, FilesNPathsError from anvio.tables.hmmhits import TablesForHMMHits from anvio.tables.trnahits import TablesForTransferRNAs @@ -90,6 +91,18 @@ def main(args): raise ConfigError("We can see that you have requested --domain-hits-table but you haven't asked us to store " "this output in a directory with --hmmer-output-dir. There is no point to requesting this output " "if you are never going to see it, so we figured we'd stop you right there. :)") + + if args.domain_hits_table and args.hmmer_program != "hmmsearch": + run.warning("You requested to save the --domtblout without using hmmsearch. We wanted to kindly warn you that if you " + "that if you plan on using `anvi-script-filter-hmm-hits-table` later to remove weak hits, you must use " + "`hmmsearch` instead of `hmmscan`. You can instruct anvi'o to switch to `hmmsearch` by including the " + "parameter `--hmmer-program hmmsearch` to your `anvi-run-hmms` command.") + + domtable_path = os.path.join(args.hmmer_output_dir + "/hmm.table") + if filesnpaths.is_file_exists(domtable_path, dont_raise=True): + raise ConfigError(f"The file {domtable_path} already exists, and anvi'o does not like to " + "to overwrite things. Please either remove the file or rename your " + "desired output.") search_tables = TablesForHMMHits(args.contigs_db, num_threads_to_use=args.num_threads, just_do_it=args.just_do_it, hmm_program_to_use=args.hmmer_program, hmmer_output_directory=args.hmmer_output_dir, diff --git a/sandbox/anvi-script-filter-hmm-hits-table b/sandbox/anvi-script-filter-hmm-hits-table index 734b5b5344..cf4e1c0d8d 100755 --- a/sandbox/anvi-script-filter-hmm-hits-table +++ b/sandbox/anvi-script-filter-hmm-hits-table @@ -8,21 +8,20 @@ is done using query and/or target coverage import sys import os -import numpy as np import pandas as pd import anvio -import anvio.data.hmm import anvio.db as db import anvio.utils as utils import anvio.hmmops as hmmops -import anvio.bamops as bamops import anvio.terminal as terminal import anvio.filesnpaths as filesnpaths +with terminal.SuppressAllOutput(): + import anvio.data.hmm as hmm_data + from anvio.dbops import ContigsDatabase from anvio.parsers import parser_modules -from anvio.argparse import ArgumentParser from anvio.tables.hmmhits import TablesForHMMHits from anvio.errors import ConfigError, FilesNPathsError @@ -35,7 +34,7 @@ __authors__ = ['mschecht'] __provides__ = ["hmm-hits"] __requires__ = ["contigs-db","hmm-source", "hmm-hits"] __description__ = ("Filter weak HMM hits from a given contigs database using a domain hits table " - "reported by `anvi-run-hmms`.") + "reported by hmmsearch in `anvi-run-hmms`.") pp = terminal.pretty_print @@ -64,7 +63,8 @@ class FilterHmmHitsTable(object): if self.hmm_profile_dir: self.sources = utils.get_HMM_sources_dictionary([args.hmm_profile_dir]) else: - self.sources = anvio.data.hmm.sources + self.sources = hmm_data.sources + def sanity_checks(self): """Sanity checks for program inputs.""" @@ -76,13 +76,13 @@ class FilterHmmHitsTable(object): self.run.info("Domtblout Path", self.domtblout) if not self.hmm_source: - raise ConfigError("Please provide a hmm-source :)") + raise ConfigError("Please provide an hmm-source :)") info_table = hmmops.SequencesForHMMHits(self.contigs_db_path).hmm_hits_info if self.hmm_source not in info_table: - raise ConfigError(f"Whoa there, the HMM source you provided, '{self.hmm_source}', is not in your contigsDB: " - f"{self.contigs_db_path}. Maybe you misspelled it? Maybe you never added it to your contigsDB??" + raise ConfigError(f"Whoa there, the HMM source you provided, '{self.hmm_source}', is not in your contigs-db: " + f"{self.contigs_db_path}. Maybe you misspelled it? Maybe you never added it to your contigs-db??" f"Please use --list-hmm-sources to see which HMM sources you have available. If you don't see the HMMs you " f"need then try re-running anvi-run-hmms and make sure to specify your HMM source of interest.") @@ -93,6 +93,14 @@ class FilterHmmHitsTable(object): f"anvi-script-filter-hmm-hit-table currently can only work with hmm-sources " f"from protein sequences.") + if self.target_coverage: + if 0.0 < float(self.target_coverage) > 1.0: + raise ConfigError("Target coverage needs to be between 0 and 1") + + if self.query_coverage: + if 0.0 < float(self.query_coverage) > 1.0: + raise ConfigError("Query coverage needs to be between 0 and 1") + def process(self): """Method to run the functions of this program""" @@ -144,6 +152,15 @@ class FilterHmmHitsTable(object): ('description', str), # description of target ] + try: + [int(l.split()[0]) for l in open(self.domtblout)][1:] + except ValueError as e: + raise ConfigError(f"The data in the first column of your DOM table output ('{self.domtblout}') " + f"do not look like anv'o gene caller ids (we know that because Python complained " + f"saying '{e}', which should never be the case with proper anvi'o gene caller ids). " + f"This outcome is only possible if you run `anvi-run-hmms` without the parameter " + f"`--hmmer-program hmmsearch`. No filters for you.") + try: colnames_coltypes_list = list(zip(*col_info)) colnames_coltypes_dict = dict(zip(colnames_coltypes_list[0], colnames_coltypes_list[1])) @@ -156,7 +173,6 @@ class FilterHmmHitsTable(object): header=None, index_col=False) except Exception as e: - print(e) raise ConfigError(f"Doesn't look like a --domtblout... anvi'o can't even... " f"Please look at this error message to find out what happened: " f"{e}") @@ -164,6 +180,7 @@ class FilterHmmHitsTable(object): self.df['query_coverage'] = ((self.df['hmm_stop'] - self.df['hmm_start'])/ self.df['hmm_length']) self.df['target_coverage'] = ((self.df['gene_stop'] - self.df['gene_start'])/ self.df['gene_length']) + self.hmm_names_set = set(self.df.hmm_name.to_list()) def filter_domtblout(self): """Filter the hmm_hits table based on query and/or target coverage""" @@ -199,22 +216,38 @@ class FilterHmmHitsTable(object): context= 'DOMAIN' hmm_program = 'hmmsearch' - parser = parser_modules['search']['hmmer_table_output'](hmmsearch_tbl, alphabet=alphabet, context=context, program=hmm_program) + parser = parser_modules['search']['hmmer_table_output'](hmmsearch_tbl, alphabet=alphabet, context=context, program=hmm_program, no_header=False) search_results_dict = parser.get_search_results() return search_results_dict def append_search_results_dict_to_hmm_tables(self, search_results_dict=None): - """Put in the new filtered hmm_hits table to the contigsDB""" + """Put in the new filtered hmm_hits table to the contigs-db""" + + contigs_db = ContigsDatabase(self.contigs_db_path) + hmm_hits_dict = contigs_db.db.get_table_as_dict("hmm_hits") + contigs_db.disconnect() + + gene_list = [] + for key,value in hmm_hits_dict.items(): + if value['source'] == self.hmm_source: + gene_list.append(value['gene_name']) + + gene_set = set(gene_list) + + if not gene_set.issubset(self.hmm_names_set): + raise ConfigError(f"The genes in {self.domtblout} don't seem to be in the hmm_hits table " + f"from your contigs-db: {self.contigs_db_path}. " + f"Please double check you are filtering with the same HMM_source that you used " + f"to create {self.domtblout} when you ran anvi-run-hmms.") # Remove old hmm_hits contigs_db_path hmm_tables = TablesForHMMHits(self.contigs_db_path) hmm_tables.remove_source(self.hmm_source) - # Re-write hmm_hits table to contigsDB - - internal_sources = list(anvio.data.hmm.sources.keys()) + # Re-write hmm_hits table to contigs-db + internal_sources = list(hmm_data.sources.keys()) source = self.hmm_source if self.sources not in internal_sources: @@ -223,7 +256,7 @@ class FilterHmmHitsTable(object): all_genes_searched_against = self.sources[source]['genes'] reference = self.sources[source]['ref'] else: - sources = anvio.data.hmm.sources + sources = hmm_data.sources source = self.hmm_source kind_of_search = sources[source]['kind'] domain = sources[source]['domain'] @@ -232,6 +265,53 @@ class FilterHmmHitsTable(object): hmm_tables.append_to_hmm_hits_table(source, reference, kind_of_search, domain, all_genes_searched_against, search_results_dict) + # add contigs-db self attributes for HMM_source, target_coverage, and query_coverage + self.db = db.DB(self.contigs_db_path, anvio.__contigs__version__, new_database=False) + + HMM_dom_filter_sources = self.db.get_meta_value('HMM_dom_filter_sources', return_none_if_not_in_table=True) + HMM_dom_filter_target_coverage = self.db.get_meta_value('HMM_dom_filter_target_coverage', return_none_if_not_in_table=True) + HMM_dom_filter_query_coverage = self.db.get_meta_value('HMM_dom_filter_query_coverage', return_none_if_not_in_table=True) + + if HMM_dom_filter_sources == None: + self.db.set_meta_value('HMM_dom_filter_sources', source) + if self.target_coverage == None: + self.db.set_meta_value('HMM_dom_filter_target_coverage', 0.00) + else: + self.db.set_meta_value('HMM_dom_filter_target_coverage', self.target_coverage) + if self.query_coverage == None: + self.db.set_meta_value('HMM_dom_filter_query_coverage', 0.00) + else: + self.db.set_meta_value('HMM_dom_filter_query_coverage', self.query_coverage) + else: + HMM_dom_filter_sources_list = HMM_dom_filter_sources.split(",") + HMM_dom_filter_target_coverage_list = str(HMM_dom_filter_target_coverage).split(",") + HMM_dom_filter_query_coverage_list = str(HMM_dom_filter_query_coverage).split(",") + + source_domain_filter_values = list(zip(HMM_dom_filter_sources_list, HMM_dom_filter_target_coverage_list, HMM_dom_filter_query_coverage_list)) + + if self.target_coverage == None: + self.target_coverage = 0.00 + if self.query_coverage == None: + self.query_coverage = 0.00 + + new_filtering_parameters = (source, self.target_coverage, self.query_coverage) + + for i, item in enumerate(source_domain_filter_values): + if new_filtering_parameters[0] == item[0]: + if float(item[1]) < float(new_filtering_parameters[1]) or float(item[2]) < float(new_filtering_parameters[2]): + source_domain_filter_values[i] = new_filtering_parameters + + if new_filtering_parameters[0] not in HMM_dom_filter_sources_list: + source_domain_filter_values.append(new_filtering_parameters) + + updated_HMM_dom_filter_attributes = list(zip(*source_domain_filter_values)) + + self.db.set_meta_value('HMM_dom_filter_sources', ','.join(str(s) for s in updated_HMM_dom_filter_attributes[0])) + self.db.set_meta_value('HMM_dom_filter_target_coverage', ','.join(str(s) for s in updated_HMM_dom_filter_attributes[1])) + self.db.set_meta_value('HMM_dom_filter_query_coverage', ','.join(str(s) for s in updated_HMM_dom_filter_attributes[2])) + + self.db.disconnect() + @terminal.time_program def main(args): @@ -248,10 +328,12 @@ if __name__ == '__main__': parser.add_argument(*anvio.A('list-hmm-sources'), **anvio.K('list-hmm-sources')) parser.add_argument(*anvio.A('hmm-profile-dir'), **anvio.K('hmm-profile-dir')) parser.add_argument('--domain-hits-table', metavar='PATH', help="Please provide the path to the domain-table-output. You can get this file from running anvi-run-hmms with the flag --domain-hits-table.") - parser.add_argument('--target-coverage', - help=" (ali_coord_to - ali_coord_from)/target_length") - parser.add_argument('--query-coverage', - help=" (hmm_coord_to - hmm_coord_from)/hmm_length") + parser.add_argument('--target-coverage', type=float, + help="The percent length (0.0-1.0) of the query sequence that must be aligned to the HMM model. " + "Here's the formula using columns from --domain-hits-table: (ali_coord_to - ali_coord_from)/target_length") + parser.add_argument('--query-coverage', type=float, + help="The percent length (0.0-1.0) of the target HMM model that is aligned query genes in the contigs-db. " + "Here's the formula using columns from --domain-hits-table: (hmm_coord_to - hmm_coord_from)/hmm_length") args, unknown = parser.parse_known_args()