Skip to content

Commit

Permalink
Allow tsv2exprofiledb to not compress outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
milot-mirdita committed Jul 23, 2024
1 parent 62975ca commit a146887
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 11 deletions.
32 changes: 22 additions & 10 deletions data/workflow/tsv2exprofiledb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,37 @@ OUT="$2"
[ -d "${OUT}.tsv" ] && echo "${OUT} is a directory!" && exit 1;

if notExists "${OUT}_h.dbtype"; then
"$MMSEQS" tsv2db "${IN}_h.tsv" "${OUT}_h" --output-dbtype 12 ${VERBOSITY}
MMSEQS_FORCE_MERGE=1 "$MMSEQS" tsv2db "${IN}_h.tsv" "${OUT}_h" --output-dbtype 12 ${VERBOSITY}
fi

if notExists "${OUT}.dbtype"; then
"$MMSEQS" tsv2db "${IN}.tsv" "${OUT}_tmp" --output-dbtype 0 ${VERBOSITY}
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_tmp" "${OUT}" ${VERBOSITY}
"$MMSEQS" rmdb "${OUT}_tmp" ${VERBOSITY}
if [ -n "${COMPRESSED}" ]; then
"$MMSEQS" tsv2db "${IN}.tsv" "${OUT}_tmp" --output-dbtype 0 ${VERBOSITY}
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_tmp" "${OUT}" ${VERBOSITY}
"$MMSEQS" rmdb "${OUT}_tmp" ${VERBOSITY}
else
MMSEQS_FORCE_MERGE=1 "$MMSEQS" tsv2db "${IN}.tsv" "${OUT}" --output-dbtype 0 ${VERBOSITY}
fi
fi

if notExists "${OUT}_seq.dbtype"; then
"$MMSEQS" tsv2db "${IN}_seq.tsv" "${OUT}_seq_tmp" --output-dbtype 0 ${VERBOSITY}
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_seq_tmp" "${OUT}_seq" ${VERBOSITY}
"$MMSEQS" rmdb "${OUT}_seq_tmp" ${VERBOSITY}
if [ -n "${COMPRESSED}" ]; then
"$MMSEQS" tsv2db "${IN}_seq.tsv" "${OUT}_seq_tmp" --output-dbtype 0 ${VERBOSITY}
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_seq_tmp" "${OUT}_seq" ${VERBOSITY}
"$MMSEQS" rmdb "${OUT}_seq_tmp" ${VERBOSITY}
else
"$MMSEQS" tsv2db "${IN}_seq.tsv" "${OUT}_seq" --output-dbtype 0 ${VERBOSITY}
fi
fi

if notExists "${OUT}_aln.dbtype"; then
"$MMSEQS" tsv2db "${IN}_aln.tsv" "${OUT}_aln_tmp" --output-dbtype 5 ${VERBOSITY}
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_aln_tmp" "${OUT}_aln" ${VERBOSITY}
"$MMSEQS" rmdb "${OUT}_aln_tmp" ${VERBOSITY}
if [ -n "${COMPRESSED}" ]; then
"$MMSEQS" tsv2db "${IN}_aln.tsv" "${OUT}_aln_tmp" --output-dbtype 5 ${VERBOSITY}
MMSEQS_FORCE_MERGE=1 "$MMSEQS" compress "${OUT}_aln_tmp" "${OUT}_aln" ${VERBOSITY}
"$MMSEQS" rmdb "${OUT}_aln_tmp" ${VERBOSITY}
else
MMSEQS_FORCE_MERGE=1 "$MMSEQS" tsv2db "${IN}_aln.tsv" "${OUT}_aln" --output-dbtype 5 ${VERBOSITY}
fi
fi

if notExists "${OUT}_seq_h.dbtype"; then
Expand Down
2 changes: 1 addition & 1 deletion src/MMseqsBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ std::vector<Command> baseCommands = {
"<i:hhsuiteHHMDB> <o:profileDB>",
CITATION_MMSEQS2,{{"",DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, NULL}}},

{"tsv2exprofiledb", tsv2exprofiledb, &par.onlyverbosity, COMMAND_PROFILE_PROFILE,
{"tsv2exprofiledb", tsv2exprofiledb, &par.verbandcompression, COMMAND_PROFILE_PROFILE,
"Create a expandable profile db from TSV files",
NULL,
"Milot Mirdita <[email protected]>",
Expand Down
6 changes: 6 additions & 0 deletions src/util/tsv2exprofiledb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,20 @@

#include "tsv2exprofiledb.sh.h"

void setTsv2ExProfileDbDefaults(Parameters *p) {
p->compressed = true;
}

int tsv2exprofiledb(int argc, const char **argv, const Command &command) {
Parameters &par = Parameters::getInstance();
setTsv2ExProfileDbDefaults(&par);
par.parseParameters(argc, argv, command, true, 0, 0);

std::string program = par.db2 + ".sh";
FileUtil::writeFile(program, tsv2exprofiledb_sh, tsv2exprofiledb_sh_len);

CommandCaller cmd;
cmd.addVariable("COMPRESSED", par.compressed ? "TRUE" : NULL);
cmd.addVariable("VERBOSITY", par.createParameterString(par.onlyverbosity).c_str());
cmd.execProgram(FileUtil::getRealPathFromSymLink(program).c_str(), par.filenames);

Expand Down

0 comments on commit a146887

Please sign in to comment.