diff --git a/src/alignment/PSSMCalculator.cpp b/src/alignment/PSSMCalculator.cpp index 79bafaf8..4faa1c89 100644 --- a/src/alignment/PSSMCalculator.cpp +++ b/src/alignment/PSSMCalculator.cpp @@ -239,6 +239,23 @@ void PSSMCalculator::printPSSM(size_t queryLength){ } } +void PSSMCalculator::profileToString(std::string& result, size_t queryLength){ + result.append(5, ' '); + for (size_t aa = 0; aa < Sequence::PROFILE_AA_SIZE; aa++) { + result.append(SSTR(subMat->num2aa[aa])); + result.append(6, ' '); + } + result.append(1, '\n'); + for (size_t i = 0; i < queryLength; i++) { + for (size_t aa = 0; aa < Sequence::PROFILE_AA_SIZE; aa++) { + result.append(SSTR(profile[i * Sequence::PROFILE_AA_SIZE + aa], 4)); + result.append(1, ' '); + } + result.append(1, '\n'); + } + result.append(1, '\n'); +} + void PSSMCalculator::computeLogPSSM(BaseMatrix *subMat, char *pssm, const float *profile, float bitFactor, size_t queryLength, float scoreBias) { for(size_t pos = 0; pos < queryLength; pos++) { for(size_t aa = 0; aa < Sequence::PROFILE_AA_SIZE; aa++) { diff --git a/src/alignment/PSSMCalculator.h b/src/alignment/PSSMCalculator.h index 146475b2..e07d7d9b 100644 --- a/src/alignment/PSSMCalculator.h +++ b/src/alignment/PSSMCalculator.h @@ -54,6 +54,7 @@ class PSSMCalculator { void printProfile(size_t queryLength); void printPSSM(size_t queryLength); + void profileToString(std::string& result, size_t queryLength); // prepare pseudocounts static void preparePseudoCounts(float *frequency, float *frequency_with_pseudocounts, size_t entrySize, size_t queryLength, const float **R); diff --git a/src/commons/Parameters.cpp b/src/commons/Parameters.cpp index 0e79376b..ab56965c 100644 --- a/src/commons/Parameters.cpp +++ b/src/commons/Parameters.cpp @@ -137,6 +137,7 @@ Parameters::Parameters(): PARAM_PC_MODE(PARAM_PC_MODE_ID, "--pseudo-cnt-mode", "Pseudo count mode", "use 0: substitution-matrix or 1: context-specific pseudocounts", typeid(int), (void *) &pcmode, "^[0-1]{1}$", MMseqsParameter::COMMAND_PROFILE | MMseqsParameter::COMMAND_EXPERT), PARAM_PCA(PARAM_PCA_ID, "--pca", "Pseudo count a", "Pseudo count admixture strength", typeid(MultiParam), (void *) &pca, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE | MMseqsParameter::COMMAND_EXPERT), PARAM_PCB(PARAM_PCB_ID, "--pcb", "Pseudo count b", "Pseudo counts: Neff at half of maximum admixture (range 0.0-inf)", typeid(MultiParam), (void *) &pcb, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE | MMseqsParameter::COMMAND_EXPERT), + PARAM_PROFILE_OUTPUT_MODE(PARAM_PROFILE_OUTPUT_MODE_ID, "--profile-output-mode", "Profile output mode", "Profile output mode: 0: binary log-odds 1: human-readable frequencies", typeid(int), (void *) &profileOutputMode, "^[0-1]{1}$", MMseqsParameter::COMMAND_PROFILE | MMseqsParameter::COMMAND_EXPERT), // sequence2profile PARAM_NEFF(PARAM_NEFF_ID, "--neff", "Neff", "Neff included into context state profile (1.0,20.0)", typeid(float), (void *) &neff, "^[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE), PARAM_TAU(PARAM_TAU_ID, "--tau", "Tau", "Tau: context state pseudo count mixture (0.0,1.0)", typeid(float), (void *) &tau, "[0-9]*(\\.[0-9]+)?$", MMseqsParameter::COMMAND_PROFILE), @@ -570,6 +571,7 @@ Parameters::Parameters(): result2profile.push_back(&PARAM_THREADS); result2profile.push_back(&PARAM_COMPRESSED); result2profile.push_back(&PARAM_V); + result2profile.push_back(&PARAM_PROFILE_OUTPUT_MODE); // createtsv createtsv.push_back(&PARAM_FIRST_SEQ_REP_SEQ); @@ -2437,6 +2439,7 @@ void Parameters::setDefaults() { pcmode = PCMODE_SUBSTITUTION_SCORE; pca = MultiParam(PseudoCounts(1.1, 1.4)); pcb = MultiParam(PseudoCounts(4.1, 5.8)); + profileOutputMode = 0; // sequence2profile neff = 1.0; diff --git a/src/commons/Parameters.h b/src/commons/Parameters.h index 58af407d..79a168dd 100644 --- a/src/commons/Parameters.h +++ b/src/commons/Parameters.h @@ -536,6 +536,7 @@ class Parameters { int pcmode; MultiParam pca; MultiParam pcb; + int profileOutputMode; // sequence2profile float neff; @@ -855,6 +856,7 @@ class Parameters { PARAMETER(PARAM_PC_MODE) PARAMETER(PARAM_PCA) PARAMETER(PARAM_PCB) + PARAMETER(PARAM_PROFILE_OUTPUT_MODE) // sequence2profile PARAMETER(PARAM_NEFF) diff --git a/src/commons/Util.cpp b/src/commons/Util.cpp index 4fbc6eec..ca622ae2 100644 --- a/src/commons/Util.cpp +++ b/src/commons/Util.cpp @@ -659,7 +659,17 @@ std::string SSTR(double x) { return fmt::format("{:.3E}", x); } +template<> +std::string SSTR(double x, int precision) { + return fmt::format("{:.{}E}", x, precision); +} + template<> std::string SSTR(float x) { return fmt::format("{:.3f}", x); +} + +template<> +std::string SSTR(float x, int precision) { + return fmt::format("{:.{}f}", x, precision); } \ No newline at end of file diff --git a/src/commons/Util.h b/src/commons/Util.h index 0dd67adf..0d02e322 100644 --- a/src/commons/Util.h +++ b/src/commons/Util.h @@ -28,6 +28,12 @@ std::string SSTR(T) { return ""; } +template +std::string SSTR(T, int) { + static_assert(assert_false::value , "Not implemented for requested type"); + return ""; +} + template<> std::string SSTR(const char*); template<> std::string SSTR(char*); template<> std::string SSTR(bool); @@ -45,6 +51,8 @@ template<> std::string SSTR(long long); template<> std::string SSTR(unsigned long long); template<> std::string SSTR(double); template<> std::string SSTR(float); +template<> std::string SSTR(double, int precision); +template<> std::string SSTR(float, int precision); #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) diff --git a/src/util/result2profile.cpp b/src/util/result2profile.cpp index 34759b17..e11720a6 100644 --- a/src/util/result2profile.cpp +++ b/src/util/result2profile.cpp @@ -104,7 +104,11 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret } int type = Parameters::DBTYPE_HMM_PROFILE; - if (returnAlnRes) { + const int writePlain = par.profileOutputMode == 1; + if (par.profileOutputMode == 1) { + type = Parameters::DBTYPE_OMIT_FILE; + par.compressed = false; + } else if (returnAlnRes) { type = Parameters::DBTYPE_ALIGNMENT_RES; if (needSrcIndex) { type = DBReader::setExtendedDbtype(type, Parameters::DBTYPE_EXTENDED_INDEX_NEED_SRC); @@ -261,18 +265,25 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret alnResults, #endif par.wg, 0.0); - if (par.compBiasCorrection == true){ - SubstitutionMatrix::calcGlobalAaBiasCorrection(&subMat, pssmRes.pssm, pNullBuffer, - Sequence::PROFILE_AA_SIZE, - res.centerLength); - } - - if (par.maskProfile == true) { - masker.mask(centerSequence, par.maskProb, pssmRes); - } - pssmRes.toBuffer(centerSequence, subMat, result); + if (writePlain) { + result.clear(); + result.append("Query profile of sequence "); + result.append(SSTR(queryKey)); + result.push_back('\n'); + calculator.profileToString(result, res.centerLength); + } else { + if (par.compBiasCorrection == true){ + SubstitutionMatrix::calcGlobalAaBiasCorrection(&subMat, pssmRes.pssm, pNullBuffer, + Sequence::PROFILE_AA_SIZE, + res.centerLength); + } + if (par.maskProfile == true) { + masker.mask(centerSequence, par.maskProb, pssmRes); + } + pssmRes.toBuffer(centerSequence, subMat, result); + } } - resultWriter.writeData(result.c_str(), result.length(), queryKey, thread_idx); + resultWriter.writeData(result.c_str(), result.length(), queryKey, thread_idx, writePlain == false); result.clear(); alnResults.clear(); @@ -281,7 +292,10 @@ int result2profile(int argc, const char **argv, const Command &command, bool ret } delete[] pNullBuffer; } - resultWriter.close(returnAlnRes == false); + resultWriter.close(returnAlnRes == false || writePlain == true); + if (writePlain) { + FileUtil::remove(par.db4Index.c_str()); + } resultReader.close(); if (!sameDatabase) {