Skip to content

Commit

Permalink
revert to single-iteration splitting of diamond results
Browse files Browse the repository at this point in the history
  • Loading branch information
raufs committed May 19, 2024
1 parent 10e9057 commit 6cb93e7
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 35 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os

setup(name='zol',
version='1.4.0',
version='1.4.1',
description='',
url='http://github.com/Kalan-Lab/zol/',
author='Rauf Salamzade',
Expand Down
20 changes: 4 additions & 16 deletions zol/orthologs/findOrthologs.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,24 +396,12 @@ def findOrthologs():
logObject.error(e)
raise RuntimeError(e)

split_diamond_cmds = []
with open(sample_listing_file) as oslf:
for line in oslf:
line = line.strip()
sample, sample_outfile = line.split('\t')
split_diamond_cmds.append([split_diamond_results_prog, alignment_result_file, sample, sample_outfile, logObject])

with concurrent.futures.ThreadPoolExecutor(max_workers=cpus) as executor:
executor.map(runCmd, split_diamond_cmds)

split_diamond_cmd = [split_diamond_results_prog, alignment_result_file, sample_listing_file]
try:
with open(sample_listing_file) as oslf:
for line in oslf:
line = line.strip()
sample, sample_outfile = line.split('\t')
assert(os.path.isfile(sample_outfile))
subprocess.call(' '.join(split_diamond_cmd), shell=True, stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL, executable='/bin/bash')
except Exception as e:
logObject.error("Issue with validating proper splitting of DIAMOND results.")
logObject.error("Issue with running: %s" % ' '.join(split_diamond_cmd))
logObject.error(e)
raise RuntimeError(e)

Expand Down
109 changes: 91 additions & 18 deletions zol/orthologs/splitDiamondResults.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,54 @@ Large
#include <math.h>
using namespace std;


/*
NOT USED - INSTEAD MEMORY LIMITS APPLIED IN PYTHON PER SUBPROCESS JOB
Code for limiting memory taken from Raul Salinas-Monteagudo's response:
https://stackoverflow.com/questions/3774858/artificially-limit-c-c-memory-usage
class RLimit {
public:
RLimit(int cmd) : mCmd(cmd) {
}
void set(rlim_t value) {
clog << "Setting " << mCmd << " to " << value << endl;
struct rlimit rlim;
rlim.rlim_cur = value;
//rlim.rlim_max = value;
int ret = setrlimit(mCmd, &rlim);
if (ret) {
clog << "Error setting rlimit" << endl;
}
}
rlim_t getCurrent() {
struct rlimit rlim = {0, 0};
if (getrlimit(mCmd, &rlim)) {
clog << "Error in getrlimit" << endl;
return 0;
}
return rlim.rlim_cur;
}
rlim_t getMax() {
struct rlimit rlim = {0, 0};
if (getrlimit(mCmd, &rlim)) {
clog << "Error in getrlimit" << endl;
return 0;
}
return rlim.rlim_max;
}
private:
int mCmd;
};
//Code which would be placed in main function
RLimit dataLimit(RLIMIT_DATA);
dataLimit.set(X*1024*1024); // X = MB
clog << "soft: " << dataLimit.getCurrent() << " hard: " << dataLimit.getMax() << endl;
*/

string delim = "\t";

/*
Expand All @@ -46,29 +94,48 @@ vector<string> split (string s, string delimiter) {
int main (int argc, char* argv[]) {
if ( argv[1]==NULL || (argv[1][0]=='-' && argv[1][1]=='h') || (argv[1][0]=='-' && argv[1][1]=='-' && argv[1][2]=='h') ) {
cout << "Usage:" << endl;
cout << "./splitDiamondResults <BLAST/DIAMOND output> <Focal Sample> <Focal Sample Outfile>" << endl;
cout << "./splitDiamondResults <BLAST/DIAMOND output> <Sample Listing>" << endl;
return 0;
}
else {
/*
Read in sample listing and create map of sample names to out-files
*/
string focal_sample = argv[2];
string focal_sample_outfile = argv[3];

ofstream outfile_handle;

outfile_handle.open(focal_sample_outfile, std::ios_base::app);

string line, query, subject, query_sample, subject_sample;

map<string, string> sample_to_outfile_path;
string line, sample, outfile_path, query, subject, query_sample, subject_sample;
int split_counter;
vector<string> v;
ifstream input_file;
input_file.open (argv[2]);
set<string> all_samples;
if (input_file.is_open()) {
while (input_file.good()) {
getline (input_file,line);
if (!line.empty()) {
split_counter = 0;
v = split (line, delim);
for (auto i : v) {
if (split_counter == 0) {
sample = i;
}
else if (split_counter == 1) {
outfile_path = i;
}
split_counter++;
}
sample_to_outfile_path[sample] = outfile_path;
all_samples.insert(sample);
}
}
} else {
cout << "ERROR: Unable to open file " + (string)argv[2] << endl;
}
input_file.close();

/*
Parse DIAMOND results and start writing to individual files.
*/
ofstream outfile_query, outfile_subject, outfile;
input_file.open (argv[1]);
string line_with_newline;
string sid;
Expand All @@ -89,20 +156,26 @@ int main (int argc, char* argv[]) {
split_counter++;
}
line_with_newline = line + '\n';
if (query_sample.compare(subject_sample) == 0) {
outfile_handle << line_with_newline;
}
else if (query_sample.compare(focal_sample) == 0 || subject_sample.compare(focal_sample) == 0) {
outfile_handle << line_with_newline;
if (query_sample.compare(subject_sample) == 0) {
for (auto sid: all_samples) {
outfile.open(sample_to_outfile_path[sid], std::ios_base::app);
outfile << line_with_newline;
outfile.close();
}
} else {
outfile_subject.open(sample_to_outfile_path[subject_sample], std::ios_base::app);
outfile_query.open(sample_to_outfile_path[query_sample], std::ios_base::app);
outfile_subject << line_with_newline;
outfile_query << line_with_newline;
outfile_subject.close();
outfile_query.close();
}
}
}
} else {
cout << "ERROR: Unable to open file " + (string)argv[1] << endl;
}

outfile_handle.close();

return 0;
}
}
}

0 comments on commit 6cb93e7

Please sign in to comment.