Skip to content

Commit

Permalink
Merge pull request #13 from avilab/zen
Browse files Browse the repository at this point in the history
Zen
  • Loading branch information
tpall authored Nov 16, 2018
2 parents 8e5c061 + 6d7d44a commit 382734a
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 10 deletions.
14 changes: 5 additions & 9 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,14 @@ __license__ = "MIT"

include: "rules/common.smk"

## Main output files
file_ids = list(range(1, n_files + 1, 1))
outputs = expand(["results/{sample}_phages_{n}.csv", "results/{sample}_unassigned_{n}.fa", "results/{sample}_phages_blasted_{n}.csv", "results/{sample}_viruses_blasted_{n}.csv"], sample = sample_ids, n = file_ids) + expand("taxonomy/{file}.csv", file = ["names", "nodes", "division"])

## Target rules
rule all:
input:
expand([
"results/{sample}_phages_{n}.csv",
"results/{sample}_unassigned_{n}.fa",
"results/{sample}_phages_blasted_{n}.csv",
"results/{sample}_viruses_blasted_{n}.csv"
],
sample = sample_ids,
n = list(range(1, n_files + 1, 1))),
expand("taxonomy/{file}.csv", file = ["names", "nodes", "division"])
outputs, expand("results/{sample}_phages.csv.tar.gz", sample = sample_ids) if config["zenodo"]["deposition_id"] else outputs

## Modules
include: "rules/munge.smk"
Expand Down
5 changes: 5 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
# Option to save time by not running blastx
run_blastx: False

# Upload results to zenodo
# For upload, setup ZENODO_PAT environment variable for access_token
zenodo:
deposition_id:

# path or URL to sample sheet (CSV format, columns: sample, condition, ...)
samples: samples.tsv

Expand Down
8 changes: 8 additions & 0 deletions envs/upload.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: upload
channels:
- anaconda
- conda-forge
- defaults
dependencies:
- python=3.6
- requests
37 changes: 37 additions & 0 deletions rules/blast.smk
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,18 @@ rule filter_viruses:
script:
"../scripts/filter_viruses.R"

rule upload_phages:
input:
expand("results/{{sample}}_phages_{n}.csv", n = N)
output:
temp("results/{sample}_phages.csv.tar.gz") if config["zenodo"]["deposition_id"] else expand("results/{{sample}}_phages_{n}.csv", n = N)
params:
config["zenodo"]["deposition_id"]
conda:
"../envs/upload.yml"
script:
"../scripts/zenodo_upload.py"

## Get unmasked candidate viral sequences
rule unmasked_viral:
input:
Expand Down Expand Up @@ -272,3 +284,28 @@ rule filter_blasted_viruses:
"../envs/tidyverse.yml"
script:
"../scripts/filter_viruses.R"

rule upload_phages_blasted:
input:
expand("results/{{sample}}_phages_blasted_{n}.csv", n = N)
output:
temp("results/{sample}_phages_blasted.csv.tar.gz") if config["zenodo"]["deposition_id"] else expand("results/{{sample}}_phages_blasted_{n}.csv", n = N)
params:
config["zenodo"]["deposition_id"]
conda:
"../envs/upload.yml"
script:
"../scripts/zenodo_upload.py"

rule upload_viruses_blasted:
input:
expand("results/{{sample}}_viruses_blasted_{n}.csv", n = N)
output:
temp("results/{sample}_viruses_blasted.csv.tar.gz") if config["zenodo"]["deposition_id"] else expand("results/{{sample}}_viruses_blasted_{n}.csv", n = N)
params:
config["zenodo"]["deposition_id"]
conda:
"../envs/upload.yml"
script:
"../scripts/zenodo_upload.py"

1 change: 1 addition & 0 deletions rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ configfile: "config.yml"
samples = pd.read_table(config["samples"], sep = "\s+", index_col = "sample", dtype = str)
sample_ids = samples.index.values.tolist()
n_files = config["split_fasta"]["n_files"]
N = list(range(1, n_files + 1, 1))

# Create slurm logs dir
if not os.path.exists("logs/slurm"):
Expand Down
2 changes: 1 addition & 1 deletion rules/mask.smk
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ rule split_fasta:
input:
rules.tantan_good.output
output:
expand("mask/{{sample}}_repeatmasker_{n}.fa", n = list(range(1, n_files + 1, 1)))
expand("mask/{{sample}}_repeatmasker_{n}.fa", n = N)
params:
config["split_fasta"]["n_files"]
conda:
Expand Down
54 changes: 54 additions & 0 deletions scripts/zenodo_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import os
import requests
import gzip
import re
from subprocess import Popen, PIPE
import hashlib

if not os.environ['ZENODO_PAT']:
raise ValueError("Missing ZENODO_PAT environment variable with zenodo API access token!")

def md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()

# Upload depository id
deposition_id = snakemake.params[0]

# Create tar.gz file for upload
files = snakemake.input
zipfile = list(set([re.sub("_\d+", "", file) for file in files]))[0] + ".tar.gz"
cmd = ['tar', '-cvzf', zipfile] + files
p = Popen(cmd, stdout = PIPE, stderr = PIPE)
stout, stderr = p.communicate()

# Check if file is present
# calculate md5 checksum for local file
hash = md5(zipfile)

# Compose files query and upload url
base_url = 'https://zenodo.org/api'
url = os.path.join(base_url, 'deposit/depositions/{}/files'.format(deposition_id))

# Setup access token
params = {'access_token': os.environ['ZENODO_PAT']}

# Get info for remote files
r = requests.get(url, params = params)
filename = [deposit['filename'] for deposit in r.json()]

# Upload, if file is not present
if os.path.basename(zipfile) not in filename:
with open(zipfile, "rb") as handle:
r = requests.post(url, params = params,
data = {'filename': str(zipfile)},
files = {'file': handle})

if r.status_code != 201:
raise requests.HTTPError(f"Error in data upload, status code: {r.status_code} {r.json()['message']}")

else:
print("Doing nothing. File {} is already uploaded!\nPlease delete local and remote copy of the file\nif you wish to upload new version.".format(os.path.basename(zipfile)))

0 comments on commit 382734a

Please sign in to comment.