Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
4ctrl-alt-del committed Jan 29, 2021
2 parents feeaf4f + 4f318d1 commit 708f737
Show file tree
Hide file tree
Showing 55 changed files with 3,000 additions and 2,330 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ wheels/
*.egg
MANIFEST
.cache/
*.user
16 changes: 0 additions & 16 deletions .travis.yml

This file was deleted.

160 changes: 57 additions & 103 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,111 +1,65 @@
# Use a prepared image with python 3.6.
FROM python:3.6-slim
FROM ubuntu:20.04

# Dockerfile information.
LABEL maintainer='[email protected]'

################## METADATA ######################
LABEL base_image="ubuntu:20.04"
LABEL software="Pynome Image"
LABEL software.version="1.0.0"
LABEL about.summary="Docker image for Pynome"
LABEL about.home=""
LABEL about.documentation=""
LABEL about.license_file=""

# Update package manager and install basics.
RUN apt-get update \
&& apt-get -y install git wget unzip build-essential

################## MAINTAINER ######################
MAINTAINER Josh Burns <[email protected]>

# Install IRODs
RUN apt-get -y install libfuse2 expect jq tree \
&& wget ftp://ftp.renci.org/pub/irods/releases/4.1.11/ubuntu14/irods-icommands-4.1.11-ubuntu14-x86_64.deb \
&& dpkg -i irods-icommands-4.1.11-ubuntu14-x86_64.deb \
&& chmod 755 /usr/local/share/*

# Set noninteractive mode for apt-get
ENV DEBIAN_FRONTEND noninteractive

# Install hisat2

# Update and install basic packages
RUN apt-get update -qq \
&& apt-get install -qq -y wget unzip python python3 python3-pip


# Copy this repository and install pynome
COPY . /tmp/pynome
RUN cd /tmp/pynome \
&& pip3 install . \
&& cd .. \
&& rm -fr pynome


# Install hisat2 version 2.1.0
RUN wget -q ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2/downloads/hisat2-2.1.0-Linux_x86_64.zip \
&& unzip hisat2-2.1.0-Linux_x86_64.zip \
&& mv hisat2-2.1.0 /opt/hisat2
ENV PATH="/opt/hisat2:$PATH"

# Install Cufflinks
RUN wget -q http://cole-trapnell-lab.github.io/cufflinks/assets/downloads/cufflinks-2.2.1.Linux_x86_64.tar.gz \
&& tar -xzf cufflinks-2.2.1.Linux_x86_64.tar.gz \
&& mv cufflinks-2.2.1.Linux_x86_64 /opt/cufflinks
ENV PATH="/opt/cufflinks:$PATH"


# Install gffread
WORKDIR /opt
RUN git clone https://github.com/gpertea/gclib \
&& git clone https://github.com/gpertea/gffread
WORKDIR /opt/gffread
RUN make
ENV PATH="/opt/gffread:$PATH"

# set user/group IDs for irods account
RUN groupadd -r irods --gid=998 \
&& useradd -r -g irods -d /var/lib/irods --uid=998 irods


# grab gosu for easy step-down from root
ENV GOSU_VERSION 1.10
RUN set -x \
&& apt-get update && apt-get install -y --no-install-recommends ca-certificates wget && rm -rf /var/lib/apt/lists/* \
&& wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)" \
&& wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture).asc" \
&& export GNUPGHOME="$(mktemp -d)" \
&& gpg --keyserver ha.pool.sks-keyservers.net --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
&& gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \
&& rm -rf "$GNUPGHOME" /usr/local/bin/gosu.asc \
&& chmod +x /usr/local/bin/gosu \
&& gosu nobody true \
&& apt-get purge -y --auto-remove wget

# default iRODS env
ENV IRODS_PORT=1247
ENV IRODS_PORT_RANGE_BEGIN=20000
ENV IRODS_PORT_RANGE_END=20199
ENV IRODS_CONTROL_PLANE_PORT=1248
# iinit parameters
ENV IRODS_HOST=scidas-irods.cahnrs.wsu.edu
ENV IRODS_USER_NAME=biggst
ENV IRODS_ZONE_NAME=scidasZone
ENV IRODS_CWD=/ScidasZone/Sysbio/testgenomes
# TODO: remove pw -- convert to docker secret.
ENV IRODS_PASSWORD=PASSWORD
# UID / GID settings
ENV UID_IRODS=998
ENV GID_IRODS=998

EXPOSE $IRODS_PORT $IRODS_CONTROL_PLANE_PORT $IRODS_PORT_RANGE_BEGIN-$IRODS_PORT_RANGE_END

COPY /scripts/docker-entrypoint.sh /docker-entrypoint.sh
RUN chmod +x /docker-entrypoint.sh
# RUN /docker-entrypoint.sh
COPY . /tmp
RUN cp /tmp/scripts/docker-entrypoint.sh /docker-entrypoint.sh \
&& rm -rf /tmp/*


# Create the diectories for pynome.
RUN mkdir /opt/pynome \
&& mkdir /pynome
VOLUME /pynome

# Copy the pynome dir.
COPY . /opt/pynome/

# Install Pynome requirements.
RUN python -m pip install -r /opt/pynome/requirements.txt

# Change the working directory -- required for the install.py to function.
WORKDIR /opt/pynome

# Install Pynome.
RUN python /opt/pynome/setup.py install

# Set the default command to be launched for this container.
WORKDIR /pynome

# Disable SSL verification...
ENV PYTHONHTTPSVERIFY=0

ENTRYPOINT ["/docker-entrypoint.sh"]
# CMD ["ls", "-lsa"]
CMD ["pynome", "--config=/opt/pynome/pynome_config.json", "discover", "download", "sra", "prepare", "putirods"]
&& unzip -q hisat2-2.1.0-Linux_x86_64.zip \
&& mv hisat2-2.1.0 /usr/local \
&& rm hisat2-2.1.0-Linux_x86_64.zip
ENV PATH "$PATH:/usr/local/hisat2-2.1.0"


# Install kallisto verison 0.45.0
RUN wget -q https://github.com/pachterlab/kallisto/releases/download/v0.45.0/kallisto_linux-v0.45.0.tar.gz \
&& tar -xf kallisto_linux-v0.45.0.tar.gz \
&& mv kallisto_linux-v0.45.0 /usr/local/kallisto-0.45.0 \
&& chmod 755 /usr/local/kallisto-0.45.0/kallisto \
&& ln -sfn /usr/local/kallisto-0.45.0/kallisto /usr/bin/kallisto \
&& rm kallisto_linux-v0.45.0.tar.gz


# Install salmon verison 0.12.0
RUN wget -q https://github.com/COMBINE-lab/salmon/releases/download/v0.12.0/salmon-0.12.0_linux_x86_64.tar.gz \
&& tar -xf salmon-0.12.0_linux_x86_64.tar.gz \
&& mv salmon-0.12.0_linux_x86_64 /usr/local/salmon-0.12.0 \
&& chmod 755 /usr/local/salmon-0.12.0/bin/salmon \
&& ln -sfn /usr/local/salmon-0.12.0/bin/salmon /usr/bin/salmon \
&& rm salmon-0.12.0_linux_x86_64.tar.gz


# Install gffread version 0.12.2
RUN wget -q http://ccb.jhu.edu/software/stringtie/dl/gffread-0.12.2.Linux_x86_64.tar.gz \
&& tar -xf gffread-0.12.2.Linux_x86_64.tar.gz \
&& cp gffread-0.12.2.Linux_x86_64/gffread /usr/bin/gffread \
&& rm -fr gffread-0.12.2.Linux_x86_64 gffread-0.12.2.Linux_x86_64.tar.gz
55 changes: 38 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,66 @@

Pynome, a Python command line interface tool, provides the user with a way to
download desired genome assembly files from the
[Ensembl](https://www.ensembl.org/) database.
[Ensembl](https://www.ensembl.org/) and [NCBI](https://www.ncbi.nlm.nih.gov/) databases.


## Installation

Pip can be used for installation.

```bash
$ python setup.py install
$ pip install .
```


## Usage

Commands can be run one at a time:
Commands can be run one at a time for crawling and mirroring.

Crawling:

```bash
$ pynome -c
```

Mirroring:

```bash
$ pynome discover
$ pynome list
$ pynome download
$ pynome prepare
$ pynome -m
```

Or strung together.

```bash
pynome discover download prepare
pynome -cm
```

## Docker Implementation
## Parallel indexing

Pynome can be run as a Docker application. For iRODs integration you must
create a docker secret named `irods_password` with the password of the iRODs
user to be used to `put` files into iRODs.
Indexing is designed to be done in parallel due to the large volume of assemblies that is mirrored.
Parellel execution is done with two steps. The first step generated a list of pynome job files, each
file being a single assembly whose indexes require updating. The second step is processing a single
job and is meant to be done in parallel.

The docker node used must be part of a swarm.
Generate job files:

```bash
$ docker swarm init
pynome -I
```

## TODO
Process a job file:

- [ ] Add delete() functions.
```bash
pynome -i -f <PATH>
```

Where <PATH\> is the path to a generated pynome job file that has the format 'pynome_job_#####.txt'
where ##### is the job number.

## Local database root directory

The default root directory where assemblies are stored is $HOME/species. To change that to a
different directory use the -d argument. For example to crawl using a custom directory:

```bash
$ pynome -d /custom -c
```
6 changes: 0 additions & 6 deletions docker-compose.yml

This file was deleted.

11 changes: 3 additions & 8 deletions pynome/__init__.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
"""Initialize Pynome.
Set up the logger.
"""


import logging

logging.basicConfig(filename='pynome.log', level=logging.DEBUG)
This is the pynome command line application used for crawling and downloading
genetic assembly data.
"""
Loading

0 comments on commit 708f737

Please sign in to comment.