Skip to content

Commit

Permalink
Merge pull request #42 from workflowhub-eu/refactor-workflow
Browse files Browse the repository at this point in the history
Refactored Snakemake workflow and generate example output
  • Loading branch information
alexhambley authored Aug 6, 2024
2 parents e4433cc + f5ebc3f commit 351ae98
Show file tree
Hide file tree
Showing 15 changed files with 473,580 additions and 1,276 deletions.
51 changes: 33 additions & 18 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,44 @@
FROM python:3.11-slim
# Stage 1: Build environment
FROM python:3.11-slim AS build-stage

RUN pip install poetry
# Install build tools and Poetry
RUN apt-get update && apt-get install -y build-essential \
&& pip install poetry

# Set the working directory
WORKDIR /app

# Install build tools for Snakemake (gcc, make, etc.)
RUN apt-get update && apt-get install -y build-essential
# Copy dependency files and install dependencies
COPY pyproject.toml poetry.lock /app/
RUN poetry config virtualenvs.create false \
&& poetry install --no-interaction --no-ansi

# Copy the pyproject.toml file
COPY pyproject.toml /app/
# Copy and install the application
COPY . /app
RUN poetry install

# Install the dependencies
RUN poetry install --no-root
# Stage 2: Snakemake runtime environment
FROM snakemake/snakemake:latest

# Copy the rest of the application files
COPY . /app
# Install Poetry
RUN pip install poetry

# Install the package
RUN poetry install
WORKDIR /app

# Copy the application from the build stage
COPY --from=build-stage /app /app

# Install dependencies
RUN pip install -r <(poetry export --format requirements.txt --without-hashes) \
&& pip install -e .

# Set up non-root user
RUN groupadd -r snakemake && useradd -r -g snakemake snakemake \
&& chown -R snakemake:snakemake /app

# Install Snakemake using Poetry
RUN poetry add snakemake
USER snakemake

# Set the entry point for the container
ENTRYPOINT ["poetry", "run"]
# Configure Python path
ENV PYTHONPATH="/app:${PYTHONPATH}"

CMD ["help"]
# Set the entry point
ENTRYPOINT ["snakemake"]
66 changes: 50 additions & 16 deletions Snakefile
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
# TODO - Refactor to input args to the Snakemake file
WORKFLOW_IDS = range(1,11)
from snakemake.io import directory

VERSIONS = ['1']
OUTPUT_DIRS = "data"
MERGED_FILE = "merged.ttl"


def list_expected_files():
files = []
for wf_id in WORKFLOW_IDS:
for ver in VERSIONS:
files.append(f"{OUTPUT_DIRS}/{wf_id}_{ver}_ro-crate-metadata.json")
return files
ro_crate_metadata_dir = "ro-crate-metadata/"

rule all:
input:
MERGED_FILE
"ro-crate-metadata"

rule source_ro_crates:
output:
Expand All @@ -23,12 +16,20 @@ rule source_ro_crates:
"""
# Create the output directory if it doesn't exist:
mkdir -p {OUTPUT_DIRS}
# Add the current directory to PYTHONPATH, creating it if it doesn't exist
export PYTHONPATH="${{PYTHONPATH:+$PYTHONPATH:}}$(pwd)"
# Run the source_crates script to download the RO Crate metadata:
python workflowhub_graph/source_crates.py --workflow-ids 1-10 --prod --all-versions
# After sourcing, check which files were actually created:
python workflowhub_graph/check_outputs.py --workflow-ids 1-10 --versions {VERSIONS} --output-dir {OUTPUT_DIRS}
# Run the source_crates script to download the RO Crate metadata,
# then check the output files and generate created_files.json:
# - all versions of all workflows:
# python workflowhub_graph/source_crates.py --prod --all-versions
# python workflowhub_graph/check_outputs.py --versions {VERSIONS} --output-dir {OUTPUT_DIRS}
# - all versions of first 10 workflows:
python workflowhub_graph/source_crates.py --workflow-ids 1-20 --prod --all-versions
python workflowhub_graph/check_outputs.py --workflow-ids 1-20 --versions {VERSIONS} --output-dir {OUTPUT_DIRS}
"""

rule report_created_files:
Expand Down Expand Up @@ -65,3 +66,36 @@ rule merge_files:
shell(f"""
python workflowhub_graph/merge.py {output[0]} -p "data/*.json"
""")

rule create_ro_crate:
input:
MERGED_FILE
params:
workflow_file = "Snakefile"
output:
directory("ro-crate-metadata/")
shell:
"""
# Create a new virtual environment
python -m venv rocrate_env
# Activate the virtual environment
source rocrate_env/bin/activate
# Upgrade pip to avoid any potential issues
pip install --upgrade pip
# pip uninstall urllib3
# Install required packages
pip install requests urllib3 rocrate rocrate-zenodo
# Run the create_ro_crate script
python workflowhub_graph/create_ro_crate.py {input} {params.workflow_file} {output}
# Deactivate the virtual environment
deactivate
# Remove the virtual environment to clean up
rm -rf rocrate_env
"""
Loading

0 comments on commit 351ae98

Please sign in to comment.