Skip to content

Commit

Permalink
Merge pull request #83 from AlexVCaron/hotfix
Browse files Browse the repository at this point in the history
[FIX] Test data caching and unzipping
  • Loading branch information
arnaudbore authored Dec 18, 2024
2 parents 9bd2a96 + 0be48f9 commit 8702891
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 33 deletions.
4 changes: 3 additions & 1 deletion .devcontainer/devops/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
"dockerfile": "Dockerfile",
"args": {
"NFTEST_VERSION": "0.9.0",
"POETRY_VERSION": "1.8.*"
"POETRY_VERSION": "1.8.*",
"NFT_DIFF": "pdiff",
"NFT_DIFF_ARGS": "--line-numbers --width 120 --expand-tabs=2"
}
},
"forwardPorts": [3000],
Expand Down
3 changes: 2 additions & 1 deletion docs/cspell/neuroscience.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
*metrics
*morph*
*mov*
*neuro*
*normalise
*pack
*par
Expand Down Expand Up @@ -64,11 +65,11 @@ fsl*
gagnon*
interp
medde
mkdirs
mppca
mrdegibbs
mrtrix
msmt
neuro*
nextflow
nf*
nifti
Expand Down
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ python = "<3.11,>=3.9"
nf-core = "~2.14.1"
black = "^24.1.1"
isort = "^5.13.2"
pdiff = "^1.1.4"
161 changes: 135 additions & 26 deletions subworkflows/nf-neuro/load_test_data/main.nf
Original file line number Diff line number Diff line change
@@ -1,36 +1,147 @@
import java.nio.file.Files

def fetch_archive ( name, destination, remote, database, data_identifiers ) {
// Find cache location for test archives

def locate_local_cache () {
// Find cache location for test archives, in order of preference:
// 1. Using environment variable $NFNEURO_TEST_DATA_HOME
// 2. Using environment variable $XDG_DATA_HOME
// 3. Using default location $HOME/.local/share
//
// Location selected is appended with 'nf-neuro-test-archives'.
// If the location does not exist, it is created.

def storage = file(
System.getenv('NFNEURO_TEST_DATA_HOME') ?:
System.getenv('XDG_DATA_HOME') ?:
"${System.getenv('HOME')}/.local/share"
)
def cache_location = file("$storage/nf-neuro-test-archives")
if ( !cache_location.exists() ) cache_location.mkdirs()

// Fetch file from remote if not present in cache
def data_id = data_identifiers[name]
if ( !data_id ) {
error "Invalid test data identifier supplied: $name"
if ( !cache_location.exists() ) {
try {
cache_location.mkdirs()
}
catch (Exception _e) {
error "Failed to create cache location: $cache_location"
}
}

return cache_location
}

def locate_remote_cache () {
return "$params.test_data_remote/$params.test_database_path"
}

def load_manifest () {
// Load test data associations from params.test_data_associations
// which must be a map of test data identifiers [filename: identifier]

if ( ! params.test_data_associations ) {
error """
No test data associations provided, cannot create cache manifest. Please
provide a map of test data identifiers [filename: identifier] using
params.test_data_associations.
"""
}

return params.test_data_associations
}

def validate_cache_entry ( name, manager ) {
// Check if the cache entry is present in the manifest

if ( !manager.manifest[name] ) {
error "Invalid cache entry supplied : $name"
}

}

def add_cache_entry ( name, manager ) {
// Add the test data archive as an entry in the cache. The archive is
// fetched from the remote location and stored in the cache location.
// The given name is validated against the manifest before adding.

manager.validate_entry(name)

def identifier = "${manager.manifest[name]}"
def cache_entry = file("${manager.cache_location}/$identifier")
def remote_subpath = "${identifier[0..1]}/${identifier[2..-1]}"
def remote_entry = file("$manager.remote_location/$remote_subpath")

try {
remote_entry.copyTo(cache_entry)
}
catch (Exception _e) {
manager.delete_entry(name)
error "Failed to fetch test data archive: $name | $_e"
}

def cache_entry = file("$cache_location/$data_id")
if ( !cache_entry.exists() ) {
return cache_entry
}

def get_cache_entry ( name, manager ) {
// Retrieve the cache entry for the given test data archive name.
// If the entry does not exist, it is added to the cache. The add
// operation will validate the name against the manifest.

def identifier = "${manager.manifest[name]}"
def cache_entry = file("${manager.cache_location}/$identifier")

if ( !cache_entry.exists() ) manager.add_entry(name)

return cache_entry
}

def delete_cache_entry ( name, manager ) {
// Delete the cache entry for the given test data archive name.

def identifier = "${manager.manifest[name]}"
def cache_entry = file("${manager.cache_location}/$identifier")
if ( cache_entry.exists() ) {
try {
def remote_entry = "${data_id[0..1]}/${data_id[2..-1]}"
file("$remote/$database/$remote_entry").copyTo(cache_entry)
cache_entry.delete()
}
catch (Exception e) {
error "Failed to fetch test data archive: $name"
file("$remote/$database/$remote_entry").delete()
catch (Exception _e) {
error "Failed to delete test data archive: $name"
}
}
}

def update_cache_entry ( name, manager ) {
// Update the cache entry for the given test data archive name. The
// procedure uses add to carry the update, but deletes the entry first
// if it exists. The add operation will validate the name against
// the manifest.

manager.delete_entry(name)
manager.add_entry(name)
}

def setup_cache () {
// Build a cache manager to encapsulate interaction with the test data cache.
// The manager follows simple CRUD operation to handle update and retrieval of
// test data archives from the cache and the remote location.

def cache_manager = new Expando(
remote_location: locate_remote_cache(),
cache_location: locate_local_cache(),
manifest: load_manifest()
)
cache_manager.validate_entry = { v -> validate_cache_entry( v, cache_manager ) }
cache_manager.add_entry = { v -> add_cache_entry(v, cache_manager) }
cache_manager.get_entry = { v -> get_cache_entry(v, cache_manager) }
cache_manager.delete_entry = { v -> delete_cache_entry(v, cache_manager) }
cache_manager.update_entry = { v -> update_cache_entry(v, cache_manager) }

return cache_manager
}


def fetch_archive ( name, destination, manager ) {
// Unzip all archive content to destination
def content = new java.util.zip.ZipFile("$cache_entry")
def content = null
try {
content = new java.util.zip.ZipFile("${manager.get_entry(name)}")
content.entries().each{ entry ->
def local_target = file("$destination/${entry.getName()}")
if (entry.isDirectory()) {
Expand All @@ -42,11 +153,14 @@ def fetch_archive ( name, destination, remote, database, data_identifiers ) {
}
}
}
content.close()

return destination.resolve("${name.take(name.lastIndexOf('.'))}")
}
finally {
content.close()
catch (Exception _e) {
if (content) content.close()
manager.delete_entry(name)
error "Failed to extract test data archive: $name | $_e"
}
}

Expand All @@ -57,16 +171,11 @@ workflow LOAD_TEST_DATA {
test_data_prefix

main:
manager = setup_cache()

ch_versions = Channel.empty()
test_data_path = Files.createTempDirectory("$test_data_prefix")
test_data_path = java.nio.file.Files.createTempDirectory("$test_data_prefix")
ch_test_data_directory = ch_archive.map{ archive ->
fetch_archive(
archive, test_data_path,
params.test_data_remote,
params.test_database_path,
params.test_data_associations
)
fetch_archive(archive, test_data_path, manager)
}

emit:
Expand Down
3 changes: 1 addition & 2 deletions tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import groovy.json.JsonSlurper

params {
outdir = "output/"
Expand All @@ -7,7 +6,7 @@ params {

test_data_remote = "https://scil.usherbrooke.ca"
test_database_path = "scil_test_data/dvc-store/files/md5"
test_data_associations = new JsonSlurper().parse(
test_data_associations = new groovy.json.JsonSlurper().parse(
new File("$projectDir/tests/test_data.json")
)
}
Expand Down
1 change: 0 additions & 1 deletion tests/test_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,5 @@
"freesurfer_nifti.zip": "adb5ac4cf5c45040339e04e7c142e8c9",
"transform.zip": "148afd665ddbd2bb80493208480571a9",
"dicom.zip": "234913cbad53c19aa19aef9eda0a3839",
"freesurfer_nifti.zip": "adb5ac4cf5c45040339e04e7c142e8c9",
"TOPUP.zip": "da11914087a1a4ed1d21d478540d41b0"
}

0 comments on commit 8702891

Please sign in to comment.