diff --git a/ome_data/mirax_files.py b/ome_data/mirax_files.py new file mode 100644 index 0000000..cfb05be --- /dev/null +++ b/ome_data/mirax_files.py @@ -0,0 +1,54 @@ +# Copyright (c) 2021, CRS4 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from .. import settings + +import os +import logging + +logger = logging.getLogger(__name__) + + +class ServerConfigError(Exception): + pass + + +class InvalidMiraxFile(Exception): + pass + + +class InvalidMiraxFolder(Exception): + pass + + +def get_mirax_files_paths(slide_base_name): + if settings.MIRAX_FOLDER is not None: + mirax_files = ( + os.path.join(settings.MIRAX_FOLDER, '{0}.mrxs'.format(slide_base_name)), + os.path.join(settings.MIRAX_FOLDER, slide_base_name) + ) + if os.path.isfile(mirax_files[0]): + if os.path.isdir(mirax_files[1]): + return mirax_files + else: + raise InvalidMiraxFolder('Path {0} not found'.format(mirax_files[1])) + else: + raise InvalidMiraxFile('File {0} not found'.format(mirax_files[0])) + else: + raise ServerConfigError('MIRAX default folder was not configured properly') diff --git a/ome_data/original_files.py b/ome_data/original_files.py index 9be439f..1f95d50 100644 --- a/ome_data/original_files.py +++ b/ome_data/original_files.py @@ -23,9 +23,9 @@ from .utils import switch_to_default_search_group +import re import logging - logger = logging.getLogger(__name__) @@ -33,6 +33,10 @@ class DuplicatedEntryError(Exception): pass +def is_valid_filename(fname): + return re.match(r'^[\w\-.]+$', fname) + + def save_original_file(connection, name, path, mimetype, size, sha1): of = get_original_file(connection, name, mimetype) if of is None: diff --git a/settings.py b/settings.py index bba3047..cd422eb 100644 --- a/settings.py +++ b/settings.py @@ -45,6 +45,7 @@ def bool_identity(value): # /bin/omero config set omero.web.ome_seadragon.repository $(/bin/omero config get omero.data.dir) 'omero.web.ome_seadragon.repository': ['IMGS_REPOSITORY', None, identity, None], 'omero.web.ome_seadragon.images_folder': ['IMGS_FOLDER', 'ManagedRepository', identity, None], + 'omero.web.ome_seadragon.default_mirax_folder': ['MIRAX_FOLDER', None, identity, None], # configure this value using OMERO.cli # /bin/omero config set omero.web.ome_seadragon.ome_public_user $(/bin/omero config get omero.web.public.user) 'omero.web.ome_seadragon.ome_public_user': ['OME_PUBLIC_USER', None, identity, None], diff --git a/tools/mirax_importer.py b/tools/batch_mirax_importer.py similarity index 97% rename from tools/mirax_importer.py rename to tools/batch_mirax_importer.py index 3136c15..5296a2f 100644 --- a/tools/mirax_importer.py +++ b/tools/batch_mirax_importer.py @@ -26,7 +26,7 @@ import logging -class MiraxImporter(object): +class MiraxBatchImporter(object): def __init__(self, source_folder, ome_base_url, chunk_size, log_level='INFO', log_file=None): self.source_folder = source_folder @@ -181,8 +181,8 @@ def get_parser(): def main(argv): parser = get_parser() args = parser.parse_args(argv) - importer = MiraxImporter(args.source_folder, args.ome_base_url, args.chunk_size, - args.log_level, args.log_file) + importer = MiraxBatchImporter(args.source_folder, args.ome_base_url, args.chunk_size, + args.log_level, args.log_file) importer.run(args.clear) if __name__ == '__main__': diff --git a/tools/mirax_slide_importer.py b/tools/mirax_slide_importer.py new file mode 100644 index 0000000..909ab0b --- /dev/null +++ b/tools/mirax_slide_importer.py @@ -0,0 +1,174 @@ +# Copyright (c) 2021, CRS4 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import requests +from argparse import ArgumentParser +from os import path, listdir +from hashlib import sha1 +import sys +from urllib.parse import urljoin +import logging + +from requests.models import Response + + +class ServerError(Exception): + pass + + +class MiraxImporter(object): + + def __init__(self, mirax_file, ome_base_url, chunk_size, log_level='INFO', log_file=None): + self.mirax_file = mirax_file + self.ome_save_url = urljoin(ome_base_url, 'mirax/register_file/') + self.ome_delete_url = urljoin(ome_base_url, 'mirax/delete_files/') + self.INDEX_FILE_MT = 'mirax/index' + self.DATA_FOLDER_MT = 'mirax/datafolder' + self.big_files_chunk_size = chunk_size * 1024 * 1024 + self.logger = self.get_logger(log_level, log_file) + + def get_logger(self, log_level='INFO', log_file=None, mode='a'): + LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' + LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' + + logger = logging.getLogger('mirax_importer') + if not isinstance(log_level, int): + try: + log_level = getattr(logging, log_level) + except AttributeError: + raise ValueError( + 'Unsupported literal log level: %s' % log_level) + logger.setLevel(log_level) + logger.handlers = [] + if log_file: + handler = logging.FileHandler(log_file, mode=mode) + else: + handler = logging.StreamHandler() + formatter = logging.Formatter(LOG_FORMAT, datefmt=LOG_DATEFMT) + handler.setFormatter(formatter) + logger.addHandler(handler) + return logger + + def _check_mirax_dataset(self, mirax_file_path): + if path.isfile(mirax_file_path): + if path.isdir(path.splitext(mirax_file_path)[0]): + return path.splitext(mirax_file_path)[0] + else: + raise ValueError( + 'Cannot find MIRAX data folder for file %s', mirax_file_path) + else: + raise ValueError('%s is not a valid file', mirax_file_path) + + def _get_sha1(self, file_name): + hasher = sha1() + if path.isfile(file_name): + with open(file_name, 'rb') as f: + hasher.update(f.read()) + elif path.isdir(file_name): + for f in listdir(file_name): + with open(path.join(file_name, f), 'rb') as fp: + for chunk in iter(lambda: fp.read(self.big_files_chunk_size), b''): + hasher.update(chunk) + return hasher.hexdigest() + + def _get_file_details(self, file_name): + label = None + details = { + 'path': path.realpath(file_name), + 'sha1': self._get_sha1(file_name) + } + if path.isfile(file_name): + label, ext = path.splitext(path.basename(file_name)) + if ext.lower() == '.mrxs': + details.update({ + 'name': label, + 'mimetype': self.INDEX_FILE_MT, + 'size': path.getsize(file_name) + }) + else: + label, details = None, None + elif path.isdir(file_name): + label = path.basename(file_name) + details.update({ + 'name': label, + 'mimetype': self.DATA_FOLDER_MT, + 'size': sum([path.getsize(path.join(file_name, f)) + for f in listdir(file_name)]), + }) + self.logger.debug('Details for file %s: %r', file_name, details) + return label, details + + def _save(self, file_details): + self.logger.debug('Saving data for file %s', file_details['name']) + response = requests.get(self.ome_save_url, params=file_details) + if response.status_code == requests.codes.ok: + self.logger.debug('File saved, assigned ID: %s', + response.json()['omero_id']) + return True, response.json()['omero_id'] + else: + self.logger.debug('Status code: %d', response.status_code) + return False, response.status_code + + def _clear(self, base_label): + response = requests.get(urljoin(self.ome_delete_url, base_label)) + return response.status_code + + def run(self): + self.logger.info('Importing file %s', self.mirax_file) + mirax_data_folder = self._check_mirax_dataset(self.mirax_file) + mirax_file_label, mirax_file_details = self._get_file_details(self.mirax_file) + mirax_df_label, mirax_df_details = self._get_file_details(mirax_data_folder) + r0_status, r0_response = self._save(mirax_file_details) + if r0_status: + r1_status, r1_response = self._save(mirax_df_details) + if not r1_status: + self.logger.warning( + 'Error while saving MIRAX data folder, removing MIRAX file from database') + d0_status = self._clear(mirax_file_label) + if d0_status != requests.codes.ok: + raise ServerError('MIRAX file %s not cleaned properly' % mirax_file_label) + else: + raise ServerError( + 'Unable to save MIRAX file, server returned error code %s', r0_response) + self.logger.info('Job completed') + +def get_parser(): + parser = ArgumentParser('Import a dingle MIRAX file and related data folder to OMERO') + parser.add_argument('--mirax-file', type=str, required=True, + help='the path to MIRAX file to be imported, MIRAX data folder with the same name must be in the same path') + parser.add_argument('--ome-base-url', type=str, required=True, + help='the base URL of the OMERO.web server') + parser.add_argument('--chunk-size', type=int, default=50, + help='size in MB of chunks that will be read to calculate the SHA1 for big files (default 50MB)') + parser.add_argument('--log-level', type=str, default='INFO', + help='log level (default=INFO)') + parser.add_argument('--log-file', type=str, default=None, + help='log file (default=stderr)') + return parser + + +def main(argv): + parser = get_parser() + args = parser.parse_args(argv) + importer = MiraxImporter(args.mirax_file, args.ome_base_url, args.chunk_size, + args.log_level, args.log_file) + importer.run() + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/urls.py b/urls.py index 062ca83..900302b 100644 --- a/urls.py +++ b/urls.py @@ -90,6 +90,7 @@ name='ome_seadragon_get_slide_bounds'), # 3DHISTECH FILES HANDLING --- DATA MANAGEMENT url(r'^mirax/register_file/$', views.register_original_file, name='ome_seadragon_mrxs_save'), + url(r'mirax/register_slide/$', views.register_mirax_slide, name='ome_seadragon_register_mirax'), url(r'^mirax/file_info/(?P[\w\-.]+)/$', views.get_original_file_infos, name='ome_seadragon_mrxs_file_info'), url(r'^mirax/delete_file/(?P[\w\-.]+)/$', views.delete_original_file, diff --git a/views.py b/views.py index 2cca268..4079f56 100644 --- a/views.py +++ b/views.py @@ -17,13 +17,13 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from .ome_data import tags_data, projects_datasets, original_files +from .ome_data import tags_data, projects_datasets, original_files, mirax_files from .ome_data.original_files import DuplicatedEntryError +from .ome_data.mirax_files import InvalidMiraxFile, InvalidMiraxFolder, ServerConfigError from . import settings from .slides_manager import RenderingEngineFactory import logging -import re from distutils.util import strtobool try: import simplejson as json @@ -32,7 +32,7 @@ from omeroweb.webclient.decorators import login_required -from django.http import HttpResponse, HttpResponseNotFound, HttpResponseServerError +from django.http import HttpResponse, HttpResponseNotFound, HttpResponseServerError, HttpResponseBadRequest from django.shortcuts import render logger = logging.getLogger(__name__) @@ -406,12 +406,12 @@ def get_slide_bounds(request, image_id, fetch_original_file=False, file_mimetype def register_original_file(request, conn=None, **kwargs): try: fname = request.GET.get('name') - if not re.match(r'^[\w\-.]+$', fname): - return HttpResponseServerError('Invalid file name received: %s' % fname) + if not original_files.is_valid_filename(fname): + return HttpResponseBadRequest('Invalid file name received: %s' % fname) fpath = request.GET.get('path') fmtype = request.GET.get('mimetype') if not all([fname, fpath, fmtype]): - return HttpResponseServerError('Mandatory field missing') + return HttpResponseBadRequest('Mandatory field missing') file_id = original_files.save_original_file(conn, fname, fpath, fmtype, int(request.GET.get('size', default=-1)), request.GET.get('sha1', default='UNKNOWN')) @@ -420,6 +420,39 @@ def register_original_file(request, conn=None, **kwargs): return HttpResponseServerError('%s' % dee) +@login_required() +def register_mirax_slide(request, conn=None, **kwargs): + sname = request.GET.get('slide_name') + if not original_files.is_valid_filename(sname): + return HttpResponseServerError('Invalid slide name received: %s' % sname) + try: + mirax_paths = mirax_files.get_mirax_files_paths(sname) + try: + mirax_file_id = original_files.save_original_file(conn, sname, mirax_paths[0], 'mirax/index', + -1, 'UNKNOWN') + try: + mirax_folder_id = original_files.save_original_file(conn, sname, mirax_paths[1], + 'mirax/datafolder', -1, 'UNKNOWN') + return HttpResponse( + json.dumps({ + 'mirax_index_omero_id': mirax_file_id, + 'mirax_folder_omero_id': mirax_folder_id + }), + content_type='application/json' + ) + except DuplicatedEntryError as dee: + original_files.delete_original_files(conn, sname, 'mirax/index') + return HttpResponseServerError('{0}'.format(dee)) + except DuplicatedEntryError as dee: + return HttpResponseServerError('{0}'.format(dee)) + except InvalidMiraxFile as imf: + return HttpResponseServerError('{0}'.format(imf)) + except InvalidMiraxFolder as imf: + return HttpResponseServerError('{0}'.format(imf)) + except ServerConfigError as sce: + return HttpResponseServerError('{0}'.format(sce)) + + @login_required() def get_original_file_infos(request, file_name, conn=None, **kwargs): fmtype = request.GET.get('mimetype')