Skip to content

Commit

Permalink
Merge pull request #837 from cortex-lab/dataset_qc
Browse files Browse the repository at this point in the history
Dataset QC
  • Loading branch information
k1o0 authored Mar 14, 2024
2 parents f59dc44 + 8c067a8 commit 53950c6
Show file tree
Hide file tree
Showing 22 changed files with 212 additions and 46 deletions.
9 changes: 5 additions & 4 deletions alyx/actions/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ def is_water_restricted(self, obj):
class WeighingForm(BaseActionForm):
def __init__(self, *args, **kwargs):
super(WeighingForm, self).__init__(*args, **kwargs)
self.fields['subject'].queryset = self.current_user.get_allowed_subjects()
if 'subject' in self.fields:
self.fields['subject'].queryset = self.current_user.get_allowed_subjects()
if self.fields.keys():
self.fields['weight'].widget.attrs.update({'autofocus': 'autofocus'})

Expand Down Expand Up @@ -455,10 +456,10 @@ class DatasetInline(BaseInlineAdmin):
show_change_link = True
model = Dataset
extra = 1
fields = ('name', 'dataset_type', 'collection', '_online', 'version', 'created_by',
'created_datetime')
fields = ('name', 'dataset_type', 'collection', '_online', 'version', 'qc',
'created_by', 'created_datetime')
readonly_fields = fields
ordering = ("name",)
ordering = ('name',)

def _online(self, obj):
return obj.is_online
Expand Down
18 changes: 18 additions & 0 deletions alyx/actions/migrations/0021_alter_session_extended_qc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.10 on 2024-03-12 13:55

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('actions', '0020_alter_notification_notification_type_and_more'),
]

operations = [
migrations.AlterField(
model_name='session',
name='extended_qc',
field=models.JSONField(blank=True, help_text='Structured data about session QC, formatted in a user-defined way', null=True),
),
]
18 changes: 7 additions & 11 deletions alyx/actions/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from datetime import timedelta
import structlog
from math import inf

import structlog
from one.alf.spec import QC

from django.conf import settings
from django.core.validators import MinValueValidator
from django.db import models
Expand Down Expand Up @@ -253,18 +255,12 @@ class Session(BaseAction):
n_trials = models.IntegerField(blank=True, null=True)
n_correct_trials = models.IntegerField(blank=True, null=True)

QC_CHOICES = [
(50, 'CRITICAL',),
(40, 'FAIL',),
(30, 'WARNING',),
(0, 'NOT_SET',),
(10, 'PASS',),
]

qc = models.IntegerField(default=0, choices=QC_CHOICES,
QC_CHOICES = [(e.value, e.name) for e in QC]
qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES,
help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES]))

extended_qc = models.JSONField(null=True, blank=True,
help_text="Structured data about session QC,"
help_text="Structured data about session QC, "
"formatted in a user-defined way")

auto_datetime = models.DateTimeField(auto_now=True, blank=True, null=True,
Expand Down
3 changes: 2 additions & 1 deletion alyx/actions/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,13 @@ class SessionDatasetsSerializer(serializers.ModelSerializer):
queryset=DatasetType.objects.all(),
)
default_revision = serializers.CharField(source='default_dataset')
qc = BaseSerializerEnumField(required=False)

class Meta:
list_serializer_class = FilterDatasetSerializer
model = Dataset
fields = ('id', 'name', 'dataset_type', 'data_url', 'url', 'file_size',
'hash', 'version', 'collection', 'revision', 'default_revision')
'hash', 'version', 'collection', 'revision', 'default_revision', 'qc')


class SessionWaterAdminSerializer(serializers.ModelSerializer):
Expand Down
22 changes: 20 additions & 2 deletions alyx/actions/tests_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,10 @@ def test_sessions(self):
# test dataset type filters
dtype1, _ = DatasetType.objects.get_or_create(name='trials.table')
dtype2, _ = DatasetType.objects.get_or_create(name='wheel.position')
Dataset.objects.create(session=ses, name='_ibl_trials.table.pqt', dataset_type=dtype1)
Dataset.objects.create(session=ses, name='_ibl_wheel.position.npy', dataset_type=dtype2)
Dataset.objects.create(
session=ses, name='_ibl_trials.table.pqt', dataset_type=dtype1, qc=40)
Dataset.objects.create(
session=ses, name='_ibl_wheel.position.npy', dataset_type=dtype2, qc=30)
d = self.ar(self.client.get(reverse('session-list') + '?dataset_types=wheel.position'))
self.assertCountEqual([str(ses.pk)], (x['id'] for x in d))
q = '?dataset_types=wheel.position,trials.table' # Check with list
Expand All @@ -280,6 +282,22 @@ def test_sessions(self):
self.assertCountEqual([str(ses.pk)], (x['id'] for x in d))
q = '?datasets=wheel.position'
self.assertFalse(self.ar(self.client.get(reverse('session-list') + q)))
# multiple datasets
q = '?datasets=_ibl_wheel.position.npy,_ibl_trials.table.pqt'
d = self.ar(self.client.get(reverse('session-list') + q))
self.assertCountEqual([str(ses.pk)], (x['id'] for x in d))
# datasets + qc (expect to return sessions where defined datasets have correct QC)
q = '?datasets=_ibl_wheel.position.npy,_ibl_trials.table.pqt&dataset_qc_lte=WARNING'
self.assertFalse(self.ar(self.client.get(reverse('session-list') + q)))
q = '?datasets=_ibl_wheel.position.npy&dataset_qc_lte=WARNING'
d = self.ar(self.client.get(reverse('session-list') + q))
self.assertCountEqual([str(ses.pk)], (x['id'] for x in d), 'failed to return session')
# qc alone (expect to return sessions where any dataset has correct QC)
q = '?dataset_qc_lte=WARNING'
d = self.ar(self.client.get(reverse('session-list') + q))
self.assertCountEqual([str(ses.pk)], (x['id'] for x in d), 'failed to return session')
q = '?dataset_qc_lte=10'
self.assertFalse(self.ar(self.client.get(reverse('session-list') + q)))

def test_surgeries(self):
from actions.models import Surgery
Expand Down
25 changes: 20 additions & 5 deletions alyx/actions/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import timedelta, date
from operator import itemgetter

from one.alf.spec import QC
from django.contrib.postgres.fields import JSONField
from django.db.models import Count, Q, F, ExpressionWrapper, FloatField
from django.db.models.deletion import Collector
Expand Down Expand Up @@ -223,9 +224,11 @@ class ProcedureTypeList(generics.ListCreateAPIView):


class SessionFilter(BaseActionFilter):
dataset_types = django_filters.CharFilter(field_name='dataset_types',
method='filter_dataset_types')
dataset_types = django_filters.CharFilter(
field_name='dataset_types', method='filter_dataset_types')
datasets = django_filters.CharFilter(field_name='datasets', method='filter_datasets')
dataset_qc_lte = django_filters.CharFilter(
field_name='dataset_qc', method='filter_dataset_qc_lte')
performance_gte = django_filters.NumberFilter(field_name='performance',
method='filter_performance_gte')
performance_lte = django_filters.NumberFilter(field_name='performance',
Expand Down Expand Up @@ -284,13 +287,23 @@ def filter_dataset_types(self, queryset, _, value):

def filter_datasets(self, queryset, _, value):
# Note this may later be modified to include collections, e.g. ?datasets=alf/obj.attr.ext
qc = QC.validate(self.request.query_params.get('dataset_qc_lte', QC.FAIL))
dsets = value.split(',')
queryset = queryset.filter(data_dataset_session_related__name__in=dsets)
queryset = queryset.filter(data_dataset_session_related__name__in=dsets,
data_dataset_session_related__qc__lte=qc)
queryset = queryset.annotate(
dsets_count=Count('data_dataset_session_related', distinct=True))
queryset = queryset.filter(dsets_count__gte=len(dsets))
return queryset

def filter_dataset_qc_lte(self, queryset, _, value):
# If filtering on datasets too, `filter_datasets` handles both QC and Datasets
if 'datasets' in self.request.query_params:
return queryset
qc = QC.validate(value)
queryset = queryset.filter(data_dataset_session_related__qc__lte=qc)
return queryset

def filter_performance_gte(self, queryset, name, perf):
queryset = queryset.exclude(n_trials__isnull=True)
pf = ExpressionWrapper(100 * F('n_correct_trials') / F('n_trials'),
Expand Down Expand Up @@ -326,6 +339,8 @@ class SessionAPIList(generics.ListCreateAPIView):
- **subject**: subject nickname `/sessions?subject=Algernon`
- **dataset_types**: dataset type(s) `/sessions?dataset_types=trials.table,camera.times`
- **datasets**: dataset name(s) `/sessions?datasets=_ibl_leftCamera.times.npy`
- **dataset_qc_lte**: dataset QC values less than or equal to this
`/sessions?dataset_qc_lte=WARNING`
- **number**: session number
- **users**: experimenters (exact)
- **date_range**: date `/sessions?date_range=2020-01-12,2020-01-16`
Expand Down Expand Up @@ -354,9 +369,9 @@ class SessionAPIList(generics.ListCreateAPIView):
- **histology**: returns sessions for which the subject has an histology session:
`/sessions?histology=True`
- **django**: generic filter allowing lookups (same syntax as json filter)
`/sessions?django=project__name__icontains,matlab
`/sessions?django=project__name__icontains,matlab`
filters sessions that have matlab in the project name
`/sessions?django=~project__name__icontains,matlab
`/sessions?django=~project__name__icontains,matlab`
does the exclusive set: filters sessions that do not have matlab in the project name
[===> session model reference](/admin/doc/models/actions.session)
Expand Down
2 changes: 1 addition & 1 deletion alyx/alyx/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = __version__ = '1.18.2'
VERSION = __version__ = '2.0.0'
11 changes: 6 additions & 5 deletions alyx/data/admin.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.db.models import Count, ProtectedError
from django.contrib import admin, messages
from django.utils.html import format_html
from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter
from django_admin_listfilter_dropdown.filters import RelatedDropdownFilter, ChoiceDropdownFilter
from rangefilter.filters import DateRangeFilter

from .models import (DataRepositoryType, DataRepository, DataFormat, DatasetType,
Expand Down Expand Up @@ -84,16 +84,17 @@ class FileRecordInline(BaseInlineAdmin):
class DatasetAdmin(BaseExperimentalDataAdmin):
fields = ['name', '_online', 'version', 'dataset_type', 'file_size', 'hash',
'session_ro', 'collection', 'auto_datetime', 'revision_', 'default_dataset',
'_protected', '_public', 'tags']
'_protected', '_public', 'tags', 'qc']
readonly_fields = ['name_', 'session_ro', '_online', 'auto_datetime', 'revision_',
'_protected', '_public', 'tags']
'_protected', '_public', 'tags', 'qc']
list_display = ['name_', '_online', 'version', 'collection', 'dataset_type_', 'file_size',
'session_ro', 'created_by', 'created_datetime']
'session_ro', 'created_by', 'created_datetime', 'qc']
inlines = [FileRecordInline]
list_filter = [('created_by', RelatedDropdownFilter),
('created_datetime', DateRangeFilter),
('dataset_type', RelatedDropdownFilter),
('tags', RelatedDropdownFilter)
('tags', RelatedDropdownFilter),
('qc', ChoiceDropdownFilter)
]
search_fields = ('session__id', 'name', 'collection', 'dataset_type__name',
'dataset_type__filename_pattern', 'version')
Expand Down
11 changes: 11 additions & 0 deletions alyx/data/fixtures/data.datasettype.json
Original file line number Diff line number Diff line change
Expand Up @@ -2220,5 +2220,16 @@
"description": "Look up table from photometry ROI, to fiber name registered in the database and Allen brain location",
"filename_pattern": "*photometryROI.locations*"
}
},
{
"model": "data.datasettype",
"pk": "140cd2a9-91c1-45ee-9d19-77e8d39abb5f",
"fields": {
"json": null,
"name": "laserStimulation.intervals",
"created_by": null,
"description": "The start and end times of the laser stimulation period.",
"filename_pattern": ""
}
}
]
18 changes: 18 additions & 0 deletions alyx/data/migrations/0019_dataset_qc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.10 on 2024-02-13 15:16

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('data', '0018_alter_dataset_collection_alter_revision_name'),
]

operations = [
migrations.AddField(
model_name='dataset',
name='qc',
field=models.IntegerField(choices=[(50, 'CRITICAL'), (40, 'FAIL'), (30, 'WARNING'), (0, 'NOT_SET'), (10, 'PASS')], default=0, help_text='50: CRITICAL / 40: FAIL / 30: WARNING / 0: NOT_SET / 10: PASS'),
),
]
5 changes: 5 additions & 0 deletions alyx/data/models.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import structlog
from one.alf.spec import QC

from django.core.validators import RegexValidator
from django.db import models
Expand Down Expand Up @@ -351,6 +352,10 @@ class Dataset(BaseExperimentalData):
help_text="Whether this dataset is the default "
"latest revision")

QC_CHOICES = [(e.value, e.name) for e in QC]
qc = models.IntegerField(default=QC.NOT_SET, choices=QC_CHOICES,
help_text=' / '.join([str(q[0]) + ': ' + q[1] for q in QC_CHOICES]))

@property
def is_online(self):
fr = self.file_records.filter(data_repository__globus_is_personal=False)
Expand Down
9 changes: 8 additions & 1 deletion alyx/data/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
from rest_framework import serializers
from django.db.models import Count, Q, BooleanField

from one.alf.spec import QC

from .models import (DataRepositoryType, DataRepository, DataFormat, DatasetType,
Dataset, Download, FileRecord, Revision, Tag)
from .transfers import _get_session, _change_default_dataset
from alyx.base import BaseSerializerEnumField
from actions.models import Session
from subjects.models import Subject
from misc.models import LabMember
Expand Down Expand Up @@ -142,6 +145,7 @@ class DatasetSerializer(serializers.HyperlinkedModelSerializer):
default_dataset = serializers.BooleanField(required=False, allow_null=True)
public = serializers.ReadOnlyField()
protected = serializers.ReadOnlyField()
qc = BaseSerializerEnumField(required=False)
file_records = DatasetFileRecordsSerializer(read_only=True, many=True)

experiment_number = serializers.SerializerMethodField()
Expand Down Expand Up @@ -178,6 +182,9 @@ def create(self, validated_data):
name = validated_data.get('name', None)
default = validated_data.get('default_dataset', None)
session = validated_data.get('session', None)
# validate QC value
if 'qc' in validated_data:
validated_data['qc'] = QC.validate(validated_data['qc'])

if session:
if default is not False:
Expand Down Expand Up @@ -213,7 +220,7 @@ class Meta:
'session', 'file_size', 'hash', 'version',
'experiment_number', 'file_records',
'subject', 'date', 'number', 'auto_datetime', 'revision',
'default_dataset', 'protected', 'public', 'tags')
'default_dataset', 'protected', 'public', 'tags', 'qc', 'json')
extra_kwargs = {
'subject': {'write_only': True},
'date': {'write_only': True},
Expand Down
23 changes: 23 additions & 0 deletions alyx/data/tests_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def test_dataset_filerecord(self):
self.assertTrue(new_mod_date > mod_date)

def test_dataset(self):
# Test dataset creation via the datasets endpoint
data = {
'name': 'some-dataset',
'dataset_type': 'dst',
Expand All @@ -145,6 +146,8 @@ def test_dataset(self):
self.assertEqual(r.data['collection'], None)
# Check that it has been set as the default dataset
self.assertEqual(r.data['default_dataset'], True)
# Check QC value is NOT_SET by default
self.assertEqual(r.data['qc'], 'NOT_SET')
# Make sure a session has been created.
session = r.data['session']
r = self.client.get(session)
Expand All @@ -162,13 +165,15 @@ def test_dataset(self):
'date': '2018-01-01',
'number': 2,
'collection': 'test_path',
'qc': 'PASS'
}

r = self.post(reverse('dataset-list'), data)
self.ar(r, 201)
self.assertEqual(r.data['revision'], None)
self.assertEqual(r.data['collection'], data['collection'])
self.assertEqual(r.data['default_dataset'], True)
self.assertEqual(r.data['qc'], 'PASS')
data_url = r.data['url']

# But if we change the collection, we are okay
Expand Down Expand Up @@ -342,6 +347,24 @@ def test_register_files_hostname(self):
self.assertEqual(ds0.version, '1.1.1')
self.assertEqual(ds1.version, '2.2.2')

def test_qc_validation(self):
# this tests the validation of dataset QC outcomes
data = {
'path': '%s/2018-01-01/2/dir' % self.subject,
'filenames': 'a.b.e1,a.c.e2',
'hostname': 'hostname',
'qc': '10,critical' # Both numerical and string QC values should be parsed
}
r = self.post(reverse('register-file'), data)
records = self.ar(r, 201)
self.assertEqual([10, 50], [rec['qc'] for rec in records])
self._assert_registration(r, data)
# a single QC value should be applied to all datasets
data['qc'] = 'FAIL'
r = self.post(reverse('register-file'), data)
records = self.ar(r, 201)
self.assertEqual([40, 40], [rec['qc'] for rec in records])

def test_register_files_hash(self):
# this is old use case where we register one dataset according to the hostname, no need
# for a lab in this case
Expand Down
Loading

0 comments on commit 53950c6

Please sign in to comment.