-
Notifications
You must be signed in to change notification settings - Fork 2
/
list_subjects.py
executable file
·137 lines (122 loc) · 7.08 KB
/
list_subjects.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python
"""
Query a DICOM node using findscu, returns list of subjects. In order to support long
queries the script will query for all study instance uids first. For each of those a
second query for the series is performed. No query using patient information is used
because OsiriX for example only supports Study and Series level findscu queries.
This script depends on the dcmtk tools to be available in '/Applications/dcmtk/bin/'.
Usage:
> python list_subjects.py 128.54.39.72 11112
[
{
"StudyInstanceUID": "1.3.12.2.1107.5.2.6.23556.30000010060722383567100000623",
"SeriesInstanceUID": "1.3.6.1.4.1.21767.1276273464547.1000153281.4258",
"PatientID": "0370543 "
},
...
]
Examples:
Get a list of subjects only:
> python list_subjects.py 128.54.39.72 11112 | jq ".[].PatientID" | sort | uniq
Get studyinstanceuid for a single subject:
> python list_subjects.py 128.54.39.72 11112 | jq '[.[] | select(.PatientID|match("^9071628"))] | [.[].StudyInstanceUID] | unique'
or to get a list of all series that start with "3D-T1" in their series description:
> python list_subjects.py 128.54.39.72 11112 | jq '[.[] | select(.SeriesDescription| match("^3D-T1"))]'
or to get a string of studyinstance uid etc from the matching entries:
> python list_subjects.py 128.54.39.72 11112 | jq '[.[] | select(.SeriesDescription| match("^3D-T1"))]' | jq ".[] | [.StudyInstanceUID, .SeriesInstanceUID, .PatientID, .SeriesDescription]" | jq 'map("\(.) ") | add'
TODO: A single query for all study instance uids could be too large (hit max on returned entries). In that case we
should query for subsets of Studies - can this be done by time? (findscu question)
"""
import dicom
import tempfile
import subprocess
import sys
import re
import json
import string
from subprocess import Popen, PIPE
if __name__ == "__main__":
if len(sys.argv) == 3:
# write a temporary file to build a dicom with the query information
temp = tempfile.NamedTemporaryFile(mode='w+t',delete=False)
temp2 = tempfile.NamedTemporaryFile(delete=False)
#print "name of the temp file is:", temp.name
temp.write("# Query all patient names and IDs with Patient Root model\n#\n(0008,0052) CS [PATIENT] # QueryRetrieveLevel\n(0010,0010) PN [] # PatientsName\n(0010,0020) LO [] # PatientID\n(0020,000d) UI [] # StudyInstanceUID\n(0020,000e) UI [] # SeriesInstanceUID\n(0008,103e) LD [] # StudyInstanceUID\n(0008,0020) DA [] # StudyDate")
temp.close()
temp2.close()
try:
# e = subprocess.check_output([ "/Applications/dcmtk/bin/dump2dcm", "+te", temp.name, temp2.name ])
e = subprocess.check_output([ "dump2dcm", "+te", temp.name, temp2.name ])
except OSError as e:
print >>sys.stderr, "Execution of dump2dcm failed:", e
try:
# here a series level findscu for Osirix
# cmd="/Applications/dcmtk/bin/findscu -v -S -k 0008,0052=\"SERIES\" -k 0010,0010=\"" + searchTerm + "*\" -aec myself -aet OsiriX " + sys.argv[1] + " " + sys.argv[2] + " " + temp2.name
# now a study level scu
#cmd="/Applications/dcmtk/bin/findscu -v -S -k 0008,0052=\"STUDY\" -aec myself -aet OsiriX " + sys.argv[1] + " " + sys.argv[2] + " " + temp2.name
cmd="findscu -v -S -k 0008,0052=\"STUDY\" -aec myself -aet OsiriX " + sys.argv[1] + " " + sys.argv[2] + " " + temp2.name
# data = subprocess.check_output(cmd, shell=True, stderr=sys.stdout)
p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True)
data1, data = p.communicate()
#print "returned data is: \"", data, "\""
except OSError as e:
print >>sys.stderr, "Execution of findscu failed:", e
# now use the study instance uids to query for the series
vars = re.compile(r'\(0020,000d\) UI \[([^\]]+)\]', re.MULTILINE)
studies = []
for match in vars.finditer(data):
studyinstanceuid = match.groups()
siuid = studyinstanceuid[0].replace('\x00', '')
studies.append( siuid )
# now query for each study its series
data2 = []
for study in studies:
#cmd=["findscu", "-v", "-S", "-k", "0008,0052=\"SERIES\"", "-k", "0020,000d=\"" + study + "\"", "-aec", "myself", "-aet", "OsiriX", sys.argv[1],sys.argv[2],temp2.name]
cmd="findscu -v -S -k 0008,0052=\"SERIES\" -k 0020,000d=\"" + study + "\" -aec myself -aet OsiriX" + " " + sys.argv[1] + " " + sys.argv[2] + " " + temp2.name
#print cmd
try:
# data = subprocess.check_output(cmd, shell=True, stderr=sys.stdout)
p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True)
data1, data = p.communicate()
except OSError as e:
print >>sys.stderr, "Execution of findscu failed:", e
except TypeError as e:
print >>sys.stderr, "findscu failed with TypeError: ", e, " on \"", cmd, "\""
continue
#print >>sys.stderr, "findscu worked without TypeError on \"", cmd, "\""
# split into different sections
vars = re.compile(r'W: # Dicom-Data-Set', re.MULTILINE)
for m in re.split(r'W: # Dicom-Data-Set', data):
studydate = re.search(r'\(0008,0020\) DA \[([^\]]+)\]', m)
if studydate:
studydate = studydate.group(1)
seriesdescription = re.search(r'\(0008,103e\) LO \[([^\]]+)\]', m)
if seriesdescription:
seriesdescription = seriesdescription.group(1)
patientname = re.search(r'\(0010,0010\) PN \[([^\]]+)\]', m)
if patientname:
patientname = patientname.group(1)
patientid = re.search(r'\(0010,0020\) LO \[([^\]]+)\]', m)
if patientid:
patientid = patientid.group(1)
studyinstanceuid = re.search(r'\(0020,000d\) UI \[([^\]]+)\]', m)
if studyinstanceuid:
studyinstanceuid = studyinstanceuid.group(1)
seriesinstanceuid = re.search(r'\(0020,000e\) UI \[([^\]]+)\]', m)
if seriesinstanceuid:
seriesinstanceuid = seriesinstanceuid.group(1)
if (studydate == None and patientid == None and patientname == None):
continue
data2.append( { 'PatientName': patientname,
'PatientID': patientid,
'StudyInstanceUID': studyinstanceuid,
'SeriesInstanceUID': seriesinstanceuid,
'SeriesDescription': seriesdescription,
'StudyDate': studydate } );
val = json.dumps(data2,indent=2)
print(val)
#erg = re.findall( r'\(0010,0010\) PN \[([^\]]+)\]', data)
#print erg
#val=json.dumps(erg,indent=2)
else:
print "Error: wrong number of arguments, we require an IP and a PORT"