-
Notifications
You must be signed in to change notification settings - Fork 0
/
categorization.py
83 lines (76 loc) · 4.06 KB
/
categorization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pickle
import commonDB
import pandas as pd
# import waveformUtil as wfutil TODO: remove these circular dependencies
# from matplotlib import pyplot as plt
## @author Mohammed Saqib
## This file is responsible for categorizing sepsis as per angus criterion
def getQSofaCategorization(hadm_ids = None, subject_ids = None):
"""
This function uses the quick sepsis organ failure assessment, as defined by the third international
consensus for sepsis
TODO: Complete this later
:param hadm_ids a list of hadm_ids which to apply standard to
:param subject_ids a list of subject_ids which to apply standard to. if hadm_ids is set, this doesn't do anything
:return dataframe with hadm_ids as index and a column with sepsis
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4968574/
"""
if hadm_ids is None and subject_ids is None:
hadm_ids = commonDB.read_sql("SELECT hadm_id FROM admissions")
elif hadm_ids is None:
hadm_ids = commonDB.read_sql("SELECT hadm_id FROM admissions WHERE subject_id in " \
+ commonDB.convertListToSQL(subject_ids), conn)
results = pd.DataFrame()
#check and see if glasgow coma score is below 15
alteredMentalVerbal = commonDB.read_sql("SELECT hadm_id FROM chartevents " \
+ "WHERE valuenum < 15 and itemid = 227013 and hadm_id in " \
+ commonDB.convertListToSQL(hadm_ids))
increasedRespiration = commonDB.read_sql("SELECT hadm_id FROM chartevents " \
+ "WHERE valuenum >= 22 and itemid = 224690 and hadm_id in" \
+ commonDB.convertListToSQL(hadm_ids))
increasedSystolicBP = commonDB.read_sql("SELECT hadm_id FROM chartevents " \
+ 'WHERE valuenum <= 100 and itemid in (228152, 220050, 442, 455, 6, 51, 3313, 3317, 3319, 3321, 3323) and hadm_id in ' \
+ commonDB.convertListToSQL(hadm_ids))
#TODO: Complete this later
return None
def getCategorizationsBySubjectID(catDF = None):
"""
This is a function to return the categorizations by subject instead of hadm_id
in other words, if a subject had a sepsis diagnosis ever, he/she will be classfied as such
:param optional categorization DataFrame to use that uses hadm_id based categorization
:return the angus categorization
"""
if catDF == None:
conn = commonDB.getConnection()
with open("data/sql/angus.sql") as f:
query = f.read()
query = query.replace("<INSERT IDS HERE>", "")
catDF = pd.read_sql(query, conn)
catDF.set_index(["hadm_id"], inplace=True)
bySubjectCat = []
for subject_id in catDF["subject_id"].unique():
onlySubject = catDF[catDF["subject_id"] == subject_id]
if onlySubject.apply(lambda row: row["angus"] == 1, axis = 1).any():
bySubjectCat.append(pd.DataFrame({"subject_id": [subject_id], "angus": [1]}))
else:
bySubjectCat.append(pd.DataFrame({"subject_id": [subject_id], "angus": [0]}))
bySubject = pd.concat(bySubjectCat)
return bySubject.set_index("subject_id")
def getCategorizations(hadm_ids = None):
"""
This is a function to run the Angus.sql query, which is responsible for categorizing patients
:param ids the hadm_ids to look at when doing the query; if None, run it on all
:return the angusData
"""
conn = commonDB.getConnection()
with open("data/sql/angus.sql") as f:
query = f.read()
if hadm_ids is None:
query = query.replace("<INSERT IDS HERE>", "")
else:
query = query.replace("<INSERT IDS HERE>", "WHERE hadm_id IN " + commonDB.convertListToSQL(hadm_ids))
angusData = pd.read_sql(query, conn)
angusData.set_index(["hadm_id"], inplace=True)
return angusData
if __name__=="__main__":
getCategorizations().to_csv("data/rawdatafiles/categorizations.csv")