-
Notifications
You must be signed in to change notification settings - Fork 0
/
pfi.py
383 lines (278 loc) · 16.5 KB
/
pfi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
'''A module for analyzing FloZF programmable Flow Injection data'''
import pfi
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
import matplotlib.axis as ax
import numpy as np
from scipy import stats
from tabulate import tabulate
from outliers import smirnov_grubbs as grubbs
from numpy.polynomial import Polynomial as P
def plot_spectra(index):
'''
INPUTS: Value of sample index used for plotting absorbance spectra
OUTPUTS: Absorbance spectra plot
'''
df = pd.read_csv('master_data/sample_'+ str(index) + '.spectrum.pfl', delimiter='\t', skiprows=3) #imports spectrum file
spectra = df[(df['Wavelength (nm)'] > 500) & (df['Wavelength (nm)'] <1000)] #keeps data from wavelengths only between 500-1000nm.
#plots spectra
plt.figure()
plt.plot(spectra['Wavelength (nm)'],spectra['Absorbance'])
plt.xlabel('Wavelength[nm]')
plt.ylabel('Absorbance[mAU]')
plt.title('Index ' + str(index) + ' Absorbance Spectrum')
plt.ylim(min(spectra['Absorbance'] - 0.01),max(spectra['Absorbance']+0.01))
plt.show()
#################################################################################
def plot_timeseries(index,reflambda):
'''
INPUTS:
- Value of sample index used for plotting time-series monitoring
- Reference wavelength used for plotting time-series (e.g. A880-A510 or A880-A975)
OUTPUTS: Time-series plot
'''
df = pd.read_csv('master_data/sample_'+ str(index) + '.pfl', delimiter='\t', skiprows=31) #imports time-series file
#plots time-series
plt.figure()
plt.plot(df['Time (s)'],df[str(reflambda)])
plt.xlabel('Time[s]')
plt.ylabel('Absorbance[mAU]')
plt.title('Index ' + str(index) + ' Absorbance Time Series')
plt.show()
#################################################################################
def abs_lookup(index):
'''
INPUTS: Value of sample index used for idenfiying absorbance value
OUTPUTS: Absorbance value of given sample index at 880-510nm
'''
file_list = glob('master_data/*.pfl') #produces list of all pfl files in master_data directory
indices = []
absorbances = dict()
A880_A510 = dict()
for file in file_list: #extracts the index numbers from each file as unique idenfiers and stores them in a list "indices"
short_title = file.split('_')[2]
name = short_title.split('.')
number = int(name[0])
indices.append(number)
#Cleaning master_data time-series files to correspond to correct reference wavelength in first column of file
absorbances[number] = pd.read_csv(file, delimiter='\t', nrows=30)
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][23], value='A880-A510', inplace=False, limit=None, regex=False, method='pad')
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][16], value='A880-A775', inplace=False, limit=None, regex=False, method='pad')
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][9], value='A880-A975', inplace=False, limit=None, regex=False, method='pad')
Abs = absorbances[number]['sample name'] == "A880-A510" #Extracting absorbance values that only correspond to the 510nm reference wavelength.
A880_A510[number] = (absorbances[number][Abs]['sample']) #Dictionary which contains absorbance values at 880-510nm for any index.
return 'Absorbance[mAU] @ 880-510nm = ' + A880_A510[index]
#################################################################################
def multispectra(sampleindices):
'''
INPUTS: Values of sample indices used for plotting multiple absorbance spectra (1-D array)
OUTPUTS: Absorbance spectra plot of all specified samples together
'''
spectra_list = glob('master_data/*.spectrum.pfl')
indices = []
spectra = dict()
A880 = dict()
for file in spectra_list:
short_title = file.split('_')[2]
name = short_title.split('.')
number = int(name[0])
indices.append(number) #produces list of all pfl files in master_data directory
spectra[number] = pd.read_csv(file, delimiter='\t', skiprows=3) #imports all spectra from master_data directory
#plots all specified spectra in one figure
for i in sampleindices:
plt.plot(spectra[i]['Wavelength (nm)'],spectra[i]['Absorbance'],label=str(i))
plt.xlabel('Wavelength [$\lambda$]')
plt.ylabel('Absorbance [mAU]')
plt.title('Absorbance Spectra')
plt.xlim(500,1000)
plt.ylim(-0.1,0.5)
plt.legend()
plt.show()
#################################################################################
def calib_plots(indices):
'''
Inputs:
indices - List of sample indices used in calibration. Note: must be same length as the x variable [1-D array]
Note: assumes calibration uses standards of 0uM, 0.5uM, 1.5uM, 3uM run in triplicates.
Returns:
Linear least-squares regression plot + residuals from model
2nd degree Polynomial least-squares regression plot + residuals from model
'''
x = np.array([0, 0, 0, 0.5, 0.5, 0.5, 1.5, 1.5, 1.5, 3, 3, 3])
absorbances = dict()
A880_A510 = dict()
y = []
for number in indices:
absorbances[number] = pd.read_csv('master_data/sample_'+str(number)+'.pfl', delimiter='\t', nrows=30)
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][23], value='A880-A510', inplace=False, limit=None, regex=False, method='pad')
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][16], value='A880-A775', inplace=False, limit=None, regex=False, method='pad')
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][9], value='A880-A975', inplace=False, limit=None, regex=False, method='pad')
Abs = absorbances[number]['sample name'] == "A880-A510" #Extracting absorbance values that only correspond to the 510nm reference wavelength.
A880_A510[number] = float(absorbances[number][Abs]['sample']) #Completed dictionary which contains absorbance values at 880-510nm for any index.
val = A880_A510[number]
y.append(val) #list of absorbance values for all specified indices in input
results = stats.linregress(x,y) #calcualting linear least-squares regression from inputs
linear = (results[0]*x) + results[1]
residual = y - linear
fit = np.polyfit(x,y,2) #calcualting 2nd degree polynomial from inputs
a = fit[0]
b = fit[1]
c = fit[2]
fit_equation = a * np.square(x) + b * x + c
#PLOTTING LINEAR REGRESSION
plt.figure(figsize=(12,10))
plt.subplot(2,2,1)
plt.plot(x,y,'.',ms=10)
plt.plot(x,linear)
plt.title('Calibration: Linear Fit')
plt.xlabel('PO4 [uM]')
plt.ylabel('Absorbance [mAU]')
plt.grid()
#RESIDUALS OF LINEAR CALIBRATION
plt.subplot(2,2,2)
plt.plot(x, y-linear, '.',ms=10)
plt.axhline(y=0, color='black', linestyle='-', lw=1)
plt.ylabel('Residuals')
plt.xlabel('PO4 [uM]')
plt.title('Residuals: Linear Fit')
plt.grid()
#PLOTTING 2ND DEG POLYNOMIAL FIT
plt.subplot(2,2,3)
plt.plot(x,y,'.',ms=10)
plt.plot(x,fit_equation)
plt.ylabel('Absorbance [mAU]')
plt.xlabel('PO4 [uM]')
plt.title('Calibration: Polynomial Fit')
plt.grid()
#RESIDUALS OF POLYNOMIAL FIT
plt.subplot(2,2,4)
plt.plot(y,y-fit_equation,'.',ms=10)
plt.axhline(y=0, color='black', linestyle='-', lw=1)
plt.ylabel('Residuals')
plt.xlabel('PO4 [uM]')
plt.title('Residuals: Polynomial Fit')
plt.grid()
plt.show()
#################################################################################
def calib_stats(indices):
'''
Inputs:
indices - List of sample indices used in calibration. Note: must be same length as the x-array (array of known PO4 concentrations used in calibration)
Outputs:
Table containing figures of merit: calibration slope, calibration y-intercept, coefficient of determination (r-squared), standard deviation of the slope, 95% confidence intervals of the slope (using alpha = 0.05), Limit of Detection (nM).
'''
x = np.array([0, 0, 0, 0.5, 0.5, 0.5, 1.5, 1.5, 1.5, 3, 3, 3])
absorbances = dict()
A880_A510 = dict()
y = []
#Cleaning master_data time-series files to correspond to correct reference wavelength in first column of file
for number in indices:
absorbances[number] = pd.read_csv('master_data/sample_'+str(number)+'.pfl', delimiter='\t', nrows=30)
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][23], value='A880-A510', inplace=False, limit=None, regex=False, method='pad')
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][16], value='A880-A775', inplace=False, limit=None, regex=False, method='pad')
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][9], value='A880-A975', inplace=False, limit=None, regex=False, method='pad')
Abs = absorbances[number]['sample name'] == "A880-A510" #Extracting absorbance values that only correspond to the 510nm reference wavelength.
A880_A510[number] = float(absorbances[number][Abs]['sample']) #Completed dictionary which contains absorbance values at 880-510nm for any index.
abslist = A880_A510[number]
y.append(abslist) #list of absorbance values for all specified indices in input
lin_results = stats.linregress(x,y) #linear least-squares regression fit
a2 = lin_results[0] #linear slope
intercept = lin_results [1] #intercept of linear model
y_hat = (a2*x) + intercept
poly_results = np.polyfit(x,y,deg=2) #polynomial fit
r_squared = lin_results.rvalue**2
N=len(x)
mean_x = np.mean(x)
mean_y = np.mean(y)
s_yx = np.sqrt(sum((y-y_hat)**2)/(N-2)) #random error in the y-direction
s_m = s_yx / (np.sqrt(sum((x-mean_x)**2))) #standard deviation of the slope
blanks = y[0:3] #specifies blank standards. Note: assumes that first 3 datapoints in calibration are blanks in triplicate
LOD = 1000 * ((3*np.std(blanks))/a2) #Limit of detection (nm)
####Calculating 95% Confidence interval of the slope####
variance_xy = sum((x-mean_x)*(y-mean_y))/(N-1) #covariance of x and y
variance_x = (sum((x-mean_x)**2))/(N-1) #variance of x
variance_y = (sum((y-mean_y)**2))/(N-1) #variance of y
r_xy = variance_xy / (np.sqrt(variance_x)*np.sqrt(variance_y)) #Calculates Pearson correlation (r)
alpha=0.05
t = stats.t.ppf(1-alpha/2,N-2) #calculates critical t-value using alpha and degrees of freedom
SE_a2 = (sum((y-y_hat)**2) / sum((x-mean_x)**2)) / np.sqrt(N-2) #standard error of the slope
CI = t*SE_a2 #95% Confidence Interval of the Slope
table = [['Slope', 'Intercept', 'R-squared', 'Standard Deviation of the Slope', '95% CI of the Slope','Limit of Detection (nm)'], [a2, intercept, r_squared, s_m, CI, LOD]]
return print(tabulate(table, headers='firstrow', tablefmt='fancy_grid'))
#################################################################################
def outlier_test(values):
'''
Inputs: Absorbance to be used in outlier test
Outputs: If outlier exists, will print "outlier exists" statment + value that is deemed an outlier. If no outlier exists, "No outlier" statement is printed.
'''
relstdev = 100 * (np.std(values)/np.mean(values))
if (relstdev > 10):
outlier = grubbs.max_test_outliers(values, alpha=.05)
return print('Outlier exists:',outlier)
else: print('No outlier')
#################################################################################
def slope_from_index(indices):
'''
INPUTS:
indices = List of indices used in a calibration [1-D array]
OUTPUTS: Slope of linear regression model from calibration.
'''
x = [0,0,0,0.5,0.5,0.5,1.5,1.5,1.5,3,3,3]
absorbances = []
y = []
for i in indices:
absorbances = pd.read_csv('master_data/sample_'+ str(i) + '.pfl', delimiter='\t', nrows=30)
absorbances = absorbances.replace(to_replace=absorbances['sample name'][23], value='A880-A510', inplace=False, limit=None, regex=False, method='pad')
absorbances = absorbances.replace(to_replace=absorbances['sample name'][16], value='A880-A775', inplace=False, limit=None, regex=False, method='pad')
absorbances = absorbances.replace(to_replace=absorbances['sample name'][9], value='A880-A975', inplace=False, limit=None, regex=False, method='pad')
Abs = absorbances['sample name'] == "A880-A510"
A880_A510 = float(absorbances[Abs]['sample'])
y.append(A880_A510)
lin_results = stats.linregress(x,y)
slope = lin_results[0]
return slope
#################################################################################
def solve_for_conc(calib_indices, sample_indices):
'''INPUTS:
Phosphate concentrations of calibration [1D array]
Absorbance values (mAU) [1D array]
OUTPUTS:
Concentration values corresponding to absorbance readings, using 2-degree polynomial calibration curve [1D array)]
'''
x = np.array([0, 0, 0, 0.5, 0.5, 0.5, 1.5, 1.5, 1.5, 3, 3, 3]) #concentrations of standards used in calibration (in triplicates)
absorbances = dict()
A880_A510 = dict()
calib_y = []
#Extracting list of absorbance values from calibration curve, using x and calib_indices inputs.
for number in calib_indices:
absorbances[number] = pd.read_csv('master_data/sample_'+str(number)+'.pfl', delimiter='\t', nrows=30)
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][23], value='A880-A510', inplace=False, limit=None, regex=False, method='pad')
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][16], value='A880-A775', inplace=False, limit=None, regex=False, method='pad')
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][9], value='A880-A975', inplace=False, limit=None, regex=False, method='pad')
Abs = absorbances[number]['sample name'] == "A880-A510" #Extracting absorbance values that only correspond to the 510nm reference wavelength.
A880_A510[number] = float(absorbances[number][Abs]['sample']) #Completed dictionary which contains absorbance values at 880-510nm for any index.
abslist = A880_A510[number]
calib_y.append(abslist)
#Extracting list of absorbance values from measured sample with unknown PO4 concentration, using sample_indices inputs.
absorbances = dict()
A880_A510 = dict()
sample_y = []
for number in sample_indices:
absorbances[number] = pd.read_csv('master_data/sample_'+str(number)+'.pfl', delimiter='\t', nrows=30)
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][23], value='A880-A510', inplace=False, limit=None, regex=False, method='pad')
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][16], value='A880-A775', inplace=False, limit=None, regex=False, method='pad')
absorbances[number] = absorbances[number].replace(to_replace=absorbances[number]['sample name'][9], value='A880-A975', inplace=False, limit=None, regex=False, method='pad')
Abs = absorbances[number]['sample name'] == "A880-A510" #Extracting absorbance values that only correspond to the 510nm reference wavelength.
A880_A510[number] = float(absorbances[number][Abs]['sample']) #Completed dictionary which contains absorbance values at 880-510nm for any index.
abslist = A880_A510[number]
sample_y.append(abslist)
p = P.fit(x, calib_y, 2)
conc_list = []
for value in sample_y:
conc = (p - value).roots()[1] #calculates the roots of the polynomial model - for this application, we are only interested in the 2nd root.
conc_list.append(conc)
return conc_list
#################################################################################
if __name__ == '__main__':
print('Run pFi functions if run as a script')