-
Notifications
You must be signed in to change notification settings - Fork 4
/
streamlit_app.py
639 lines (539 loc) · 33.9 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
import streamlit as st
import numpy as np
import datetime
import pickle
import gdown
import time
abstract_content = '''
<p>This project focuses on developing a comprehensive online payment fraud detection system using advanced machine learning \
techniques. Leveraging the IEEE-CIS fraud detection dataset, the system is designed to accurately identify potential \
fraudulent transactions while maintaining a streamlined user experience. Key features of the project include \
dimensionality reduction through Principal Component Analysis (PCA) for optimizing feature input, particularly \
focusing on V1-V339, D-type features, and C-type features. The project employs a variety of models, including \
XGBoost and KMeans clustering, with hyperparameter tuning to ensure robust performance.</p>
<p>The application integrates user input through a simple, intuitive Streamlit interface, where users are prompted with \
targeted yes/no and multiple-choice questions. This input is dynamically transformed using pre-trained PCA models and \
processed through the XGBoost classifier for fraud prediction. The system handles null values, optimizes feature \
importance to reduce user inputs, and seamlessly incorporates clustering for behavioral and transactional data \
analysis. The application also adapts to device-related features and behavioral data through Vesta’s proprietary \
feature set, ensuring comprehensive coverage of all aspects of fraud detection.</p>
<p>Significant improvements have been made in handling categorical and numerical features, scaling models, and minimizing \
user data input while retaining prediction accuracy. The project ensures model portability with saved PCA and clustering \
models in pickle format. The entire system is designed to be both highly accurate and computationally efficient, making \
it suitable for real-world deployment in environments requiring fraud detection in online payments.</p>
'''
disclaimer_content = '''
<p>The information and tools provided by this application are intended for educational and informational \
purposes only. The results and predictions generated by the machine learning models should not be used as the sole basis \
for making financial decisions or assessing risk. Users should exercise caution and consult with financial experts or \
professionals before acting on the information provided. The developer of this application does not assume any \
responsibility for any financial losses or damages that may result from the use of this tool. The accuracy \
of the predictions depends on the quality of the input data and the assumptions made during model training.</p>
'''
@st.cache_resource
def downloadFiles():
gdown.download(f"https://drive.google.com/uc?id={st.secrets['LABELS']}", f'labels.pkl', quiet=False)
gdown.download(f"https://drive.google.com/uc?id={st.secrets['KM_C_FEATURES']}", f'km_C_features.pkl', quiet=False)
gdown.download(f"https://drive.google.com/uc?id={st.secrets['KM_D_FEATURES']}", f'km_D_features.pkl', quiet=False)
gdown.download(f"https://drive.google.com/uc?id={st.secrets['KM_V_FEATURES']}", f'km_V_features.pkl', quiet=False)
gdown.download(f"https://drive.google.com/uc?id={st.secrets['PCA_C_FEATURES']}", f'PCA_C_features.pkl', quiet=False)
gdown.download(f"https://drive.google.com/uc?id={st.secrets['PCA_D_FEATURES']}", f'PCA_D_features.pkl', quiet=False)
gdown.download(f"https://drive.google.com/uc?id={st.secrets['PCA_V_FEATURES']}", f'PCA_V_features.pkl', quiet=False)
gdown.download(f"https://drive.google.com/uc?id={st.secrets['MODEL']}", f'model.pkl', quiet=False)
# User Interface System
def userDetails():
st.subheader('Provide your details')
col1, col2 = st.columns(2)
with col1:
name = st.text_input("Enter your name", key="name", placeholder="John Doe")
with col2:
age = st.number_input("Enter your age", min_value=1, key="age")
col3, col4 = st.columns(2)
with col3:
phone_extension = st.text_input("Enter your phone extension", key="phone_extension", placeholder="+91")
with col4:
phone = st.text_input("Enter your phone number", key="phone", placeholder="1234567890")
phone_no = f"{phone_extension} {phone}"
st.markdown("#### Your Home Address")
col5, col6 = st.columns(2)
with col5:
street = st.text_input("Street", key="street", placeholder="123, Example Street")
with col6:
city = st.text_input("City", key="city", placeholder="Example City")
col7, col8, col9 = st.columns(3)
with col7:
state = st.text_input("State", key="state", placeholder="Example State")
with col8:
country = st.text_input("Country", key="country", placeholder="Example Country")
with col9:
postal_code = st.number_input("Postal Code", min_value=10000, max_value=999999, step=1, value=123456, key="postal_code")
my_address_data = {"Street": street, "City": city, "State": state, "Country": country, "Postal Code": postal_code}
my_address = f"{street}, {city}, {state}, {country}, {postal_code}"
if st.checkbox("Correspondence Address same as Home Address", key="corr_address"):
corr_address_data = my_address_data
corr_address = my_address
else:
st.markdown("#### Your Correspondence Address")
col10, col11 = st.columns(2)
with col10:
corr_street = st.text_input("Street", key="corr_street", placeholder="123, Example Street")
with col11:
corr_city = st.text_input("City", key="corr_city", placeholder="Example City")
col12, col13, col14 = st.columns(3)
with col12:
corr_state = st.text_input("State", key="corr_state", placeholder="Example State")
with col13:
corr_country = st.text_input("Country", key="corr_country", placeholder="Example Country")
with col14:
corr_postal_code = st.number_input("Postal Code", min_value=10000, max_value=999999, step=1, value=123456, key="corr_postal_code")
corr_address_data = {"Street": corr_street, "City": corr_city, "State": corr_state, "Country": corr_country, "Postal Code": corr_postal_code}
corr_address = f"{corr_street}, {corr_city}, {corr_state}, {corr_country}, {corr_postal_code}"
st.divider() ############################################################################################################
return name, age, phone_no, my_address_data, my_address, corr_address_data, corr_address
def transactionDetails(labels, my_address_data, my_address):
st.subheader('Provide the transaction details')
# For TransactionDT
START_DATE = '2017-12-01'
StartDate = datetime.datetime.strptime(START_DATE, "%Y-%m-%d")
col1, col2 = st.columns(2)
with col1:
TransactionDT_date = st.date_input("Transaction Date", key="TransactionDT_date")
with col2:
TransactionDT_time = st.time_input("Transaction Time", step=60, key="TransactionDT_time")
TransactionDateTime = datetime.datetime.combine(TransactionDT_date, TransactionDT_time)
TransactionDT = int((TransactionDateTime - StartDate).total_seconds())
# For TransactionID, TransactionAmt
cols = st.columns(2)
with cols[0]:
TransactionID = st.number_input("Transaction ID", key="TransactionID", min_value=1000000, max_value=999999999999, value=123456789012, placeholder="7-12 Digits")
with cols[1]:
TransactionAmt = np.log(st.number_input("Transaction Amount", min_value=0.1, value=1.0, step=1.0, key="TransactionAmt"))
# For min_last, max_last, mean_last, std_last
min_last, max_last = st.slider("Your usual transaction amount range", min_value=1, max_value=100000, value=(10500, 70000), step=10, key="min_max_last")
mean_last = np.log(np.mean([min_last, max_last]))
std_last = mean_last / np.log(np.std([min_last, max_last]))
# For dist1
dist1 = st.slider("Distance from the home location (in km)", min_value=0, max_value=10000, value=100, step=1, key="dist1")
# For _Weekdays, _Hours, _Days
_Weekdays, _Hours, _Days = TransactionDateTime.weekday(), TransactionDateTime.hour, TransactionDateTime.day
# For ProductCD
ProductCD = labels[st.selectbox("What type of item is being purchased?", ["Widgets", "Clothing", "Retail", "Healthcare", "Subscription"], key="ProductCD").lower()[0]]
# For P_emaildomain, R_emaildomain
col3, col4 = st.columns(2)
with col3:
purchaser_email = st.text_input("Purchaser Email Address", key="puchaser_email", placeholder="[email protected]")
P_emaildomain = purchaser_email.split('@')[-1] or 'nan'
if P_emaildomain not in list(labels.keys()):
P_emaildomain = 'nan'
P_emaildomain = labels[P_emaildomain]
with col4:
recipient_email = st.text_input("Recipient Email Address", key="recipient_email", placeholder="[email protected]")
R_emaildomain = recipient_email.split('@')[-1] or 'nan'
if R_emaildomain not in list(labels.keys()):
R_emaildomain = 'nan'
R_emaildomain = labels[R_emaildomain]
col5, col6 = st.columns(2)
with col5:
tx_phone_extension = st.text_input("Enter the phone extension", key="tx_phone_extension", placeholder="+91")
with col6:
tx_phone = st.text_input("Enter transaction phone number", key="tx_phone", placeholder="1234567890")
tx_phone_no = f"{tx_phone_extension} {tx_phone}"
if st.checkbox("Transaction Address same as Home Address", key="tx_address", value=True):
tx_address_data = my_address_data
tx_address = my_address
else:
st.markdown("##### From where did you conduct your transaction?")
col7, col8 = st.columns(2)
with col7:
tx_street = st.text_input("Street", key="tx_street", placeholder="123, Example Street")
with col8:
tx_city = st.text_input("City", key="tx_city", placeholder="Example City")
col9, col10, col11 = st.columns(3)
with col9:
tx_state = st.text_input("State", key="tx_state", placeholder="Example State")
with col10:
tx_country = st.text_input("Country", key="tx_country", placeholder="Example Country")
with col11:
tx_postal_code = st.number_input("Postal Code", min_value=10000, max_value=999999, step=1, value=123456, key="tx_postal_code")
tx_address_data = {"Street": tx_street, "City": tx_city, "State": tx_state, "Country": tx_country, "Postal Code": tx_postal_code}
tx_address = f"{tx_street}, {tx_city}, {tx_state}, {tx_country}, {tx_postal_code}"
# For addr1, addr2, first_value_addr1
addr1 = float(str(tx_address_data['Postal Code'])[3:])
addr2 = float(str(tx_address_data['Postal Code'])[:3])
first_value_addr1 = float(str(tx_address_data['Postal Code'])[0])
st.divider() ############################################################################################################
return TransactionDT, TransactionID, TransactionAmt, mean_last, min_last, max_last, std_last, dist1, _Weekdays, _Days, _Hours, \
ProductCD, purchaser_email, P_emaildomain, recipient_email, R_emaildomain, tx_phone_no, tx_address_data, tx_address, addr1, addr2, \
first_value_addr1
def cardDetails(labels, TransactionAmt):
st.subheader("Provide the card details")
# For Card_ID
cols = st.columns(5)
with cols[0]:
st.text_input("Card ID", key="Card_ID", label_visibility="hidden", value="Card ID", disabled=True)
with cols[1]:
card1 = st.text_input("Card ID 1", key="card1", label_visibility="hidden", value="1000", placeholder="1000")
first_value_card1 = float(card1[0])
with cols[2]:
card2 = st.text_input("Card ID 2", key="card2", label_visibility="hidden", value="5552", placeholder="5552")
with cols[3]:
card3 = st.text_input("Card ID 3", key="card3", label_visibility="hidden", value="1835", placeholder="1835")
with cols[4]:
card5 = st.text_input("Card ID 5", key="card5", label_visibility="hidden", value="2246", placeholder="2246")
Card_ID = f"{card1} {card2} {card3} {card5}"
# Card Holder Name
card_holder_name = st.text_input("Card Holder Name", key="card_holder_name", placeholder="John Doe")
# For TransactionAmt_to_mean_card_id, TransactionAmt_to_mean_card1, TransactionAmt_to_mean_card4
max_limit = st.number_input("Maximum transaction limit of this card", min_value=1, value=100000, step=10, key="max_limit")
min_last, max_last = st.slider("Your usual transaction amount range through this card", min_value=1, max_value=100000 if max_limit == 1 else max_limit, value=(1, max_limit), step=1, key="min_max_last_card")
std_last = np.log(np.mean([min_last, max_last])) / np.log(np.std([min_last, max_last]))
TransactionAmt_to_mean_card_id = np.exp(TransactionAmt) - np.mean([min_last, max_last])
TransactionAmt_to_mean_card1 = np.exp(TransactionAmt) / np.mean([min_last, max_last])
TransactionAmt_to_mean_card4 = abs((np.exp(TransactionAmt) / np.mean([min_last, max_last])) - std_last)
# For card1, card2, card4
payment_method = st.selectbox("Have you made the payment using a card?", ['Select One', 'Yes', 'No'], key="payment_method")
card4, card6 = -999, -999
if payment_method == 'Yes':
card4 = labels[st.selectbox("Brand of the card", ['Visa', 'Mastercard', 'American Express', 'Discover'], key="card4").lower()]
card6 = labels[st.selectbox("Usage of the card", ['Credit', 'Debit', 'Debit or Credit', 'Charge Card'], key="card6").lower()]
card_data = {"Card1": card1, "Card2": card2, "Card3": card3, "Card4": card4, "Card5": card5, "Card6": card6}
st.divider() ############################################################################################################
return card_data, Card_ID, first_value_card1, card_holder_name, TransactionAmt_to_mean_card_id, TransactionAmt_to_mean_card1, TransactionAmt_to_mean_card4
def billingDetails(purchaser_email, card_holder_name, name, phone_no, tx_phone_no, my_address, tx_address, country, tx_country):
st.subheader("Provide the billing details")
# For M1, M4, M5, M6, M7
# M1 -> Billing Address == Shipping Address
M1 = st.selectbox("Is the billing address the same as the shipping address?", ['Select One', 'Yes', 'No'], key="M1")
M1 = 1 if M1 == 'Yes' else 2 if M1 == 'No' else -999
# M2 -> Email Address (at T/X) == Email Address (Owner)
email_tx = st.text_input("Email Address at the Transaction Receipt", key="email_tx", placeholder="[email protected]")
M2 = -999 if email_tx == '' or purchaser_email == '' else 1 if email_tx == purchaser_email else 2
# M3 -> Card Holder Name == Name of the Owner
M3 = -999 if card_holder_name == '' or name == '' else 1 if card_holder_name == name else 2
# M4 -> T/X from same device as last transaction
M4 = st.selectbox("Have you done the transaction from the same device as last time?", ['Select One', 'Yes', 'No'], key="M4_device")
if M4 == 'Yes':
M4 = st.selectbox("Have you done the transaction from a nearby home location?", ['Select One', 'Yes', 'No'], key="M4_home")
M4 = 4 if M4 == 'Yes' else 5 if M4 == 'No' else -999
elif M4 == 'No':
M4 = 3
else:
M4 = -999
# M5 -> Phone Number (at T/X) == Phone Number of the Owner
M5 = -999 if phone_no == ' ' or len(phone_no) < 14 or tx_phone_no == ' ' or len(tx_phone_no) < 14 else 1 if phone_no == tx_phone_no else 2
# M6 -> Address (at T/X) == Address of the Owner
M6 = -999 if my_address == ", , , , 123456" or tx_address == ", , , , 123456" else 1 if my_address == tx_address else 2
# M7 -> Country (at T/X) == Country of the Owner
M7 = -999 if country == '' or tx_country == '' else 1 if country == tx_country else 2
M_data = {"M1": M1, "M2": M2, "M3": M3, "M4": M4, "M5": M5, "M6": M6, "M7": M7}
st.divider() ############################################################################################################
return M_data
def behavioralDetails(TransactionAmt, min_last, max_last, _Hours, my_address_data, tx_address_data):
# For V1, V12, V14, V35, V41, V65, V69, V75, V88, V94, V241
st.subheader("Provide the transactional usage details")
cols = st.columns(3)
with cols[0]:
V1 = st.selectbox("Have you recently changed your account information?", ['Select One', 'Yes', 'No'], key="V1")
V1 = 1 if V1 == 'No' else 0
with cols[1]:
V14 = st.selectbox("Have you encountered any other fraud scenarios?", ['Select One', 'Yes', 'No'], key="V14")
V14 = 1 if V14 == 'No' else 0
with cols[2]:
V88 = st.selectbox("Is this transaction done from your own device?", ['Select One', 'Yes', 'No'], key="V88")
V88 = 1 if V88 == 'Yes' else 0
# Is the transaction amount significantly higher than the user's maximum?
V41 = 1 if (np.exp(TransactionAmt) > max_last) else 0
# Was this transaction made within normal business hours?
V65 = 1 if (8 <= _Hours <= 18) else 0
# Is the user's transaction history within the expected range for their account?
V241 = 1 if (min_last <= np.exp(TransactionAmt) <= max_last) else 0
V94 = st.selectbox("What is the nature of your purchase history?", ['Select One', 'Mostly small transactions', 'A mix of small and large transactions', 'Mostly large transactions'], key="V94")
V94 = (0 if V94 == 'Mostly small transactions' else 1 if V94 == 'A mix of small and large transactions' else 2) / 2
# 0 Exactly the same address, 1 Same city but different street, 2 Same state but different city, 3 Same country but different state
V12 = abs(0 if my_address_data == tx_address_data else 1 if my_address_data['City'] == tx_address_data['City'] else 2 if my_address_data['State'] == tx_address_data['State'] else 3 if my_address_data['Country'] == tx_address_data['Country'] else -1) / 3
V35 = st.selectbox("How long has your account been active?", ['Select One', 'Less than 1 month', '1-6 months', '6-12 months', 'More than a year'], key="V35")
V35 = (0 if V35 == 'More than a year' else 1 if V35 == '6-12 months' else 2 if V35 == '1-6 months' else 3) / 3
V75 = st.selectbox("How often do you change your password?", ['Select One', 'Less than once a year', 'Once a year', 'Every 6 months', 'Every 3 months', 'Monthly'], key="V75")
V75 = (0 if V75 == 'Less than once a year' else 1 if V75 == 'Once a year' else 2 if V75 == 'Every 6 months' else 3 if V75 == 'Every 3 months' else 4) / 4
V69 = st.selectbox("How frequently do you make transactions online?", ['Select One', 'Daily', 'Weekly', 'Bi-weekly', 'Monthly', 'Every few months', 'Rarely'], key="V69")
V69 = (0 if V69 == 'Daily' else 1 if V69 == 'Weekly' else 2 if V69 == 'Bi-weekly' else 3 if V69 == 'Monthly' else 4 if V69 == 'Every few months' else 5) / 5
V_data = {"V1": V1, "V12": V12, "V14": V14, "V35": V35, "V41": V41, "V65": V65, "V69": V69, "V75": V75, "V88": V88, "V94": V94, "V241": V241}
# For C5, C6, C7, C9, C12, C14
st.divider() ############################################################################################################
time_tx = st.expander("Transaction Details", expanded=True)
with time_tx:
st.markdown("<div style='text-align: justify; margin: 1rem;'>\
<p style='margin-bottom: 5px;'>Ranges of the transaction amount:</p>\
<b>Small Transactions:</b> 1 - 1000<br />\
<b>Medium Transactions:</b> 1000 - 20000<br />\
<b>Large Transactions:</b> 20000 - 100000<br />\
<b>Very Large Transactions:</b> 100000 - more than 100000<br />\
</div>", unsafe_allow_html=True)
st.subheader("Provide the transactional behavior details")
C7 = st.slider("How frequently do small transactions occur per day?", min_value=0, max_value=100, value=5, step=1, key="C7") / 2256
C12 = st.slider("How frequently do large transactions occur per day?", min_value=0, max_value=10, value=2, step=1, key="C12") / 3188
C6 = st.slider("How frequently do small transactions occur per week?", min_value=0, max_value=1000, value=30, step=1, key="C6") / 2252
C14 = st.slider("How frequently do large transactions occur per week?", min_value=0, max_value=100, value=13, step=1, key="C14") / 1429
C5 = st.slider("How frequently do small transactions occur per month?", min_value=0, max_value=10000, value=200, step=1, key="C5") / 376
C9 = st.slider("How frequently do large transactions occur per month?", min_value=0, max_value=1000, value=30, step=1, key="C9") / 572
C_data = {"C5": C5, "C6": C6, "C7": C7, "C9": C9, "C12": C12, "C14": C14}
# For D2, D3, D4, D5, D11
st.divider() ############################################################################################################
st.subheader("Provide the transaction time behavioral details")
D2 = st.slider("How many days between small transactions?", min_value=0, max_value=100, value=2, step=1, key="D2") / 641
D11 = st.slider("How many days between medium transactions?", min_value=0, max_value=365, value=15, step=1, key="D11") / 936
D3 = st.slider("How many days between large transactions?", min_value=0, max_value=730, value=40, step=1, key="D3") / 1076
D5 = st.slider("How many days between very large transactions?", min_value=0, max_value=730, value=80, step=1, key="D5") / 1088
D4 = st.slider("How many days between exceptional transactions?", min_value=0, max_value=1095, value=200, step=1, key="D4") / 1213
D_data = {"D2": D2, "D3": D3, "D4": D4, "D5": D5, "D11": D11}
st.divider() ############################################################################################################
return V_data, C_data, D_data
def deviceInfo(labels):
st.subheader("Provide the device details")
# For DeviceType
DeviceType = st.selectbox("Device Type", ['Select One', 'Desktop', 'Mobile'], key="DeviceType")
DeviceType = 1 if DeviceType == 'Desktop' else 2 if DeviceType == 'Mobile' else -999
# For DeviceInfo
DeviceInfo = st.selectbox("Device Info", list(labels.keys()), key="DeviceInfo")
DeviceInfo = labels[DeviceInfo]
device_data = {"DeviceType": DeviceType, "DeviceInfo": DeviceInfo}
st.divider() ############################################################################################################
return device_data
# Model Prediction System
def preprocessing(data):
# Preprocessing of C features
with open('PCA_C_features.pkl', 'rb') as f:
PCA_C_features = pickle.load(f)
C_data = data['Transactional Usage Data']
C_data = np.array([C_data['C5'], C_data['C9'], C_data['C14'], C_data['C7'], C_data['C12'], C_data['C6']]).reshape(1, -1)
PCA_C_data = PCA_C_features.transform(C_data)
with open('km_C_features.pkl', 'rb') as f:
km_C_features = pickle.load(f)
clusters_C = km_C_features.predict(PCA_C_data)
clusters_C = clusters_C[0]
# Preprocessing of D features
with open('PCA_D_features.pkl', 'rb') as f:
PCA_D_features = pickle.load(f)
D_data = data['Transaction Time Behavioral Data']
D_data = np.array([D_data['D5'], D_data['D4'], D_data['D3'], D_data['D11'], D_data['D2']]).reshape(1, -1)
PCA_D_data = PCA_D_features.transform(D_data)
with open('km_D_features.pkl', 'rb') as f:
km_D_features = pickle.load(f)
clusters_D = km_D_features.predict(PCA_D_data)
clusters_D = clusters_D[0]
# Preprocessing of V features
with open('PCA_V_features.pkl', 'rb') as f:
PCA_V_features = pickle.load(f)
V_data = data['Behavioral Data']
V_data = np.array([V_data['V88'], V_data['V14'], V_data['V1'], V_data['V65'], V_data['V41'], V_data['V94'], V_data['V35'], V_data['V12'], V_data['V241'], V_data['V69'], V_data['V75']]).reshape(1, -1)
PCA_V_data = PCA_V_features.transform(V_data)
with open('km_V_features.pkl', 'rb') as f:
km_V_features = pickle.load(f)
clusters_V = km_V_features.predict(PCA_V_data)
clusters_V = clusters_V[0]
# For count_cluster
count_cluster = clusters_C + clusters_D + clusters_V
data['PCA_C_data'] = PCA_C_data
data['PCA_D_data'] = PCA_D_data
data['PCA_V_data'] = PCA_V_data
data['clusters_C'] = clusters_C
data['clusters_D'] = clusters_D
data['clusters_V'] = clusters_V
data['count_cluster'] = count_cluster
return data
def predict(data):
prepared_data = np.array([
int(data['Transaction ID']),
float(data['Transaction Amount']),
int(data['ProductCD']) - 66,
int(data['Card Data']['Card1']),
float(data['Card Data']['Card2']),
data['Card Data']['Card4'],
data['Card Data']['Card6'],
float(data['Address 1']),
float(data['Address 2']),
float(data['Distance']),
data['Purchaser Email Domain'],
data['Recipient Email Domain'],
data['Billing Data']['M1'],
data['Billing Data']['M4'],
data['Billing Data']['M5'],
data['Billing Data']['M6'],
data['Billing Data']['M7'],
float(data['PCA_V_data'][0][0]),
float(data['PCA_V_data'][0][1]),
float(data['PCA_V_data'][0][2]),
float(data['PCA_V_data'][0][3]),
float(data['PCA_V_data'][0][4]),
float(data['PCA_V_data'][0][5]),
float(data['PCA_V_data'][0][6]),
float(data['PCA_V_data'][0][7]),
float(data['PCA_V_data'][0][8]),
float(data['PCA_V_data'][0][9]),
int(data['clusters_V']),
float(data['PCA_C_data'][0][0]),
float(data['PCA_C_data'][0][1]),
float(data['PCA_C_data'][0][2]),
int(data['clusters_C']),
float(data['Device Data']['DeviceType']),
float(data['Device Data']['DeviceInfo']),
data['Weekdays'],
data['Hours'],
data['Days'],
float(data['Mean Transaction Amount']),
float(data['Minimum Transaction Amount']),
float(data['Maximum Transaction Amount']),
float(data['Standard Deviation Transaction Amount']),
float(data['PCA_D_data'][0][0]),
float(data['PCA_D_data'][0][1]),
float(data['PCA_D_data'][0][2]),
float(data['PCA_D_data'][0][3]),
float(data['PCA_D_data'][0][4]),
int(data['clusters_D']),
int(data['count_cluster']),
float(data['First Value Address 1']),
float(data['Transaction Amount with Card ID']),
float(data['Transaction Amount with Card1']),
float(data['Transaction Amount with Card4']),
float(data['First Value Card1'])
]).reshape(1, -1)
with open('model.pkl', 'rb') as f:
model = pickle.load(f)
prediction = model.predict_proba(prepared_data)
return prediction
def app():
st.write("This is a simple web app to predict whether a transaction is fraudulent or not.")
st.write("Please provide the necessary details to classify the transaction.")
# Load labels from file
with open('labels.pkl', 'rb') as f:
labels = pickle.load(f)
# Details of the User
name, age, phone_no, my_address_data, my_address, corr_address_data, corr_address = userDetails()
# Details of the Transaction
(TransactionDT, TransactionID, TransactionAmt, mean_last, min_last, max_last, std_last, dist1, _Weekdays, _Days, _Hours, ProductCD,
purchaser_email, P_emaildomain, recipient_email, R_emaildomain, tx_phone_no, tx_address_data, tx_address, addr1, addr2,
first_value_addr1) = transactionDetails(labels, my_address_data, my_address)
# Details of the Card
(card_data, Card_ID, first_value_card1, card_holder_name, TransactionAmt_to_mean_card_id, TransactionAmt_to_mean_card1,
TransactionAmt_to_mean_card4) = cardDetails(labels, TransactionAmt)
# Details of the Billing
M_data = billingDetails(purchaser_email, card_holder_name, name, phone_no, tx_phone_no, my_address, tx_address,
my_address_data['Country'], tx_address_data['Country'])
# Details of the Behavior
V_data, C_data, D_data = behavioralDetails(TransactionAmt, min_last, max_last, _Hours, my_address_data, tx_address_data)
# Details of the Device
device_data = deviceInfo(labels)
if st.button("Predict"):
data = {
"Name": name, "Age": age, "Phone Number": phone_no, "My Address Data": my_address_data, "Address": my_address,
"Correspondence Address Data": corr_address_data, "Correspondence Address": corr_address, "Transaction Date": TransactionDT,
"Transaction ID": TransactionID, "Transaction Amount": TransactionAmt, "Mean Transaction Amount": mean_last,
"Minimum Transaction Amount": min_last, "Maximum Transaction Amount": max_last, "Standard Deviation Transaction Amount": std_last,
"Distance": dist1, "Weekdays": _Weekdays, "Days": _Days, "Hours": _Hours, "ProductCD": ProductCD,
"Purchaser Email": purchaser_email, "Purchaser Email Domain": P_emaildomain, "Recipient Email": recipient_email,
"Recipient Email Domain": R_emaildomain, "Transaction Phone Number": tx_phone_no, "Transaction Address Data": tx_address_data,
"Transaction Address": tx_address, "Address 1": addr1, "Address 2": addr2, "First Value Address 1": first_value_addr1,
"Card Data": card_data, "Card ID": Card_ID, "First Value Card1": first_value_card1, "Card Holder Name": card_holder_name,
"Transaction Amount with Card ID": TransactionAmt_to_mean_card_id, "Transaction Amount with Card1": TransactionAmt_to_mean_card1,
"Transaction Amount with Card4": TransactionAmt_to_mean_card4, "Billing Data": M_data, "Behavioral Data": V_data,
"Transactional Usage Data": C_data, "Transaction Time Behavioral Data": D_data, "Device Data": device_data
}
st.toast("Pre-processing has started...", icon="⏳")
with st.spinner('Model is processing...'):
progress_bar = st.progress(0)
for percent_complete in range(100):
time.sleep(0.03)
progress_bar.progress(percent_complete + 1)
data = preprocessing(data)
st.toast("Pre-processing is complete...", icon="✅")
time.sleep(1)
results = st.container()
with results:
st.subheader("Results")
st.toast("Model is working now...", icon="⏳")
with st.spinner('Model is processing...'):
progress_bar = st.progress(0)
for percent_complete in range(100):
time.sleep(0.03)
progress_bar.progress(percent_complete + 1)
st.toast("Prediction is complete...", icon="✅")
try:
prediction = predict(data)
st.write(f'Hello {name}!')
st.write('Based on the machine learning model, the risk of this transaction being fraudulent is:')
if prediction[:,1] >= 0.5:
st.error("**HIGH**", icon="🚫")
st.toast("The transaction is classified as fraudulent.", icon="🚫")
else:
st.success("**LOW**", icon="✅")
st.toast("The transaction is classified as non-fraudulent.", icon="🎉")
st.warning('Disclaimer: **The results from this test are not intended for any financial or legal advice.** \
The model was trained on 590,540 data points with personal attributes only. Additionally, the analysis of \
this model indicates that attributes such as the transaction amount, maximum and minimum transaction limits, \
and the distance of the transaction location are of high importance in determining if a transaction is fraudulent or not.', icon="⚠️")
st.info('Accuracy: The machine learning model used for prediction was initially evaluated on an unknown dataset \
consisting of 506,691 records. The model correctly classified 89.54% of transactions as fraudulent and \
88.66% of transactions as non-fraudulent. The model has an AUC-ROC score of 0.94. It also considers the cost of \
misclassifying a non-fraudulent transaction as fraudulent, which is why the model is more likely to classify a transaction \
as non-fraudulent rather than fraudulent.', icon="ℹ️")
details = st.expander("More Details", expanded=False)
with details:
st.write('According to the ML model:')
st.write('The probability of the transaction being classified as fraudulent is:')
st.error(f'**{prediction[:,1][0]:.2%}**')
st.write('The probability of the transaction being classified as non-fraudulent is:')
st.success(f'**{prediction[:,0][0]:.2%}**')
st.write('**Note:** If the probability of classifying the transaction as fraudulent is over 0.5, then the model will classify it as a fraudulent transaction.')
except:
st.error("Enter valid values to show the results.", icon="🚨")
st.toast("Oops... Something went wrong. Please try again.", icon="🚨")
if st.button("Go Back"):
st.toast("Thanks for using the application. Hope you have a great day!", icon="👋")
time.sleep(2)
st.session_state.button_clicked = False
st.rerun()
if __name__ == '__main__':
st.set_page_config(page_title="Online Payment Fraud Detection App", page_icon="https://github.com/user-attachments/assets/662c2283-a6c9-471a-a2cd-b4173c64cd54")
if 'button_clicked' not in st.session_state:
st.session_state.button_clicked = False
title = st.container()
with title:
col1, col2 = st.columns([1, 7])
with col1:
st.image('https://github.com/user-attachments/assets/662c2283-a6c9-471a-a2cd-b4173c64cd54', width=100)
with col2:
st.title("Payment Fraud Detection App")
if not st.session_state.button_clicked:
st.toast("Welcome to the Payment Fraud Detection App!", icon="🎉")
time.sleep(2)
st.toast("Please wait while we load the application...", icon="⏳")
downloadFiles()
st.toast("Files downloaded...", icon="✅")
time.sleep(2)
st.toast("Application is ready to use...", icon="🚀")
if st.session_state.button_clicked:
st.success("You have agreed to the terms. You can now proceed with using the application.", icon="✅")
app()
else:
abstract = st.expander(label="Abstract", expanded=False)
with abstract:
st.write('**Read the full article** [ **here!** ](https://fraud-detection-handbook.github.io/fraud-detection-handbook/Foreword.html)')
st.markdown(f'<div style="text-align: justify; margin: 1rem;">\
<h3 style="text-align: center; font-weight: 900;">Fraud Detection Using Machine Learning</h3>\
{abstract_content}\
</div>', unsafe_allow_html=True)
youtube_video = st.expander("Watch the Video", expanded=False)
with youtube_video:
st.video(f"https://youtu.be/{st.secrets['YOUTUBE_VIDEO_ID']}")
disclaimer = st.expander("Disclaimer", expanded=True)
with disclaimer:
st.markdown(f'<div style="text-align: justify; margin: 1rem;">{disclaimer_content}</div>', unsafe_allow_html=True)
cols = st.columns(7)
with cols[0]:
if st.button('Agree'):
st.session_state.button_clicked = True
st.rerun()
with cols[6]:
st.button('Disagree')
st.toast("You must agree to the terms to proceed with the application.", icon="🚨")
st.error("If you have any concerns, please contact support. [ **here!** ](https://avdhesh-portfolio.vercel.app/)", icon="🚨")