-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
87 lines (73 loc) · 2.76 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import nltk
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import re
import streamlit as st
import pickle as pi
nltk.download('punkt')
nltk.download('stopwords')
# Loading models
clf = pi.load(open('clf.pkl', 'rb'))
tfidf = pi.load(open('tfidf.pkl', 'rb'))
def cleanresume(txt):
cleantxt = re.sub('http\S+\s'," ", txt)
cleantxt = re.sub('RT|cc+ '," ", cleantxt)
cleantxt = re.sub('@\S+'," ", cleantxt)
cleantxt = re.sub('#\S+\s'," ", cleantxt)
cleantxt = re.sub('[%s]'% re.escape("""!"#$%&'()*+,./:;<=>?@[\]^_`{|}~""")," ", cleantxt)
cleantxt = re.sub(r'[^\x00-\x7f]'," ", cleantxt)
cleantxt = re.sub('\s+'," ", cleantxt)
return cleantxt
# Web app
def main():
st.title("Resume Screening App")
uploaded_resume = st.file_uploader("Upload Resume", type=['txt', 'pdf'])
if uploaded_resume is not None:
try:
resume_bytes = uploaded_resume.read()
resume_text = resume_bytes.decode('utf-8')
except UnicodeDecodeError:
# If UTF-8 decoding fails, try decoding with 'latin-1'
resume_text = resume_bytes.decode('latin-1')
# Clean the input resume
cleaned_resume = cleanresume(resume_text)
# Transform the cleaned resume using the trained TfidfVectorizer
input_features = tfidf.transform([cleaned_resume])
# Make the prediction using the loaded classifier
prediction_id = clf.predict(input_features)[0]
# st.write(prediction_id)
# Map category ID to category name
category_mapping = {
15: "Java Developer",
23: "Testing",
8: "DevOps Engineer",
20: "Python Developer",
24: "Web Designing",
12: "HR",
13: "Hadoop",
3: "Blockchain",
10: "ETL Developer",
18: "Operations Manager",
6: "Data Science",
22: "Sales",
16: "Mechanical Engineer",
1: "Arts",
7: "Database",
11: "Electrical Engineering",
14: "Health and fitness",
19: "PMO",
4: "Business Analyst",
9: "DotNet Developer",
2: "Automation Testing",
17: "Network Security Engineer",
21: "SAP Developer",
5: "Civil Engineer",
0: "Advocate",
}
category_name = category_mapping.get(prediction_id, "Unknown")
st.write("Prediction Category : ", category_name)
# Python main
if __name__ == "__main__":
main()