hpratapsingh
commited on
Update app.py
Browse filesAdded streamlit
app.py
CHANGED
@@ -1,24 +1,27 @@
|
|
1 |
-
import
|
2 |
import pickle
|
3 |
import docx
|
4 |
import PyPDF2
|
5 |
import re
|
|
|
6 |
|
7 |
# Load pre-trained model and TF-IDF vectorizer
|
8 |
svc_model = pickle.load(open('clf.pkl', 'rb')) # Update with your model path
|
9 |
tfidf = pickle.load(open('tfidf.pkl', 'rb')) # Update with your vectorizer path
|
10 |
le = pickle.load(open('encoder.pkl', 'rb')) # Update with your encoder path
|
11 |
|
|
|
12 |
# Function to clean resume text
|
13 |
-
def
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
return
|
|
|
22 |
|
23 |
# Function to extract text from PDF
|
24 |
def extract_text_from_pdf(file):
|
@@ -28,6 +31,7 @@ def extract_text_from_pdf(file):
|
|
28 |
text += page.extract_text()
|
29 |
return text
|
30 |
|
|
|
31 |
# Function to extract text from DOCX
|
32 |
def extract_text_from_docx(file):
|
33 |
doc = docx.Document(file)
|
@@ -36,6 +40,7 @@ def extract_text_from_docx(file):
|
|
36 |
text += paragraph.text + '\n'
|
37 |
return text
|
38 |
|
|
|
39 |
# Function to extract text from TXT
|
40 |
def extract_text_from_txt(file):
|
41 |
try:
|
@@ -44,6 +49,7 @@ def extract_text_from_txt(file):
|
|
44 |
text = file.read().decode('latin-1')
|
45 |
return text
|
46 |
|
|
|
47 |
# Function to handle file upload and extraction
|
48 |
def handle_file_upload(uploaded_file):
|
49 |
file_extension = uploaded_file.name.split('.')[-1].lower()
|
@@ -57,27 +63,71 @@ def handle_file_upload(uploaded_file):
|
|
57 |
raise ValueError("Unsupported file type. Please upload a PDF, DOCX, or TXT file.")
|
58 |
return text
|
59 |
|
|
|
60 |
# Function to predict the category of a resume
|
61 |
-
def
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
return f"Error: {str(e)}"
|
72 |
-
|
73 |
-
# Define Gradio interface
|
74 |
-
inputs = gr.File(label="Upload Resume (PDF, DOCX, TXT)")
|
75 |
-
outputs = gr.Textbox(label="Prediction")
|
76 |
-
|
77 |
-
interface = gr.Interface(fn=predict_category, inputs=inputs, outputs=outputs, title="Resume Classifier",
|
78 |
-
description="Upload your resume to predict its job category using an AI model.")
|
79 |
-
|
80 |
-
# Launch the interface
|
81 |
-
if __name__ == "__main__":
|
82 |
-
interface.launch(share=True)
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
import pickle
|
3 |
import docx
|
4 |
import PyPDF2
|
5 |
import re
|
6 |
+
from PIL import Image
|
7 |
|
8 |
# Load pre-trained model and TF-IDF vectorizer
|
9 |
svc_model = pickle.load(open('clf.pkl', 'rb')) # Update with your model path
|
10 |
tfidf = pickle.load(open('tfidf.pkl', 'rb')) # Update with your vectorizer path
|
11 |
le = pickle.load(open('encoder.pkl', 'rb')) # Update with your encoder path
|
12 |
|
13 |
+
|
14 |
# Function to clean resume text
|
15 |
+
def cleanResume(txt):
|
16 |
+
cleanText = re.sub('http\S+\s', ' ', txt)
|
17 |
+
cleanText = re.sub('RT|cc', ' ', cleanText)
|
18 |
+
cleanText = re.sub('#\S+\s', ' ', cleanText)
|
19 |
+
cleanText = re.sub('@\S+', ' ', cleanText)
|
20 |
+
cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
|
21 |
+
cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText)
|
22 |
+
cleanText = re.sub('\s+', ' ', cleanText)
|
23 |
+
return cleanText
|
24 |
+
|
25 |
|
26 |
# Function to extract text from PDF
|
27 |
def extract_text_from_pdf(file):
|
|
|
31 |
text += page.extract_text()
|
32 |
return text
|
33 |
|
34 |
+
|
35 |
# Function to extract text from DOCX
|
36 |
def extract_text_from_docx(file):
|
37 |
doc = docx.Document(file)
|
|
|
40 |
text += paragraph.text + '\n'
|
41 |
return text
|
42 |
|
43 |
+
|
44 |
# Function to extract text from TXT
|
45 |
def extract_text_from_txt(file):
|
46 |
try:
|
|
|
49 |
text = file.read().decode('latin-1')
|
50 |
return text
|
51 |
|
52 |
+
|
53 |
# Function to handle file upload and extraction
|
54 |
def handle_file_upload(uploaded_file):
|
55 |
file_extension = uploaded_file.name.split('.')[-1].lower()
|
|
|
63 |
raise ValueError("Unsupported file type. Please upload a PDF, DOCX, or TXT file.")
|
64 |
return text
|
65 |
|
66 |
+
|
67 |
# Function to predict the category of a resume
|
68 |
+
def pred(input_resume):
|
69 |
+
cleaned_text = cleanResume(input_resume)
|
70 |
+
vectorized_text = tfidf.transform([cleaned_text])
|
71 |
+
vectorized_text = vectorized_text.toarray()
|
72 |
+
predicted_category = svc_model.predict(vectorized_text)
|
73 |
+
predicted_category_name = le.inverse_transform(predicted_category)
|
74 |
+
return predicted_category_name[0]
|
75 |
+
|
76 |
+
|
77 |
+
def main():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
+
st.set_page_config(page_title="Resume Classifier", page_icon="π", layout="wide")
|
80 |
+
|
81 |
+
# Sidebar design
|
82 |
+
# st.sidebar.image("sidebar_logo.png", use_column_width=True) # Add your sidebar logo
|
83 |
+
st.sidebar.title("Navigation")
|
84 |
+
st.sidebar.write("π Welcome to the Resume Classifier!")
|
85 |
+
st.sidebar.info("Use this tool to predict the category of resumes.")
|
86 |
+
st.sidebar.markdown("---")
|
87 |
+
st.sidebar.header("Instructions")
|
88 |
+
st.sidebar.write("1. Upload a resume file (PDF, DOCX, or TXT).")
|
89 |
+
st.sidebar.write("2. View the extracted resume text.")
|
90 |
+
st.sidebar.write("3. Get the predicted job category.")
|
91 |
+
|
92 |
+
# Main page
|
93 |
+
st.title("π Resume Classifier")
|
94 |
+
st.markdown("Upload your resume and get an AI-powered prediction of the job category.")
|
95 |
+
|
96 |
+
# File upload
|
97 |
+
uploaded_file = st.file_uploader("Upload a Resume", type=["pdf", "docx", "txt"])
|
98 |
+
if uploaded_file is not None:
|
99 |
+
try:
|
100 |
+
resume_text = handle_file_upload(uploaded_file)
|
101 |
+
st.success("Successfully extracted the text from the uploaded resume.")
|
102 |
+
|
103 |
+
# Display extracted text
|
104 |
+
with st.expander("View Extracted Text"):
|
105 |
+
st.text_area("Extracted Resume Text", resume_text, height=300)
|
106 |
+
|
107 |
+
# Display prediction
|
108 |
+
st.subheader("Predicted Category")
|
109 |
+
category = pred(resume_text)
|
110 |
+
st.write(f"The predicted category is: **{category}**")
|
111 |
+
|
112 |
+
except Exception as e:
|
113 |
+
st.error(f"Error: {str(e)}")
|
114 |
+
|
115 |
+
# Footer with copyright and developer credits
|
116 |
+
st.markdown("---")
|
117 |
+
col1, col2 = st.columns([1, 3])
|
118 |
+
|
119 |
+
|
120 |
+
|
121 |
+
with col2:
|
122 |
+
st.markdown(
|
123 |
+
"""
|
124 |
+
<p style='text-align: center;'>
|
125 |
+
© 2025 Resume Classifier. All rights reserved.<br>
|
126 |
+
Developed with β€οΈ by <a href="https://github.com/" target="_blank">Aashish</a>.
|
127 |
+
</p>
|
128 |
+
""",
|
129 |
+
unsafe_allow_html=True
|
130 |
+
)
|
131 |
+
|
132 |
+
if __name__ == "__main__":
|
133 |
+
main()
|