hpratapsingh commited on
Commit
4a3863e
Β·
verified Β·
1 Parent(s): b271f2c

Update app.py

Browse files

Added streamlit

Files changed (1) hide show
  1. app.py +82 -32
app.py CHANGED
@@ -1,24 +1,27 @@
1
- import gradio as gr
2
  import pickle
3
  import docx
4
  import PyPDF2
5
  import re
 
6
 
7
  # Load pre-trained model and TF-IDF vectorizer
8
  svc_model = pickle.load(open('clf.pkl', 'rb')) # Update with your model path
9
  tfidf = pickle.load(open('tfidf.pkl', 'rb')) # Update with your vectorizer path
10
  le = pickle.load(open('encoder.pkl', 'rb')) # Update with your encoder path
11
 
 
12
  # Function to clean resume text
13
- def clean_resume(txt):
14
- clean_text = re.sub('http\S+\s', ' ', txt)
15
- clean_text = re.sub('RT|cc', ' ', clean_text)
16
- clean_text = re.sub('#\S+\s', ' ', clean_text)
17
- clean_text = re.sub('@\S+', ' ', clean_text)
18
- clean_text = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"""), ' ', clean_text)
19
- clean_text = re.sub(r'[^\x00-\x7f]', ' ', clean_text)
20
- clean_text = re.sub('\s+', ' ', clean_text)
21
- return clean_text
 
22
 
23
  # Function to extract text from PDF
24
  def extract_text_from_pdf(file):
@@ -28,6 +31,7 @@ def extract_text_from_pdf(file):
28
  text += page.extract_text()
29
  return text
30
 
 
31
  # Function to extract text from DOCX
32
  def extract_text_from_docx(file):
33
  doc = docx.Document(file)
@@ -36,6 +40,7 @@ def extract_text_from_docx(file):
36
  text += paragraph.text + '\n'
37
  return text
38
 
 
39
  # Function to extract text from TXT
40
  def extract_text_from_txt(file):
41
  try:
@@ -44,6 +49,7 @@ def extract_text_from_txt(file):
44
  text = file.read().decode('latin-1')
45
  return text
46
 
 
47
  # Function to handle file upload and extraction
48
  def handle_file_upload(uploaded_file):
49
  file_extension = uploaded_file.name.split('.')[-1].lower()
@@ -57,27 +63,71 @@ def handle_file_upload(uploaded_file):
57
  raise ValueError("Unsupported file type. Please upload a PDF, DOCX, or TXT file.")
58
  return text
59
 
 
60
  # Function to predict the category of a resume
61
- def predict_category(file):
62
- try:
63
- resume_text = handle_file_upload(file)
64
- cleaned_text = clean_resume(resume_text)
65
- vectorized_text = tfidf.transform([cleaned_text])
66
- vectorized_text = vectorized_text.toarray()
67
- predicted_category = svc_model.predict(vectorized_text)
68
- predicted_category_name = le.inverse_transform(predicted_category)
69
- return f"Predicted Category: {predicted_category_name[0]}"
70
- except Exception as e:
71
- return f"Error: {str(e)}"
72
-
73
- # Define Gradio interface
74
- inputs = gr.File(label="Upload Resume (PDF, DOCX, TXT)")
75
- outputs = gr.Textbox(label="Prediction")
76
-
77
- interface = gr.Interface(fn=predict_category, inputs=inputs, outputs=outputs, title="Resume Classifier",
78
- description="Upload your resume to predict its job category using an AI model.")
79
-
80
- # Launch the interface
81
- if __name__ == "__main__":
82
- interface.launch(share=True)
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import pickle
3
  import docx
4
  import PyPDF2
5
  import re
6
+ from PIL import Image
7
 
8
  # Load pre-trained model and TF-IDF vectorizer
9
  svc_model = pickle.load(open('clf.pkl', 'rb')) # Update with your model path
10
  tfidf = pickle.load(open('tfidf.pkl', 'rb')) # Update with your vectorizer path
11
  le = pickle.load(open('encoder.pkl', 'rb')) # Update with your encoder path
12
 
13
+
14
  # Function to clean resume text
15
+ def cleanResume(txt):
16
+ cleanText = re.sub('http\S+\s', ' ', txt)
17
+ cleanText = re.sub('RT|cc', ' ', cleanText)
18
+ cleanText = re.sub('#\S+\s', ' ', cleanText)
19
+ cleanText = re.sub('@\S+', ' ', cleanText)
20
+ cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
21
+ cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText)
22
+ cleanText = re.sub('\s+', ' ', cleanText)
23
+ return cleanText
24
+
25
 
26
  # Function to extract text from PDF
27
  def extract_text_from_pdf(file):
 
31
  text += page.extract_text()
32
  return text
33
 
34
+
35
  # Function to extract text from DOCX
36
  def extract_text_from_docx(file):
37
  doc = docx.Document(file)
 
40
  text += paragraph.text + '\n'
41
  return text
42
 
43
+
44
  # Function to extract text from TXT
45
  def extract_text_from_txt(file):
46
  try:
 
49
  text = file.read().decode('latin-1')
50
  return text
51
 
52
+
53
  # Function to handle file upload and extraction
54
  def handle_file_upload(uploaded_file):
55
  file_extension = uploaded_file.name.split('.')[-1].lower()
 
63
  raise ValueError("Unsupported file type. Please upload a PDF, DOCX, or TXT file.")
64
  return text
65
 
66
+
67
  # Function to predict the category of a resume
68
+ def pred(input_resume):
69
+ cleaned_text = cleanResume(input_resume)
70
+ vectorized_text = tfidf.transform([cleaned_text])
71
+ vectorized_text = vectorized_text.toarray()
72
+ predicted_category = svc_model.predict(vectorized_text)
73
+ predicted_category_name = le.inverse_transform(predicted_category)
74
+ return predicted_category_name[0]
75
+
76
+
77
+ def main():
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ st.set_page_config(page_title="Resume Classifier", page_icon="πŸ“„", layout="wide")
80
+
81
+ # Sidebar design
82
+ # st.sidebar.image("sidebar_logo.png", use_column_width=True) # Add your sidebar logo
83
+ st.sidebar.title("Navigation")
84
+ st.sidebar.write("πŸ‘‹ Welcome to the Resume Classifier!")
85
+ st.sidebar.info("Use this tool to predict the category of resumes.")
86
+ st.sidebar.markdown("---")
87
+ st.sidebar.header("Instructions")
88
+ st.sidebar.write("1. Upload a resume file (PDF, DOCX, or TXT).")
89
+ st.sidebar.write("2. View the extracted resume text.")
90
+ st.sidebar.write("3. Get the predicted job category.")
91
+
92
+ # Main page
93
+ st.title("πŸ“„ Resume Classifier")
94
+ st.markdown("Upload your resume and get an AI-powered prediction of the job category.")
95
+
96
+ # File upload
97
+ uploaded_file = st.file_uploader("Upload a Resume", type=["pdf", "docx", "txt"])
98
+ if uploaded_file is not None:
99
+ try:
100
+ resume_text = handle_file_upload(uploaded_file)
101
+ st.success("Successfully extracted the text from the uploaded resume.")
102
+
103
+ # Display extracted text
104
+ with st.expander("View Extracted Text"):
105
+ st.text_area("Extracted Resume Text", resume_text, height=300)
106
+
107
+ # Display prediction
108
+ st.subheader("Predicted Category")
109
+ category = pred(resume_text)
110
+ st.write(f"The predicted category is: **{category}**")
111
+
112
+ except Exception as e:
113
+ st.error(f"Error: {str(e)}")
114
+
115
+ # Footer with copyright and developer credits
116
+ st.markdown("---")
117
+ col1, col2 = st.columns([1, 3])
118
+
119
+
120
+
121
+ with col2:
122
+ st.markdown(
123
+ """
124
+ <p style='text-align: center;'>
125
+ &copy; 2025 Resume Classifier. All rights reserved.<br>
126
+ Developed with ❀️ by <a href="https://github.com/" target="_blank">Aashish</a>.
127
+ </p>
128
+ """,
129
+ unsafe_allow_html=True
130
+ )
131
+
132
+ if __name__ == "__main__":
133
+ main()