File size: 4,421 Bytes
33bd636 f54f27c 33bd636 f54f27c 33bd636 f54f27c 33bd636 f54f27c 33bd636 f54f27c 33bd636 f54f27c 33bd636 f54f27c 33bd636 f54f27c 33bd636 b271f2c 33bd636 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import streamlit as st
import pickle
import docx
import PyPDF2
import re
from PIL import Image
# Load pre-trained model and TF-IDF vectorizer
svc_model = pickle.load(open('clf.pkl', 'rb')) # Update with your model path
tfidf = pickle.load(open('tfidf.pkl', 'rb')) # Update with your vectorizer path
le = pickle.load(open('encoder.pkl', 'rb')) # Update with your encoder path
# Function to clean resume text
def cleanResume(txt):
cleanText = re.sub('http\S+\s', ' ', txt)
cleanText = re.sub('RT|cc', ' ', cleanText)
cleanText = re.sub('#\S+\s', ' ', cleanText)
cleanText = re.sub('@\S+', ' ', cleanText)
cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText)
cleanText = re.sub('\s+', ' ', cleanText)
return cleanText
# Function to extract text from PDF
def extract_text_from_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
text = ''
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to extract text from DOCX
def extract_text_from_docx(file):
doc = docx.Document(file)
text = ''
for paragraph in doc.paragraphs:
text += paragraph.text + '\n'
return text
# Function to extract text from TXT
def extract_text_from_txt(file):
try:
text = file.read().decode('utf-8')
except UnicodeDecodeError:
text = file.read().decode('latin-1')
return text
# Function to handle file upload and extraction
def handle_file_upload(uploaded_file):
file_extension = uploaded_file.name.split('.')[-1].lower()
if file_extension == 'pdf':
text = extract_text_from_pdf(uploaded_file)
elif file_extension == 'docx':
text = extract_text_from_docx(uploaded_file)
elif file_extension == 'txt':
text = extract_text_from_txt(uploaded_file)
else:
raise ValueError("Unsupported file type. Please upload a PDF, DOCX, or TXT file.")
return text
# Function to predict the category of a resume
def pred(input_resume):
cleaned_text = cleanResume(input_resume)
vectorized_text = tfidf.transform([cleaned_text])
vectorized_text = vectorized_text.toarray()
predicted_category = svc_model.predict(vectorized_text)
predicted_category_name = le.inverse_transform(predicted_category)
return predicted_category_name[0]
def main():
st.set_page_config(page_title="Resume Classifier", page_icon="π", layout="wide")
# Sidebar design
# st.sidebar.image("sidebar_logo.png", use_column_width=True) # Add your sidebar logo
st.sidebar.title("Navigation")
st.sidebar.write("π Welcome to the Resume Classifier!")
st.sidebar.info("Use this tool to predict the category of resumes.")
st.sidebar.markdown("---")
st.sidebar.header("Instructions")
st.sidebar.write("1. Upload a resume file (PDF, DOCX, or TXT).")
st.sidebar.write("2. View the extracted resume text.")
st.sidebar.write("3. Get the predicted job category.")
# Main page
st.title("π Resume Classifier")
st.markdown("Upload your resume and get an AI-powered prediction of the job category.")
# File upload
uploaded_file = st.file_uploader("Upload a Resume", type=["pdf", "docx", "txt"])
if uploaded_file is not None:
try:
resume_text = handle_file_upload(uploaded_file)
st.success("Successfully extracted the text from the uploaded resume.")
# Display extracted text
with st.expander("View Extracted Text"):
st.text_area("Extracted Resume Text", resume_text, height=300)
# Display prediction
st.subheader("Predicted Category")
category = pred(resume_text)
st.write(f"The predicted category is: **{category}**")
except Exception as e:
st.error(f"Error: {str(e)}")
# Footer with copyright and developer credits
st.markdown("---")
col1, col2 = st.columns([1, 3])
with col2:
st.markdown(
"""
<p style='text-align: center;'>
© 2025 Resume Classifier. All rights reserved.<br>
Developed with β€οΈ by <a href="https://github.com/" target="_blank">Aashish</a>.
</p>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
main() |