File size: 4,421 Bytes
33bd636
f54f27c
 
 
 
33bd636
f54f27c
 
 
 
 
 
33bd636
f54f27c
33bd636
 
 
 
 
 
 
 
 
 
f54f27c
 
 
 
 
 
 
 
 
33bd636
f54f27c
 
 
 
 
 
 
 
33bd636
f54f27c
 
 
 
 
 
 
 
33bd636
f54f27c
 
 
 
 
 
 
 
 
 
 
 
 
33bd636
f54f27c
33bd636
 
 
 
 
 
 
 
 
 
b271f2c
33bd636
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
import pickle
import docx
import PyPDF2
import re
from PIL import Image

# Load pre-trained model and TF-IDF vectorizer
svc_model = pickle.load(open('clf.pkl', 'rb'))  # Update with your model path
tfidf = pickle.load(open('tfidf.pkl', 'rb'))  # Update with your vectorizer path
le = pickle.load(open('encoder.pkl', 'rb'))  # Update with your encoder path


# Function to clean resume text
def cleanResume(txt):
    cleanText = re.sub('http\S+\s', ' ', txt)
    cleanText = re.sub('RT|cc', ' ', cleanText)
    cleanText = re.sub('#\S+\s', ' ', cleanText)
    cleanText = re.sub('@\S+', '  ', cleanText)
    cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText)
    cleanText = re.sub('\s+', ' ', cleanText)
    return cleanText


# Function to extract text from PDF
def extract_text_from_pdf(file):
    pdf_reader = PyPDF2.PdfReader(file)
    text = ''
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text


# Function to extract text from DOCX
def extract_text_from_docx(file):
    doc = docx.Document(file)
    text = ''
    for paragraph in doc.paragraphs:
        text += paragraph.text + '\n'
    return text


# Function to extract text from TXT
def extract_text_from_txt(file):
    try:
        text = file.read().decode('utf-8')
    except UnicodeDecodeError:
        text = file.read().decode('latin-1')
    return text


# Function to handle file upload and extraction
def handle_file_upload(uploaded_file):
    file_extension = uploaded_file.name.split('.')[-1].lower()
    if file_extension == 'pdf':
        text = extract_text_from_pdf(uploaded_file)
    elif file_extension == 'docx':
        text = extract_text_from_docx(uploaded_file)
    elif file_extension == 'txt':
        text = extract_text_from_txt(uploaded_file)
    else:
        raise ValueError("Unsupported file type. Please upload a PDF, DOCX, or TXT file.")
    return text


# Function to predict the category of a resume
def pred(input_resume):
    cleaned_text = cleanResume(input_resume)
    vectorized_text = tfidf.transform([cleaned_text])
    vectorized_text = vectorized_text.toarray()
    predicted_category = svc_model.predict(vectorized_text)
    predicted_category_name = le.inverse_transform(predicted_category)
    return predicted_category_name[0]


def main():

    st.set_page_config(page_title="Resume Classifier", page_icon="πŸ“„", layout="wide")

    # Sidebar design
    # st.sidebar.image("sidebar_logo.png", use_column_width=True)  # Add your sidebar logo
    st.sidebar.title("Navigation")
    st.sidebar.write("πŸ‘‹ Welcome to the Resume Classifier!")
    st.sidebar.info("Use this tool to predict the category of resumes.")
    st.sidebar.markdown("---")
    st.sidebar.header("Instructions")
    st.sidebar.write("1. Upload a resume file (PDF, DOCX, or TXT).")
    st.sidebar.write("2. View the extracted resume text.")
    st.sidebar.write("3. Get the predicted job category.")

    # Main page
    st.title("πŸ“„ Resume Classifier")
    st.markdown("Upload your resume and get an AI-powered prediction of the job category.")

    # File upload
    uploaded_file = st.file_uploader("Upload a Resume", type=["pdf", "docx", "txt"])
    if uploaded_file is not None:
        try:
            resume_text = handle_file_upload(uploaded_file)
            st.success("Successfully extracted the text from the uploaded resume.")
            
            # Display extracted text
            with st.expander("View Extracted Text"):
                st.text_area("Extracted Resume Text", resume_text, height=300)

            # Display prediction
            st.subheader("Predicted Category")
            category = pred(resume_text)
            st.write(f"The predicted category is: **{category}**")

        except Exception as e:
            st.error(f"Error: {str(e)}")

    # Footer with copyright and developer credits
    st.markdown("---")
    col1, col2 = st.columns([1, 3])

   

    with col2:
        st.markdown(
            """
            <p style='text-align: center;'>
            &copy; 2025 Resume Classifier. All rights reserved.<br>
            Developed with ❀️ by <a href="https://github.com/" target="_blank">Aashish</a>.
            </p>
            """,
            unsafe_allow_html=True
        )

if __name__ == "__main__":
    main()