File size: 4,679 Bytes
bb9a1f9
 
 
 
 
 
 
 
 
2231637
7132f90
bb9a1f9
 
 
 
6a3a4c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb9a1f9
2231637
 
bb9a1f9
2231637
bb9a1f9
2231637
 
 
 
 
 
 
bb9a1f9
 
2231637
bb9a1f9
 
 
 
 
 
 
 
 
 
 
 
 
7132f90
bb9a1f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2acd771
bb9a1f9
 
 
 
2231637
 
b7ec1f2
7132f90
b7ec1f2
 
 
 
bb9a1f9
 
7132f90
bb9a1f9
2231637
bb9a1f9
 
2231637
bb9a1f9
 
 
 
 
 
 
 
 
 
 
 
7132f90
 
 
 
 
bb9a1f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import streamlit as st
import joblib
import re
import PyPDF2
import pandas as pd
import os
import uuid
from datetime import datetime
import tempfile
from io import BytesIO

# Load model and vectorizer
classifier_model = joblib.load('resume_classifier')
resume_vectorizer = joblib.load('resume_vectorizer')

def transfer_tmp_logs():
    tmp_log_path = "/tmp/corrections_log.csv"
    main_log_path = "corrections_log.csv"

    if not os.path.exists(tmp_log_path):
        return  # No new logs to transfer

    tmp_df = pd.read_csv(tmp_log_path)

    if os.path.exists(main_log_path):
        main_df = pd.read_csv(main_log_path)
        # Merge without duplicates based on serial_id
        combined_df = pd.concat([main_df, tmp_df]).drop_duplicates(subset=["serial_id"], keep="last")
    else:
        combined_df = tmp_df

    combined_df.to_csv(main_log_path, index=False)

    # Optionally, clean up the tmp file after transfer
    os.remove(tmp_log_path)


def read_uploaded_file(uploaded_file):
    ext = os.path.splitext(uploaded_file.name)[1].lower()

    try:
        if ext == ".pdf":
            reader = PyPDF2.PdfReader(uploaded_file)
            text = ""
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
            return text.strip()

        elif ext == ".txt":
            return uploaded_file.read().decode("utf-8").strip()

        else:
            return "Unsupported file type."

    except Exception as e:
        return f"Error reading file: {str(e)}"


def clean_resume(text):
    return re.sub(r'[^a-zA-Z]', ' ', text).lower()


def log_or_update(serial_id, timestamp, resume_text, model_prediction, corrected_prediction):
    log_file = "/tmp/corrections_log.csv"
    resume_text_short = resume_text[:500]  # Truncate for privacy/log size

    new_row = {
        "serial_id": serial_id,
        "timestamp": timestamp,
        "resume_text": resume_text_short,
        "model_prediction": model_prediction,
        "corrected_prediction": corrected_prediction
    }

    if os.path.exists(log_file):
        df = pd.read_csv(log_file)
        if serial_id in df["serial_id"].values:
            df.loc[df["serial_id"] == serial_id, "corrected_prediction"] = corrected_prediction
        else:
            df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    else:
        df = pd.DataFrame([new_row])

    df.to_csv(log_file, index=False)


# Streamlit UI
st.title("πŸ“„ Resume Role Classifier")

uploaded_file = st.file_uploader(
    "Upload your resume (PDF, TXT format)",
    type=["pdf", "txt", "doc", "docx"]
)

if uploaded_file:
    # Reset the file read pointer in case it was read earlier
    uploaded_file.seek(0)

    # Track upload session
    if (
        "uploaded_file_name" not in st.session_state
        or st.session_state.uploaded_file_name != uploaded_file.name
    ):
        st.session_state.uploaded_file_name = uploaded_file.name
        st.session_state.serial_id = str(uuid.uuid4())
        st.session_state.corrected_prediction = None

    extracted_text = read_uploaded_file(uploaded_file)

    if "Error" in extracted_text or not extracted_text.strip():
        st.warning("⚠️ Could not extract text from the uploaded file.")
    else:
        cleaned_text = clean_resume(extracted_text)
        new_input = resume_vectorizer.transform([cleaned_text])
        prediction = classifier_model.predict(new_input)[0]

        st.write(f"**Predicted Role:** `{prediction}`")

        feedback = st.radio("Is this prediction correct?", ("Yes", "No"), key="feedback_radio")

        corrected_prediction = prediction

        if feedback == "No":
            corrected_prediction = st.text_input(
                "Please provide the correct role:",
                value=st.session_state.get("corrected_prediction", ""),
                key="correction_input"
            )
            st.session_state.corrected_prediction = corrected_prediction
        else:
            st.session_state.corrected_prediction = prediction

        if (feedback == "Yes") or (feedback == "No" and corrected_prediction):
            now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            log_or_update(
                serial_id=st.session_state.serial_id,
                timestamp=now,
                resume_text=extracted_text,
                model_prediction=prediction,
                corrected_prediction=corrected_prediction
            )
            st.success(f"βœ… Final role recorded: `{corrected_prediction}`")
else:
    st.info("πŸ“€ Please upload a supported file (PDF, TXT, DOC, DOCX).")