Spaces:

sid22669
/

Resume_classifier

Sleeping

File size: 4,679 Bytes

import streamlit as st
import joblib
import re
import PyPDF2
import pandas as pd
import os
import uuid
from datetime import datetime
import tempfile
from io import BytesIO

# Load model and vectorizer
classifier_model = joblib.load('resume_classifier')
resume_vectorizer = joblib.load('resume_vectorizer')

def transfer_tmp_logs():
    tmp_log_path = "/tmp/corrections_log.csv"
    main_log_path = "corrections_log.csv"

    if not os.path.exists(tmp_log_path):
        return  # No new logs to transfer

    tmp_df = pd.read_csv(tmp_log_path)

    if os.path.exists(main_log_path):
        main_df = pd.read_csv(main_log_path)
        # Merge without duplicates based on serial_id
        combined_df = pd.concat([main_df, tmp_df]).drop_duplicates(subset=["serial_id"], keep="last")
    else:
        combined_df = tmp_df

    combined_df.to_csv(main_log_path, index=False)

    # Optionally, clean up the tmp file after transfer
    os.remove(tmp_log_path)


def read_uploaded_file(uploaded_file):
    ext = os.path.splitext(uploaded_file.name)[1].lower()

    try:
        if ext == ".pdf":
            reader = PyPDF2.PdfReader(uploaded_file)
            text = ""
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + "\n"
            return text.strip()

        elif ext == ".txt":
            return uploaded_file.read().decode("utf-8").strip()

        else:
            return "Unsupported file type."

    except Exception as e:
        return f"Error reading file: {str(e)}"


def clean_resume(text):
    return re.sub(r'[^a-zA-Z]', ' ', text).lower()


def log_or_update(serial_id, timestamp, resume_text, model_prediction, corrected_prediction):
    log_file = "/tmp/corrections_log.csv"
    resume_text_short = resume_text[:500]  # Truncate for privacy/log size

    new_row = {
        "serial_id": serial_id,
        "timestamp": timestamp,
        "resume_text": resume_text_short,
        "model_prediction": model_prediction,
        "corrected_prediction": corrected_prediction
    }

    if os.path.exists(log_file):
        df = pd.read_csv(log_file)
        if serial_id in df["serial_id"].values:
            df.loc[df["serial_id"] == serial_id, "corrected_prediction"] = corrected_prediction
        else:
            df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
    else:
        df = pd.DataFrame([new_row])

    df.to_csv(log_file, index=False)


# Streamlit UI
st.title("📄 Resume Role Classifier")

uploaded_file = st.file_uploader(
    "Upload your resume (PDF, TXT format)",
    type=["pdf", "txt", "doc", "docx"]
)

if uploaded_file:
    # Reset the file read pointer in case it was read earlier
    uploaded_file.seek(0)

    # Track upload session
    if (
        "uploaded_file_name" not in st.session_state
        or st.session_state.uploaded_file_name != uploaded_file.name
    ):
        st.session_state.uploaded_file_name = uploaded_file.name
        st.session_state.serial_id = str(uuid.uuid4())
        st.session_state.corrected_prediction = None

    extracted_text = read_uploaded_file(uploaded_file)

    if "Error" in extracted_text or not extracted_text.strip():
        st.warning("⚠️ Could not extract text from the uploaded file.")
    else:
        cleaned_text = clean_resume(extracted_text)
        new_input = resume_vectorizer.transform([cleaned_text])
        prediction = classifier_model.predict(new_input)[0]

        st.write(f"**Predicted Role:** `{prediction}`")

        feedback = st.radio("Is this prediction correct?", ("Yes", "No"), key="feedback_radio")

        corrected_prediction = prediction

        if feedback == "No":
            corrected_prediction = st.text_input(
                "Please provide the correct role:",
                value=st.session_state.get("corrected_prediction", ""),
                key="correction_input"
            )
            st.session_state.corrected_prediction = corrected_prediction
        else:
            st.session_state.corrected_prediction = prediction

        if (feedback == "Yes") or (feedback == "No" and corrected_prediction):
            now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            log_or_update(
                serial_id=st.session_state.serial_id,
                timestamp=now,
                resume_text=extracted_text,
                model_prediction=prediction,
                corrected_prediction=corrected_prediction
            )
            st.success(f"✅ Final role recorded: `{corrected_prediction}`")
else:
    st.info("📤 Please upload a supported file (PDF, TXT, DOC, DOCX).")