sid22669's picture
Update app.py
144cf7f verified
import streamlit as st
import joblib
import re
import PyPDF2
import pandas as pd
import os
import uuid
from datetime import datetime
import tempfile
from io import BytesIO
# Load model and vectorizer
classifier_model = joblib.load('resume_classifier')
resume_vectorizer = joblib.load('resume_vectorizer')
def transfer_tmp_logs():
tmp_log_path = "/tmp/corrections_log.csv"
main_log_path = "corrections_log.csv"
if not os.path.exists(tmp_log_path):
return # No new logs to transfer
tmp_df = pd.read_csv(tmp_log_path)
if os.path.exists(main_log_path):
main_df = pd.read_csv(main_log_path)
# Merge without duplicates based on serial_id
combined_df = pd.concat([main_df, tmp_df]).drop_duplicates(subset=["serial_id"], keep="last")
else:
combined_df = tmp_df
combined_df.to_csv(main_log_path, index=False)
# Optionally, clean up the tmp file after transfer
os.remove(tmp_log_path)
def read_uploaded_file(uploaded_file):
ext = os.path.splitext(uploaded_file.name)[1].lower()
try:
if ext == ".pdf":
reader = PyPDF2.PdfReader(uploaded_file)
text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text.strip()
elif ext == ".txt":
return uploaded_file.read().decode("utf-8").strip()
else:
return "Unsupported file type."
except Exception as e:
return f"Error reading file: {str(e)}"
def clean_resume(text):
return re.sub(r'[^a-zA-Z]', ' ', text).lower()
def log_or_update(serial_id, timestamp, resume_text, model_prediction, corrected_prediction):
log_file = "/tmp/corrections_log.csv"
resume_text_short = resume_text[:500] # Truncate for privacy/log size
new_row = {
"serial_id": serial_id,
"timestamp": timestamp,
"resume_text": resume_text_short,
"model_prediction": model_prediction,
"corrected_prediction": corrected_prediction
}
if os.path.exists(log_file):
df = pd.read_csv(log_file)
if serial_id in df["serial_id"].values:
df.loc[df["serial_id"] == serial_id, "corrected_prediction"] = corrected_prediction
else:
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
else:
df = pd.DataFrame([new_row])
df.to_csv(log_file, index=False)
# Streamlit UI
st.title("πŸ“„ Resume Role Classifier")
uploaded_file = st.file_uploader(
"Upload your resume (PDF, TXT format)",
type=["pdf", "txt", "doc", "docx"]
)
if uploaded_file:
# Reset the file read pointer in case it was read earlier
uploaded_file.seek(0)
# Track upload session
if (
"uploaded_file_name" not in st.session_state
or st.session_state.uploaded_file_name != uploaded_file.name
):
st.session_state.uploaded_file_name = uploaded_file.name
st.session_state.serial_id = str(uuid.uuid4())
st.session_state.corrected_prediction = None
extracted_text = read_uploaded_file(uploaded_file)
if "Error" in extracted_text or not extracted_text.strip():
st.warning("⚠️ Could not extract text from the uploaded file.")
else:
cleaned_text = clean_resume(extracted_text)
new_input = resume_vectorizer.transform([cleaned_text])
prediction = classifier_model.predict(new_input)[0]
st.write(f"**Predicted Role:** `{prediction}`")
feedback = st.radio("Is this prediction correct?", ("Yes", "No"), key="feedback_radio")
corrected_prediction = prediction
if feedback == "No":
corrected_prediction = st.text_input(
"Please provide the correct role:",
value=st.session_state.get("corrected_prediction", ""),
key="correction_input"
)
st.session_state.corrected_prediction = corrected_prediction
else:
st.session_state.corrected_prediction = prediction
if (feedback == "Yes") or (feedback == "No" and corrected_prediction):
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_or_update(
serial_id=st.session_state.serial_id,
timestamp=now,
resume_text=extracted_text,
model_prediction=prediction,
corrected_prediction=corrected_prediction
)
st.success(f"βœ… Final role recorded: `{corrected_prediction}`")
else:
st.info("πŸ“€ Please upload a supported file (PDF, TXT, DOC, DOCX).")