Spaces:
Sleeping
Sleeping
import streamlit as st | |
import joblib | |
import re | |
import PyPDF2 | |
import pandas as pd | |
import os | |
import uuid | |
from datetime import datetime | |
import tempfile | |
from io import BytesIO | |
# Load model and vectorizer | |
classifier_model = joblib.load('resume_classifier') | |
resume_vectorizer = joblib.load('resume_vectorizer') | |
def transfer_tmp_logs(): | |
tmp_log_path = "/tmp/corrections_log.csv" | |
main_log_path = "corrections_log.csv" | |
if not os.path.exists(tmp_log_path): | |
return # No new logs to transfer | |
tmp_df = pd.read_csv(tmp_log_path) | |
if os.path.exists(main_log_path): | |
main_df = pd.read_csv(main_log_path) | |
# Merge without duplicates based on serial_id | |
combined_df = pd.concat([main_df, tmp_df]).drop_duplicates(subset=["serial_id"], keep="last") | |
else: | |
combined_df = tmp_df | |
combined_df.to_csv(main_log_path, index=False) | |
# Optionally, clean up the tmp file after transfer | |
os.remove(tmp_log_path) | |
def read_uploaded_file(uploaded_file): | |
ext = os.path.splitext(uploaded_file.name)[1].lower() | |
try: | |
if ext == ".pdf": | |
reader = PyPDF2.PdfReader(uploaded_file) | |
text = "" | |
for page in reader.pages: | |
page_text = page.extract_text() | |
if page_text: | |
text += page_text + "\n" | |
return text.strip() | |
elif ext == ".txt": | |
return uploaded_file.read().decode("utf-8").strip() | |
else: | |
return "Unsupported file type." | |
except Exception as e: | |
return f"Error reading file: {str(e)}" | |
def clean_resume(text): | |
return re.sub(r'[^a-zA-Z]', ' ', text).lower() | |
def log_or_update(serial_id, timestamp, resume_text, model_prediction, corrected_prediction): | |
log_file = "/tmp/corrections_log.csv" | |
resume_text_short = resume_text[:500] # Truncate for privacy/log size | |
new_row = { | |
"serial_id": serial_id, | |
"timestamp": timestamp, | |
"resume_text": resume_text_short, | |
"model_prediction": model_prediction, | |
"corrected_prediction": corrected_prediction | |
} | |
if os.path.exists(log_file): | |
df = pd.read_csv(log_file) | |
if serial_id in df["serial_id"].values: | |
df.loc[df["serial_id"] == serial_id, "corrected_prediction"] = corrected_prediction | |
else: | |
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) | |
else: | |
df = pd.DataFrame([new_row]) | |
df.to_csv(log_file, index=False) | |
# Streamlit UI | |
st.title("π Resume Role Classifier") | |
uploaded_file = st.file_uploader( | |
"Upload your resume (PDF, TXT format)", | |
type=["pdf", "txt", "doc", "docx"] | |
) | |
if uploaded_file: | |
# Reset the file read pointer in case it was read earlier | |
uploaded_file.seek(0) | |
# Track upload session | |
if ( | |
"uploaded_file_name" not in st.session_state | |
or st.session_state.uploaded_file_name != uploaded_file.name | |
): | |
st.session_state.uploaded_file_name = uploaded_file.name | |
st.session_state.serial_id = str(uuid.uuid4()) | |
st.session_state.corrected_prediction = None | |
extracted_text = read_uploaded_file(uploaded_file) | |
if "Error" in extracted_text or not extracted_text.strip(): | |
st.warning("β οΈ Could not extract text from the uploaded file.") | |
else: | |
cleaned_text = clean_resume(extracted_text) | |
new_input = resume_vectorizer.transform([cleaned_text]) | |
prediction = classifier_model.predict(new_input)[0] | |
st.write(f"**Predicted Role:** `{prediction}`") | |
feedback = st.radio("Is this prediction correct?", ("Yes", "No"), key="feedback_radio") | |
corrected_prediction = prediction | |
if feedback == "No": | |
corrected_prediction = st.text_input( | |
"Please provide the correct role:", | |
value=st.session_state.get("corrected_prediction", ""), | |
key="correction_input" | |
) | |
st.session_state.corrected_prediction = corrected_prediction | |
else: | |
st.session_state.corrected_prediction = prediction | |
if (feedback == "Yes") or (feedback == "No" and corrected_prediction): | |
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
log_or_update( | |
serial_id=st.session_state.serial_id, | |
timestamp=now, | |
resume_text=extracted_text, | |
model_prediction=prediction, | |
corrected_prediction=corrected_prediction | |
) | |
st.success(f"β Final role recorded: `{corrected_prediction}`") | |
else: | |
st.info("π€ Please upload a supported file (PDF, TXT, DOC, DOCX).") | |