|
|
""" |
|
|
Quantum Scrutiny Platform | Groq-Powered |
|
|
Single-file Streamlit app (refactored, Groq streaming-compatible) |
|
|
""" |
|
|
|
|
|
import os |
|
|
import io |
|
|
import re |
|
|
import json |
|
|
import base64 |
|
|
import traceback |
|
|
from typing import Optional, List |
|
|
|
|
|
from dotenv import load_dotenv |
|
|
load_dotenv() |
|
|
|
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
import fitz |
|
|
from docx import Document |
|
|
|
|
|
|
|
|
from groq import Groq |
|
|
|
|
|
|
|
|
from pydantic import BaseModel, Field, ValidationError |
|
|
|
|
|
|
|
|
st.set_page_config(layout="wide", page_title="Quantum Scrutiny Platform | Groq-Powered") |
|
|
|
|
|
|
|
|
GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
|
|
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "admin") |
|
|
|
|
|
|
|
|
groq_client = None |
|
|
if GROQ_API_KEY: |
|
|
try: |
|
|
groq_client = Groq(api_key=GROQ_API_KEY) |
|
|
except Exception as e: |
|
|
st.error(f"Failed to initialize Groq client: {e}") |
|
|
else: |
|
|
st.warning("GROQ_API_KEY not found. Set it as an environment variable or in .env for model calls to work.") |
|
|
|
|
|
|
|
|
if 'is_admin_logged_in' not in st.session_state: |
|
|
st.session_state.is_admin_logged_in = False |
|
|
if 'analyzed_data' not in st.session_state: |
|
|
initial_cols = [ |
|
|
'Name', 'Job Role', 'Resume Score (100)', 'Email', 'Phone', 'Shortlisted', |
|
|
'Experience Summary', 'Education Summary', 'Communication Rating (1-10)', |
|
|
'Skills/Technologies', 'Certifications', 'ABA Skills (1-10)', |
|
|
'RBT/BCBA Cert', 'Autism-Care Exp (1-10)' |
|
|
] |
|
|
st.session_state.analyzed_data = pd.DataFrame(columns=initial_cols) |
|
|
if 'individual_analysis' not in st.session_state: |
|
|
st.session_state.individual_analysis = [] |
|
|
if 'run_analysis' not in st.session_state: |
|
|
st.session_state.run_analysis = False |
|
|
|
|
|
|
|
|
class ResumeAnalysis(BaseModel): |
|
|
name: str = Field(default="Unknown") |
|
|
email: str = Field(default="") |
|
|
phone: str = Field(default="") |
|
|
certifications: List[str] = Field(default_factory=list) |
|
|
experience_summary: str = Field(default="") |
|
|
education_summary: str = Field(default="") |
|
|
communication_skills: str = Field(default="N/A") |
|
|
technical_skills: List[str] = Field(default_factory=list) |
|
|
aba_therapy_skills: Optional[str] = Field(default="N/A") |
|
|
rbt_bcba_certification: Optional[str] = Field(default="N/A") |
|
|
autism_care_experience_score: Optional[str] = Field(default="N/A") |
|
|
|
|
|
|
|
|
def extract_text_from_file(uploaded_file) -> str: |
|
|
"""Extract text from PDF or DOCX. Returns empty string on failure.""" |
|
|
try: |
|
|
content = uploaded_file.read() |
|
|
filename = uploaded_file.name.lower() |
|
|
if filename.endswith(".pdf") or content[:5] == b"%PDF-": |
|
|
try: |
|
|
with fitz.open(stream=content, filetype="pdf") as doc: |
|
|
text = "" |
|
|
for p in doc: |
|
|
text += p.get_text() |
|
|
return text.strip() |
|
|
except Exception: |
|
|
return "" |
|
|
elif filename.endswith(".docx"): |
|
|
try: |
|
|
doc = Document(io.BytesIO(content)) |
|
|
paragraphs = [p.text for p in doc.paragraphs if p.text and p.text.strip()] |
|
|
return "\n".join(paragraphs).strip() |
|
|
except Exception: |
|
|
return "" |
|
|
else: |
|
|
|
|
|
try: |
|
|
return content.decode('utf-8', errors='ignore') |
|
|
except Exception: |
|
|
return "" |
|
|
except Exception: |
|
|
return "" |
|
|
|
|
|
|
|
|
def call_groq_stream_collect(prompt: str, model_name: str = "llama-3.3-70b-versatile", temperature: float = 0.2, max_completion_tokens: int = 2048, top_p: float = 1.0) -> Optional[str]: |
|
|
""" |
|
|
Calls Groq with streaming enabled and collects the textual output. |
|
|
Returns the full model text, or None on failure. |
|
|
""" |
|
|
if not groq_client: |
|
|
st.error("Groq client not initialized. Set GROQ_API_KEY in environment/secrets.") |
|
|
return None |
|
|
|
|
|
try: |
|
|
completion = groq_client.chat.completions.create( |
|
|
model=model_name, |
|
|
messages=[ |
|
|
{"role": "system", "content": "You are a professional Resume Analyzer. Return JSON only when asked."}, |
|
|
{"role": "user", "content": prompt} |
|
|
], |
|
|
temperature=temperature, |
|
|
max_completion_tokens=max_completion_tokens, |
|
|
top_p=top_p, |
|
|
stream=True |
|
|
) |
|
|
|
|
|
|
|
|
collected = "" |
|
|
|
|
|
for chunk in completion: |
|
|
try: |
|
|
|
|
|
delta = getattr(chunk.choices[0].delta, "content", None) if hasattr(chunk, "choices") else None |
|
|
if delta is None: |
|
|
|
|
|
if isinstance(chunk, dict): |
|
|
delta = chunk.get("choices", [{}])[0].get("delta", {}).get("content") |
|
|
if delta: |
|
|
collected += delta |
|
|
else: |
|
|
|
|
|
try: |
|
|
msg = getattr(chunk.choices[0].message, "content", None) |
|
|
if msg: |
|
|
collected += msg |
|
|
except Exception: |
|
|
pass |
|
|
except Exception: |
|
|
|
|
|
try: |
|
|
collected += str(chunk) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
return collected.strip() |
|
|
except Exception as e: |
|
|
st.error(f"Groq API call failed: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
def extract_first_json(text: str) -> Optional[dict]: |
|
|
""" |
|
|
Find the first JSON object in text and parse it; return dict or None. |
|
|
""" |
|
|
if not text: |
|
|
return None |
|
|
|
|
|
|
|
|
try: |
|
|
match = re.search(r"(\{(?:[^{}]|(?R))*\})", text, re.DOTALL) |
|
|
except re.error: |
|
|
|
|
|
match = re.search(r"(\{.*\})", text, re.DOTALL) |
|
|
if match: |
|
|
json_text = match.group(1) |
|
|
else: |
|
|
|
|
|
json_text = text |
|
|
|
|
|
try: |
|
|
parsed = json.loads(json_text) |
|
|
return parsed |
|
|
except Exception: |
|
|
|
|
|
try: |
|
|
json_text_fixed = json_text.replace("'", '"') |
|
|
parsed = json.loads(json_text_fixed) |
|
|
return parsed |
|
|
except Exception: |
|
|
return None |
|
|
|
|
|
|
|
|
@st.cache_data(show_spinner=False) |
|
|
def analyze_resume_with_groq_cached(resume_text: str, job_role: str) -> ResumeAnalysis: |
|
|
""" |
|
|
Calls Groq (streaming) and returns a ResumeAnalysis instance. |
|
|
Uses caching to avoid duplicate calls for same resume_text+role. |
|
|
""" |
|
|
|
|
|
therapist_instructions = "" |
|
|
if job_role.lower() == "therapist": |
|
|
therapist_instructions = ( |
|
|
"Because the role is 'Therapist', carefully search for ABA Therapy Skills, " |
|
|
"RBT/BCBA Certification, and Autism-Care Experience. Provide scores 1-10 as STRINGS, or 'N/A'." |
|
|
) |
|
|
else: |
|
|
therapist_instructions = "If therapist-specific fields are not relevant, set them to 'N/A'." |
|
|
|
|
|
system_user_prompt = ( |
|
|
"Return a single JSON object with the following keys exactly: " |
|
|
"name (string), email (string), phone (string), certifications (array of strings), " |
|
|
"experience_summary (string), education_summary (string), communication_skills (STRING, e.g., '8'), " |
|
|
"technical_skills (array of strings), aba_therapy_skills (STRING or 'N/A'), " |
|
|
"rbt_bcba_certification (STRING 'Yes'/'No'/'N/A'), autism_care_experience_score (STRING or 'N/A'). " |
|
|
f"{therapist_instructions}\n\nResume Text:\n\n{resume_text}\n\nReturn only the JSON object." |
|
|
) |
|
|
|
|
|
raw = call_groq_stream_collect(system_user_prompt, model_name="llama-3.3-70b-versatile", temperature=0.0, max_completion_tokens=2048) |
|
|
|
|
|
if not raw: |
|
|
|
|
|
return ResumeAnalysis( |
|
|
name="Extraction Failed", |
|
|
email="", |
|
|
phone="", |
|
|
certifications=[], |
|
|
experience_summary="", |
|
|
education_summary="", |
|
|
communication_skills="N/A", |
|
|
technical_skills=[], |
|
|
aba_therapy_skills="N/A", |
|
|
rbt_bcba_certification="N/A", |
|
|
autism_care_experience_score="N/A" |
|
|
) |
|
|
|
|
|
parsed = extract_first_json(raw) |
|
|
if not parsed: |
|
|
|
|
|
st.warning("Failed to parse model JSON output. See raw output below for debugging.") |
|
|
st.text_area("Raw model output (debug)", raw, height=200) |
|
|
return ResumeAnalysis( |
|
|
name="Extraction Failed", |
|
|
email="", |
|
|
phone="", |
|
|
certifications=[], |
|
|
experience_summary="", |
|
|
education_summary="", |
|
|
communication_skills="N/A", |
|
|
technical_skills=[], |
|
|
aba_therapy_skills="N/A", |
|
|
rbt_bcba_certification="N/A", |
|
|
autism_care_experience_score="N/A" |
|
|
) |
|
|
|
|
|
|
|
|
parsed.setdefault("name", "Unknown") |
|
|
parsed.setdefault("email", "") |
|
|
parsed.setdefault("phone", "") |
|
|
parsed.setdefault("certifications", []) |
|
|
parsed.setdefault("experience_summary", "") |
|
|
parsed.setdefault("education_summary", "") |
|
|
parsed.setdefault("communication_skills", "N/A") |
|
|
parsed.setdefault("technical_skills", []) |
|
|
parsed.setdefault("aba_therapy_skills", "N/A") |
|
|
parsed.setdefault("rbt_bcba_certification", "N/A") |
|
|
parsed.setdefault("autism_care_experience_score", "N/A") |
|
|
|
|
|
|
|
|
try: |
|
|
parsed["communication_skills"] = str(parsed.get("communication_skills") or "N/A") |
|
|
parsed["aba_therapy_skills"] = str(parsed.get("aba_therapy_skills") or "N/A") |
|
|
parsed["rbt_bcba_certification"] = str(parsed.get("rbt_bcba_certification") or "N/A") |
|
|
parsed["autism_care_experience_score"] = str(parsed.get("autism_care_experience_score") or "N/A") |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
try: |
|
|
analysis = ResumeAnalysis.parse_obj(parsed) |
|
|
return analysis |
|
|
except ValidationError as ve: |
|
|
st.error("Model output failed schema validation.") |
|
|
st.text_area("Raw model output (debug)", raw, height=200) |
|
|
st.exception(ve) |
|
|
return ResumeAnalysis( |
|
|
name="Extraction Failed", |
|
|
email="", |
|
|
phone="", |
|
|
certifications=[], |
|
|
experience_summary="", |
|
|
education_summary="", |
|
|
communication_skills="N/A", |
|
|
technical_skills=[], |
|
|
aba_therapy_skills="N/A", |
|
|
rbt_bcba_certification="N/A", |
|
|
autism_care_experience_score="N/A" |
|
|
) |
|
|
|
|
|
|
|
|
def calculate_resume_score(analysis: ResumeAnalysis, role: str) -> float: |
|
|
total_score = 0.0 |
|
|
|
|
|
|
|
|
exp_len = len(analysis.experience_summary or "") |
|
|
exp_factor = min(exp_len / 100.0, 1.0) |
|
|
total_score += exp_factor * 40.0 |
|
|
|
|
|
|
|
|
skills_count = len(analysis.technical_skills or []) |
|
|
skills_factor = min(skills_count / 10.0, 1.0) |
|
|
total_score += skills_factor * 30.0 |
|
|
|
|
|
|
|
|
try: |
|
|
m = re.search(r"(\d+(\.\d+)?)", str(analysis.communication_skills)) |
|
|
comm_val = float(m.group(1)) if m else float(str(analysis.communication_skills)) |
|
|
comm_val = max(0.0, min(10.0, comm_val)) |
|
|
except Exception: |
|
|
comm_val = 5.0 |
|
|
total_score += (comm_val / 10.0) * 20.0 |
|
|
|
|
|
|
|
|
total_score += min(len(analysis.certifications or []), 10) * 1.0 |
|
|
|
|
|
|
|
|
if role.lower() == "therapist": |
|
|
def safe_score(x): |
|
|
try: |
|
|
m = re.search(r"(\d+(\.\d+)?)", str(x)) |
|
|
return float(m.group(1)) if m else 0.0 |
|
|
except Exception: |
|
|
return 0.0 |
|
|
aba = safe_score(analysis.aba_therapy_skills) |
|
|
autism = safe_score(analysis.autism_care_experience_score) |
|
|
spec_bonus = ((aba + autism) / 20.0) * 10.0 |
|
|
total_score += spec_bonus |
|
|
|
|
|
final = round(min(total_score, 100)) |
|
|
return float(final) |
|
|
|
|
|
|
|
|
def append_analysis_to_dataframe(job_role: str, analysis: ResumeAnalysis, score: float): |
|
|
data = analysis.dict() |
|
|
tech = ", ".join(data.get("technical_skills") or []) |
|
|
certs = ", ".join(data.get("certifications") or []) |
|
|
row = { |
|
|
'Name': data.get("name") or "", |
|
|
'Job Role': job_role, |
|
|
'Resume Score (100)': score, |
|
|
'Email': data.get("email") or "", |
|
|
'Phone': data.get("phone") or "", |
|
|
'Shortlisted': 'No', |
|
|
'Experience Summary': data.get("experience_summary") or "", |
|
|
'Education Summary': data.get("education_summary") or "", |
|
|
'Communication Rating (1-10)': str(data.get("communication_skills") or "N/A"), |
|
|
'Skills/Technologies': tech, |
|
|
'Certifications': certs, |
|
|
'ABA Skills (1-10)': str(data.get("aba_therapy_skills") or "N/A"), |
|
|
'RBT/BCBA Cert': str(data.get("rbt_bcba_certification") or "N/A"), |
|
|
'Autism-Care Exp (1-10)': str(data.get("autism_care_experience_score") or "N/A"), |
|
|
} |
|
|
new_df = pd.DataFrame([row]) |
|
|
st.session_state.analyzed_data = pd.concat([st.session_state.analyzed_data, new_df], ignore_index=True) |
|
|
|
|
|
|
|
|
def df_to_excel_bytes(df: pd.DataFrame) -> bytes: |
|
|
output = io.BytesIO() |
|
|
with pd.ExcelWriter(output, engine="openpyxl") as writer: |
|
|
df.to_excel(writer, index=False, sheet_name="Resume Analysis Data") |
|
|
return output.getvalue() |
|
|
|
|
|
|
|
|
st.title("π Quantum Scrutiny Platform: AI Resume Analysis (Single-file)") |
|
|
|
|
|
tab_user, tab_admin = st.tabs(["π€ Resume Uploader (User Panel)", "π Admin Dashboard (Password Protected)"]) |
|
|
|
|
|
|
|
|
with tab_user: |
|
|
st.header("Upload Resumes for Analysis") |
|
|
st.info("Upload multiple PDF or DOCX files. The Groq AI engine will extract and score fields.") |
|
|
|
|
|
job_role_options = ["Software Engineer", "ML Engineer", "Therapist", "Data Analyst", "Project Manager"] |
|
|
selected_role = st.selectbox("1. Select the Target Job Role", options=job_role_options, key="selected_role") |
|
|
|
|
|
uploaded_files = st.file_uploader("2. Upload Resumes (PDF or DOCX)", type=["pdf", "docx"], accept_multiple_files=True) |
|
|
|
|
|
if st.button("π Analyze All Uploaded Resumes"): |
|
|
if not uploaded_files: |
|
|
st.warning("Please upload one or more resume files to begin analysis.") |
|
|
else: |
|
|
st.session_state.run_analysis = True |
|
|
st.rerun() |
|
|
|
|
|
if st.session_state.get("run_analysis", False): |
|
|
if not uploaded_files: |
|
|
st.warning("No files found. Upload files and try again.") |
|
|
st.session_state.run_analysis = False |
|
|
else: |
|
|
total = len(uploaded_files) |
|
|
progress = st.progress(0) |
|
|
st.session_state.individual_analysis = [] |
|
|
idx = 0 |
|
|
with st.spinner("Processing resumes..."): |
|
|
for f in uploaded_files: |
|
|
idx += 1 |
|
|
try: |
|
|
st.write(f"Analyzing **{f.name}**...") |
|
|
resume_text = extract_text_from_file(f) |
|
|
if not resume_text: |
|
|
st.error(f"Could not extract text from {f.name}. Skipping.") |
|
|
progress.progress(idx / total) |
|
|
continue |
|
|
|
|
|
analysis = analyze_resume_with_groq_cached(resume_text, selected_role) |
|
|
|
|
|
if analysis.name == "Extraction Failed": |
|
|
st.error(f"Extraction failed for {f.name}. See debug output.") |
|
|
progress.progress(idx / total) |
|
|
continue |
|
|
|
|
|
score = calculate_resume_score(analysis, selected_role) |
|
|
append_analysis_to_dataframe(selected_role, analysis, score) |
|
|
|
|
|
st.session_state.individual_analysis.append({ |
|
|
'name': analysis.name, |
|
|
'score': score, |
|
|
'role': selected_role, |
|
|
'file_name': f.name |
|
|
}) |
|
|
except Exception as e: |
|
|
st.error(f"Error analyzing {f.name}: {e}") |
|
|
st.exception(traceback.format_exc()) |
|
|
finally: |
|
|
progress.progress(idx / total) |
|
|
|
|
|
st.success(f"β
Successfully processed {len(st.session_state.individual_analysis)} of {total} resumes.") |
|
|
st.session_state.run_analysis = False |
|
|
|
|
|
|
|
|
if st.session_state.individual_analysis: |
|
|
st.subheader("Last Analysis Summary") |
|
|
for item in st.session_state.individual_analysis: |
|
|
st.markdown(f"**{item['name']}** (for **{item['role']}**) - **Score: {item['score']}/100**") |
|
|
st.markdown("---") |
|
|
st.caption("All analyzed data is stored in the Admin Dashboard.") |
|
|
|
|
|
|
|
|
with tab_admin: |
|
|
if not st.session_state.is_admin_logged_in: |
|
|
st.header("Admin Login") |
|
|
password = st.text_input("Enter Admin Password", type="password") |
|
|
if st.button("π Login"): |
|
|
if password == ADMIN_PASSWORD: |
|
|
st.session_state.is_admin_logged_in = True |
|
|
st.rerun() |
|
|
else: |
|
|
st.error("Incorrect password.") |
|
|
st.stop() |
|
|
|
|
|
st.header("π― Recruitment Dashboard") |
|
|
if st.button("πͺ Logout"): |
|
|
st.session_state.is_admin_logged_in = False |
|
|
st.rerun() |
|
|
|
|
|
if st.session_state.analyzed_data.empty: |
|
|
st.warning("No resume data has been analyzed yet. Please upload files in the User Panel.") |
|
|
else: |
|
|
df = st.session_state.analyzed_data.copy() |
|
|
st.subheader("Candidate Data Table") |
|
|
st.success(f"**Total Candidates Analyzed: {len(df)}**") |
|
|
|
|
|
display_cols = ['Name', 'Job Role', 'Resume Score (100)', 'Shortlisted', 'Email', 'Skills/Technologies'] |
|
|
|
|
|
edited_df = st.data_editor( |
|
|
df[display_cols], |
|
|
column_config={ |
|
|
"Shortlisted": st.column_config.SelectboxColumn( |
|
|
"Shortlisted", |
|
|
help="Mark the candidate as Shortlisted or Rejected.", |
|
|
options=["No", "Yes"], |
|
|
required=True |
|
|
) |
|
|
}, |
|
|
key="dashboard_editor", |
|
|
hide_index=True |
|
|
) |
|
|
|
|
|
try: |
|
|
st.session_state.analyzed_data.loc[:, 'Shortlisted'] = edited_df['Shortlisted'].values |
|
|
except Exception: |
|
|
for i, val in enumerate(edited_df['Shortlisted'].tolist()): |
|
|
if i < len(st.session_state.analyzed_data): |
|
|
st.session_state.analyzed_data.at[i, 'Shortlisted'] = val |
|
|
|
|
|
st.markdown("---") |
|
|
st.subheader("π₯ Download Data") |
|
|
df_export = st.session_state.analyzed_data.copy() |
|
|
excel_bytes = df_to_excel_bytes(df_export) |
|
|
|
|
|
st.download_button( |
|
|
label="πΎ Download All Data as Excel (.xlsx)", |
|
|
data=excel_bytes, |
|
|
file_name="quantum_scrutiny_report.xlsx", |
|
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
|
|
help="Downloads the full table including all extracted fields and shortlist status." |
|
|
) |
|
|
|
|
|
|
|
|
|