| import os
|
| import json
|
| import re
|
| import gdown
|
| import shutil
|
| import streamlit as st
|
| from dotenv import load_dotenv
|
| from PyPDF2 import PdfReader
|
| from typing import TypedDict, List
|
| from pydantic import BaseModel, Field
|
|
|
|
|
| from langchain_mistralai import ChatMistralAI
|
| from langgraph.graph import StateGraph, START, END
|
|
|
|
|
|
|
|
|
| st.set_page_config(page_title="HR AI Agent", layout="wide", page_icon="π€")
|
| load_dotenv()
|
|
|
|
|
| api_key = os.environ.get("MISTRAL_API_KEY") or st.secrets.get("MISTRAL_API_KEY")
|
|
|
| if not api_key:
|
| st.error("π Mistral API Key not found. Please set it in your environment variables or secrets.")
|
| st.stop()
|
|
|
|
|
|
|
|
|
| class ScoredCandidate(BaseModel):
|
| name: str
|
| score: float = Field(..., description="Objective score 0.00-100.00.")
|
| review: str = Field(..., description="Exactly 2 lines of review comment.")
|
|
|
| class AgentState(TypedDict):
|
| gdrive_link: str
|
| job_description: str
|
| num_to_hire: int
|
| raw_candidates: List[dict]
|
| evaluated_results: dict
|
| final_report: str
|
|
|
|
|
|
|
|
|
| def download_from_gdrive(url):
|
| temp_dir = "temp_resumes"
|
| if os.path.exists(temp_dir):
|
| shutil.rmtree(temp_dir)
|
| os.makedirs(temp_dir)
|
|
|
| try:
|
|
|
| gdown.download_folder(url, output=temp_dir, quiet=True, remaining_ok=True, use_cookies=False)
|
| return temp_dir
|
| except Exception as e:
|
| st.error(f"Error downloading from Google Drive: {e}")
|
| return None
|
|
|
| def process_pdfs_to_json(folder_path):
|
| llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
|
| all_candidates_json = []
|
|
|
|
|
| files = []
|
| for root, dirs, filenames in os.walk(folder_path):
|
| for f in filenames:
|
| if f.lower().endswith(".pdf"):
|
| files.append(os.path.join(root, f))
|
|
|
| if not files:
|
| st.warning("No PDF files found in the folder.")
|
| return []
|
|
|
| progress_bar = st.progress(0)
|
| status_text = st.empty()
|
|
|
| for i, path in enumerate(files):
|
| filename = os.path.basename(path)
|
| status_text.text(f"π Analyzing: {filename}")
|
| try:
|
| reader = PdfReader(path)
|
| raw_text = "".join([page.extract_text() or "" for page in reader.pages])
|
|
|
| if len(raw_text.strip()) < 50:
|
| continue
|
|
|
| prompt = f"Extract details from this resume into JSON (name, email, phone, skills, experience_years):\n{raw_text[:7000]}"
|
| response = llm.invoke(prompt)
|
| json_match = re.search(r"\{.*\}", response.content, re.DOTALL)
|
| if json_match:
|
| candidate_data = json.loads(json_match.group())
|
| candidate_data["resume_text"] = raw_text
|
| all_candidates_json.append(candidate_data)
|
| except Exception:
|
| pass
|
| progress_bar.progress((i + 1) / len(files))
|
|
|
| status_text.empty()
|
| progress_bar.empty()
|
| return all_candidates_json
|
|
|
|
|
|
|
|
|
| def extract_resumes_node(state: AgentState):
|
| st.write("---")
|
| st.info("β‘ **Phase 1:** Fetching resumes from Google Drive...")
|
| temp_path = download_from_gdrive(state['gdrive_link'])
|
| if temp_path:
|
| candidates = process_pdfs_to_json(temp_path)
|
| shutil.rmtree(temp_path)
|
| return {"raw_candidates": candidates}
|
| return {"raw_candidates": []}
|
|
|
|
|
|
|
| def rank_candidates_node(state: AgentState):
|
| """
|
| Evaluates candidates using a strict weighted rubric and 0-temperature
|
| to ensure deterministic and consistent scoring.
|
| """
|
| print("\n" + "="*50)
|
| print("π STEP 2: DETERMINISTIC SCORING ENGINE")
|
| print("="*50)
|
|
|
|
|
| llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
|
| structured_llm = llm.with_structured_output(ScoredCandidate)
|
|
|
| scored_list = []
|
|
|
| for cand in state['raw_candidates']:
|
| name = cand.get('name', 'Unknown Candidate')
|
| print(f"π§ Analyzing: {name}...")
|
|
|
|
|
| prompt = f"""
|
| YOU ARE AN EXPERT RECRUITER. Evaluate the candidate against the Job Description (JD).
|
|
|
| ### JOB DESCRIPTION:
|
| {state['job_description']}
|
|
|
| ### CANDIDATE DATA:
|
| {json.dumps(cand)}
|
|
|
| ### SCORING RUBRIC (Strict 100-Point Scale):
|
| 1. Technical Skill Match (40 pts): Compare 'skills' in candidate data to JD requirements.
|
| 2. Experience Level (30 pts): Rate years of experience and seniority fit.
|
| 3. Industry Fit (20 pts): Does their previous experience align with this JD's industry?
|
| 4. Education/Certifications (10 pts): Does the candidate meet the degree requirements?
|
|
|
| ### RULES:
|
| - You must be OBJECTIVE. If a skill is not explicitly mentioned, do not award points for it.
|
| - Temperature is set to 0; provide the most logical mathematical score.
|
| - The 'review' must explain exactly why points were deducted.
|
| - You must not make tie between candidates.
|
| """
|
|
|
| try:
|
|
|
| result = structured_llm.invoke(prompt)
|
|
|
| if result:
|
| scored_list.append(result.model_dump())
|
| print(f"β
Scored {name}: {result.score}/100")
|
| else:
|
| scored_list.append({"name": name, "score": 0.0, "review": "Parsing error in AI output."})
|
|
|
| except Exception as e:
|
| print(f"β οΈ Error scoring {name}: {e}")
|
| scored_list.append({"name": name, "score": 0.0, "review": f"Processing Error: {str(e)}"})
|
|
|
|
|
| sorted_all = sorted(scored_list, key=lambda x: x['score'], reverse=True)
|
|
|
|
|
| return {
|
| "evaluated_results": {
|
| "all_evaluated_candidates": scored_list,
|
| "top_n_hired_list": sorted_all[:state['num_to_hire']]
|
| }
|
| }
|
|
|
|
|
| def report_node(state: AgentState):
|
| st.info("β‘ **Phase 3:** Compiling final report...")
|
| evals = state['evaluated_results']['top_n_hired_list']
|
| report = "\n".join([f"π **{c['name']}** (Score: {c['score']})\n{c['review']}\n" for c in evals])
|
| return {"final_report": report}
|
|
|
|
|
|
|
|
|
| workflow = StateGraph(AgentState)
|
| workflow.add_node("parser", extract_resumes_node)
|
| workflow.add_node("ranker", rank_candidates_node)
|
| workflow.add_node("reporter", report_node)
|
| workflow.add_edge(START, "parser")
|
| workflow.add_edge("parser", "ranker")
|
| workflow.add_edge("ranker", "reporter")
|
| workflow.add_edge("reporter", END)
|
| app = workflow.compile()
|
|
|
|
|
|
|
|
|
| st.title("π AI HR Agent: Google Drive Edition")
|
|
|
| col1, col2 = st.columns([2, 1])
|
|
|
| with col1:
|
| jd_input = st.text_area("π Job Description", placeholder="Paste the job requirements here...", height=200)
|
|
|
| with col2:
|
| gdrive_link = st.text_input("π Public GDrive Folder Link")
|
| hire_count = st.number_input("Selection Count (Top N)", min_value=1, max_value=20, value=3)
|
| analyze_btn = st.button("π Run Analysis", type="primary", use_container_width=True)
|
|
|
| if analyze_btn:
|
| if not jd_input or not gdrive_link:
|
| st.warning("Please provide both a Job Description and a Google Drive Link.")
|
| else:
|
| inputs = {
|
| "gdrive_link": gdrive_link,
|
| "job_description": jd_input,
|
| "num_to_hire": int(hire_count),
|
| "raw_candidates": []
|
| }
|
|
|
| with st.status("AI Agent is working...", expanded=True) as status:
|
| final_state = app.invoke(inputs)
|
| status.update(label="Analysis Complete!", state="complete")
|
|
|
| st.session_state.result_state = final_state
|
| st.session_state.jd = jd_input
|
|
|
| st.success("### π Shortlisted Candidates")
|
| st.markdown(final_state["final_report"])
|
|
|
|
|
|
|
|
|
| if "result_state" in st.session_state:
|
| st.divider()
|
| st.subheader("π¬ Deep-Dive: Ask the HR Agent")
|
|
|
|
|
| if "messages" not in st.session_state:
|
| st.session_state.messages = []
|
|
|
|
|
| for msg in st.session_state.messages:
|
| with st.chat_message(msg["role"]):
|
| st.markdown(msg["content"])
|
|
|
| if prompt := st.chat_input("Ex: Why was John selected but Sarah wasn't?"):
|
| st.session_state.messages.append({"role": "user", "content": prompt})
|
| with st.chat_message("user"):
|
| st.markdown(prompt)
|
|
|
|
|
| all_evals = st.session_state.result_state['evaluated_results']['all_evaluated_candidates']
|
| top_hired = [c['name'] for c in st.session_state.result_state['evaluated_results']['top_n_hired_list']]
|
|
|
|
|
| knowledge_base = []
|
| for eval_item in all_evals:
|
| status = "SELECTED/TOP-TIER" if eval_item['name'] in top_hired else "DESELECTED/LOWER-RANKED"
|
| knowledge_base.append({
|
| "name": eval_item['name'],
|
| "score": eval_item['score'],
|
| "status": status,
|
| "reasoning": eval_item['review']
|
| })
|
|
|
|
|
| chat_llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key)
|
|
|
| context_message = f"""
|
| You are an HR Analytics Bot. You have full access to the scoring results for ALL candidates.
|
|
|
| JOB DESCRIPTION:
|
| {st.session_state.jd}
|
|
|
| CANDIDATE DATA (Scores and Status):
|
| {json.dumps(knowledge_base, indent=2)}
|
|
|
| INSTRUCTIONS:
|
| 1. Answer questions about specific candidates using the 'reasoning' and 'score' provided.
|
| 2. If asked why someone was deselected, compare their score/reasoning to the higher-scoring candidates.
|
| 3. Use Markdown tables if asked to compare multiple people.
|
| """
|
|
|
| with st.chat_message("assistant"):
|
|
|
| response = chat_llm.invoke([
|
| ("system", context_message),
|
| ("user", prompt)
|
| ])
|
| st.markdown(response.content)
|
| st.session_state.messages.append({"role": "assistant", "content": response.content})
|
|
|
|
|
|
|