Spaces:

IET-DEV
/

HR-Bot-V1

Sleeping

App Files Files Community

HR-Bot-V1 / app.py

Tarun-intellentech

Upload 3 files

2f174fb verified 2 months ago

raw

history blame contribute delete

12.1 kB

	import os
	import json
	import re
	import gdown
	import shutil
	import streamlit as st
	from dotenv import load_dotenv
	from PyPDF2 import PdfReader
	from typing import TypedDict, List
	from pydantic import BaseModel, Field

	# Mistral & LangGraph Imports
	from langchain_mistralai import ChatMistralAI
	from langgraph.graph import StateGraph, START, END

	# =================================================================
	# 1. SETUP & UI STYLING
	# =================================================================
	st.set_page_config(page_title="HR AI Agent", layout="wide", page_icon="👤")
	load_dotenv()

	# Use st.secrets for cloud or os.environ for local
	api_key = os.environ.get("MISTRAL_API_KEY") or st.secrets.get("MISTRAL_API_KEY")

	if not api_key:
	st.error("🔑 Mistral API Key not found. Please set it in your environment variables or secrets.")
	st.stop()

	# =================================================================
	# 2. DATA SCHEMAS
	# =================================================================
	class ScoredCandidate(BaseModel):
	name: str
	score: float = Field(..., description="Objective score 0.00-100.00.")
	review: str = Field(..., description="Exactly 2 lines of review comment.")

	class AgentState(TypedDict):
	gdrive_link: str
	job_description: str
	num_to_hire: int
	raw_candidates: List[dict]
	evaluated_results: dict
	final_report: str

	# =================================================================
	# 3. HELPER FUNCTIONS
	# =================================================================
	def download_from_gdrive(url):
	temp_dir = "temp_resumes"
	if os.path.exists(temp_dir):
	shutil.rmtree(temp_dir)
	os.makedirs(temp_dir)

	try:
	# Note: GDrive folders must be "Anyone with the link"
	gdown.download_folder(url, output=temp_dir, quiet=True, remaining_ok=True, use_cookies=False)
	return temp_dir
	except Exception as e:
	st.error(f"Error downloading from Google Drive: {e}")
	return None

	def process_pdfs_to_json(folder_path):
	llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
	all_candidates_json = []

	# Get all PDFs, including those in subfolders created by gdown
	files = []
	for root, dirs, filenames in os.walk(folder_path):
	for f in filenames:
	if f.lower().endswith(".pdf"):
	files.append(os.path.join(root, f))

	if not files:
	st.warning("No PDF files found in the folder.")
	return []

	progress_bar = st.progress(0)
	status_text = st.empty()

	for i, path in enumerate(files):
	filename = os.path.basename(path)
	status_text.text(f"🔍 Analyzing: {filename}")
	try:
	reader = PdfReader(path)
	raw_text = "".join([page.extract_text() or "" for page in reader.pages])

	if len(raw_text.strip()) < 50:
	continue # Skip empty or scanned PDFs without OCR

	prompt = f"Extract details from this resume into JSON (name, email, phone, skills, experience_years):\n{raw_text[:7000]}"
	response = llm.invoke(prompt)
	json_match = re.search(r"\{.*\}", response.content, re.DOTALL)
	if json_match:
	candidate_data = json.loads(json_match.group())
	candidate_data["resume_text"] = raw_text
	all_candidates_json.append(candidate_data)
	except Exception:
	pass
	progress_bar.progress((i + 1) / len(files))

	status_text.empty()
	progress_bar.empty()
	return all_candidates_json

	# =================================================================
	# 4. AGENT NODES
	# =================================================================
	def extract_resumes_node(state: AgentState):
	st.write("---")
	st.info("⚡ Phase 1: Fetching resumes from Google Drive...")
	temp_path = download_from_gdrive(state['gdrive_link'])
	if temp_path:
	candidates = process_pdfs_to_json(temp_path)
	shutil.rmtree(temp_path) # Cleanup
	return {"raw_candidates": candidates}
	return {"raw_candidates": []}



	def rank_candidates_node(state: AgentState):
	"""
	Evaluates candidates using a strict weighted rubric and 0-temperature
	to ensure deterministic and consistent scoring.
	"""
	print("\n" + "="*50)
	print("🚀 STEP 2: DETERMINISTIC SCORING ENGINE")
	print("="*50)

	# Initialize LLM with Temperature 0 for consistency
	llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key, temperature=0)
	structured_llm = llm.with_structured_output(ScoredCandidate)

	scored_list = []

	for cand in state['raw_candidates']:
	name = cand.get('name', 'Unknown Candidate')
	print(f"🧠 Analyzing: {name}...")

	# OPTIMIZED PROMPT: Using a Point-Based Rubric
	prompt = f"""
	YOU ARE AN EXPERT RECRUITER. Evaluate the candidate against the Job Description (JD).

	### JOB DESCRIPTION:
	{state['job_description']}

	### CANDIDATE DATA:
	{json.dumps(cand)}

	### SCORING RUBRIC (Strict 100-Point Scale):
	1. Technical Skill Match (40 pts): Compare 'skills' in candidate data to JD requirements.
	2. Experience Level (30 pts): Rate years of experience and seniority fit.
	3. Industry Fit (20 pts): Does their previous experience align with this JD's industry?
	4. Education/Certifications (10 pts): Does the candidate meet the degree requirements?

	### RULES:
	- You must be OBJECTIVE. If a skill is not explicitly mentioned, do not award points for it.
	- Temperature is set to 0; provide the most logical mathematical score.
	- The 'review' must explain exactly why points were deducted.
	- You must not make tie between candidates.
	"""

	try:
	# Mistral performs the evaluation based on the rubric above
	result = structured_llm.invoke(prompt)

	if result:
	scored_list.append(result.model_dump())
	print(f"✅ Scored {name}: {result.score}/100")
	else:
	scored_list.append({"name": name, "score": 0.0, "review": "Parsing error in AI output."})

	except Exception as e:
	print(f"⚠️ Error scoring {name}: {e}")
	scored_list.append({"name": name, "score": 0.0, "review": f"Processing Error: {str(e)}"})

	# SORTING: Ensures the list is ordered by score (highest first)
	sorted_all = sorted(scored_list, key=lambda x: x['score'], reverse=True)

	# OUTPUT: Returns the updated state to the LangGraph
	return {
	"evaluated_results": {
	"all_evaluated_candidates": scored_list,
	"top_n_hired_list": sorted_all[:state['num_to_hire']]
	}
	}


	def report_node(state: AgentState):
	st.info("⚡ Phase 3: Compiling final report...")
	evals = state['evaluated_results']['top_n_hired_list']
	report = "\n".join([f"🏆 {c['name']} (Score: {c['score']})\n{c['review']}\n" for c in evals])
	return {"final_report": report}

	# =================================================================
	# 5. GRAPH ORCHESTRATION
	# =================================================================
	workflow = StateGraph(AgentState)
	workflow.add_node("parser", extract_resumes_node)
	workflow.add_node("ranker", rank_candidates_node)
	workflow.add_node("reporter", report_node)
	workflow.add_edge(START, "parser")
	workflow.add_edge("parser", "ranker")
	workflow.add_edge("ranker", "reporter")
	workflow.add_edge("reporter", END)
	app = workflow.compile()

	# =================================================================
	# 6. UI LAYOUT
	# =================================================================
	st.title("🌟 AI HR Agent: Google Drive Edition")

	col1, col2 = st.columns([2, 1])

	with col1:
	jd_input = st.text_area("📋 Job Description", placeholder="Paste the job requirements here...", height=200)

	with col2:
	gdrive_link = st.text_input("🔗 Public GDrive Folder Link")
	hire_count = st.number_input("Selection Count (Top N)", min_value=1, max_value=20, value=3)
	analyze_btn = st.button("🚀 Run Analysis", type="primary", use_container_width=True)

	if analyze_btn:
	if not jd_input or not gdrive_link:
	st.warning("Please provide both a Job Description and a Google Drive Link.")
	else:
	inputs = {
	"gdrive_link": gdrive_link,
	"job_description": jd_input,
	"num_to_hire": int(hire_count),
	"raw_candidates": []
	}

	with st.status("AI Agent is working...", expanded=True) as status:
	final_state = app.invoke(inputs)
	status.update(label="Analysis Complete!", state="complete")

	st.session_state.result_state = final_state
	st.session_state.jd = jd_input

	st.success("### 📋 Shortlisted Candidates")
	st.markdown(final_state["final_report"])

	# =================================================================
	# 7. CHATBOT (FIXED: ACCESS TO ALL CANDIDATES)
	# =================================================================
	if "result_state" in st.session_state:
	st.divider()
	st.subheader("💬 Deep-Dive: Ask the HR Agent")

	# Initialize chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat history
	for msg in st.session_state.messages:
	with st.chat_message(msg["role"]):
	st.markdown(msg["content"])

	if prompt := st.chat_input("Ex: Why was John selected but Sarah wasn't?"):
	st.session_state.messages.append({"role": "user", "content": prompt})
	with st.chat_message("user"):
	st.markdown(prompt)

	# 1. PREPARE LEAN DATA (Crucial: Removes heavy resume_text)
	all_evals = st.session_state.result_state['evaluated_results']['all_evaluated_candidates']
	top_hired = [c['name'] for c in st.session_state.result_state['evaluated_results']['top_n_hired_list']]

	# Build a summarized list of EVERY candidate
	knowledge_base = []
	for eval_item in all_evals:
	status = "SELECTED/TOP-TIER" if eval_item['name'] in top_hired else "DESELECTED/LOWER-RANKED"
	knowledge_base.append({
	"name": eval_item['name'],
	"score": eval_item['score'],
	"status": status,
	"reasoning": eval_item['review']
	})

	# 2. SYSTEM INSTRUCTIONS FOR THE AI
	chat_llm = ChatMistralAI(model="mistral-large-latest", api_key=api_key)

	context_message = f"""
	You are an HR Analytics Bot. You have full access to the scoring results for ALL candidates.

	JOB DESCRIPTION:
	{st.session_state.jd}

	CANDIDATE DATA (Scores and Status):
	{json.dumps(knowledge_base, indent=2)}

	INSTRUCTIONS:
	1. Answer questions about specific candidates using the 'reasoning' and 'score' provided.
	2. If asked why someone was deselected, compare their score/reasoning to the higher-scoring candidates.
	3. Use Markdown tables if asked to compare multiple people.
	"""

	with st.chat_message("assistant"):
	# Use a list of messages (System + User) for better steering
	response = chat_llm.invoke([
	("system", context_message),
	("user", prompt)
	])
	st.markdown(response.content)
	st.session_state.messages.append({"role": "assistant", "content": response.content})