Spaces:

amj808
/

talent

Sleeping

talent / app.py

Alastair Jepps

persistant storage for candidate data

9880209 12 months ago

18.2 kB

	import gradio as gr
	import os
	import json
	import time
	import io
	from typing import Tuple, Optional, List
	from dotenv import load_dotenv
	from docx import Document
	from langchain_anthropic import ChatAnthropic
	import re
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
	from langchain import hub
	import pytesseract
	from pdf2image import convert_from_path
	import logging
	from langsmith import traceable, Client
	from langchain_core.tracers.context import collect_runs
	from typing import List, Dict
	from datetime import datetime
	# Set up logging
	logging.basicConfig(level=logging.ERROR)

	# Load environment variables from .env file
	load_dotenv()

	# Initialize LangSmith client
	client = Client()

	# Set up logging
	logging.basicConfig(level=logging.ERROR)

	theme = gr.themes.Default(
	text_size="lg",
	).set(
	button_small_text_size='*text_lg'
	)

	try:
	model: Optional[ChatAnthropic] = ChatAnthropic(model="claude-3-5-sonnet-20240620", api_key=os.getenv("ANTHROPIC_API_KEY"))
	hub_prompt: Optional[ChatPromptTemplate] = hub.pull("talent_assistant")
	hub_prompt_questions: Optional[ChatPromptTemplate] = hub.pull("talent_interview_questions")

	except Exception as e:
	logging.error(f"Error initializing ChatAnthropic or pulling hub prompt: {str(e)}")
	model = None
	hub_prompt = None

	def check_password(username: str, password: str) -> bool:
	"""
	Check if the provided username and password match the environment variables.

	Args:
	username (str): The username to check.
	password (str): The password to check.

	Returns:
	bool: True if the credentials are correct, False otherwise.
	"""
	return username == os.getenv("GRADIO_USERNAME") and password == os.getenv("GRADIO_PASSWORD")

	def extract_human_message_template(chat_prompt: ChatPromptTemplate) -> Optional[HumanMessagePromptTemplate]:
	"""
	Extract the HumanMessagePromptTemplate from a ChatPromptTemplate.

	Args:
	chat_prompt (ChatPromptTemplate): The ChatPromptTemplate to extract from.

	Returns:
	Optional[HumanMessagePromptTemplate]: The extracted HumanMessagePromptTemplate, or None if not found.
	"""
	try:
	for message in chat_prompt.messages:
	if isinstance(message, HumanMessagePromptTemplate):
	return message.prompt
	except Exception as e:
	logging.error(f"Error extracting human message template: {str(e)}")
	return None

	def clean_bullet_points(text: str) -> str:
	"""
	Clean and standardize bullet points in the given text.

	Args:
	text (str): The text to clean.

	Returns:
	str: The cleaned text with standardized bullet points.
	"""
	try:
	text = re.sub(r'(?m)^e\s', '• ', text)
	text = re.sub(r'(?m)^eo\s', ' ◦ ', text)
	text = re.sub(r'(?m)^\+\s', '• ', text)
	except Exception as e:
	logging.error(f"Error cleaning bullet points: {str(e)}")
	return text

	def pdf_to_text_ocr(file_path: str) -> str:
	"""
	Convert a PDF file to text using OCR.

	Args:
	file_path (str): The path to the PDF file.

	Returns:
	str: The extracted text from the PDF.
	"""
	try:
	images: List[Image.Image] = convert_from_path(file_path)
	text: str = ""
	for image in images:
	page_text: str = pytesseract.image_to_string(image, config='--psm 6')
	try:
	page_text = page_text.encode('utf-8', errors='ignore').decode('utf-8')
	except UnicodeEncodeError:
	page_text = page_text.encode('iso-8859-1', errors='ignore').decode('iso-8859-1')
	text += page_text + "\n\n"

	text = text.replace('-\n', '')
	text = re.sub(r' +', ' ', text)
	text = re.sub(r'\n{3,}', '\n\n', text)
	text = re.sub(r'[^\x20-\x7E\n]', '', text)
	text = text.strip()
	text = clean_bullet_points(text)
	except Exception as e:
	logging.error(f"Error in pdf_to_text_ocr: {str(e)}")
	text = ""
	return text

	def process_questions(*args: str) -> Tuple[str, str]:
	"""
	Process the CV and job description to generate interview questions.

	Args:
	*args: Variable length argument list containing CV and job description.

	Returns:
	Tuple[str, str]: A tuple containing the generated questions and the run ID.
	"""
	try:
	global hub_prompt_questions
	prompt: Optional[HumanMessagePromptTemplate] = extract_human_message_template(hub_prompt_questions)
	chain = (prompt \| model \| StrOutputParser()).with_config({"run_name": "Talent Questions"})

	with collect_runs() as cb:
	response: str = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]})
	run_id = cb.traced_runs[0].id
	print(["questions", run_id])

	except Exception as e:
	logging.error(f"Error in process_questions: {str(e)}")
	response = "An error occurred while generating questions."
	run_id = ""
	return response, run_id



	def process_candidate_name(cv: str) ->str:
	"""
	Process the cv to generate a candidate name.

	Args:
	jd: string containing candidate name.

	Returns:
	str containing the job name.
	"""
	try:
	prompt = ChatPromptTemplate.from_messages([HumanMessagePromptTemplate.from_template(
	"Extract only the candidate name from this cv <CV>{CV}</CV>. Do not write any additional commentary"
	)])
	chain = (prompt \| model \| StrOutputParser()).with_config({"run_name": "Talent Candidate Name"})
	response: str = chain.invoke({"CV":cv})

	except Exception as e:
	logging.error(f"Error in process_match: {str(e)}")
	response = "An error occurred while processing the match."
	return response

	def process_job_name(jd: str) ->str:
	"""
	Process the job description to generate a job name.

	Args:
	jd: string containing job description.

	Returns:
	str containing the job name.
	"""
	try:

	prompt = ChatPromptTemplate.from_messages([
	HumanMessagePromptTemplate.from_template(
	"Extract only the job name from this job description <JOB_DESCRIPTION>{JOB_DESCRIPTION}</JOB_DESCRIPTION>. Do not write any additional commentary"
	)
	])

	chain = (prompt \| model \| StrOutputParser()).with_config({"run_name": "Talent Job Name"})
	response: str = chain.invoke({"JOB_DESCRIPTION":jd})

	except Exception as e:
	logging.error(f"Error in process_match: {str(e)}")
	response = "An error occurred while processing the match."
	return response


	def process_match(*args: str) -> Tuple[str, str]:
	"""
	Process the CV and job description to generate a match score.

	Args:
	*args: Variable length argument list containing CV and job description.

	Returns:
	Tuple[str, str]: A tuple containing the match score and the run ID.
	"""
	try:
	global hub_prompt
	prompt: Optional[HumanMessagePromptTemplate] = extract_human_message_template(hub_prompt)
	chain = (prompt \| model \| StrOutputParser()).with_config({"run_name": "Talent Match"})

	with collect_runs() as cb:
	response: str = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]})
	run_id = cb.traced_runs[0].id
	print(["match", run_id])

	except Exception as e:
	logging.error(f"Error in process_match: {str(e)}")
	response = "An error occurred while processing the match."
	run_id = ""
	return response, run_id

	def wrapper_function(cv: str, jd: str) -> Tuple[str, str, str, str]:
	"""
	Wrapper function to process both match score and interview questions.

	Args:
	cv (str): The CV text.
	jd (str): The job description text.

	Returns:
	Tuple[str, str, str, str]: A tuple containing the match score, score run ID,
	generated questions, and questions run ID.
	"""
	try:
	# Input validation
	if not cv.strip():
	return "Error: CV is empty. Please provide a CV.", "", "", ""
	if not jd.strip():
	return "Error: Job Description is empty. Please provide a Job Description.", "", "", ""
	job_name = process_job_name(jd)
	candidate_name = process_candidate_name(cv)
	score, score_run_id = process_match(cv, jd)
	questions, questions_run_id = process_questions(cv, jd)
	except Exception as e:
	logging.error(f"Error in wrapper_function: {str(e)}")
	score = "An error occurred while processing the match."
	questions = "An error occurred while generating questions."
	score_run_id = ""
	questions_run_id = ""
	return score, score_run_id, questions, questions_run_id, job_name, candidate_name

	def submit_feedback(run_id: str, score: float, comment: str) -> str:
	"""
	Submit feedback for a specific run.

	Args:
	run_id (str): The ID of the run to submit feedback for.
	score (float): The feedback score.
	comment (str): The feedback comment.

	Returns:
	str: A message indicating the success or failure of the feedback submission.
	"""
	print(run_id, score, comment)
	try:
	client.create_feedback(
	run_id,
	key="user-feedback",
	score=score,
	comment=comment,
	)
	return "Feedback submitted successfully."
	except Exception as e:
	logging.error(f"Error submitting feedback: {str(e)}")
	return "An error occurred while submitting feedback."

	def file_process(file: str) -> str:
	"""
	Process a file and extract its content.

	Args:
	file (str): The path to the file to process.

	Returns:
	str: The extracted content of the file.
	"""
	try:
	if file.endswith('.pdf'):
	return pdf_to_text_ocr(file)
	else:
	return open(file, 'r').read()
	except Exception as e:
	logging.error(f"Error in file_process: {str(e)}")
	return "An error occurred while processing the file."


	def save_match_results(cv: str, jd: str, score: str, questions: str, job_name:str, candidate_name:str) -> str:
	"""
	Save the CV/JD match results to a JSON file.

	Args:
	cv (str): The CV text.
	jd (str): The job description text.
	score (str): The match score.
	questions (str): The generated interview questions.

	Returns:
	str: The ID of the saved entry.
	"""
	entry_id = datetime.now().strftime("%Y%m%d%H%M%S")
	data = {
	"id": entry_id,
	"cv": cv,
	"jd": jd,
	"job_name": job_name,
	"candidate_name": candidate_name,
	"score": score,
	"questions": questions,
	"interview_notes": "",
	"status": "Pending"
	}

	filename = "candidate_data.json"
	if os.path.exists(filename):
	with open(filename, "r") as f:
	existing_data = json.load(f)
	else:
	existing_data = []

	existing_data.append(data)

	with open(filename, "w") as f:
	json.dump(existing_data, f)

	return entry_id

	def load_candidate_list() -> List[Dict]:
	"""
	Load the list of candidates from the JSON file.

	Returns:
	List[Dict]: A list of candidate data dictionaries.
	"""
	filename = "candidate_data.json"
	if os.path.exists(filename):
	with open(filename, "r") as f:
	return json.load(f)
	return []

	def update_interview_notes(entry_id: str, notes: str, status: str) -> str:
	"""
	Update the interview notes and status for a specific candidate.

	Args:
	entry_id (str): The ID of the candidate entry.
	notes (str): The interview notes.
	status (str): The updated status of the candidate.

	Returns:
	str: A message indicating success or failure.
	"""
	filename = "candidate_data.json"
	if os.path.exists(filename):
	with open(filename, "r") as f:
	data = json.load(f)

	for entry in data:
	if entry["id"] == entry_id:
	entry["interview_notes"] = notes
	entry["status"] = status

	with open(filename, "w") as f:
	json.dump(data, f)
	return "Interview notes and status updated successfully."

	return "Failed to update interview notes and status."

	def update_active_tab(tab_name: str) -> str:
	"""
	Update the active tab.

	Args:
	tab_name (str): The name of the tab to set as active.

	Returns:
	str: The name of the active tab.
	"""
	return tab_name

	def create_app() -> gr.Blocks:
	"""
	Create and configure the Gradio application.

	Returns:
	gr.Blocks: The configured Gradio application.
	"""
	with gr.Blocks(theme=theme) as app:
	gr.Markdown("# Kingmakers Talent Decision Support System")
	active_tab: gr.State = gr.State("CV/JD Match")

	with gr.Tabs() as main_tabs:
	with gr.TabItem("Candidate List") as candidate_list_tab:
	refresh_btn = gr.Button("Refresh List")
	candidate_table = gr.Dataframe(
	headers=["ID", "Job Name", "Candidate Name", "Status"],
	datatype=["str", "str", "str", "str"],
	label="Candidate List"
	)

	def load_candidate_table():
	data = load_candidate_list()
	return [[entry["id"], entry["job_name"], entry["candidate_name"], entry["status"]] for entry in data]

	refresh_btn.click(
	fn=load_candidate_table,
	outputs=candidate_table
	)

	with gr.TabItem("CV/JD Match") as cv_jd_match_tab:
	with gr.Row():
	with gr.Column(scale=1):
	jd = gr.Textbox(label="Job Description")
	jd_file = gr.File(label=".pdf, .doc or .txt", file_types=[".pdf", ".doc", ".txt"])
	jd_file.change(fn=file_process, inputs=jd_file, outputs=jd)

	cv = gr.Textbox(label="CV")
	cv_file = gr.File(label=".pdf, .doc or .txt", file_types=[".pdf", ".doc", ".txt"])
	cv_file.change(fn=file_process, inputs=cv_file, outputs=cv)

	generate_btn = gr.Button("Generate")

	with gr.Column(scale=1):
	job_name = gr.Textbox(label="Job Name")
	candidate_name = gr.Textbox(label="Candidate Name")
	score = gr.Textbox(label="Score")
	score_run_id = gr.Textbox(label="Score Run ID", visible=False)
	score_feedback_score = gr.Slider(minimum=1, maximum=5, step=1, label="Score Feedback", interactive=False)
	score_feedback_comment = gr.Textbox(label="Score Feedback Comment", interactive=False)
	score_feedback_btn = gr.Button("Submit Score Feedback", interactive=False)

	questions = gr.Textbox(label="Questions")
	questions_run_id = gr.Textbox(label="Questions Run ID", visible=False)
	questions_feedback_score = gr.Slider(minimum=1, maximum=5, step=1, label="Questions Feedback", interactive=False)
	questions_feedback_comment = gr.Textbox(label="Questions Feedback Comment", interactive=False)
	questions_feedback_btn = gr.Button("Submit Questions Feedback", interactive=False)

	save_status = gr.Textbox(label="Save Status")

	save_btn = gr.Button("Save Results")
	save_btn.click(
	fn=save_match_results,
	inputs=[cv, jd, score, questions, job_name, candidate_name],
	outputs=save_status
	)


	generate_btn.click(
	fn=wrapper_function,
	inputs=[cv, jd],
	outputs=[score, score_run_id, questions, questions_run_id, job_name, candidate_name]
	)

	def enable_feedback(score, questions):
	return [
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True),
	gr.update(interactive=True)
	]

	generate_btn.click(
	fn=enable_feedback,
	inputs=[score, questions],
	outputs=[
	score_feedback_score,
	score_feedback_comment,
	score_feedback_btn,
	questions_feedback_score,
	questions_feedback_comment,
	questions_feedback_btn
	]
	)

	score_feedback_btn.click(
	fn=submit_feedback,
	inputs=[score_run_id, score_feedback_score, score_feedback_comment],
	outputs=gr.Textbox(label="Score Feedback Status")
	)

	questions_feedback_btn.click(
	fn=submit_feedback,
	inputs=[questions_run_id, questions_feedback_score, questions_feedback_comment],
	outputs=gr.Textbox(label="Questions Feedback Status")
	)

	return app
	if __name__ == "__main__":
	try:
	app: gr.Blocks = create_app()
	app.launch(debug=True, auth=check_password) # auth=check_password Added share=True to create a public link
	except Exception as e:
	logging.error(f"Error launching the app: {str(e)}")