talent / app.py
Alastair Jepps
persistant storage for candidate data
9880209
import gradio as gr
import os
import json
import time
import io
from typing import Tuple, Optional, List
from dotenv import load_dotenv
from docx import Document
from langchain_anthropic import ChatAnthropic
import re
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain import hub
import pytesseract
from pdf2image import convert_from_path
import logging
from langsmith import traceable, Client
from langchain_core.tracers.context import collect_runs
from typing import List, Dict
from datetime import datetime
# Set up logging
logging.basicConfig(level=logging.ERROR)
# Load environment variables from .env file
load_dotenv()
# Initialize LangSmith client
client = Client()
# Set up logging
logging.basicConfig(level=logging.ERROR)
theme = gr.themes.Default(
text_size="lg",
).set(
button_small_text_size='*text_lg'
)
try:
model: Optional[ChatAnthropic] = ChatAnthropic(model="claude-3-5-sonnet-20240620", api_key=os.getenv("ANTHROPIC_API_KEY"))
hub_prompt: Optional[ChatPromptTemplate] = hub.pull("talent_assistant")
hub_prompt_questions: Optional[ChatPromptTemplate] = hub.pull("talent_interview_questions")
except Exception as e:
logging.error(f"Error initializing ChatAnthropic or pulling hub prompt: {str(e)}")
model = None
hub_prompt = None
def check_password(username: str, password: str) -> bool:
"""
Check if the provided username and password match the environment variables.
Args:
username (str): The username to check.
password (str): The password to check.
Returns:
bool: True if the credentials are correct, False otherwise.
"""
return username == os.getenv("GRADIO_USERNAME") and password == os.getenv("GRADIO_PASSWORD")
def extract_human_message_template(chat_prompt: ChatPromptTemplate) -> Optional[HumanMessagePromptTemplate]:
"""
Extract the HumanMessagePromptTemplate from a ChatPromptTemplate.
Args:
chat_prompt (ChatPromptTemplate): The ChatPromptTemplate to extract from.
Returns:
Optional[HumanMessagePromptTemplate]: The extracted HumanMessagePromptTemplate, or None if not found.
"""
try:
for message in chat_prompt.messages:
if isinstance(message, HumanMessagePromptTemplate):
return message.prompt
except Exception as e:
logging.error(f"Error extracting human message template: {str(e)}")
return None
def clean_bullet_points(text: str) -> str:
"""
Clean and standardize bullet points in the given text.
Args:
text (str): The text to clean.
Returns:
str: The cleaned text with standardized bullet points.
"""
try:
text = re.sub(r'(?m)^e\s', '• ', text)
text = re.sub(r'(?m)^eo\s', ' ◦ ', text)
text = re.sub(r'(?m)^\+\s', '• ', text)
except Exception as e:
logging.error(f"Error cleaning bullet points: {str(e)}")
return text
def pdf_to_text_ocr(file_path: str) -> str:
"""
Convert a PDF file to text using OCR.
Args:
file_path (str): The path to the PDF file.
Returns:
str: The extracted text from the PDF.
"""
try:
images: List[Image.Image] = convert_from_path(file_path)
text: str = ""
for image in images:
page_text: str = pytesseract.image_to_string(image, config='--psm 6')
try:
page_text = page_text.encode('utf-8', errors='ignore').decode('utf-8')
except UnicodeEncodeError:
page_text = page_text.encode('iso-8859-1', errors='ignore').decode('iso-8859-1')
text += page_text + "\n\n"
text = text.replace('-\n', '')
text = re.sub(r' +', ' ', text)
text = re.sub(r'\n{3,}', '\n\n', text)
text = re.sub(r'[^\x20-\x7E\n]', '', text)
text = text.strip()
text = clean_bullet_points(text)
except Exception as e:
logging.error(f"Error in pdf_to_text_ocr: {str(e)}")
text = ""
return text
def process_questions(*args: str) -> Tuple[str, str]:
"""
Process the CV and job description to generate interview questions.
Args:
*args: Variable length argument list containing CV and job description.
Returns:
Tuple[str, str]: A tuple containing the generated questions and the run ID.
"""
try:
global hub_prompt_questions
prompt: Optional[HumanMessagePromptTemplate] = extract_human_message_template(hub_prompt_questions)
chain = (prompt | model | StrOutputParser()).with_config({"run_name": "Talent Questions"})
with collect_runs() as cb:
response: str = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]})
run_id = cb.traced_runs[0].id
print(["questions", run_id])
except Exception as e:
logging.error(f"Error in process_questions: {str(e)}")
response = "An error occurred while generating questions."
run_id = ""
return response, run_id
def process_candidate_name(cv: str) ->str:
"""
Process the cv to generate a candidate name.
Args:
jd: string containing candidate name.
Returns:
str containing the job name.
"""
try:
prompt = ChatPromptTemplate.from_messages([HumanMessagePromptTemplate.from_template(
"Extract only the candidate name from this cv <CV>{CV}</CV>. Do not write any additional commentary"
)])
chain = (prompt | model | StrOutputParser()).with_config({"run_name": "Talent Candidate Name"})
response: str = chain.invoke({"CV":cv})
except Exception as e:
logging.error(f"Error in process_match: {str(e)}")
response = "An error occurred while processing the match."
return response
def process_job_name(jd: str) ->str:
"""
Process the job description to generate a job name.
Args:
jd: string containing job description.
Returns:
str containing the job name.
"""
try:
prompt = ChatPromptTemplate.from_messages([
HumanMessagePromptTemplate.from_template(
"Extract only the job name from this job description <JOB_DESCRIPTION>{JOB_DESCRIPTION}</JOB_DESCRIPTION>. Do not write any additional commentary"
)
])
chain = (prompt | model | StrOutputParser()).with_config({"run_name": "Talent Job Name"})
response: str = chain.invoke({"JOB_DESCRIPTION":jd})
except Exception as e:
logging.error(f"Error in process_match: {str(e)}")
response = "An error occurred while processing the match."
return response
def process_match(*args: str) -> Tuple[str, str]:
"""
Process the CV and job description to generate a match score.
Args:
*args: Variable length argument list containing CV and job description.
Returns:
Tuple[str, str]: A tuple containing the match score and the run ID.
"""
try:
global hub_prompt
prompt: Optional[HumanMessagePromptTemplate] = extract_human_message_template(hub_prompt)
chain = (prompt | model | StrOutputParser()).with_config({"run_name": "Talent Match"})
with collect_runs() as cb:
response: str = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]})
run_id = cb.traced_runs[0].id
print(["match", run_id])
except Exception as e:
logging.error(f"Error in process_match: {str(e)}")
response = "An error occurred while processing the match."
run_id = ""
return response, run_id
def wrapper_function(cv: str, jd: str) -> Tuple[str, str, str, str]:
"""
Wrapper function to process both match score and interview questions.
Args:
cv (str): The CV text.
jd (str): The job description text.
Returns:
Tuple[str, str, str, str]: A tuple containing the match score, score run ID,
generated questions, and questions run ID.
"""
try:
# Input validation
if not cv.strip():
return "Error: CV is empty. Please provide a CV.", "", "", ""
if not jd.strip():
return "Error: Job Description is empty. Please provide a Job Description.", "", "", ""
job_name = process_job_name(jd)
candidate_name = process_candidate_name(cv)
score, score_run_id = process_match(cv, jd)
questions, questions_run_id = process_questions(cv, jd)
except Exception as e:
logging.error(f"Error in wrapper_function: {str(e)}")
score = "An error occurred while processing the match."
questions = "An error occurred while generating questions."
score_run_id = ""
questions_run_id = ""
return score, score_run_id, questions, questions_run_id, job_name, candidate_name
def submit_feedback(run_id: str, score: float, comment: str) -> str:
"""
Submit feedback for a specific run.
Args:
run_id (str): The ID of the run to submit feedback for.
score (float): The feedback score.
comment (str): The feedback comment.
Returns:
str: A message indicating the success or failure of the feedback submission.
"""
print(run_id, score, comment)
try:
client.create_feedback(
run_id,
key="user-feedback",
score=score,
comment=comment,
)
return "Feedback submitted successfully."
except Exception as e:
logging.error(f"Error submitting feedback: {str(e)}")
return "An error occurred while submitting feedback."
def file_process(file: str) -> str:
"""
Process a file and extract its content.
Args:
file (str): The path to the file to process.
Returns:
str: The extracted content of the file.
"""
try:
if file.endswith('.pdf'):
return pdf_to_text_ocr(file)
else:
return open(file, 'r').read()
except Exception as e:
logging.error(f"Error in file_process: {str(e)}")
return "An error occurred while processing the file."
def save_match_results(cv: str, jd: str, score: str, questions: str, job_name:str, candidate_name:str) -> str:
"""
Save the CV/JD match results to a JSON file.
Args:
cv (str): The CV text.
jd (str): The job description text.
score (str): The match score.
questions (str): The generated interview questions.
Returns:
str: The ID of the saved entry.
"""
entry_id = datetime.now().strftime("%Y%m%d%H%M%S")
data = {
"id": entry_id,
"cv": cv,
"jd": jd,
"job_name": job_name,
"candidate_name": candidate_name,
"score": score,
"questions": questions,
"interview_notes": "",
"status": "Pending"
}
filename = "candidate_data.json"
if os.path.exists(filename):
with open(filename, "r") as f:
existing_data = json.load(f)
else:
existing_data = []
existing_data.append(data)
with open(filename, "w") as f:
json.dump(existing_data, f)
return entry_id
def load_candidate_list() -> List[Dict]:
"""
Load the list of candidates from the JSON file.
Returns:
List[Dict]: A list of candidate data dictionaries.
"""
filename = "candidate_data.json"
if os.path.exists(filename):
with open(filename, "r") as f:
return json.load(f)
return []
def update_interview_notes(entry_id: str, notes: str, status: str) -> str:
"""
Update the interview notes and status for a specific candidate.
Args:
entry_id (str): The ID of the candidate entry.
notes (str): The interview notes.
status (str): The updated status of the candidate.
Returns:
str: A message indicating success or failure.
"""
filename = "candidate_data.json"
if os.path.exists(filename):
with open(filename, "r") as f:
data = json.load(f)
for entry in data:
if entry["id"] == entry_id:
entry["interview_notes"] = notes
entry["status"] = status
with open(filename, "w") as f:
json.dump(data, f)
return "Interview notes and status updated successfully."
return "Failed to update interview notes and status."
def update_active_tab(tab_name: str) -> str:
"""
Update the active tab.
Args:
tab_name (str): The name of the tab to set as active.
Returns:
str: The name of the active tab.
"""
return tab_name
def create_app() -> gr.Blocks:
"""
Create and configure the Gradio application.
Returns:
gr.Blocks: The configured Gradio application.
"""
with gr.Blocks(theme=theme) as app:
gr.Markdown("# Kingmakers Talent Decision Support System")
active_tab: gr.State = gr.State("CV/JD Match")
with gr.Tabs() as main_tabs:
with gr.TabItem("Candidate List") as candidate_list_tab:
refresh_btn = gr.Button("Refresh List")
candidate_table = gr.Dataframe(
headers=["ID", "Job Name", "Candidate Name", "Status"],
datatype=["str", "str", "str", "str"],
label="Candidate List"
)
def load_candidate_table():
data = load_candidate_list()
return [[entry["id"], entry["job_name"], entry["candidate_name"], entry["status"]] for entry in data]
refresh_btn.click(
fn=load_candidate_table,
outputs=candidate_table
)
with gr.TabItem("CV/JD Match") as cv_jd_match_tab:
with gr.Row():
with gr.Column(scale=1):
jd = gr.Textbox(label="Job Description")
jd_file = gr.File(label=".pdf, .doc or .txt", file_types=[".pdf", ".doc", ".txt"])
jd_file.change(fn=file_process, inputs=jd_file, outputs=jd)
cv = gr.Textbox(label="CV")
cv_file = gr.File(label=".pdf, .doc or .txt", file_types=[".pdf", ".doc", ".txt"])
cv_file.change(fn=file_process, inputs=cv_file, outputs=cv)
generate_btn = gr.Button("Generate")
with gr.Column(scale=1):
job_name = gr.Textbox(label="Job Name")
candidate_name = gr.Textbox(label="Candidate Name")
score = gr.Textbox(label="Score")
score_run_id = gr.Textbox(label="Score Run ID", visible=False)
score_feedback_score = gr.Slider(minimum=1, maximum=5, step=1, label="Score Feedback", interactive=False)
score_feedback_comment = gr.Textbox(label="Score Feedback Comment", interactive=False)
score_feedback_btn = gr.Button("Submit Score Feedback", interactive=False)
questions = gr.Textbox(label="Questions")
questions_run_id = gr.Textbox(label="Questions Run ID", visible=False)
questions_feedback_score = gr.Slider(minimum=1, maximum=5, step=1, label="Questions Feedback", interactive=False)
questions_feedback_comment = gr.Textbox(label="Questions Feedback Comment", interactive=False)
questions_feedback_btn = gr.Button("Submit Questions Feedback", interactive=False)
save_status = gr.Textbox(label="Save Status")
save_btn = gr.Button("Save Results")
save_btn.click(
fn=save_match_results,
inputs=[cv, jd, score, questions, job_name, candidate_name],
outputs=save_status
)
generate_btn.click(
fn=wrapper_function,
inputs=[cv, jd],
outputs=[score, score_run_id, questions, questions_run_id, job_name, candidate_name]
)
def enable_feedback(score, questions):
return [
gr.update(interactive=True),
gr.update(interactive=True),
gr.update(interactive=True),
gr.update(interactive=True),
gr.update(interactive=True),
gr.update(interactive=True)
]
generate_btn.click(
fn=enable_feedback,
inputs=[score, questions],
outputs=[
score_feedback_score,
score_feedback_comment,
score_feedback_btn,
questions_feedback_score,
questions_feedback_comment,
questions_feedback_btn
]
)
score_feedback_btn.click(
fn=submit_feedback,
inputs=[score_run_id, score_feedback_score, score_feedback_comment],
outputs=gr.Textbox(label="Score Feedback Status")
)
questions_feedback_btn.click(
fn=submit_feedback,
inputs=[questions_run_id, questions_feedback_score, questions_feedback_comment],
outputs=gr.Textbox(label="Questions Feedback Status")
)
return app
if __name__ == "__main__":
try:
app: gr.Blocks = create_app()
app.launch(debug=True, auth=check_password) # auth=check_password Added share=True to create a public link
except Exception as e:
logging.error(f"Error launching the app: {str(e)}")