|
import gradio as gr |
|
import os |
|
import json |
|
import time |
|
import io |
|
from typing import Tuple, Optional, List |
|
from dotenv import load_dotenv |
|
from docx import Document |
|
from langchain_anthropic import ChatAnthropic |
|
import re |
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate |
|
from langchain import hub |
|
import pytesseract |
|
from pdf2image import convert_from_path |
|
import logging |
|
from langsmith import traceable, Client |
|
from langchain_core.tracers.context import collect_runs |
|
from typing import List, Dict |
|
from datetime import datetime |
|
|
|
logging.basicConfig(level=logging.ERROR) |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
client = Client() |
|
|
|
|
|
logging.basicConfig(level=logging.ERROR) |
|
|
|
theme = gr.themes.Default( |
|
text_size="lg", |
|
).set( |
|
button_small_text_size='*text_lg' |
|
) |
|
|
|
try: |
|
model: Optional[ChatAnthropic] = ChatAnthropic(model="claude-3-5-sonnet-20240620", api_key=os.getenv("ANTHROPIC_API_KEY")) |
|
hub_prompt: Optional[ChatPromptTemplate] = hub.pull("talent_assistant") |
|
hub_prompt_questions: Optional[ChatPromptTemplate] = hub.pull("talent_interview_questions") |
|
|
|
except Exception as e: |
|
logging.error(f"Error initializing ChatAnthropic or pulling hub prompt: {str(e)}") |
|
model = None |
|
hub_prompt = None |
|
|
|
def check_password(username: str, password: str) -> bool: |
|
""" |
|
Check if the provided username and password match the environment variables. |
|
|
|
Args: |
|
username (str): The username to check. |
|
password (str): The password to check. |
|
|
|
Returns: |
|
bool: True if the credentials are correct, False otherwise. |
|
""" |
|
return username == os.getenv("GRADIO_USERNAME") and password == os.getenv("GRADIO_PASSWORD") |
|
|
|
def extract_human_message_template(chat_prompt: ChatPromptTemplate) -> Optional[HumanMessagePromptTemplate]: |
|
""" |
|
Extract the HumanMessagePromptTemplate from a ChatPromptTemplate. |
|
|
|
Args: |
|
chat_prompt (ChatPromptTemplate): The ChatPromptTemplate to extract from. |
|
|
|
Returns: |
|
Optional[HumanMessagePromptTemplate]: The extracted HumanMessagePromptTemplate, or None if not found. |
|
""" |
|
try: |
|
for message in chat_prompt.messages: |
|
if isinstance(message, HumanMessagePromptTemplate): |
|
return message.prompt |
|
except Exception as e: |
|
logging.error(f"Error extracting human message template: {str(e)}") |
|
return None |
|
|
|
def clean_bullet_points(text: str) -> str: |
|
""" |
|
Clean and standardize bullet points in the given text. |
|
|
|
Args: |
|
text (str): The text to clean. |
|
|
|
Returns: |
|
str: The cleaned text with standardized bullet points. |
|
""" |
|
try: |
|
text = re.sub(r'(?m)^e\s', '• ', text) |
|
text = re.sub(r'(?m)^eo\s', ' ◦ ', text) |
|
text = re.sub(r'(?m)^\+\s', '• ', text) |
|
except Exception as e: |
|
logging.error(f"Error cleaning bullet points: {str(e)}") |
|
return text |
|
|
|
def pdf_to_text_ocr(file_path: str) -> str: |
|
""" |
|
Convert a PDF file to text using OCR. |
|
|
|
Args: |
|
file_path (str): The path to the PDF file. |
|
|
|
Returns: |
|
str: The extracted text from the PDF. |
|
""" |
|
try: |
|
images: List[Image.Image] = convert_from_path(file_path) |
|
text: str = "" |
|
for image in images: |
|
page_text: str = pytesseract.image_to_string(image, config='--psm 6') |
|
try: |
|
page_text = page_text.encode('utf-8', errors='ignore').decode('utf-8') |
|
except UnicodeEncodeError: |
|
page_text = page_text.encode('iso-8859-1', errors='ignore').decode('iso-8859-1') |
|
text += page_text + "\n\n" |
|
|
|
text = text.replace('-\n', '') |
|
text = re.sub(r' +', ' ', text) |
|
text = re.sub(r'\n{3,}', '\n\n', text) |
|
text = re.sub(r'[^\x20-\x7E\n]', '', text) |
|
text = text.strip() |
|
text = clean_bullet_points(text) |
|
except Exception as e: |
|
logging.error(f"Error in pdf_to_text_ocr: {str(e)}") |
|
text = "" |
|
return text |
|
|
|
def process_questions(*args: str) -> Tuple[str, str]: |
|
""" |
|
Process the CV and job description to generate interview questions. |
|
|
|
Args: |
|
*args: Variable length argument list containing CV and job description. |
|
|
|
Returns: |
|
Tuple[str, str]: A tuple containing the generated questions and the run ID. |
|
""" |
|
try: |
|
global hub_prompt_questions |
|
prompt: Optional[HumanMessagePromptTemplate] = extract_human_message_template(hub_prompt_questions) |
|
chain = (prompt | model | StrOutputParser()).with_config({"run_name": "Talent Questions"}) |
|
|
|
with collect_runs() as cb: |
|
response: str = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]}) |
|
run_id = cb.traced_runs[0].id |
|
print(["questions", run_id]) |
|
|
|
except Exception as e: |
|
logging.error(f"Error in process_questions: {str(e)}") |
|
response = "An error occurred while generating questions." |
|
run_id = "" |
|
return response, run_id |
|
|
|
|
|
|
|
def process_candidate_name(cv: str) ->str: |
|
""" |
|
Process the cv to generate a candidate name. |
|
|
|
Args: |
|
jd: string containing candidate name. |
|
|
|
Returns: |
|
str containing the job name. |
|
""" |
|
try: |
|
prompt = ChatPromptTemplate.from_messages([HumanMessagePromptTemplate.from_template( |
|
"Extract only the candidate name from this cv <CV>{CV}</CV>. Do not write any additional commentary" |
|
)]) |
|
chain = (prompt | model | StrOutputParser()).with_config({"run_name": "Talent Candidate Name"}) |
|
response: str = chain.invoke({"CV":cv}) |
|
|
|
except Exception as e: |
|
logging.error(f"Error in process_match: {str(e)}") |
|
response = "An error occurred while processing the match." |
|
return response |
|
|
|
def process_job_name(jd: str) ->str: |
|
""" |
|
Process the job description to generate a job name. |
|
|
|
Args: |
|
jd: string containing job description. |
|
|
|
Returns: |
|
str containing the job name. |
|
""" |
|
try: |
|
|
|
prompt = ChatPromptTemplate.from_messages([ |
|
HumanMessagePromptTemplate.from_template( |
|
"Extract only the job name from this job description <JOB_DESCRIPTION>{JOB_DESCRIPTION}</JOB_DESCRIPTION>. Do not write any additional commentary" |
|
) |
|
]) |
|
|
|
chain = (prompt | model | StrOutputParser()).with_config({"run_name": "Talent Job Name"}) |
|
response: str = chain.invoke({"JOB_DESCRIPTION":jd}) |
|
|
|
except Exception as e: |
|
logging.error(f"Error in process_match: {str(e)}") |
|
response = "An error occurred while processing the match." |
|
return response |
|
|
|
|
|
def process_match(*args: str) -> Tuple[str, str]: |
|
""" |
|
Process the CV and job description to generate a match score. |
|
|
|
Args: |
|
*args: Variable length argument list containing CV and job description. |
|
|
|
Returns: |
|
Tuple[str, str]: A tuple containing the match score and the run ID. |
|
""" |
|
try: |
|
global hub_prompt |
|
prompt: Optional[HumanMessagePromptTemplate] = extract_human_message_template(hub_prompt) |
|
chain = (prompt | model | StrOutputParser()).with_config({"run_name": "Talent Match"}) |
|
|
|
with collect_runs() as cb: |
|
response: str = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]}) |
|
run_id = cb.traced_runs[0].id |
|
print(["match", run_id]) |
|
|
|
except Exception as e: |
|
logging.error(f"Error in process_match: {str(e)}") |
|
response = "An error occurred while processing the match." |
|
run_id = "" |
|
return response, run_id |
|
|
|
def wrapper_function(cv: str, jd: str) -> Tuple[str, str, str, str]: |
|
""" |
|
Wrapper function to process both match score and interview questions. |
|
|
|
Args: |
|
cv (str): The CV text. |
|
jd (str): The job description text. |
|
|
|
Returns: |
|
Tuple[str, str, str, str]: A tuple containing the match score, score run ID, |
|
generated questions, and questions run ID. |
|
""" |
|
try: |
|
|
|
if not cv.strip(): |
|
return "Error: CV is empty. Please provide a CV.", "", "", "" |
|
if not jd.strip(): |
|
return "Error: Job Description is empty. Please provide a Job Description.", "", "", "" |
|
job_name = process_job_name(jd) |
|
candidate_name = process_candidate_name(cv) |
|
score, score_run_id = process_match(cv, jd) |
|
questions, questions_run_id = process_questions(cv, jd) |
|
except Exception as e: |
|
logging.error(f"Error in wrapper_function: {str(e)}") |
|
score = "An error occurred while processing the match." |
|
questions = "An error occurred while generating questions." |
|
score_run_id = "" |
|
questions_run_id = "" |
|
return score, score_run_id, questions, questions_run_id, job_name, candidate_name |
|
|
|
def submit_feedback(run_id: str, score: float, comment: str) -> str: |
|
""" |
|
Submit feedback for a specific run. |
|
|
|
Args: |
|
run_id (str): The ID of the run to submit feedback for. |
|
score (float): The feedback score. |
|
comment (str): The feedback comment. |
|
|
|
Returns: |
|
str: A message indicating the success or failure of the feedback submission. |
|
""" |
|
print(run_id, score, comment) |
|
try: |
|
client.create_feedback( |
|
run_id, |
|
key="user-feedback", |
|
score=score, |
|
comment=comment, |
|
) |
|
return "Feedback submitted successfully." |
|
except Exception as e: |
|
logging.error(f"Error submitting feedback: {str(e)}") |
|
return "An error occurred while submitting feedback." |
|
|
|
def file_process(file: str) -> str: |
|
""" |
|
Process a file and extract its content. |
|
|
|
Args: |
|
file (str): The path to the file to process. |
|
|
|
Returns: |
|
str: The extracted content of the file. |
|
""" |
|
try: |
|
if file.endswith('.pdf'): |
|
return pdf_to_text_ocr(file) |
|
else: |
|
return open(file, 'r').read() |
|
except Exception as e: |
|
logging.error(f"Error in file_process: {str(e)}") |
|
return "An error occurred while processing the file." |
|
|
|
|
|
def save_match_results(cv: str, jd: str, score: str, questions: str, job_name:str, candidate_name:str) -> str: |
|
""" |
|
Save the CV/JD match results to a JSON file. |
|
|
|
Args: |
|
cv (str): The CV text. |
|
jd (str): The job description text. |
|
score (str): The match score. |
|
questions (str): The generated interview questions. |
|
|
|
Returns: |
|
str: The ID of the saved entry. |
|
""" |
|
entry_id = datetime.now().strftime("%Y%m%d%H%M%S") |
|
data = { |
|
"id": entry_id, |
|
"cv": cv, |
|
"jd": jd, |
|
"job_name": job_name, |
|
"candidate_name": candidate_name, |
|
"score": score, |
|
"questions": questions, |
|
"interview_notes": "", |
|
"status": "Pending" |
|
} |
|
|
|
filename = "candidate_data.json" |
|
if os.path.exists(filename): |
|
with open(filename, "r") as f: |
|
existing_data = json.load(f) |
|
else: |
|
existing_data = [] |
|
|
|
existing_data.append(data) |
|
|
|
with open(filename, "w") as f: |
|
json.dump(existing_data, f) |
|
|
|
return entry_id |
|
|
|
def load_candidate_list() -> List[Dict]: |
|
""" |
|
Load the list of candidates from the JSON file. |
|
|
|
Returns: |
|
List[Dict]: A list of candidate data dictionaries. |
|
""" |
|
filename = "candidate_data.json" |
|
if os.path.exists(filename): |
|
with open(filename, "r") as f: |
|
return json.load(f) |
|
return [] |
|
|
|
def update_interview_notes(entry_id: str, notes: str, status: str) -> str: |
|
""" |
|
Update the interview notes and status for a specific candidate. |
|
|
|
Args: |
|
entry_id (str): The ID of the candidate entry. |
|
notes (str): The interview notes. |
|
status (str): The updated status of the candidate. |
|
|
|
Returns: |
|
str: A message indicating success or failure. |
|
""" |
|
filename = "candidate_data.json" |
|
if os.path.exists(filename): |
|
with open(filename, "r") as f: |
|
data = json.load(f) |
|
|
|
for entry in data: |
|
if entry["id"] == entry_id: |
|
entry["interview_notes"] = notes |
|
entry["status"] = status |
|
|
|
with open(filename, "w") as f: |
|
json.dump(data, f) |
|
return "Interview notes and status updated successfully." |
|
|
|
return "Failed to update interview notes and status." |
|
|
|
def update_active_tab(tab_name: str) -> str: |
|
""" |
|
Update the active tab. |
|
|
|
Args: |
|
tab_name (str): The name of the tab to set as active. |
|
|
|
Returns: |
|
str: The name of the active tab. |
|
""" |
|
return tab_name |
|
|
|
def create_app() -> gr.Blocks: |
|
""" |
|
Create and configure the Gradio application. |
|
|
|
Returns: |
|
gr.Blocks: The configured Gradio application. |
|
""" |
|
with gr.Blocks(theme=theme) as app: |
|
gr.Markdown("# Kingmakers Talent Decision Support System") |
|
active_tab: gr.State = gr.State("CV/JD Match") |
|
|
|
with gr.Tabs() as main_tabs: |
|
with gr.TabItem("Candidate List") as candidate_list_tab: |
|
refresh_btn = gr.Button("Refresh List") |
|
candidate_table = gr.Dataframe( |
|
headers=["ID", "Job Name", "Candidate Name", "Status"], |
|
datatype=["str", "str", "str", "str"], |
|
label="Candidate List" |
|
) |
|
|
|
def load_candidate_table(): |
|
data = load_candidate_list() |
|
return [[entry["id"], entry["job_name"], entry["candidate_name"], entry["status"]] for entry in data] |
|
|
|
refresh_btn.click( |
|
fn=load_candidate_table, |
|
outputs=candidate_table |
|
) |
|
|
|
with gr.TabItem("CV/JD Match") as cv_jd_match_tab: |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
jd = gr.Textbox(label="Job Description") |
|
jd_file = gr.File(label=".pdf, .doc or .txt", file_types=[".pdf", ".doc", ".txt"]) |
|
jd_file.change(fn=file_process, inputs=jd_file, outputs=jd) |
|
|
|
cv = gr.Textbox(label="CV") |
|
cv_file = gr.File(label=".pdf, .doc or .txt", file_types=[".pdf", ".doc", ".txt"]) |
|
cv_file.change(fn=file_process, inputs=cv_file, outputs=cv) |
|
|
|
generate_btn = gr.Button("Generate") |
|
|
|
with gr.Column(scale=1): |
|
job_name = gr.Textbox(label="Job Name") |
|
candidate_name = gr.Textbox(label="Candidate Name") |
|
score = gr.Textbox(label="Score") |
|
score_run_id = gr.Textbox(label="Score Run ID", visible=False) |
|
score_feedback_score = gr.Slider(minimum=1, maximum=5, step=1, label="Score Feedback", interactive=False) |
|
score_feedback_comment = gr.Textbox(label="Score Feedback Comment", interactive=False) |
|
score_feedback_btn = gr.Button("Submit Score Feedback", interactive=False) |
|
|
|
questions = gr.Textbox(label="Questions") |
|
questions_run_id = gr.Textbox(label="Questions Run ID", visible=False) |
|
questions_feedback_score = gr.Slider(minimum=1, maximum=5, step=1, label="Questions Feedback", interactive=False) |
|
questions_feedback_comment = gr.Textbox(label="Questions Feedback Comment", interactive=False) |
|
questions_feedback_btn = gr.Button("Submit Questions Feedback", interactive=False) |
|
|
|
save_status = gr.Textbox(label="Save Status") |
|
|
|
save_btn = gr.Button("Save Results") |
|
save_btn.click( |
|
fn=save_match_results, |
|
inputs=[cv, jd, score, questions, job_name, candidate_name], |
|
outputs=save_status |
|
) |
|
|
|
|
|
generate_btn.click( |
|
fn=wrapper_function, |
|
inputs=[cv, jd], |
|
outputs=[score, score_run_id, questions, questions_run_id, job_name, candidate_name] |
|
) |
|
|
|
def enable_feedback(score, questions): |
|
return [ |
|
gr.update(interactive=True), |
|
gr.update(interactive=True), |
|
gr.update(interactive=True), |
|
gr.update(interactive=True), |
|
gr.update(interactive=True), |
|
gr.update(interactive=True) |
|
] |
|
|
|
generate_btn.click( |
|
fn=enable_feedback, |
|
inputs=[score, questions], |
|
outputs=[ |
|
score_feedback_score, |
|
score_feedback_comment, |
|
score_feedback_btn, |
|
questions_feedback_score, |
|
questions_feedback_comment, |
|
questions_feedback_btn |
|
] |
|
) |
|
|
|
score_feedback_btn.click( |
|
fn=submit_feedback, |
|
inputs=[score_run_id, score_feedback_score, score_feedback_comment], |
|
outputs=gr.Textbox(label="Score Feedback Status") |
|
) |
|
|
|
questions_feedback_btn.click( |
|
fn=submit_feedback, |
|
inputs=[questions_run_id, questions_feedback_score, questions_feedback_comment], |
|
outputs=gr.Textbox(label="Questions Feedback Status") |
|
) |
|
|
|
return app |
|
if __name__ == "__main__": |
|
try: |
|
app: gr.Blocks = create_app() |
|
app.launch(debug=True, auth=check_password) |
|
except Exception as e: |
|
logging.error(f"Error launching the app: {str(e)}") |