Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| import chromadb | |
| from chatbot_functionalities.llms import llm_inference | |
| from langchain.output_parsers import ResponseSchema | |
| from langchain.output_parsers import StructuredOutputParser | |
| from typing import List | |
| from langchain.prompts import PromptTemplate | |
| from langchain import FewShotPromptTemplate | |
| from pathlib import Path | |
| def evaluate_answer( | |
| question: str, | |
| answer: str, | |
| position: str, | |
| questions_collection: chromadb.Collection, | |
| ): | |
| """Call HuggingFace/OpenAI model for inference | |
| Given a question,answer, and position , this function calls the relevant | |
| API to fetch LLM inference results. | |
| Args: | |
| question: The generated question from our database | |
| answer: answer given by the candidate | |
| position: job position that the candidate applying for | |
| Returns: | |
| Rating: rating for candidate's answer . | |
| qualitative_feedback : based on the candidate's answer and the given rating. | |
| HuggingFace repo_id example: | |
| - mistralai/Mistral-7B-Instruct-v0.1 | |
| """ | |
| # read the collected data from excel file | |
| excel_file_path = (Path.cwd() / "data" / "processed" / "combined_dataset.xlsx").__str__() | |
| collected_q_a_df = pd.read_excel(excel_file_path, sheet_name='combined') | |
| collected_q_a_df.columns = [ | |
| x.replace(" ", "_").lower().replace("/", "_or_") for x in collected_q_a_df.columns | |
| ] | |
| # fetch good, average, poor examples for the given question and pass to llm (few shot learning) | |
| matching_questions = \ | |
| questions_collection.query( | |
| query_texts=[question], | |
| where={"position": {"$eq": position}}, | |
| n_results=3, | |
| ) | |
| # fetch examples from collected data | |
| examples = [] | |
| ratings_scope = ['Good', 'Average', 'Poor'] | |
| for rating in ratings_scope: | |
| matching_rows = \ | |
| collected_q_a_df\ | |
| .query(f"position_or_role == '{position}'")\ | |
| .query(f"question.isin({matching_questions['documents'][0]})")\ | |
| .query(f"answer_quality == '{rating}'")\ | |
| [['question', 'answer']] | |
| if matching_rows.shape[0] > 0: | |
| examples.append( | |
| { | |
| 'position': position, | |
| 'question': question, | |
| 'answer': matching_rows.answer.iloc[0], | |
| 'Rating': rating, | |
| } | |
| ) | |
| #set up example_template | |
| example_template = """ | |
| position: {position} .\ | |
| question: {question} \ | |
| answer: {answer}.\ | |
| Rating:{Rating}.\ | |
| """ | |
| #set up example_prompt | |
| example_prompt = \ | |
| PromptTemplate( | |
| input_variables=["position", "question", "answer","Rating"], | |
| template=example_template, | |
| ) | |
| # Set up prefix prompt | |
| prefix = """ | |
| ### instruction: you are an experienced interviewer.\ | |
| You are interviewing a candidate for the position of {position} .\ | |
| You are tasked to rate an answer provided by the candidate. You should provide a categorical Rating and qualitative feedback.\ | |
| The categorical rating should be one of the following values: Good, average, or Poor.\ | |
| the qualitative feedback should provide sufficient details to justify the categorical rating.\ | |
| The position and the question asked to the candidate and the answer given by the candidate are given below.\ | |
| also some examples are given below.\ | |
| """ | |
| suffix = """ | |
| position : {position} .\ | |
| question : {question} \ | |
| answer : {answer}.\ | |
| qualitative_feedback: | |
| """ | |
| few_shot_prompt_template = \ | |
| FewShotPromptTemplate( | |
| examples=examples, | |
| example_prompt=example_prompt, | |
| prefix=prefix, | |
| suffix=suffix, | |
| input_variables=["position", "question", "answer"], | |
| example_separator="\\\n\\\n", | |
| ) | |
| # send prompt to LLM using the common function | |
| response = \ | |
| llm_inference( | |
| model_type="huggingface", | |
| input_variables_list=[ position, question, answer], | |
| prompt_template=few_shot_prompt_template, | |
| hf_repo_id="mistralai/Mistral-7B-Instruct-v0.1", | |
| inference_type = "evaluation", | |
| temperature=0.1, | |
| max_length=32000, | |
| ) | |
| return 'None', response | |
| def evaluate_answer_obsolete( | |
| question: str, | |
| answer: str, | |
| position: str, | |
| ): | |
| """Call HuggingFace/OpenAI model for inference | |
| Given a question,answer, and position , this function calls the relevant | |
| API to fetch LLM inference results. | |
| Args: | |
| question: The generated question from our database | |
| answer: answer given by the candidate | |
| position: job position that the candidate applying for | |
| Returns: | |
| Rating: rating for candidate's answer . | |
| qualitative_feedback : based on the candidate's answer and the given rating. | |
| HuggingFace repo_id example: | |
| - mistralai/Mistral-7B-Instruct-v0.1 | |
| """ | |
| # Set up prompt and chain | |
| prompt = ( | |
| """### instruction: you are an experienced interviewer.\ | |
| You are interviewing a candidate for the position of {position} .\ | |
| You are tasked to rate an answer provided by the candidate. You should provide a categorical rating and qualitative_feedback.\ | |
| The categorical rating should be one of the following values: Good, average, or Poor.\ | |
| the qualitative_feedback should provide sufficient details to justify the categorical rating.\ | |
| the format instructions of the output and the question asked to the candidate and the answer given by the candidate are given below.\ | |
| ### format instruction: {format_instructions}.\ | |
| ### question:{question}.\ | |
| ### answer:{answer}.\ | |
| ### Rating: | |
| """ | |
| ) | |
| # Define Rating and feedback schema | |
| Rating_schema = ResponseSchema(name="Rating", | |
| description="it was the categorical value for the answer given by the candidate and this value could be poor, average or good. \ | |
| ,the categorical value given by you as an experienced interviewer. \ | |
| after asking a candidate a question related to the position he is applying for") | |
| #defining feedback schema | |
| qualitative_feedback_schema = ResponseSchema(name="qualitative_feedback", | |
| description="the qualitative feedback is the sufficient details which is given by you as an Experienced interviewer. \ | |
| the qualitative feedback is given after asking the candidate a question related to the position he is applying for, \ | |
| and the candidate provided his answer. \ | |
| the qualitative feedback should provide sufficient details to justify the categorical rating ") | |
| # Stack the two schemas | |
| response_schemas = [Rating_schema, qualitative_feedback_schema] | |
| # Parsing the output | |
| output_parser = StructuredOutputParser.from_response_schemas(response_schemas) | |
| # Extracting format instructions | |
| format_instructions = output_parser.get_format_instructions() | |
| # apply evaluation using hugging inference API | |
| response = llm_inference( | |
| model_type="huggingface", | |
| input_variables_list=[position, format_instructions, question, answer], | |
| prompt_template=prompt, | |
| hf_repo_id="mistralai/Mistral-7B-Instruct-v0.1", | |
| inference_type = "evaluation", | |
| temperature=0.1, | |
| max_length=2024, | |
| ) | |
| # Output dictionary having two keys "Rating" and "qualitative_feedback" | |
| output_dict = output_parser.parse(response) | |
| return output_dict["Rating"] , output_dict["qualitative_feedback"] | |
| def evaluate_all_answers( | |
| interview_history: pd.DataFrame, | |
| questions_collection: chromadb.Collection, | |
| ): | |
| """Evaluates all answers from interview history and obtains categorical rating | |
| as well as qualitative feedback. | |
| """ | |
| # interview history contains all the questions asked in the mock interview | |
| # and the answers provided by the candidate | |
| # process each pair (question & answer) one by one and do evaluation | |
| # columns=["question", "interview_phase", "position", "answer", "ratings", "feedback"] | |
| for index, row in interview_history.iterrows(): | |
| # get rating and qualitative feedback for a single question - answer pair | |
| rating, feedback = \ | |
| evaluate_answer( | |
| question=row.question, | |
| answer=row.answer, | |
| position=row.position, | |
| questions_collection=questions_collection, | |
| ) | |
| # update the rating and feedback obtained from llm into the data frame | |
| interview_history.loc[index, ['ratings', 'feedback']] = [rating, feedback] | |
| def get_ratings_for_answers(df: pd.DataFrame): | |
| arr_random = np.random.default_rng().uniform(low=0,high=1,size=[df.shape[0],1]) | |
| df.loc[:, 'ratings'] = arr_random | |
| def get_feedback_for_answers(df: pd.DataFrame): | |
| df.loc[:, 'feedback'] = 'Some Random Feedback' | |
| def get_overall_feedback(): | |
| return 'Some Overall Feedback' |