IliaLarchenko commited on
Commit
324d83a
β€’
1 Parent(s): fb03edc

Added grader

Browse files
tests/candidate.py CHANGED
@@ -13,13 +13,13 @@ from api.llm import LLMManager
13
  from config import config
14
  from resources.data import fixed_messages, topic_lists
15
  from resources.prompts import prompts
16
- from tests.tessting_prompts import candidate_prompt
17
 
18
  load_dotenv()
19
 
20
 
21
  def complete_interview(interview_type, exp_name, requirements="", difficulty="", topic="", model="gpt-3.5-turbo"):
22
- client = OpenAI()
23
  llm = LLMManager(config, prompts)
24
  llm_name = config.llm.name
25
 
 
13
  from config import config
14
  from resources.data import fixed_messages, topic_lists
15
  from resources.prompts import prompts
16
+ from tests.testing_prompts import candidate_prompt
17
 
18
  load_dotenv()
19
 
20
 
21
  def complete_interview(interview_type, exp_name, requirements="", difficulty="", topic="", model="gpt-3.5-turbo"):
22
+ client = OpenAI(url="https://api.openai.com/v1")
23
  llm = LLMManager(config, prompts)
24
  llm_name = config.llm.name
25
 
tests/grader.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ from openai import OpenAI
4
+
5
+ from tests.testing_prompts import grader_prompt
6
+
7
+
8
+ def grade(json_file_path, model="gpt-4-turbo"):
9
+ client = OpenAI(url="https://api.openai.com/v1")
10
+
11
+ with open(json_file_path) as file:
12
+ interview_data = json.load(file)
13
+
14
+ messages = [
15
+ {"role": "system", "content": grader_prompt},
16
+ {"role": "user", "content": f"Interview data: {interview_data}"},
17
+ {"role": "user", "content": "Please evaluate the interview."},
18
+ ]
19
+
20
+ response = client.chat.completions.create(model=model, messages=messages, temperature=1, response_format={"type": "json_object"})
21
+ feedback = json.loads(response.choices[0].message.content)
22
+
23
+ feedback["file_name"] = json_file_path
24
+ feedback["agent_llm"] = interview_data["interviewer_llm"]
25
+ feedback["candidate_llm"] = interview_data["candidate_llm"]
26
+ feedback["type"] = interview_data["inputs"]["interview_type"]
27
+ feedback["difficulty"] = interview_data["inputs"]["difficulty"]
28
+ feedback["topic"] = interview_data["inputs"]["topic"]
29
+ feedback["average_response_time_seconds"] = interview_data["average_response_time_seconds"]
30
+ feedback["number_of_messages"] = len(interview_data["transcript"])
31
+
32
+ scores = [
33
+ feedback[x]
34
+ for x in feedback
35
+ if x.startswith("interviewer_") or x.startswith("feedback_") or x.startswith("problem_") and feedback[x] is not None
36
+ ]
37
+ feedback["overall_score"] = sum(scores) / len(scores)
38
+
39
+ # save results to json file in the same folder as the interview data
40
+ with open(json_file_path.replace(".json", "_feedback.json"), "w") as file:
41
+ json.dump(feedback, file, indent=4)
42
+
43
+ return feedback
tests/test_e2e.py CHANGED
@@ -1,6 +1,12 @@
1
  from tests.candidate import complete_interview
 
2
 
3
 
4
  def test_complete_interview():
5
- file_path, interview_data = complete_interview("coding", "test", model="gpt-3.5-turbo")
6
- assert True
 
 
 
 
 
 
1
  from tests.candidate import complete_interview
2
+ from tests.grader import grade
3
 
4
 
5
  def test_complete_interview():
6
+ for _ in range(3):
7
+ file_path, _ = complete_interview("coding", "test", model="gpt-3.5-turbo")
8
+ feedback = grade(file_path, model="gpt-4-turbo")
9
+ assert feedback["overall_score"] > 0.5
10
+ if feedback["overall_score"] > 0.8:
11
+ return
12
+ assert False
tests/{tessting_prompts.py β†’ testing_prompts.py} RENAMED
File without changes