Spaces:
Sleeping
Sleeping
IliaLarchenko
commited on
Commit
β’
324d83a
1
Parent(s):
fb03edc
Added grader
Browse files- tests/candidate.py +2 -2
- tests/grader.py +43 -0
- tests/test_e2e.py +8 -2
- tests/{tessting_prompts.py β testing_prompts.py} +0 -0
tests/candidate.py
CHANGED
@@ -13,13 +13,13 @@ from api.llm import LLMManager
|
|
13 |
from config import config
|
14 |
from resources.data import fixed_messages, topic_lists
|
15 |
from resources.prompts import prompts
|
16 |
-
from tests.
|
17 |
|
18 |
load_dotenv()
|
19 |
|
20 |
|
21 |
def complete_interview(interview_type, exp_name, requirements="", difficulty="", topic="", model="gpt-3.5-turbo"):
|
22 |
-
client = OpenAI()
|
23 |
llm = LLMManager(config, prompts)
|
24 |
llm_name = config.llm.name
|
25 |
|
|
|
13 |
from config import config
|
14 |
from resources.data import fixed_messages, topic_lists
|
15 |
from resources.prompts import prompts
|
16 |
+
from tests.testing_prompts import candidate_prompt
|
17 |
|
18 |
load_dotenv()
|
19 |
|
20 |
|
21 |
def complete_interview(interview_type, exp_name, requirements="", difficulty="", topic="", model="gpt-3.5-turbo"):
|
22 |
+
client = OpenAI(url="https://api.openai.com/v1")
|
23 |
llm = LLMManager(config, prompts)
|
24 |
llm_name = config.llm.name
|
25 |
|
tests/grader.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
from openai import OpenAI
|
4 |
+
|
5 |
+
from tests.testing_prompts import grader_prompt
|
6 |
+
|
7 |
+
|
8 |
+
def grade(json_file_path, model="gpt-4-turbo"):
|
9 |
+
client = OpenAI(url="https://api.openai.com/v1")
|
10 |
+
|
11 |
+
with open(json_file_path) as file:
|
12 |
+
interview_data = json.load(file)
|
13 |
+
|
14 |
+
messages = [
|
15 |
+
{"role": "system", "content": grader_prompt},
|
16 |
+
{"role": "user", "content": f"Interview data: {interview_data}"},
|
17 |
+
{"role": "user", "content": "Please evaluate the interview."},
|
18 |
+
]
|
19 |
+
|
20 |
+
response = client.chat.completions.create(model=model, messages=messages, temperature=1, response_format={"type": "json_object"})
|
21 |
+
feedback = json.loads(response.choices[0].message.content)
|
22 |
+
|
23 |
+
feedback["file_name"] = json_file_path
|
24 |
+
feedback["agent_llm"] = interview_data["interviewer_llm"]
|
25 |
+
feedback["candidate_llm"] = interview_data["candidate_llm"]
|
26 |
+
feedback["type"] = interview_data["inputs"]["interview_type"]
|
27 |
+
feedback["difficulty"] = interview_data["inputs"]["difficulty"]
|
28 |
+
feedback["topic"] = interview_data["inputs"]["topic"]
|
29 |
+
feedback["average_response_time_seconds"] = interview_data["average_response_time_seconds"]
|
30 |
+
feedback["number_of_messages"] = len(interview_data["transcript"])
|
31 |
+
|
32 |
+
scores = [
|
33 |
+
feedback[x]
|
34 |
+
for x in feedback
|
35 |
+
if x.startswith("interviewer_") or x.startswith("feedback_") or x.startswith("problem_") and feedback[x] is not None
|
36 |
+
]
|
37 |
+
feedback["overall_score"] = sum(scores) / len(scores)
|
38 |
+
|
39 |
+
# save results to json file in the same folder as the interview data
|
40 |
+
with open(json_file_path.replace(".json", "_feedback.json"), "w") as file:
|
41 |
+
json.dump(feedback, file, indent=4)
|
42 |
+
|
43 |
+
return feedback
|
tests/test_e2e.py
CHANGED
@@ -1,6 +1,12 @@
|
|
1 |
from tests.candidate import complete_interview
|
|
|
2 |
|
3 |
|
4 |
def test_complete_interview():
|
5 |
-
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from tests.candidate import complete_interview
|
2 |
+
from tests.grader import grade
|
3 |
|
4 |
|
5 |
def test_complete_interview():
|
6 |
+
for _ in range(3):
|
7 |
+
file_path, _ = complete_interview("coding", "test", model="gpt-3.5-turbo")
|
8 |
+
feedback = grade(file_path, model="gpt-4-turbo")
|
9 |
+
assert feedback["overall_score"] > 0.5
|
10 |
+
if feedback["overall_score"] > 0.8:
|
11 |
+
return
|
12 |
+
assert False
|
tests/{tessting_prompts.py β testing_prompts.py}
RENAMED
File without changes
|