AI4PE-STATIC / evaluator.py
Adr740's picture
Upload 4 files
f798440 verified
from openai import OpenAI
from config import openai_api
client = OpenAI(api_key=openai_api)
def eval_answer(ANSWER_REFERENCE, ANSWER_TO_SCORE):
system_prompt = f"""Your task is to evaluate how well a given answer fits with the following expected output (all sources and references should back up the given answers):
====================
EXPECTED OUTPUT
{ANSWER_REFERENCE}
=====================
You only output a float score between 1 and 5 with the following scale (sources, where information was used to answer, is a key critical expected element):
1 : out of topic, answer doesn't make sense
2 : misleading or false answer.
3: the answer makes sense but some parts of what is expected are missing or sources are missing
4: very good answer backed up by all valid sources, all key elements are present. Could be more clear
5: Perfect answer, nothing else was expected
You output the score in the following json format:
{{"score" : X, "rationale_based_on_scoring_rules" : "XXX"}}
"""
user_prompt = f"""
Given answer:
{ANSWER_TO_SCORE}
"""
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": [
{
"type": "text",
"text": system_prompt
}
]
},
{
"role": "user",
"content": [
{
"type": "text",
"text": user_prompt
}
]
}
],
temperature=0,
max_tokens=256,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
response_format={"type": "json_object"},
stream=False
).choices[0].message.content
return response