File size: 1,431 Bytes
b1e25b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


import logging
import os

def judge_answer_with_api(question, target, answer):
    from openai import OpenAI

    client = OpenAI(
    base_url=os.environ.get("OPENAI_BASE_URL"),
    api_key=os.environ.get("OPENAI_API_KEY")
    )

    prompt = (
        "You will be given a question, a target answer (maybe a list of all possible answers), "
        "and a generated answer. Please judge whether the generated answer is correct. "
        "If it is correct, return 'True'. If it is incorrect, return 'False'.\n"
        f"Question: {question}\n"
        f"Target Answer: {target}\n"
        f"Generated Answer: {answer}\n"
        "Please return only 'True' or 'False', without any other text."
    )

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini-2024-07-18",
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
            max_tokens=1
        )
    except Exception as e:
        logging.error("API call failed: %s", str(e))
        return 0  # Return default value

    try:
        result = response.choices[0].message.content.strip()
    except (AttributeError, IndexError) as e:
        logging.error("Error parsing API response: %s", str(e))
        return 0

    if result == "True":
        return 1
    elif result == "False":
        return 0
    else:
        logging.warning("Abnormal response format: %s", result)
        return 0