Spaces:
No application file
No application file
File size: 4,661 Bytes
f3305db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
"""Generate answers with GPT-4
Usage:
python3 gen_api_answer.py --model gpt-3.5-turbo
"""
import argparse
import json
import os
import time
import concurrent.futures
import openai
import shortuuid
import tqdm
from fastchat.llm_judge.common import (
load_questions,
temperature_config,
chat_completion_openai,
chat_completion_anthropic,
chat_completion_palm,
)
from fastchat.llm_judge.gen_model_answer import reorg_answer_file
from fastchat.model.model_adapter import get_conversation_template, ANTHROPIC_MODEL_LIST
def get_answer(
question: dict, model: str, num_choices: int, max_tokens: int, answer_file: str
):
assert (
args.force_temperature is not None and "required_temperature" in question.keys()
) == False
if args.force_temperature is not None:
temperature = args.force_temperature
elif "required_temperature" in question.keys():
temperature = question["required_temperature"]
elif question["category"] in temperature_config:
temperature = temperature_config[question["category"]]
else:
temperature = 0.7
choices = []
chat_state = None # for palm-2 model
for i in range(num_choices):
conv = get_conversation_template(model)
turns = []
for j in range(len(question["turns"])):
conv.append_message(conv.roles[0], question["turns"][j])
conv.append_message(conv.roles[1], None)
if model in ANTHROPIC_MODEL_LIST:
output = chat_completion_anthropic(model, conv, temperature, max_tokens)
elif model == "palm-2-chat-bison-001":
chat_state, output = chat_completion_palm(
chat_state, model, conv, temperature, max_tokens
)
else:
output = chat_completion_openai(model, conv, temperature, max_tokens)
conv.update_last_message(output)
turns.append(output)
choices.append({"index": i, "turns": turns})
# Dump answers
ans = {
"question_id": question["question_id"],
"answer_id": shortuuid.uuid(),
"model_id": model,
"choices": choices,
"tstamp": time.time(),
}
os.makedirs(os.path.dirname(answer_file), exist_ok=True)
with open(answer_file, "a") as fout:
fout.write(json.dumps(ans) + "\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--bench-name",
type=str,
default="mt_bench",
help="The name of the benchmark question set.",
)
parser.add_argument("--answer-file", type=str, help="The output answer file.")
parser.add_argument("--model", type=str, default="gpt-3.5-turbo")
parser.add_argument(
"--num-choices",
type=int,
default=1,
help="How many completion choices to generate.",
)
parser.add_argument(
"--force-temperature", type=float, help="Forcibly set a sampling temperature."
)
parser.add_argument(
"--max-tokens",
type=int,
default=1024,
help="The maximum number of new generated tokens.",
)
parser.add_argument(
"--question-begin",
type=int,
help="A debug option. The begin index of questions.",
)
parser.add_argument(
"--question-end", type=int, help="A debug option. The end index of questions."
)
parser.add_argument(
"--parallel", type=int, default=1, help="The number of concurrent API calls."
)
parser.add_argument("--openai-api-base", type=str, default=None)
args = parser.parse_args()
if args.openai_api_base is not None:
openai.api_base = args.openai_api_base
question_file = f"data/{args.bench_name}/question.jsonl"
questions = load_questions(question_file, args.question_begin, args.question_end)
if args.answer_file:
answer_file = args.answer_file
else:
answer_file = f"data/{args.bench_name}/model_answer/{args.model}.jsonl"
print(f"Output to {answer_file}")
with concurrent.futures.ThreadPoolExecutor(max_workers=args.parallel) as executor:
futures = []
for question in questions:
future = executor.submit(
get_answer,
question,
args.model,
args.num_choices,
args.max_tokens,
answer_file,
)
futures.append(future)
for future in tqdm.tqdm(
concurrent.futures.as_completed(futures), total=len(futures)
):
future.result()
reorg_answer_file(answer_file)
|