Spaces:

tianleliphoebe
/

visual-arena

Runtime error

App Files Files Community

visual-arena / fastchat /llm_judge /gen_api_answer.py

tianleliphoebe

Upload folder using huggingface_hub

ec0c335 verified 6 months ago

raw history blame contribute delete

No virus

4.71 kB

	"""Generate answers with GPT-4

	Usage:
	python3 gen_api_answer.py --model gpt-3.5-turbo
	"""
	import argparse
	import json
	import os
	import time
	import concurrent.futures

	import openai
	import shortuuid
	import tqdm

	from fastchat.llm_judge.common import (
	load_questions,
	temperature_config,
	chat_compeletion_openai,
	chat_compeletion_anthropic,
	chat_compeletion_palm,
	)
	from fastchat.llm_judge.gen_model_answer import reorg_answer_file
	from fastchat.model.model_adapter import get_conversation_template, ANTHROPIC_MODEL_LIST


	def get_answer(
	question: dict, model: str, num_choices: int, max_tokens: int, answer_file: str
	):
	assert (
	args.force_temperature is not None and "required_temperature" in question.keys()
	) == False
	if args.force_temperature is not None:
	temperature = args.force_temperature
	elif "required_temperature" in question.keys():
	temperature = question["required_temperature"]
	elif question["category"] in temperature_config:
	temperature = temperature_config[question["category"]]
	else:
	temperature = 0.7

	choices = []
	chat_state = None # for palm-2 model
	for i in range(num_choices):
	conv = get_conversation_template(model)

	turns = []
	for j in range(len(question["turns"])):
	conv.append_message(conv.roles[0], question["turns"][j])
	conv.append_message(conv.roles[1], None)

	if model in ANTHROPIC_MODEL_LIST:
	output = chat_compeletion_anthropic(
	model, conv, temperature, max_tokens
	)
	elif model == "palm-2-chat-bison-001":
	chat_state, output = chat_compeletion_palm(
	chat_state, model, conv, temperature, max_tokens
	)
	else:
	output = chat_compeletion_openai(model, conv, temperature, max_tokens)

	conv.update_last_message(output)
	turns.append(output)

	choices.append({"index": i, "turns": turns})

	# Dump answers
	ans = {
	"question_id": question["question_id"],
	"answer_id": shortuuid.uuid(),
	"model_id": model,
	"choices": choices,
	"tstamp": time.time(),
	}

	os.makedirs(os.path.dirname(answer_file), exist_ok=True)
	with open(answer_file, "a") as fout:
	fout.write(json.dumps(ans) + "\n")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--bench-name",
	type=str,
	default="mt_bench",
	help="The name of the benchmark question set.",
	)
	parser.add_argument("--answer-file", type=str, help="The output answer file.")
	parser.add_argument("--model", type=str, default="gpt-3.5-turbo")
	parser.add_argument(
	"--num-choices",
	type=int,
	default=1,
	help="How many completion choices to generate.",
	)
	parser.add_argument(
	"--force-temperature", type=float, help="Forcibly set a sampling temperature."
	)
	parser.add_argument(
	"--max-tokens",
	type=int,
	default=1024,
	help="The maximum number of new generated tokens.",
	)
	parser.add_argument(
	"--question-begin",
	type=int,
	help="A debug option. The begin index of questions.",
	)
	parser.add_argument(
	"--question-end", type=int, help="A debug option. The end index of questions."
	)
	parser.add_argument(
	"--parallel", type=int, default=1, help="The number of concurrent API calls."
	)
	parser.add_argument("--openai-api-base", type=str, default=None)
	args = parser.parse_args()

	if args.openai_api_base is not None:
	openai.api_base = args.openai_api_base

	question_file = f"data/{args.bench_name}/question.jsonl"
	questions = load_questions(question_file, args.question_begin, args.question_end)

	if args.answer_file:
	answer_file = args.answer_file
	else:
	answer_file = f"data/{args.bench_name}/model_answer/{args.model}.jsonl"
	print(f"Output to {answer_file}")

	with concurrent.futures.ThreadPoolExecutor(max_workers=args.parallel) as executor:
	futures = []
	for question in questions:
	future = executor.submit(
	get_answer,
	question,
	args.model,
	args.num_choices,
	args.max_tokens,
	answer_file,
	)
	futures.append(future)

	for future in tqdm.tqdm(
	concurrent.futures.as_completed(futures), total=len(futures)
	):
	future.result()

	reorg_answer_file(answer_file)