Spaces:

yanolja
/

arena

Running

arena / app.py

Kang Suhyun

[#8] Update the leaderboard every 10 minutes (#38)

076f69b unverified 8 months ago

4.34 kB

	"""
	It provides a platform for comparing the responses of two LLMs.
	"""
	import enum
	from uuid import uuid4

	from firebase_admin import firestore
	import gradio as gr

	from leaderboard import build_leaderboard
	from leaderboard import db
	import response
	from response import get_responses

	SUPPORTED_TRANSLATION_LANGUAGES = [
	"Korean", "English", "Chinese", "Japanese", "Spanish", "French"
	]


	class VoteOptions(enum.Enum):
	MODEL_A = "Model A is better"
	MODEL_B = "Model B is better"
	TIE = "Tie"


	def vote(vote_button, response_a, response_b, model_a_name, model_b_name,
	user_prompt, instruction, category, source_lang, target_lang):
	doc_id = uuid4().hex
	winner = VoteOptions(vote_button).name.lower()

	deactivated_buttons = [gr.Button(interactive=False) for _ in range(3)]
	outputs = deactivated_buttons + [gr.Row(visible=True)]

	doc = {
	"id": doc_id,
	"prompt": user_prompt,
	"instruction": instruction,
	"model_a": model_a_name,
	"model_b": model_b_name,
	"model_a_response": response_a,
	"model_b_response": response_b,
	"winner": winner,
	"timestamp": firestore.SERVER_TIMESTAMP
	}

	if category == response.Category.SUMMARIZE.value:
	doc_ref = db.collection("arena-summarizations").document(doc_id)
	doc_ref.set(doc)

	return outputs

	if category == response.Category.TRANSLATE.value:
	if not source_lang or not target_lang:
	raise gr.Error("Please select source and target languages.")

	doc_ref = db.collection("arena-translations").document(doc_id)
	doc["source_language"] = source_lang.lower()
	doc["target_language"] = target_lang.lower()
	doc_ref.set(doc)

	return outputs

	raise gr.Error("Please select a response type.")


	with gr.Blocks(title="Arena") as app:
	with gr.Row():
	category_radio = gr.Radio(
	[category.value for category in response.Category],
	label="Category",
	info="The chosen category determines the instruction sent to the LLMs.")

	source_language = gr.Dropdown(
	choices=SUPPORTED_TRANSLATION_LANGUAGES,
	label="Source language",
	info="Choose the source language for translation.",
	interactive=True,
	visible=False)
	target_language = gr.Dropdown(
	choices=SUPPORTED_TRANSLATION_LANGUAGES,
	label="Target language",
	info="Choose the target language for translation.",
	interactive=True,
	visible=False)

	def update_language_visibility(category):
	visible = category == response.Category.TRANSLATE.value
	return {
	source_language: gr.Dropdown(visible=visible),
	target_language: gr.Dropdown(visible=visible)
	}

	category_radio.change(update_language_visibility, category_radio,
	[source_language, target_language])

	model_names = [gr.State(None), gr.State(None)]
	response_boxes = [gr.State(None), gr.State(None)]

	prompt = gr.TextArea(label="Prompt", lines=4)
	submit = gr.Button()

	with gr.Group():
	with gr.Row():
	response_boxes[0] = gr.Textbox(label="Model A", interactive=False)
	response_boxes[1] = gr.Textbox(label="Model B", interactive=False)

	with gr.Row(visible=False) as model_name_row:
	model_names[0] = gr.Textbox(show_label=False)
	model_names[1] = gr.Textbox(show_label=False)

	with gr.Row(visible=False) as vote_row:
	option_a = gr.Button(VoteOptions.MODEL_A.value)
	option_b = gr.Button(VoteOptions.MODEL_B.value)
	tie = gr.Button(VoteOptions.TIE.value)

	vote_buttons = [option_a, option_b, tie]
	instruction_state = gr.State("")

	submit.click(
	get_responses, [prompt, category_radio, source_language, target_language],
	response_boxes + model_names + vote_buttons +
	[instruction_state, model_name_row, vote_row])

	common_inputs = response_boxes + model_names + [
	prompt, instruction_state, category_radio, source_language,
	target_language
	]
	common_outputs = vote_buttons + [model_name_row]
	option_a.click(vote, [option_a] + common_inputs, common_outputs)
	option_b.click(vote, [option_b] + common_inputs, common_outputs)
	tie.click(vote, [tie] + common_inputs, common_outputs)

	build_leaderboard()

	if __name__ == "__main__":
	# We need to enable queue to use generators.
	app.queue()
	app.launch(debug=True)