Spaces:

HumanLLMs
/

Human-Likeness_Voting_System

Running

App Files Files Community

Human-Likeness_Voting_System / app.py

Weyaxi

forgot "

eaac9a1 verified 2 days ago

raw

history blame contribute delete

7.48 kB

	import gradio as gr
	import pandas as pd
	import random
	from datasets import load_dataset, Dataset, DatasetDict
	from huggingface_hub import HfApi, login
	import os
	from datetime import datetime
	import markdown

	hf_api = HfApi()
	HF_TOKEN = os.getenv('HF_TOKEN')
	login(token=HF_TOKEN)

	log_dataset = "HumanLLMs/log"

	selected_indices = set()

	dataset_1 = load_dataset("HumanLLMs/LlamaPair")["train"]
	#dataset_2 = load_dataset("HumanLLMs/QwenPair")["train"]
	#dataset_3 = load_dataset("HumanLLMs/MistralPair")["train"]"""

	df_log = pd.DataFrame(columns=["instruction", "selected_model", "pair", "submission_time"])


	def remove_emojis(text):
	return text.encode('ascii', 'ignore').decode('ascii')


	def get_random_row():
	global selected_indices
	#selected_dataset = random.choice([dataset_1, dataset_2, dataset_3])
	#pair_name = ("LlamaPair" if selected_dataset == dataset_1
	# else "QwenPair" if selected_dataset == dataset_2
	# else "MistralPair"

	pair_name = "LlamaPair"
	if len(selected_indices) >= len(dataset_1):
	raise ValueError("All rows in the dataset have been used.")

	idx = random.randint(0, len(dataset_1) - 1)
	while idx in selected_indices:
	idx = random.randint(0, len(dataset_1) - 1)

	selected_indices.add(idx)
	row = dataset_1[idx]
	instruction = row["instruction"]
	response_human = row["response_human_like_model"]
	response_official = row["response_offical_instruct_model"]

	responses = [("Human-like Model", response_human),
	("Official Model", response_official)]

	random.shuffle(responses)

	return (instruction, remove_emojis(responses[0][1]), remove_emojis(responses[1][1]),
	responses[0][0], responses[1][0], pair_name)


	def format_response_1_html(response):
	return f'''
	<div style="border: 1px solid white; background-color: black;
	padding: 10px; margin: 5px;">
	<strong style="color: white;">Answer 1:</strong>
	<div style="color: white;">{markdown.markdown(response)}</div>
	</div>
	'''

	def format_response_2_html(response): ## Near duplicate
	return f'''
	<div style="border: 1px solid white; background-color: black;
	padding: 10px; margin: 5px;">
	<strong style="color: white;">Answer 2:</strong>
	<div style="color: white;">{markdown.markdown(response)}</div>
	</div>
	'''

	counter = 0
	accumulated_log = pd.DataFrame(columns=["instruction", "selected_model", "pair", "submission_time"])

	def submit_choice(selected_response, instruction, label_1, label_2, pair_name):
	global counter, accumulated_log

	try: # Use the existing dataset if exists, create the dataset if not
	df_log = pd.DataFrame(load_dataset(log_dataset)["train"])
	except:
	df_log = pd.DataFrame(columns=["instruction", "selected_model",
	"pair", "submission_time"])

	selected_model = label_1 if selected_response == "Answer 1" else label_2
	submission_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

	new_instruction, new_response_1, new_response_2, new_label_1, new_label_2, new_pair_name = get_random_row()

	new_entry = pd.DataFrame({
	"instruction": [new_instruction],
	"selected_model": [selected_model],
	"pair": [pair_name],
	"submission_time": [submission_time]
	})
	accumulated_log = pd.concat([accumulated_log, new_entry], ignore_index=True)

	counter += 1

	if counter % 10 == 0: # Save the dataset every 10 vote
	df_log = pd.concat([df_log, accumulated_log], ignore_index=True)
	df_log.to_csv("annotations_log.csv", index=False)
	accumulated_log = pd.DataFrame(columns=["instruction", "selected_model", "pair", "submission_time"])
	log = Dataset.from_pandas(df_log)
	log.push_to_hub(log_dataset)

	question = f"""
	<div style="text-align: center; font-size: 24px; font-weight: bold; margin-top: 20px;">
	Question:
	</div>
	<div style="text-align: center; font-size: 20px; margin-top: 10px;">
	{new_instruction}
	</div>
	"""

	return (
	question,
	format_response_1_html(new_response_1),
	format_response_2_html(new_response_2),
	new_label_1,
	new_label_2,
	new_pair_name,
	"Your choice has been recorded. A new question is loaded!"
	)

	def create_interface():
	instruction, response_1, response_2, label_1, label_2, pair_name = get_random_row()

	with gr.Blocks(theme=gr.themes.Default()) as demo:
	gr.HTML("""
	<div style="text-align: center;">
	<h1>Human-Likeness Voting System</h1>

	</div>
	""")
	gr.Markdown("This interface has been created to compare the performance of the human-like LLMs developed by our team with the models on which they were trained. The results of this study will be presented in a paper. Please ensure that your responses are fair and accurate when casting your vote and selecting the appropriate answer. We thank you for your contributions on behalf of the research team.")
	gr.Markdown("## Instructions")
	gr.Markdown(
	"""
	1. First, read the provided question carefully.
	2. Second, read both responses carefully.
	3. Finally, select the model that best resembles a human in terms of response quality."""
	)

	current_instruction = gr.State(instruction)
	label_1_state = gr.State(label_1)
	label_2_state = gr.State(label_2)
	pair_name_state = gr.State(pair_name)
	question_display = gr.HTML(
	value=f"""
	<div style="text-align: center; font-size: 24px; font-weight: bold; margin-top: 20px;">
	Question:
	</div>
	<div style="text-align: center; font-size: 20px; margin-top: 10px;">
	{instruction}
	</div>
	"""
	)

	with gr.Row():
	with gr.Column():
	response_1_display = gr.HTML(format_response_1_html(response_1))
	with gr.Column():
	response_2_display = gr.HTML(format_response_2_html(response_2))
	with gr.Row():
	selected_response = gr.Radio(
	["Answer 1", "Answer 2"],
	label="Which answer is better?",
	interactive=True,
	)
	submit_btn = gr.Button("Submit Choice")

	status_output = gr.Textbox(
	interactive=False,
	label="Status",
	value="Select an answer and click Submit"
	)
	submit_btn.click(
	fn=submit_choice,
	inputs=[
	selected_response,
	current_instruction,
	label_1_state,
	label_2_state,
	pair_name_state
	],
	outputs=[
	question_display,
	response_1_display,
	response_2_display,
	label_1_state,
	label_2_state,
	pair_name_state,
	status_output
	]
	)

	return demo

	if __name__ == "__main__":
	interface = create_interface()
	interface.launch(share=True)