Spaces:

yangzhang33
/

human_evaluation_free

Sleeping

Yang Zhang

init

e5c8c32 8 months ago

9.07 kB

	import gradio as gr
	import pandas as pd
	import os

	# File name for the common input CSV (assumed to be common for everyone)
	input_csv = "data.csv"

	def get_output_csv(username):
	"""Return the user-specific output CSV file name."""
	return f"{username}_ratings.csv"

	def format_text_with_id(text, text_id):
	"""Prepend the text ID to the text if it's not a special placeholder."""
	special_messages = [
	"This is the last text.",
	"All texts have been rated!",
	"This is the first text.",
	"Error: text id not found.",
	"Text not found."
	]
	if text in special_messages or text_id == -1:
	return text
	return f"Text ID {text_id}:\n{text}"

	def get_score_radio_update(text):
	"""Return an update for the score input; disable it if the text is a special message."""
	if text in ["This is the last text.", "All texts have been rated!"]:
	return gr.update(interactive=False)
	return gr.update(interactive=True)

	def get_next_text(username):
	"""Returns the first unrated text (formatted with its text id) and its text_id from the user CSV."""
	output_csv = get_output_csv(username)
	df = pd.read_csv(output_csv)
	unrated_rows = df[df["scores"].isna()]
	if unrated_rows.empty:
	return "All texts have been rated!", -1
	next_row = unrated_rows.iloc[0]
	tid = int(next_row["text_id"])
	return format_text_with_id(next_row["text"], tid), tid

	def save_and_next(score, text_id, username):
	"""
	Saves the given score in the user CSV, then moves to the next text by sorted text_id.
	Returns a status message, next text (formatted), its ID, the updated progress table,
	and an update for the score input.
	"""
	output_csv = get_output_csv(username)
	df = pd.read_csv(output_csv)
	text_id = int(text_id)
	df.loc[df["text_id"] == text_id, "scores"] = score
	df.to_csv(output_csv, index=False)

	# Determine the next text based on sorted text IDs.
	df = pd.read_csv(output_csv)
	sorted_ids = sorted(df["text_id"].tolist())
	try:
	idx = sorted_ids.index(text_id)
	if idx < len(sorted_ids) - 1:
	next_id = sorted_ids[idx+1]
	raw_text = df.loc[df["text_id"] == next_id, "text"].iloc[0]
	next_text = format_text_with_id(raw_text, next_id)
	else:
	next_text = "This is the last text."
	next_id = text_id
	except ValueError:
	next_text = "Error: text id not found."
	next_id = text_id

	progress_df = df[df["scores"].notna()][["text_id", "scores"]]
	score_update = get_score_radio_update(next_text)
	return (f"Saved score {score} for text ID {text_id}.",
	next_text,
	next_id,
	progress_df,
	score_update)

	def next_text(current_id, username):
	"""Loads the next text (formatted with text id) based on the current text_id from the user CSV."""
	output_csv = get_output_csv(username)
	df = pd.read_csv(output_csv)
	sorted_ids = sorted(df["text_id"].tolist())
	try:
	idx = sorted_ids.index(int(current_id))
	if idx == len(sorted_ids) - 1:
	return "This is the last text.", current_id, gr.update(interactive=False)
	next_id = sorted_ids[idx+1]
	raw_text = df.loc[df["text_id"] == next_id, "text"].iloc[0]
	text = format_text_with_id(raw_text, next_id)
	return text, next_id, get_score_radio_update(text)
	except ValueError:
	return "Text not found.", current_id, gr.update(interactive=True)

	def previous_text(current_id, current_text, username):
	"""
	Loads the previous text (formatted with text id) based on the current text_id from the user CSV.
	If the current displayed text is the placeholder "This is the last text.",
	it loads the actual last text.
	"""
	output_csv = get_output_csv(username)
	df = pd.read_csv(output_csv)
	sorted_ids = sorted(df["text_id"].tolist())
	if current_text == "This is the last text.":
	raw_text = df.loc[df["text_id"] == int(current_id), "text"].iloc[0]
	return format_text_with_id(raw_text, int(current_id)), current_id, get_score_radio_update(raw_text)
	try:
	idx = sorted_ids.index(int(current_id))
	if idx == 0:
	return "This is the first text.", current_id, gr.update(interactive=True)
	prev_id = sorted_ids[idx-1]
	raw_text = df.loc[df["text_id"] == prev_id, "text"].iloc[0]
	text = format_text_with_id(raw_text, prev_id)
	return text, prev_id, get_score_radio_update(text)
	except ValueError:
	return "Text not found.", current_id, gr.update(interactive=True)

	def load_text_by_id(text_id, username):
	"""
	Loads a text by its text_id (for direct selection) from the user CSV and returns it formatted.
	Also returns an update for the score input based on the text.
	"""
	output_csv = get_output_csv(username)
	df = pd.read_csv(output_csv)
	row = df[df["text_id"] == int(text_id)]
	if row.empty:
	return "Text not found!", text_id, gr.update(interactive=True)
	raw_text = row.iloc[0]["text"]
	return format_text_with_id(raw_text, int(text_id)), int(text_id), get_score_radio_update(raw_text)

	def download_csv(username):
	"""Returns the user ratings CSV file for download."""
	return get_output_csv(username)

	def initialize_user(username):
	"""
	Initializes the user-specific ratings CSV file (if it doesn't exist) by reading from input_csv,
	then returns a welcome message, the first text (formatted), its ID, the progress table, and the username.
	"""
	output_csv = get_output_csv(username)
	if not os.path.exists(output_csv):
	df = pd.read_csv(input_csv)
	df["scores"] = None
	df.to_csv(output_csv, index=False)
	initial_text, initial_id = get_next_text(username)
	df = pd.read_csv(output_csv)
	progress_df = df[df["scores"].notna()][["text_id", "scores"]]
	return f"Welcome {username}!", initial_text, initial_id, progress_df, username

	# Prepare a dropdown with text IDs (taken from the common input CSV)
	df_all = pd.read_csv(input_csv)
	text_ids = df_all["text_id"].tolist()
	text_id_options = [str(tid) for tid in text_ids]

	with gr.Blocks() as demo:
	gr.Markdown("## 🧠 Human Evaluation Tool")

	# START CONTAINER: shown first
	with gr.Column(visible=True) as start_container:
	with gr.Row():
	username_input = gr.Textbox(label="Enter your name")
	start_btn = gr.Button("Start Evaluation")
	start_status = gr.Textbox(label="Status", interactive=False)
	hidden_username = gr.Textbox(visible=False) # This will store the user's name

	# EVALUATION CONTAINER: initially hidden
	with gr.Column(visible=False) as eval_container:
	with gr.Row():
	with gr.Column():
	text_id_box = gr.Number(visible=False)
	text_display = gr.Textbox(label="Text to Evaluate", lines=4, interactive=False)
	score_radio = gr.Radio(choices=[1, 2, 3, 4, 5],
	label="Select a score (1 = bad, 5 = excellent)")
	submit_btn = gr.Button("Submit Rating")
	result_text = gr.Textbox(label="Status", interactive=False)
	with gr.Row():
	prev_btn = gr.Button("Previous")
	next_btn = gr.Button("Next")
	progress_table = gr.Dataframe(label="Evaluation Progress", interactive=False)
	download_btn = gr.Button("Download Ratings CSV")
	download_file = gr.File(label="Download File")
	with gr.Column():
	gr.Markdown("### Select Text by ID")
	text_dropdown = gr.Dropdown(choices=text_id_options, label="Select Text ID")
	load_btn = gr.Button("Load Selected Text")

	# Bind the start button.
	start_btn.click(
	fn=initialize_user,
	inputs=[username_input],
	outputs=[start_status, text_display, text_id_box, progress_table, hidden_username]
	).then(
	lambda *args: (gr.update(visible=False), gr.update(visible=True)),
	outputs=[start_container, eval_container]
	)

	submit_btn.click(
	fn=save_and_next,
	inputs=[score_radio, text_id_box, hidden_username],
	outputs=[result_text, text_display, text_id_box, progress_table, score_radio]
	)
	next_btn.click(
	fn=next_text,
	inputs=[text_id_box, hidden_username],
	outputs=[text_display, text_id_box, score_radio]
	)
	prev_btn.click(
	fn=previous_text,
	inputs=[text_id_box, text_display, hidden_username],
	outputs=[text_display, text_id_box, score_radio]
	)
	load_btn.click(
	fn=load_text_by_id,
	inputs=[text_dropdown, hidden_username],
	outputs=[text_display, text_id_box, score_radio]
	)
	download_btn.click(
	fn=download_csv,
	inputs=[hidden_username],
	outputs=download_file
	)

	demo.launch()