|
import gradio as gr |
|
import pandas as pd |
|
import random |
|
from datasets import load_dataset, Dataset, DatasetDict |
|
from huggingface_hub import HfApi, login |
|
import os |
|
from datetime import datetime |
|
import markdown |
|
|
|
hf_api = HfApi() |
|
HF_TOKEN = os.getenv('HF_TOKEN') |
|
login(token=HF_TOKEN) |
|
|
|
log_dataset = "HumanLLMs/log" |
|
|
|
selected_indices = set() |
|
|
|
dataset_1 = load_dataset("HumanLLMs/LlamaPair")["train"] |
|
|
|
|
|
|
|
df_log = pd.DataFrame(columns=["instruction", "selected_model", "pair", "submission_time"]) |
|
|
|
|
|
def remove_emojis(text): |
|
return text.encode('ascii', 'ignore').decode('ascii') |
|
|
|
|
|
def get_random_row(): |
|
global selected_indices |
|
|
|
|
|
|
|
|
|
|
|
pair_name = "LlamaPair" |
|
if len(selected_indices) >= len(dataset_1): |
|
raise ValueError("All rows in the dataset have been used.") |
|
|
|
idx = random.randint(0, len(dataset_1) - 1) |
|
while idx in selected_indices: |
|
idx = random.randint(0, len(dataset_1) - 1) |
|
|
|
selected_indices.add(idx) |
|
row = dataset_1[idx] |
|
instruction = row["instruction"] |
|
response_human = row["response_human_like_model"] |
|
response_official = row["response_offical_instruct_model"] |
|
|
|
responses = [("Human-like Model", response_human), |
|
("Official Model", response_official)] |
|
|
|
random.shuffle(responses) |
|
|
|
return (instruction, remove_emojis(responses[0][1]), remove_emojis(responses[1][1]), |
|
responses[0][0], responses[1][0], pair_name) |
|
|
|
|
|
def format_response_1_html(response): |
|
return f''' |
|
<div style="border: 1px solid white; background-color: black; |
|
padding: 10px; margin: 5px;"> |
|
<strong style="color: white;">Answer 1:</strong> |
|
<div style="color: white;">{markdown.markdown(response)}</div> |
|
</div> |
|
''' |
|
|
|
def format_response_2_html(response): |
|
return f''' |
|
<div style="border: 1px solid white; background-color: black; |
|
padding: 10px; margin: 5px;"> |
|
<strong style="color: white;">Answer 2:</strong> |
|
<div style="color: white;">{markdown.markdown(response)}</div> |
|
</div> |
|
''' |
|
|
|
counter = 0 |
|
accumulated_log = pd.DataFrame(columns=["instruction", "selected_model", "pair", "submission_time"]) |
|
|
|
def submit_choice(selected_response, instruction, label_1, label_2, pair_name): |
|
global counter, accumulated_log |
|
|
|
try: |
|
df_log = pd.DataFrame(load_dataset(log_dataset)["train"]) |
|
except: |
|
df_log = pd.DataFrame(columns=["instruction", "selected_model", |
|
"pair", "submission_time"]) |
|
|
|
selected_model = label_1 if selected_response == "Answer 1" else label_2 |
|
submission_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
|
|
|
new_instruction, new_response_1, new_response_2, new_label_1, new_label_2, new_pair_name = get_random_row() |
|
|
|
new_entry = pd.DataFrame({ |
|
"instruction": [new_instruction], |
|
"selected_model": [selected_model], |
|
"pair": [pair_name], |
|
"submission_time": [submission_time] |
|
}) |
|
accumulated_log = pd.concat([accumulated_log, new_entry], ignore_index=True) |
|
|
|
counter += 1 |
|
|
|
if counter % 10 == 0: |
|
df_log = pd.concat([df_log, accumulated_log], ignore_index=True) |
|
df_log.to_csv("annotations_log.csv", index=False) |
|
accumulated_log = pd.DataFrame(columns=["instruction", "selected_model", "pair", "submission_time"]) |
|
log = Dataset.from_pandas(df_log) |
|
log.push_to_hub(log_dataset) |
|
|
|
question = f""" |
|
<div style="text-align: center; font-size: 24px; font-weight: bold; margin-top: 20px;"> |
|
Question: |
|
</div> |
|
<div style="text-align: center; font-size: 20px; margin-top: 10px;"> |
|
{new_instruction} |
|
</div> |
|
""" |
|
|
|
return ( |
|
question, |
|
format_response_1_html(new_response_1), |
|
format_response_2_html(new_response_2), |
|
new_label_1, |
|
new_label_2, |
|
new_pair_name, |
|
"Your choice has been recorded. A new question is loaded!" |
|
) |
|
|
|
def create_interface(): |
|
instruction, response_1, response_2, label_1, label_2, pair_name = get_random_row() |
|
|
|
with gr.Blocks(theme=gr.themes.Default()) as demo: |
|
gr.HTML(""" |
|
<div style="text-align: center;"> |
|
<h1>Human-Likeness Voting System</h1> |
|
|
|
</div> |
|
""") |
|
gr.Markdown("This interface has been created to compare the performance of the human-like LLMs developed by our team with the models on which they were trained. The results of this study will be presented in a paper. Please ensure that your responses are fair and accurate when casting your vote and selecting the appropriate answer. We thank you for your contributions on behalf of the research team.") |
|
gr.Markdown("## Instructions") |
|
gr.Markdown( |
|
""" |
|
1. First, read the provided question carefully. |
|
2. Second, read both responses carefully. |
|
3. Finally, select the model that best resembles a human in terms of response quality.""" |
|
) |
|
|
|
current_instruction = gr.State(instruction) |
|
label_1_state = gr.State(label_1) |
|
label_2_state = gr.State(label_2) |
|
pair_name_state = gr.State(pair_name) |
|
question_display = gr.HTML( |
|
value=f""" |
|
<div style="text-align: center; font-size: 24px; font-weight: bold; margin-top: 20px;"> |
|
Question: |
|
</div> |
|
<div style="text-align: center; font-size: 20px; margin-top: 10px;"> |
|
{instruction} |
|
</div> |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
response_1_display = gr.HTML(format_response_1_html(response_1)) |
|
with gr.Column(): |
|
response_2_display = gr.HTML(format_response_2_html(response_2)) |
|
with gr.Row(): |
|
selected_response = gr.Radio( |
|
["Answer 1", "Answer 2"], |
|
label="Which answer is better?", |
|
interactive=True, |
|
) |
|
submit_btn = gr.Button("Submit Choice") |
|
|
|
status_output = gr.Textbox( |
|
interactive=False, |
|
label="Status", |
|
value="Select an answer and click Submit" |
|
) |
|
submit_btn.click( |
|
fn=submit_choice, |
|
inputs=[ |
|
selected_response, |
|
current_instruction, |
|
label_1_state, |
|
label_2_state, |
|
pair_name_state |
|
], |
|
outputs=[ |
|
question_display, |
|
response_1_display, |
|
response_2_display, |
|
label_1_state, |
|
label_2_state, |
|
pair_name_state, |
|
status_output |
|
] |
|
) |
|
|
|
return demo |
|
|
|
if __name__ == "__main__": |
|
interface = create_interface() |
|
interface.launch(share=True) |
|
|