Q-bert commited on
Commit
61261dd
1 Parent(s): 182df4c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import random
4
+ from datasets import load_dataset, Dataset, DatasetDict
5
+ from huggingface_hub import HfApi, login
6
+ import os
7
+ from datetime import datetime
8
+
9
+ hf_api = HfApi()
10
+ HF_TOKEN = os.getenv('HF_TOKEN')
11
+ login(token=HF_TOKEN)
12
+
13
+
14
+ dataset_1 = load_dataset("Q-bert/LlamaPair")["train"]
15
+ dataset_2 = load_dataset("Q-bert/QwenPair")["train"]
16
+ dataset_3 = load_dataset("Q-bert/MistralPair")["train"]
17
+
18
+ df_log = pd.DataFrame(columns=["instruction", "selected_model", "pair", "submission_time"])
19
+
20
+ def get_random_row():
21
+ selected_dataset = random.choice([dataset_1, dataset_2, dataset_3])
22
+ pair_name = ("LlamaPair" if selected_dataset == dataset_1
23
+ else "QwenPair" if selected_dataset == dataset_2
24
+ else "MistralPair")
25
+
26
+ row = selected_dataset[random.randint(0, len(selected_dataset) - 1)]
27
+ instruction = row["instruction"]
28
+ response_human = row["response_human_like_model"]
29
+ response_official = row["response_offical_instruct_model"]
30
+
31
+ responses = [("Human-like Model", response_human),
32
+ ("Official Model", response_official)]
33
+ random.shuffle(responses)
34
+
35
+ return (instruction, responses[0][1], responses[1][1],
36
+ responses[0][0], responses[1][0], pair_name)
37
+
38
+ def format_response_html(response):
39
+ return f'''
40
+ <div style="border: 1px solid white; background-color: black;
41
+ padding: 10px; margin: 5px;">
42
+ <strong style="color: white;">Answer:</strong>
43
+ <div style="color: white;">{response}</div>
44
+ </div>
45
+ '''
46
+
47
+ def submit_choice(selected_response, instruction, label_1, label_2, pair_name):
48
+ try:
49
+ df_log = pd.DataFrame(load_dataset("Q-bert/log")["train"])
50
+ except:
51
+ df_log = pd.DataFrame(columns=["instruction", "selected_model",
52
+ "pair", "submission_time"])
53
+
54
+ selected_model = label_1 if selected_response == "Answer 1" else label_2
55
+ submission_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
56
+
57
+ new_entry = pd.DataFrame({
58
+ "instruction": [instruction],
59
+ "selected_model": [selected_model],
60
+ "pair": [pair_name],
61
+ "submission_time": [submission_time]
62
+ })
63
+
64
+ df_log = pd.concat([df_log, new_entry], ignore_index=True)
65
+ df_log.to_csv("annotations_log.csv", index=False)
66
+ log = Dataset.from_pandas(df_log)
67
+ log.push_to_hub("Q-bert/log")
68
+
69
+ new_instruction, new_response_1, new_response_2, new_label_1, new_label_2, new_pair_name = get_random_row()
70
+
71
+ return (
72
+ f"### Question:\n{new_instruction}",
73
+ format_response_html(new_response_1),
74
+ format_response_html(new_response_2),
75
+ new_label_1,
76
+ new_label_2,
77
+ new_pair_name,
78
+ "Your choice has been recorded. A new question is loaded!"
79
+ )
80
+
81
+ def create_interface():
82
+ instruction, response_1, response_2, label_1, label_2, pair_name = get_random_row()
83
+
84
+ with gr.Blocks(theme=gr.themes.Default()) as demo:
85
+ gr.Markdown("# Human-Likeness Voting System")
86
+ gr.Markdown("![image/png](https://cdn-uploads.huggingface.co/production/uploads/63da3d7ae697e5898cb86854/6vL52mOW6IqZu8DFlAZ4C.png)")
87
+ gr.Markdown("This interface has been created to compare the performance of the human-like LLMs developed by our team with the models on which they were trained. The results of this study will be presented in a paper. Please ensure that your responses are fair and accurate when casting your vote and selecting the appropriate answer. We thank you for your contributions on behalf of the research team.")
88
+ gr.Markdown("## Instructions")
89
+ gr.Markdown(
90
+ """
91
+ 1. First, read the provided question carefully.
92
+ 2. Second, read both responses carefully.
93
+ 3. Finally, select the model that best resembles a human in terms of response quality."""
94
+ )
95
+ current_instruction = gr.State(instruction)
96
+ label_1_state = gr.State(label_1)
97
+ label_2_state = gr.State(label_2)
98
+ pair_name_state = gr.State(pair_name)
99
+ question_display = gr.Markdown(value=f"### Question:\n{instruction}")
100
+ with gr.Row():
101
+ with gr.Column():
102
+ response_1_display = gr.HTML(format_response_html(response_1))
103
+ with gr.Column():
104
+ response_2_display = gr.HTML(format_response_html(response_2))
105
+ with gr.Row():
106
+ selected_response = gr.Radio(
107
+ ["Answer 1", "Answer 2"],
108
+ label="Which answer is better?",
109
+ interactive=True
110
+ )
111
+ submit_btn = gr.Button("Submit Choice")
112
+
113
+ status_output = gr.Textbox(
114
+ interactive=False,
115
+ label="Status",
116
+ value="Select an answer and click Submit"
117
+ )
118
+ submit_btn.click(
119
+ fn=submit_choice,
120
+ inputs=[
121
+ selected_response,
122
+ current_instruction,
123
+ label_1_state,
124
+ label_2_state,
125
+ pair_name_state
126
+ ],
127
+ outputs=[
128
+ question_display,
129
+ response_1_display,
130
+ response_2_display,
131
+ label_1_state,
132
+ label_2_state,
133
+ pair_name_state,
134
+ status_output
135
+ ]
136
+ )
137
+
138
+ return demo
139
+
140
+ if __name__ == "__main__":
141
+ interface = create_interface()
142
+ interface.launch(share=True)