oliver-aizip kai-aizip commited on
Commit
2d7d23d
·
verified ·
1 Parent(s): ee2fb63

Added Context preload and inference interruption (#2)

Browse files

- Added Context preload and inference interruption (189d8573833ade641554ffcb21e5f5e30412ea75)


Co-authored-by: Kai <kai-aizip@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +195 -153
app.py CHANGED
@@ -2,17 +2,17 @@ import gradio as gr
2
  import random
3
  import pandas as pd
4
  import os
 
 
5
  from utils.data_loader import get_random_example
6
  from utils.models import generate_summaries, model_names
7
  from utils.ui_helpers import toggle_context_display, update_feedback, get_context_html
8
  from utils.leaderboard import load_leaderboard_data, save_leaderboard_data
9
 
10
- # Read CSS from file
11
- css_path = os.path.join(os.getcwd(), 'static', 'styles.css')
12
- with open(css_path, 'r') as f:
13
- css_content = f.read()
14
 
15
- # Feedback options
16
  feedback_options = {
17
  "left": ["Model A: More complete", "Model A: More accurate", "Model A: More relevant", "Model A: Better written", "Model A: Better refusal (if applicable)"],
18
  "right": ["Model B: More complete", "Model B: More accurate", "Model B: More relevant", "Model B: Better written", "Model B: Better refusal (if applicable)"],
@@ -20,15 +20,11 @@ feedback_options = {
20
  "neither": ["Both incomplete", "Both hallucinate", "Both irrelevant", "Both incorrectly refuse (if applicable)", "A is bad", "B is bad"]
21
  }
22
 
23
- def load_new_question_improved(agg_results=None, show_full=False):
24
- """Loads a new random question, contexts, and model summaries."""
25
- if agg_results is None:
26
- agg_results = load_leaderboard_data()
27
-
28
  example = get_random_example()
29
- m_a_name, m_b_name = random.sample(model_names, 2)
30
- s_a, s_b = generate_summaries(example, m_a_name, m_b_name)
31
-
32
  context_desc = example.get('processed_context_desc', '')
33
  if context_desc:
34
  context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>"
@@ -37,36 +33,98 @@ def load_new_question_improved(agg_results=None, show_full=False):
37
  context_html = get_context_html(example, show_full=show_full)
38
 
39
  return [
40
- example, # current_example
41
- m_a_name, # model_a_name
42
- m_b_name, # model_b_name
43
- s_a, # summary_a_text
44
- s_b, # summary_b_text
45
- None, # selected_winner
46
- [], # feedback_list
47
- False, # show_results_state
48
- agg_results, # results_agg
49
- show_full, # show_full_context
50
- gr.update(value=example['question']), # query_display
51
- gr.update(value=context_desc, visible=bool(context_desc)), # context_description
52
- gr.update(value=context_html), # context_display
53
- gr.update(value="Show Full Context", elem_classes=["context-toggle-button"]), # context_toggle_btn
54
- gr.update(value=s_a), # summary_a_display
55
- gr.update(value=s_b), # summary_b_display
56
- gr.update(interactive=True, elem_classes=["vote-button"]), # vote_button_a
57
- gr.update(interactive=True, elem_classes=["vote-button"]), # vote_button_b
58
- gr.update(interactive=True, elem_classes=["vote-button"]), # vote_button_tie
59
- gr.update(interactive=True, elem_classes=["vote-button", "vote-button-neither"]), # vote_button_neither
60
- gr.update(choices=[], value=[], interactive=False, visible=False), # feedback_checkboxes
61
- gr.update(visible=False), # feedback_section
62
- gr.update(interactive=False, visible=True), # submit_button
63
- gr.update(visible=False), # results_reveal_area
64
- gr.update(interactive=True), # random_question_btn
65
- gr.update(elem_classes=[]) # main_interface_area
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  ]
67
 
68
  def select_vote_improved(winner_choice):
69
- """Handles vote button selections."""
70
  feedback_choices = feedback_options.get(winner_choice, [])
71
 
72
  btn_a_classes = ["vote-button"]
@@ -84,18 +142,18 @@ def select_vote_improved(winner_choice):
84
  btn_neither_classes.append("selected")
85
 
86
  return [
87
- winner_choice, # selected_winner
88
- gr.update(choices=feedback_choices, value=[], interactive=True, visible=True), # feedback_checkboxes
89
- gr.update(visible=True), # feedback_section
90
- gr.update(interactive=True), # submit_button
91
- gr.update(elem_classes=btn_a_classes), # vote_button_a
92
- gr.update(elem_classes=btn_b_classes), # vote_button_b
93
- gr.update(elem_classes=btn_tie_classes), # vote_button_tie
94
- gr.update(elem_classes=btn_neither_classes) # vote_button_neither
95
  ]
96
 
97
  def submit_vote_fixed(m_a, m_b, winner, feedback, current_results):
98
- """Processes vote submission and updates results."""
99
  if winner is None:
100
  print("Warning: Submit called without a winner selected.")
101
  return {}
@@ -123,7 +181,9 @@ def submit_vote_fixed(m_a, m_b, winner, feedback, current_results):
123
 
124
  # Prepare Results Table
125
  results_list = []
126
- all_models = list(set(list(updated_results["wins"].keys()) + list(updated_results["losses"].keys()) + list(updated_results["ties"].keys())))
 
 
127
 
128
  for model in sorted(all_models):
129
  wins = updated_results["wins"].get(model, 0)
@@ -146,34 +206,26 @@ def submit_vote_fixed(m_a, m_b, winner, feedback, current_results):
146
  results_df = results_df.sort_values(by='Win Rate Value', ascending=False).drop(columns=['Win Rate Value'])
147
 
148
  return [
149
- True, # show_results_state
150
- updated_results, # results_agg
151
- gr.update(interactive=False), # vote_button_a
152
- gr.update(interactive=False), # vote_button_b
153
- gr.update(interactive=False), # vote_button_tie
154
- gr.update(interactive=False), # vote_button_neither
155
- gr.update(interactive=False), # feedback_checkboxes
156
- gr.update(visible=True), # feedback_section
157
- gr.update(visible=False), # submit_button
158
- gr.update(visible=True), # results_reveal_area
159
- gr.update(interactive=False), # random_question_btn
160
- gr.update(value=results_df, visible=True), # results_table_display
161
- gr.update(elem_classes=["results-revealed"]), # main_interface_area
162
- gr.update(interactive=True), # context_toggle_btn
163
- gr.update(value=m_a), # model_a_reveal
164
- gr.update(value=m_b) # model_b_reveal
165
  ]
166
 
167
- # Create embedded CSS
168
- css_html = f"<style>{css_content}</style>"
169
-
170
  # Create Gradio interface
171
  with gr.Blocks(theme=gr.themes.Default(
172
  primary_hue=gr.themes.colors.orange,
173
  secondary_hue=gr.themes.colors.slate
174
  )) as demo:
175
- # Embed CSS directly in HTML
176
- gr.HTML(css_html)
 
 
 
177
 
178
  # State Variables
179
  current_example = gr.State({})
@@ -191,7 +243,6 @@ with gr.Blocks(theme=gr.themes.Default(
191
  with gr.Tabs() as tabs:
192
  # Main Arena Tab
193
  with gr.TabItem("Arena", id="arena-tab"):
194
- # Main title and description
195
  gr.Markdown("# RAG Summarizer Arena")
196
  gr.Markdown("Compare summaries generated by different models based on the provided context and query. Select the better summary, or choose 'Tie' or 'Neither'. Your feedback helps evaluate model performance.")
197
 
@@ -206,10 +257,9 @@ with gr.Blocks(theme=gr.themes.Default(
206
  query_display = gr.Markdown(value="Loading question...", elem_classes="query-text")
207
  random_question_btn = gr.Button("🔄 Get Random Question", elem_classes="query-button")
208
 
209
- # Context description
210
  context_description = gr.Markdown("", elem_classes="context-description")
211
-
212
- # Context section
213
  with gr.Row(elem_id="context-header-row"):
214
  gr.Markdown("### Context Provided", elem_classes="context-title")
215
  context_toggle_btn = gr.Button("Show Full Context", elem_classes=["context-toggle-button"])
@@ -236,11 +286,9 @@ with gr.Blocks(theme=gr.themes.Default(
236
  vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"])
237
  vote_button_neither = gr.Button("❌ Neither is Adequate", elem_classes=["vote-button", "vote-button-neither"])
238
 
239
- # Feedback section
240
  with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
241
  feedback_checkboxes = gr.CheckboxGroup(label="Feedback (optional)", choices=[], interactive=False)
242
-
243
- # Submit button
244
  submit_button = gr.Button("Submit Vote", variant="primary", interactive=False, elem_id="submit-button")
245
 
246
  # Results area
@@ -258,7 +306,7 @@ with gr.Blocks(theme=gr.themes.Default(
258
  model_b_reveal = gr.Markdown("", elem_classes="model-reveal model-b-reveal")
259
 
260
  gr.HTML("<div style='height: 10px;'></div>")
261
-
262
  # Try another button
263
  with gr.Row(elem_classes=["control-buttons"]):
264
  try_another_btn = gr.Button("🔄 Try Another Question", elem_id="try-another-btn")
@@ -269,99 +317,93 @@ with gr.Blocks(theme=gr.themes.Default(
269
  gr.Markdown("View aggregate performance statistics for all models. The table below shows win rates, wins, losses, and ties for each model based on all evaluations.")
270
  results_table_display = gr.DataFrame(label="Model Performance", interactive=False, wrap=True)
271
 
272
- # Event Listeners
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  context_toggle_btn.click(
274
  fn=toggle_context_display,
275
  inputs=[current_example, show_full_context],
276
  outputs=[show_full_context, context_display, context_toggle_btn]
277
  )
278
 
 
279
  demo.load(
280
- fn=load_new_question_improved,
281
  inputs=[],
282
- outputs=[
283
- current_example, model_a_name, model_b_name, summary_a_text, summary_b_text,
284
- selected_winner, feedback_list, show_results_state, results_agg, show_full_context,
285
- query_display, context_description, context_display, context_toggle_btn,
286
- summary_a_display, summary_b_display,
287
- vote_button_a, vote_button_b, vote_button_tie, vote_button_neither,
288
- feedback_checkboxes, feedback_section, submit_button, results_reveal_area, random_question_btn,
289
- main_interface_area
290
- ]
291
- )
292
-
293
- random_question_btn.click(
294
- fn=load_new_question_improved,
295
- inputs=[],
296
- outputs=[
297
- current_example, model_a_name, model_b_name, summary_a_text, summary_b_text,
298
- selected_winner, feedback_list, show_results_state, results_agg, show_full_context,
299
- query_display, context_description, context_display, context_toggle_btn,
300
- summary_a_display, summary_b_display,
301
- vote_button_a, vote_button_b, vote_button_tie, vote_button_neither,
302
- feedback_checkboxes, feedback_section, submit_button, results_reveal_area, random_question_btn,
303
- main_interface_area
304
- ]
305
- )
306
-
307
- vote_button_a.click(
308
- fn=lambda: select_vote_improved('left'),
309
- inputs=None,
310
- outputs=[selected_winner, feedback_checkboxes, feedback_section, submit_button, vote_button_a, vote_button_b, vote_button_tie, vote_button_neither]
311
- )
312
- vote_button_b.click(
313
- fn=lambda: select_vote_improved('right'),
314
- inputs=None,
315
- outputs=[selected_winner, feedback_checkboxes, feedback_section, submit_button, vote_button_a, vote_button_b, vote_button_tie, vote_button_neither]
316
- )
317
- vote_button_tie.click(
318
- fn=lambda: select_vote_improved('tie'),
319
- inputs=None,
320
- outputs=[selected_winner, feedback_checkboxes, feedback_section, submit_button, vote_button_a, vote_button_b, vote_button_tie, vote_button_neither]
321
- )
322
- vote_button_neither.click(
323
- fn=lambda: select_vote_improved('neither'),
324
- inputs=None,
325
- outputs=[selected_winner, feedback_checkboxes, feedback_section, submit_button, vote_button_a, vote_button_b, vote_button_tie, vote_button_neither]
326
  )
327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  feedback_checkboxes.change(
329
  fn=update_feedback,
330
  inputs=[feedback_checkboxes],
331
  outputs=[feedback_list]
332
  )
333
 
 
334
  submit_button.click(
335
  fn=submit_vote_fixed,
336
  inputs=[model_a_name, model_b_name, selected_winner, feedback_list, results_agg],
337
- outputs=[
338
- show_results_state, results_agg,
339
- vote_button_a, vote_button_b, vote_button_tie, vote_button_neither,
340
- feedback_checkboxes,
341
- feedback_section,
342
- submit_button,
343
- results_reveal_area,
344
- random_question_btn,
345
- results_table_display,
346
- main_interface_area,
347
- context_toggle_btn,
348
- model_a_reveal,
349
- model_b_reveal
350
- ]
351
- )
352
-
353
- try_another_btn.click(
354
- fn=load_new_question_improved,
355
- inputs=[],
356
- outputs=[
357
- current_example, model_a_name, model_b_name, summary_a_text, summary_b_text,
358
- selected_winner, feedback_list, show_results_state, results_agg, show_full_context,
359
- query_display, context_description, context_display, context_toggle_btn,
360
- summary_a_display, summary_b_display,
361
- vote_button_a, vote_button_b, vote_button_tie, vote_button_neither,
362
- feedback_checkboxes, feedback_section, submit_button, results_reveal_area, random_question_btn,
363
- main_interface_area
364
- ]
365
  )
366
 
367
  if __name__ == "__main__":
 
2
  import random
3
  import pandas as pd
4
  import os
5
+ import threading
6
+ from threading import Event
7
  from utils.data_loader import get_random_example
8
  from utils.models import generate_summaries, model_names
9
  from utils.ui_helpers import toggle_context_display, update_feedback, get_context_html
10
  from utils.leaderboard import load_leaderboard_data, save_leaderboard_data
11
 
12
+ # Global interrupt mechanism for model generation
13
+ generation_interrupt = Event()
 
 
14
 
15
+ # Feedback options for different voting outcomes
16
  feedback_options = {
17
  "left": ["Model A: More complete", "Model A: More accurate", "Model A: More relevant", "Model A: Better written", "Model A: Better refusal (if applicable)"],
18
  "right": ["Model B: More complete", "Model B: More accurate", "Model B: More relevant", "Model B: Better written", "Model B: Better refusal (if applicable)"],
 
20
  "neither": ["Both incomplete", "Both hallucinate", "Both irrelevant", "Both incorrectly refuse (if applicable)", "A is bad", "B is bad"]
21
  }
22
 
23
+ def load_context():
24
+ """Load a new question and context (fast operation)"""
25
+ generation_interrupt.clear()
 
 
26
  example = get_random_example()
27
+
 
 
28
  context_desc = example.get('processed_context_desc', '')
29
  if context_desc:
30
  context_desc = f"<div class='context-topic'><span class='topic-label'>The question and context are about:</span> {context_desc}</div>"
 
33
  context_html = get_context_html(example, show_full=show_full)
34
 
35
  return [
36
+ example,
37
+ gr.update(value=example['question']),
38
+ gr.update(value=context_desc, visible=bool(context_desc)),
39
+ gr.update(value=context_html),
40
+ gr.update(value="Show Full Context", elem_classes=["context-toggle-button"]),
41
+ show_full
42
+ ]
43
+
44
+ def generate_model_summaries_with_timeout(example, timeout=30):
45
+ """Run model inference in a separate thread with timeout for interruptibility"""
46
+ import threading
47
+ import time
48
+
49
+ result = {
50
+ "model_a": "",
51
+ "model_b": "",
52
+ "summary_a": "",
53
+ "summary_b": "",
54
+ "completed": False
55
+ }
56
+
57
+ if generation_interrupt.is_set():
58
+ return result
59
+
60
+ def run_generation():
61
+ try:
62
+ m_a_name, m_b_name = random.sample(model_names, 2)
63
+ s_a, s_b = generate_summaries(example, m_a_name, m_b_name)
64
+
65
+ if not generation_interrupt.is_set():
66
+ result["model_a"] = m_a_name
67
+ result["model_b"] = m_b_name
68
+ result["summary_a"] = s_a
69
+ result["summary_b"] = s_b
70
+ result["completed"] = True
71
+ except Exception as e:
72
+ print(f"Error in generation thread: {e}")
73
+
74
+ generation_thread = threading.Thread(target=run_generation)
75
+ generation_thread.daemon = True
76
+ generation_thread.start()
77
+
78
+ start_time = time.time()
79
+ while time.time() - start_time < timeout:
80
+ if generation_interrupt.is_set() or not generation_thread.is_alive() or result["completed"]:
81
+ break
82
+ time.sleep(0.1)
83
+
84
+ return result
85
+
86
+ def process_generation_result(result):
87
+ """Process the results from the threaded generation function"""
88
+ if not result["completed"]:
89
+ # Generation was interrupted or failed
90
+ return [
91
+ "", "", "", "", None, [], False, load_leaderboard_data(),
92
+ gr.update(value="Generation was interrupted or timed out. Please try again."),
93
+ gr.update(value="Generation was interrupted or timed out. Please try again."),
94
+ gr.update(interactive=True, elem_classes=["vote-button"]),
95
+ gr.update(interactive=True, elem_classes=["vote-button"]),
96
+ gr.update(interactive=True, elem_classes=["vote-button"]),
97
+ gr.update(interactive=True, elem_classes=["vote-button", "vote-button-neither"]),
98
+ gr.update(choices=[], value=[], interactive=False, visible=False),
99
+ gr.update(visible=False),
100
+ gr.update(interactive=False, visible=True),
101
+ gr.update(visible=False),
102
+ gr.update(interactive=True),
103
+ gr.update(elem_classes=[])
104
+ ]
105
+
106
+ # Generation completed successfully
107
+ agg_results = load_leaderboard_data()
108
+ return [
109
+ result["model_a"], result["model_b"],
110
+ result["summary_a"], result["summary_b"],
111
+ None, [], False, agg_results,
112
+ gr.update(value=result["summary_a"]),
113
+ gr.update(value=result["summary_b"]),
114
+ gr.update(interactive=True, elem_classes=["vote-button"]),
115
+ gr.update(interactive=True, elem_classes=["vote-button"]),
116
+ gr.update(interactive=True, elem_classes=["vote-button"]),
117
+ gr.update(interactive=True, elem_classes=["vote-button", "vote-button-neither"]),
118
+ gr.update(choices=[], value=[], interactive=False, visible=False),
119
+ gr.update(visible=False),
120
+ gr.update(interactive=False, visible=True),
121
+ gr.update(visible=False),
122
+ gr.update(interactive=True),
123
+ gr.update(elem_classes=[])
124
  ]
125
 
126
  def select_vote_improved(winner_choice):
127
+ """Updates UI based on vote selection"""
128
  feedback_choices = feedback_options.get(winner_choice, [])
129
 
130
  btn_a_classes = ["vote-button"]
 
142
  btn_neither_classes.append("selected")
143
 
144
  return [
145
+ winner_choice,
146
+ gr.update(choices=feedback_choices, value=[], interactive=True, visible=True),
147
+ gr.update(visible=True),
148
+ gr.update(interactive=True),
149
+ gr.update(elem_classes=btn_a_classes),
150
+ gr.update(elem_classes=btn_b_classes),
151
+ gr.update(elem_classes=btn_tie_classes),
152
+ gr.update(elem_classes=btn_neither_classes)
153
  ]
154
 
155
  def submit_vote_fixed(m_a, m_b, winner, feedback, current_results):
156
+ """Processes vote and updates leaderboard"""
157
  if winner is None:
158
  print("Warning: Submit called without a winner selected.")
159
  return {}
 
181
 
182
  # Prepare Results Table
183
  results_list = []
184
+ all_models = list(set(list(updated_results["wins"].keys()) +
185
+ list(updated_results["losses"].keys()) +
186
+ list(updated_results["ties"].keys())))
187
 
188
  for model in sorted(all_models):
189
  wins = updated_results["wins"].get(model, 0)
 
206
  results_df = results_df.sort_values(by='Win Rate Value', ascending=False).drop(columns=['Win Rate Value'])
207
 
208
  return [
209
+ True, updated_results,
210
+ gr.update(interactive=False), gr.update(interactive=False),
211
+ gr.update(interactive=False), gr.update(interactive=False),
212
+ gr.update(interactive=False), gr.update(visible=True),
213
+ gr.update(visible=False), gr.update(visible=True),
214
+ gr.update(interactive=False), gr.update(value=results_df, visible=True),
215
+ gr.update(elem_classes=["results-revealed"]),
216
+ gr.update(interactive=True), gr.update(value=m_a), gr.update(value=m_b)
 
 
 
 
 
 
 
 
217
  ]
218
 
 
 
 
219
  # Create Gradio interface
220
  with gr.Blocks(theme=gr.themes.Default(
221
  primary_hue=gr.themes.colors.orange,
222
  secondary_hue=gr.themes.colors.slate
223
  )) as demo:
224
+ # Load CSS
225
+ css_path = os.path.join(os.getcwd(), 'static', 'styles.css')
226
+ with open(css_path, 'r') as f:
227
+ css_content = f.read()
228
+ gr.HTML(f"<style>{css_content}</style>")
229
 
230
  # State Variables
231
  current_example = gr.State({})
 
243
  with gr.Tabs() as tabs:
244
  # Main Arena Tab
245
  with gr.TabItem("Arena", id="arena-tab"):
 
246
  gr.Markdown("# RAG Summarizer Arena")
247
  gr.Markdown("Compare summaries generated by different models based on the provided context and query. Select the better summary, or choose 'Tie' or 'Neither'. Your feedback helps evaluate model performance.")
248
 
 
257
  query_display = gr.Markdown(value="Loading question...", elem_classes="query-text")
258
  random_question_btn = gr.Button("🔄 Get Random Question", elem_classes="query-button")
259
 
260
+ # Context description and display
261
  context_description = gr.Markdown("", elem_classes="context-description")
262
+
 
263
  with gr.Row(elem_id="context-header-row"):
264
  gr.Markdown("### Context Provided", elem_classes="context-title")
265
  context_toggle_btn = gr.Button("Show Full Context", elem_classes=["context-toggle-button"])
 
286
  vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"])
287
  vote_button_neither = gr.Button("❌ Neither is Adequate", elem_classes=["vote-button", "vote-button-neither"])
288
 
289
+ # Feedback and Submit sections
290
  with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
291
  feedback_checkboxes = gr.CheckboxGroup(label="Feedback (optional)", choices=[], interactive=False)
 
 
292
  submit_button = gr.Button("Submit Vote", variant="primary", interactive=False, elem_id="submit-button")
293
 
294
  # Results area
 
306
  model_b_reveal = gr.Markdown("", elem_classes="model-reveal model-b-reveal")
307
 
308
  gr.HTML("<div style='height: 10px;'></div>")
309
+
310
  # Try another button
311
  with gr.Row(elem_classes=["control-buttons"]):
312
  try_another_btn = gr.Button("🔄 Try Another Question", elem_id="try-another-btn")
 
317
  gr.Markdown("View aggregate performance statistics for all models. The table below shows win rates, wins, losses, and ties for each model based on all evaluations.")
318
  results_table_display = gr.DataFrame(label="Model Performance", interactive=False, wrap=True)
319
 
320
+ # Generic function to handle starting a new example
321
+ def handle_new_example_click():
322
+ generation_interrupt.set() # Interrupt any ongoing generation
323
+ return load_context()[0]
324
+
325
+ def update_ui_for_new_context(example):
326
+ return [
327
+ gr.update(value=example['question']),
328
+ gr.update(value=example.get('processed_context_desc', ''), visible=bool(example.get('processed_context_desc', ''))),
329
+ gr.update(value=get_context_html(example, False)),
330
+ gr.update(value="Show Full Context", elem_classes=["context-toggle-button"]),
331
+ False
332
+ ]
333
+
334
+ # Event handling
335
+ # Toggle context display
336
  context_toggle_btn.click(
337
  fn=toggle_context_display,
338
  inputs=[current_example, show_full_context],
339
  outputs=[show_full_context, context_display, context_toggle_btn]
340
  )
341
 
342
+ # Initial loading - context first, then summaries
343
  demo.load(
344
+ fn=load_context,
345
  inputs=[],
346
+ outputs=[current_example, query_display, context_description, context_display,
347
+ context_toggle_btn, show_full_context]
348
+ ).then(
349
+ fn=lambda example: process_generation_result(generate_model_summaries_with_timeout(example)),
350
+ inputs=[current_example],
351
+ outputs=[model_a_name, model_b_name, summary_a_text, summary_b_text,
352
+ selected_winner, feedback_list, show_results_state, results_agg,
353
+ summary_a_display, summary_b_display, vote_button_a, vote_button_b,
354
+ vote_button_tie, vote_button_neither, feedback_checkboxes, feedback_section,
355
+ submit_button, results_reveal_area, random_question_btn, main_interface_area]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  )
357
 
358
+ # Random Question and Try Another buttons with interruption
359
+ for btn in [random_question_btn, try_another_btn]:
360
+ btn.click(
361
+ fn=handle_new_example_click,
362
+ inputs=[],
363
+ outputs=[current_example]
364
+ ).then(
365
+ fn=update_ui_for_new_context,
366
+ inputs=[current_example],
367
+ outputs=[query_display, context_description, context_display,
368
+ context_toggle_btn, show_full_context]
369
+ ).then(
370
+ fn=lambda example: process_generation_result(generate_model_summaries_with_timeout(example)),
371
+ inputs=[current_example],
372
+ outputs=[model_a_name, model_b_name, summary_a_text, summary_b_text,
373
+ selected_winner, feedback_list, show_results_state, results_agg,
374
+ summary_a_display, summary_b_display, vote_button_a, vote_button_b,
375
+ vote_button_tie, vote_button_neither, feedback_checkboxes, feedback_section,
376
+ submit_button, results_reveal_area, random_question_btn, main_interface_area]
377
+ )
378
+
379
+ # Vote button handlers
380
+ for btn, choice in zip(
381
+ [vote_button_a, vote_button_b, vote_button_tie, vote_button_neither],
382
+ ['left', 'right', 'tie', 'neither']
383
+ ):
384
+ btn.click(
385
+ fn=lambda choice=choice: select_vote_improved(choice),
386
+ inputs=None,
387
+ outputs=[selected_winner, feedback_checkboxes, feedback_section, submit_button,
388
+ vote_button_a, vote_button_b, vote_button_tie, vote_button_neither]
389
+ )
390
+
391
+ # Update feedback when checkboxes change
392
  feedback_checkboxes.change(
393
  fn=update_feedback,
394
  inputs=[feedback_checkboxes],
395
  outputs=[feedback_list]
396
  )
397
 
398
+ # Process vote submission and reveal results
399
  submit_button.click(
400
  fn=submit_vote_fixed,
401
  inputs=[model_a_name, model_b_name, selected_winner, feedback_list, results_agg],
402
+ outputs=[show_results_state, results_agg, vote_button_a, vote_button_b,
403
+ vote_button_tie, vote_button_neither, feedback_checkboxes,
404
+ feedback_section, submit_button, results_reveal_area,
405
+ random_question_btn, results_table_display, main_interface_area,
406
+ context_toggle_btn, model_a_reveal, model_b_reveal]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  )
408
 
409
  if __name__ == "__main__":