kaikaidai commited on
Commit
47e4bdb
Β·
verified Β·
1 Parent(s): 4bc1049

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -33
app.py CHANGED
@@ -31,6 +31,8 @@ from common import (
31
  BATTLE_RULES,
32
  EVAL_DESCRIPTION,
33
  VOTING_HEADER,
 
 
34
  )
35
  from leaderboard import (
36
  get_leaderboard,
@@ -153,8 +155,10 @@ def get_ip(request: gr.Request) -> str:
153
  return hashlib.sha256(ip.encode()).hexdigest()[:16]
154
 
155
 
156
- def get_vote_message(choice: str, model_a: str, model_b: str) -> str:
157
- """Generate appropriate message based on vote and model rankings."""
 
 
158
  voting_data = get_current_votes()
159
  leaderboard = get_leaderboard(model_data, voting_data, show_preliminary=True)
160
  rankings = get_model_rankings(leaderboard)
@@ -162,19 +166,13 @@ def get_vote_message(choice: str, model_a: str, model_b: str) -> str:
162
  pos_b = rankings.get(model_b, 0)
163
 
164
  if choice == "Tie":
165
- return f"It's a tie! Currently, {model_a} ranks #{pos_a} and {model_b} ranks #{pos_b}. \n"
166
-
167
- # Get chosen and rejected models based on vote
168
- model_chosen = model_a if choice == "A" else model_b
169
- model_rejected = model_b if choice == "A" else model_a
170
- pos_chosen = pos_a if choice == "A" else pos_b
171
- pos_rejected = pos_b if choice == "A" else pos_a
172
 
173
  # Check if vote aligns with leaderboard
174
  if (choice == "A" and pos_a < pos_b) or (choice == "B" and pos_b < pos_a):
175
- return f"You're in-line with the community! {model_chosen} ranks #{pos_chosen} ahead of {model_rejected} in #{pos_rejected}. \n"
176
  else:
177
- return f"You don't think like everyone else ;) {model_chosen} ranks #{pos_chosen} which is behind {model_rejected} in #{pos_rejected}. \n"
178
 
179
 
180
  def vote(
@@ -227,19 +225,38 @@ def vote(
227
  final_prompt, response_a, response_b, model_a, model_b, choice, judge_id
228
  )
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  # Generate vote message
231
- message = get_vote_message(choice, model_a, model_b)
232
 
233
- # Return updates for UI components
234
  return [
235
  gr.update(interactive=False, variant="primary" if choice == "A" else "secondary"), # vote_a
236
  gr.update(interactive=False, variant="primary" if choice == "B" else "secondary"), # vote_b
237
  gr.update(interactive=False, variant="primary" if choice == "Tie" else "secondary"), # vote_tie
238
- gr.update(value=f"*Model: {model_a}*"), # model_name_a
239
- gr.update(value=f"*Model: {model_b}*"), # model_name_b
240
  gr.update(interactive=True, value="Regenerate judges", variant="secondary"), # send_btn
241
  gr.update(value="🎲 New round", variant="primary"), # random_btn
242
- gr.Info(message, title = "πŸ₯³ Thanks for voting responsibly!"), # success message
243
  ]
244
 
245
 
@@ -311,7 +328,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
311
  with gr.Column(scale=1):
312
  with gr.Group():
313
  human_input = gr.TextArea(
314
- label="πŸ‘© Human Input",
315
  lines=10,
316
  placeholder="Enter the human message here..."
317
  )
@@ -368,12 +385,18 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
368
 
369
  gr.Markdown("<br>")
370
 
371
- # Add Evaluator Prompt Accordion
372
  with gr.Accordion("πŸ“ Evaluator Prompt", open=False):
373
- gr.Markdown(f"```\n{DEFAULT_EVAL_PROMPT}\n```")
374
-
375
- # Add spacing and acknowledgements at the bottom
376
- gr.Markdown(ACKNOWLEDGEMENTS)
 
 
 
 
 
 
377
 
378
  with gr.TabItem("Leaderboard"):
379
  with gr.Row():
@@ -406,11 +429,14 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
406
 
407
  with gr.TabItem("Policy"):
408
  gr.Markdown(POLICY_CONTENT)
 
409
 
410
  # Define state variables for model tracking
411
  model_a_state = gr.State()
412
  model_b_state = gr.State()
413
  final_prompt_state = gr.State()
 
 
414
 
415
  # Update variable inputs based on the eval prompt
416
  #def update_variables(eval_prompt):
@@ -550,12 +576,50 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
550
  ],
551
  )
552
 
553
- # Update the send button handler to store the submitted inputs
554
- def submit_and_store(prompt, *variables):
555
- # Create a copy of the current submission
556
- current_submission = {"prompt": prompt, "variables": variables}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
 
558
- # Get the responses
 
 
 
 
 
559
  (
560
  response_a,
561
  response_b,
@@ -564,18 +628,19 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
564
  model_a,
565
  model_b,
566
  final_prompt,
567
- ) = submit_prompt(prompt, *variables)
568
 
569
  # Parse the responses
570
  score_a, critique_a = parse_model_response(response_a)
571
  score_b, critique_b = parse_model_response(response_b)
572
 
573
- # Format scores with "/ 5"
574
- score_a = f"{score_a} / 5"
575
- score_b = f"{score_b} / 5"
 
576
 
577
  # Update the last_submission state with the current values
578
- last_submission.value = current_submission
579
 
580
  return (
581
  score_a,
@@ -598,9 +663,10 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
598
  gr.update(value="🎲"), # random_btn
599
  )
600
 
 
601
  send_btn.click(
602
  fn=submit_and_store,
603
- inputs=[eval_prompt, human_input, ai_response],
604
  outputs=[
605
  score_a,
606
  critique_a,
 
31
  BATTLE_RULES,
32
  EVAL_DESCRIPTION,
33
  VOTING_HEADER,
34
+ DEFAULT_EVAL_PROMPT_EDITABLE,
35
+ FIXED_EVAL_SUFFIX,
36
  )
37
  from leaderboard import (
38
  get_leaderboard,
 
155
  return hashlib.sha256(ip.encode()).hexdigest()[:16]
156
 
157
 
158
+ def get_vote_message(choice: str, model_a: str, model_b: str) -> tuple[str, str]:
159
+ """Generate appropriate message based on vote and model rankings.
160
+ Returns (title, message) tuple."""
161
+ # Get current rankings
162
  voting_data = get_current_votes()
163
  leaderboard = get_leaderboard(model_data, voting_data, show_preliminary=True)
164
  rankings = get_model_rankings(leaderboard)
 
166
  pos_b = rankings.get(model_b, 0)
167
 
168
  if choice == "Tie":
169
+ return "It's a tie!", "Keep voting responsibly πŸ€—"
 
 
 
 
 
 
170
 
171
  # Check if vote aligns with leaderboard
172
  if (choice == "A" and pos_a < pos_b) or (choice == "B" and pos_b < pos_a):
173
+ return "The favourite wins!", "Keep voting responsibly πŸ€—"
174
  else:
175
+ return "The underdog wins!", "Keep voting responsibly πŸ€—"
176
 
177
 
178
  def vote(
 
225
  final_prompt, response_a, response_b, model_a, model_b, choice, judge_id
226
  )
227
 
228
+ # Get model positions for display
229
+ voting_data = get_current_votes()
230
+ leaderboard = get_leaderboard(model_data, voting_data, show_preliminary=True)
231
+ rankings = get_model_rankings(leaderboard)
232
+ pos_a = rankings.get(model_a, 0)
233
+ pos_b = rankings.get(model_b, 0)
234
+
235
+ # Format model names with positions and win/loss indicators
236
+ if choice == "Tie":
237
+ model_a_display = f"*Model: {model_a} (Position #{pos_a})*"
238
+ model_b_display = f"*Model: {model_b} (Position #{pos_b})*"
239
+ else:
240
+ winner = model_a if choice == "A" else model_b
241
+ loser = model_b if choice == "A" else model_a
242
+ winner_pos = pos_a if choice == "A" else pos_b
243
+ loser_pos = pos_b if choice == "A" else pos_a
244
+
245
+ model_a_display = f"*Model: {model_a} {'βœ…' if choice == 'A' else '❌'} (Position #{pos_a})*"
246
+ model_b_display = f"*Model: {model_b} {'βœ…' if choice == 'B' else '❌'} (Position #{pos_b})*"
247
+
248
  # Generate vote message
249
+ title, message = get_vote_message(choice, model_a, model_b)
250
 
 
251
  return [
252
  gr.update(interactive=False, variant="primary" if choice == "A" else "secondary"), # vote_a
253
  gr.update(interactive=False, variant="primary" if choice == "B" else "secondary"), # vote_b
254
  gr.update(interactive=False, variant="primary" if choice == "Tie" else "secondary"), # vote_tie
255
+ gr.update(value=model_a_display), # model_name_a
256
+ gr.update(value=model_b_display), # model_name_b
257
  gr.update(interactive=True, value="Regenerate judges", variant="secondary"), # send_btn
258
  gr.update(value="🎲 New round", variant="primary"), # random_btn
259
+ gr.Info(message, title=title), # success message
260
  ]
261
 
262
 
 
328
  with gr.Column(scale=1):
329
  with gr.Group():
330
  human_input = gr.TextArea(
331
+ label="πŸ‘© User Input",
332
  lines=10,
333
  placeholder="Enter the human message here..."
334
  )
 
385
 
386
  gr.Markdown("<br>")
387
 
388
+ # Update Evaluator Prompt Accordion
389
  with gr.Accordion("πŸ“ Evaluator Prompt", open=False):
390
+ eval_prompt_editable = gr.TextArea(
391
+ value=DEFAULT_EVAL_PROMPT_EDITABLE,
392
+ label="Evaluation Criteria",
393
+ lines=12
394
+ )
395
+ with gr.Row(visible=False) as edit_buttons_row: # Make buttons row initially hidden
396
+ cancel_prompt_btn = gr.Button("Cancel")
397
+ save_prompt_btn = gr.Button("Save", variant="primary")
398
+ gr.Markdown("*The sample being evaluated is always appended as:*")
399
+ gr.Markdown(f"```{FIXED_EVAL_SUFFIX}")
400
 
401
  with gr.TabItem("Leaderboard"):
402
  with gr.Row():
 
429
 
430
  with gr.TabItem("Policy"):
431
  gr.Markdown(POLICY_CONTENT)
432
+ gr.Markdown(ACKNOWLEDGEMENTS)
433
 
434
  # Define state variables for model tracking
435
  model_a_state = gr.State()
436
  model_b_state = gr.State()
437
  final_prompt_state = gr.State()
438
+ eval_prompt_previous = gr.State(value=DEFAULT_EVAL_PROMPT_EDITABLE) # Initialize with default value
439
+ is_editing = gr.State(False) # Track editing state
440
 
441
  # Update variable inputs based on the eval prompt
442
  #def update_variables(eval_prompt):
 
576
  ],
577
  )
578
 
579
+ # Add handlers for save/cancel buttons
580
+ def save_prompt(new_prompt, previous_prompt):
581
+ return [
582
+ gr.update(value=new_prompt), # Update the prompt
583
+ new_prompt, # Update the previous prompt state
584
+ gr.update(visible=False) # Hide the buttons
585
+ ]
586
+
587
+ def cancel_prompt(previous_prompt):
588
+ return [
589
+ gr.update(value=previous_prompt), # Revert to previous prompt
590
+ previous_prompt, # Keep the previous prompt state
591
+ gr.update(visible=False) # Hide the buttons
592
+ ]
593
+
594
+ def show_edit_buttons(current_value, previous_value):
595
+ # Show buttons only if the current value differs from the previous value
596
+ return gr.update(visible=current_value != previous_value)
597
+
598
+ # Add handlers for save/cancel buttons and prompt changes
599
+ save_prompt_btn.click(
600
+ fn=save_prompt,
601
+ inputs=[eval_prompt_editable, eval_prompt_previous],
602
+ outputs=[eval_prompt_editable, eval_prompt_previous, edit_buttons_row]
603
+ )
604
+
605
+ cancel_prompt_btn.click(
606
+ fn=cancel_prompt,
607
+ inputs=[eval_prompt_previous],
608
+ outputs=[eval_prompt_editable, eval_prompt_previous, edit_buttons_row]
609
+ )
610
+
611
+ eval_prompt_editable.change(
612
+ fn=show_edit_buttons,
613
+ inputs=[eval_prompt_editable, eval_prompt_previous],
614
+ outputs=edit_buttons_row
615
+ )
616
 
617
+ # Update the submit function to combine editable and fixed parts
618
+ def submit_and_store(editable_prompt, *variables):
619
+ # Combine the editable prompt with fixed suffix
620
+ full_prompt = editable_prompt + FIXED_EVAL_SUFFIX
621
+
622
+ # Get the responses using the full prompt
623
  (
624
  response_a,
625
  response_b,
 
628
  model_a,
629
  model_b,
630
  final_prompt,
631
+ ) = submit_prompt(full_prompt, *variables)
632
 
633
  # Parse the responses
634
  score_a, critique_a = parse_model_response(response_a)
635
  score_b, critique_b = parse_model_response(response_b)
636
 
637
+ # Only append "/ 5" if using the default prompt
638
+ if editable_prompt.strip() == DEFAULT_EVAL_PROMPT_EDITABLE.strip():
639
+ score_a = f"{score_a} / 5"
640
+ score_b = f"{score_b} / 5"
641
 
642
  # Update the last_submission state with the current values
643
+ last_submission.value = {"prompt": full_prompt, "variables": variables}
644
 
645
  return (
646
  score_a,
 
663
  gr.update(value="🎲"), # random_btn
664
  )
665
 
666
+ # Update the click handler to use the editable prompt
667
  send_btn.click(
668
  fn=submit_and_store,
669
+ inputs=[eval_prompt_editable, human_input, ai_response],
670
  outputs=[
671
  score_a,
672
  critique_a,