Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -201,6 +201,7 @@ def vote(
|
|
201 |
gr.update(value=f"*Model: {model_a}*"), # model_name_a
|
202 |
gr.update(value=f"*Model: {model_b}*"), # model_name_b
|
203 |
gr.update(interactive=True, value="Run the evaluators", variant="primary"), # send_btn
|
|
|
204 |
]
|
205 |
|
206 |
|
@@ -434,61 +435,59 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
434 |
|
435 |
with gr.Tabs():
|
436 |
with gr.TabItem("Judge Arena"):
|
|
|
437 |
with gr.Row():
|
438 |
# Left side - Input section
|
439 |
with gr.Column(scale=1):
|
440 |
-
random_btn = gr.Button("π²", scale=0)
|
441 |
with gr.Group():
|
442 |
human_input = gr.TextArea(
|
443 |
label="π© Human Input",
|
444 |
-
lines=
|
445 |
placeholder="Enter the human message here..."
|
446 |
)
|
447 |
|
448 |
ai_response = gr.TextArea(
|
449 |
label="π€ AI Response",
|
450 |
-
lines=
|
451 |
placeholder="Enter the AI response here..."
|
452 |
)
|
453 |
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
)
|
460 |
|
461 |
# Right side - Model outputs
|
462 |
with gr.Column(scale=1):
|
463 |
-
gr.Markdown("
|
464 |
-
gr.Markdown("\n### π©ββοΈ Judge A")
|
465 |
with gr.Group():
|
|
|
466 |
with gr.Row():
|
467 |
with gr.Column(scale=1, min_width=100): # Fixed narrow width for score
|
468 |
-
score_a = gr.Textbox(label="Score", interactive=False)
|
469 |
vote_a = gr.Button("Vote A", variant="primary", visible=False)
|
470 |
with gr.Column(scale=9, min_width=400): # Wider width for critique
|
471 |
-
critique_a = gr.TextArea(label="Critique", lines=
|
472 |
-
model_name_a = gr.Markdown("*Model: Hidden*")
|
473 |
|
474 |
-
#
|
475 |
-
gr.
|
476 |
|
477 |
-
#
|
478 |
with gr.Row(visible=False) as tie_button_row:
|
479 |
with gr.Column():
|
480 |
vote_tie = gr.Button("Tie", variant="secondary")
|
481 |
-
|
482 |
|
483 |
-
gr.Markdown("###
|
484 |
with gr.Group():
|
|
|
485 |
with gr.Row():
|
486 |
with gr.Column(scale=1, min_width=100): # Fixed narrow width for score
|
487 |
-
score_b = gr.Textbox(label="Score", interactive=False)
|
488 |
vote_b = gr.Button("Vote B", variant="primary", visible=False)
|
489 |
with gr.Column(scale=9, min_width=400): # Wider width for critique
|
490 |
-
critique_b = gr.TextArea(label="Critique", lines=
|
491 |
-
model_name_b = gr.Markdown("*Model: Hidden*")
|
492 |
# Place Vote B button directly under Judge B
|
493 |
|
494 |
gr.Markdown("<br>")
|
@@ -636,6 +635,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
636 |
model_name_a,
|
637 |
model_name_b,
|
638 |
send_btn,
|
|
|
639 |
],
|
640 |
)
|
641 |
|
@@ -658,6 +658,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
658 |
model_name_a,
|
659 |
model_name_b,
|
660 |
send_btn,
|
|
|
661 |
],
|
662 |
)
|
663 |
|
@@ -680,6 +681,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
680 |
model_name_a,
|
681 |
model_name_b,
|
682 |
send_btn,
|
|
|
683 |
],
|
684 |
)
|
685 |
|
@@ -703,6 +705,10 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
703 |
score_a, critique_a = parse_model_response(response_a)
|
704 |
score_b, critique_b = parse_model_response(response_b)
|
705 |
|
|
|
|
|
|
|
|
|
706 |
# Update the last_submission state with the current values
|
707 |
last_submission.value = current_submission
|
708 |
|
@@ -725,6 +731,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
725 |
variant="secondary",
|
726 |
interactive=True
|
727 |
),
|
|
|
728 |
)
|
729 |
|
730 |
send_btn.click(
|
@@ -744,6 +751,7 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
744 |
model_name_a,
|
745 |
model_name_b,
|
746 |
send_btn,
|
|
|
747 |
],
|
748 |
)
|
749 |
|
@@ -825,5 +833,12 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
825 |
outputs=[send_btn]
|
826 |
)
|
827 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
828 |
if __name__ == "__main__":
|
829 |
demo.launch()
|
|
|
201 |
gr.update(value=f"*Model: {model_a}*"), # model_name_a
|
202 |
gr.update(value=f"*Model: {model_b}*"), # model_name_b
|
203 |
gr.update(interactive=True, value="Run the evaluators", variant="primary"), # send_btn
|
204 |
+
gr.update(visible=True), # spacing_div
|
205 |
]
|
206 |
|
207 |
|
|
|
435 |
|
436 |
with gr.Tabs():
|
437 |
with gr.TabItem("Judge Arena"):
|
438 |
+
random_btn = gr.Button("π²", scale=0)
|
439 |
with gr.Row():
|
440 |
# Left side - Input section
|
441 |
with gr.Column(scale=1):
|
|
|
442 |
with gr.Group():
|
443 |
human_input = gr.TextArea(
|
444 |
label="π© Human Input",
|
445 |
+
lines=12,
|
446 |
placeholder="Enter the human message here..."
|
447 |
)
|
448 |
|
449 |
ai_response = gr.TextArea(
|
450 |
label="π€ AI Response",
|
451 |
+
lines=12,
|
452 |
placeholder="Enter the AI response here..."
|
453 |
)
|
454 |
|
455 |
+
send_btn = gr.Button(
|
456 |
+
value="Run the evaluators",
|
457 |
+
variant="primary",
|
458 |
+
size="lg"
|
459 |
+
)
|
|
|
460 |
|
461 |
# Right side - Model outputs
|
462 |
with gr.Column(scale=1):
|
463 |
+
gr.Markdown("### π©ββοΈ Judge A")
|
|
|
464 |
with gr.Group():
|
465 |
+
model_name_a = gr.Markdown("*Model: Hidden*")
|
466 |
with gr.Row():
|
467 |
with gr.Column(scale=1, min_width=100): # Fixed narrow width for score
|
468 |
+
score_a = gr.Textbox(label="Score", lines=5, interactive=False)
|
469 |
vote_a = gr.Button("Vote A", variant="primary", visible=False)
|
470 |
with gr.Column(scale=9, min_width=400): # Wider width for critique
|
471 |
+
critique_a = gr.TextArea(label="Critique", lines=7, interactive=False)
|
|
|
472 |
|
473 |
+
# Spacing div that's visible only when tie button is hidden
|
474 |
+
spacing_div = gr.HTML('<div style="height: 42px;"></div>', visible=True, elem_id="spacing-div")
|
475 |
|
476 |
+
# Tie button row
|
477 |
with gr.Row(visible=False) as tie_button_row:
|
478 |
with gr.Column():
|
479 |
vote_tie = gr.Button("Tie", variant="secondary")
|
480 |
+
|
481 |
|
482 |
+
gr.Markdown("### π§ββοΈ Judge B")
|
483 |
with gr.Group():
|
484 |
+
model_name_b = gr.Markdown("*Model: Hidden*")
|
485 |
with gr.Row():
|
486 |
with gr.Column(scale=1, min_width=100): # Fixed narrow width for score
|
487 |
+
score_b = gr.Textbox(label="Score", lines=5, interactive=False)
|
488 |
vote_b = gr.Button("Vote B", variant="primary", visible=False)
|
489 |
with gr.Column(scale=9, min_width=400): # Wider width for critique
|
490 |
+
critique_b = gr.TextArea(label="Critique", lines=7, interactive=False)
|
|
|
491 |
# Place Vote B button directly under Judge B
|
492 |
|
493 |
gr.Markdown("<br>")
|
|
|
635 |
model_name_a,
|
636 |
model_name_b,
|
637 |
send_btn,
|
638 |
+
spacing_div,
|
639 |
],
|
640 |
)
|
641 |
|
|
|
658 |
model_name_a,
|
659 |
model_name_b,
|
660 |
send_btn,
|
661 |
+
spacing_div,
|
662 |
],
|
663 |
)
|
664 |
|
|
|
681 |
model_name_a,
|
682 |
model_name_b,
|
683 |
send_btn,
|
684 |
+
spacing_div,
|
685 |
],
|
686 |
)
|
687 |
|
|
|
705 |
score_a, critique_a = parse_model_response(response_a)
|
706 |
score_b, critique_b = parse_model_response(response_b)
|
707 |
|
708 |
+
# Format scores with "/ 5"
|
709 |
+
score_a = f"{score_a} / 5"
|
710 |
+
score_b = f"{score_b} / 5"
|
711 |
+
|
712 |
# Update the last_submission state with the current values
|
713 |
last_submission.value = current_submission
|
714 |
|
|
|
731 |
variant="secondary",
|
732 |
interactive=True
|
733 |
),
|
734 |
+
gr.update(visible=False), # spacing_div
|
735 |
)
|
736 |
|
737 |
send_btn.click(
|
|
|
751 |
model_name_a,
|
752 |
model_name_b,
|
753 |
send_btn,
|
754 |
+
spacing_div,
|
755 |
],
|
756 |
)
|
757 |
|
|
|
833 |
outputs=[send_btn]
|
834 |
)
|
835 |
|
836 |
+
# Update the demo.load to include the random example population
|
837 |
+
demo.load(
|
838 |
+
fn=populate_random_example,
|
839 |
+
inputs=[],
|
840 |
+
outputs=[human_input, ai_response]
|
841 |
+
)
|
842 |
+
|
843 |
if __name__ == "__main__":
|
844 |
demo.launch()
|