SLM-RAG-Arena

Running on Zero

App Files Files Community

oliver-aizip

kai-aizip commited on 5 days ago

Commit

d958e2a

verified ·

1 Parent(s): 6b26b26

Refined writing and layout (#15)

Browse files

- Refined writing and layout (ca89290c23c802513b65d8ad50d4161204f7f72c)

Co-authored-by: Kai <kai-aizip@users.noreply.huggingface.co>

Files changed (1) hide show

app.py +26 -14

app.py CHANGED Viewed

@@ -14,8 +14,10 @@ from utils.shared import generation_interrupt  # Import from shared module
 feedback_options = {
     "left": ["Model A: More complete", "Model A: More accurate", "Model A: More relevant", "Model A: Better written", "Model A: Better refusal (if applicable)"],
     "right": ["Model B: More complete", "Model B: More accurate", "Model B: More relevant", "Model B: Better written", "Model B: Better refusal (if applicable)"],
-    "tie": ["Both complete", "Both accurate", "Both well written", "Both handle refusal well (if applicable)"],
-    "neither": ["Both incomplete", "Both hallucinate", "Both irrelevant", "Both incorrectly refuse (if applicable)", "A is bad", "B is bad"]
 }
 def load_context():
@@ -186,31 +188,39 @@ with gr.Blocks(theme=gr.themes.Default(
     with gr.Tabs() as tabs:
         # Main Arena Tab
         with gr.TabItem("Arena", id="arena-tab"):
-            gr.Markdown("# RAG Summarizer Arena")
-            gr.Markdown("Compare summaries generated by different models based on the provided context and query. Select the better summary, or choose 'Tie' or 'Neither'. Your feedback helps evaluate model performance.")
             # Main container
             with gr.Column(elem_id="main-interface-area") as main_interface_area:
                 # Query section
                 with gr.Row(elem_id="query-title-row"):
-                    gr.Markdown("### Query", elem_classes="section-heading")
                 with gr.Row(elem_id="query-container"):
                     with gr.Row(elem_classes="query-box-row"):
                         query_display = gr.Markdown(value="Loading question...", elem_classes="query-text")
-                    random_question_btn = gr.Button("🔄 Get Random Question", elem_classes="query-button")
                 # Context description and display
                 context_description = gr.Markdown("", elem_classes="context-description")
                 with gr.Row(elem_id="context-header-row"):
-                    gr.Markdown("### Context Provided", elem_classes="context-title")
                     context_toggle_btn = gr.Button("Show Full Context", elem_classes=["context-toggle-button"])
                 context_display = gr.HTML(value="Loading context...", label="Context Chunks")
                 gr.Markdown("---")
-                gr.Markdown("### Compare Summaries", elem_classes="section-heading")
                 # Model summaries
                 with gr.Row():
@@ -221,18 +231,20 @@ with gr.Blocks(theme=gr.themes.Default(
                         with gr.Group(elem_classes=["summary-card", "summary-card-b"]):
                             summary_b_display = gr.Textbox(label="Model B", lines=10, interactive=False, show_copy_button=True)
                 # Voting section
-                gr.Markdown("### Cast Your Vote", elem_classes="section-heading")
                 with gr.Row():
                     vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"])
                     vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"])
                     vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"])
-                    vote_button_neither = gr.Button("❌ Neither is Adequate", elem_classes=["vote-button", "vote-button-neither"])
                 # Feedback and Submit sections
                 with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
                     feedback_checkboxes = gr.CheckboxGroup(label="Feedback (optional)", choices=[], interactive=False)
-                submit_button = gr.Button("Submit Vote", variant="primary", interactive=False, elem_id="submit-button")
                 # Results area
                 with gr.Column(visible=False) as results_reveal_area:
@@ -242,10 +254,10 @@ with gr.Blocks(theme=gr.themes.Default(
                     # Model reveal section
                     with gr.Row():
                         with gr.Column(scale=1):
-                            gr.Markdown("### Model A was actually:", elem_classes="section-heading")
                             model_a_reveal = gr.Markdown("", elem_classes="model-reveal model-a-reveal")
                         with gr.Column(scale=1):
-                            gr.Markdown("### Model B was actually:", elem_classes="section-heading")
                             model_b_reveal = gr.Markdown("", elem_classes="model-reveal model-b-reveal")
                     gr.HTML("<hr>")
@@ -256,7 +268,7 @@ with gr.Blocks(theme=gr.themes.Default(
         # Leaderboard Tab
         with gr.TabItem("Leaderboard", id="leaderboard-tab"):
-            gr.Markdown("# Model Performance Leaderboard", elem_classes="orange-title")
             gr.Markdown("View performance statistics for all models ranked by Elo rating.")
             with gr.Group(elem_id="leaderboard-info"):

 feedback_options = {
     "left": ["Model A: More complete", "Model A: More accurate", "Model A: More relevant", "Model A: Better written", "Model A: Better refusal (if applicable)"],
     "right": ["Model B: More complete", "Model B: More accurate", "Model B: More relevant", "Model B: Better written", "Model B: Better refusal (if applicable)"],
+    "tie": ["Model A: More complete", "Model A: More accurate", "Model A: More relevant", "Model A: Better written", "Model A: Better refusal (if applicable)",
+           "Model B: More complete", "Model B: More accurate", "Model B: More relevant", "Model B: Better written", "Model B: Better refusal (if applicable)"],
+    "neither": ["Model A: Incomplete", "Model A: Hallucinate", "Model A: Irrelevant", "Model A: Incorrect refusal (if applicable)",
+               "Model B: Incomplete", "Model B: Hallucinate", "Model B: Irrelevant", "Model B: Incorrect refusal (if applicable)"]
 }
 def load_context():
     with gr.Tabs() as tabs:
         # Main Arena Tab
         with gr.TabItem("Arena", id="arena-tab"):
+            gr.Markdown("# RAG SLM Summarizer/Generator Arena")
+            gr.Markdown("""
+1️⃣ Review the query and examine the highlighted context (✨ highlights contain key information! )\n
+2️⃣ Compare answers generated by two different models side-by-side\n
+3️⃣ Vote for the better response or select 'Tie/Neither' if appropriate""")
+            gr.HTML("<hr>")
             # Main container
             with gr.Column(elem_id="main-interface-area") as main_interface_area:
                 # Query section
                 with gr.Row(elem_id="query-title-row"):
+                    gr.Markdown("### 💬 Query (What Users Want to Ask About the Doc)", elem_classes="section-heading")
                 with gr.Row(elem_id="query-container"):
                     with gr.Row(elem_classes="query-box-row"):
                         query_display = gr.Markdown(value="Loading question...", elem_classes="query-text")
+                    random_question_btn = gr.Button("🔄 Try a New Question", elem_classes="query-button")
                 # Context description and display
                 context_description = gr.Markdown("", elem_classes="context-description")
+                gr.HTML("<hr>")
                 with gr.Row(elem_id="context-header-row"):
+                    gr.Markdown("### 📋 Context (Relevant Information We Got from the Database)", elem_classes="context-title")
                     context_toggle_btn = gr.Button("Show Full Context", elem_classes=["context-toggle-button"])
                 context_display = gr.HTML(value="Loading context...", label="Context Chunks")
                 gr.Markdown("---")
+                gr.Markdown("### 🔍 Compare Answers from Models", elem_classes="section-heading")
                 # Model summaries
                 with gr.Row():
                         with gr.Group(elem_classes=["summary-card", "summary-card-b"]):
                             summary_b_display = gr.Textbox(label="Model B", lines=10, interactive=False, show_copy_button=True)
+                gr.HTML("<hr>")
                 # Voting section
+                gr.Markdown("### 🏅 Cast Your Vote", elem_classes="section-heading")
                 with gr.Row():
                     vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"])
                     vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"])
                     vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"])
+                    vote_button_neither = gr.Button("❌ Neither is Good", elem_classes=["vote-button", "vote-button-neither"])
                 # Feedback and Submit sections
                 with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
                     feedback_checkboxes = gr.CheckboxGroup(label="Feedback (optional)", choices=[], interactive=False)
+                submit_button = gr.Button("Submit Your Vote", variant="primary", interactive=False, elem_id="submit-button")
                 # Results area
                 with gr.Column(visible=False) as results_reveal_area:
                     # Model reveal section
                     with gr.Row():
                         with gr.Column(scale=1):
+                            gr.Markdown("### Model A was:", elem_classes="section-heading")
                             model_a_reveal = gr.Markdown("", elem_classes="model-reveal model-a-reveal")
                         with gr.Column(scale=1):
+                            gr.Markdown("### Model B was:", elem_classes="section-heading")
                             model_b_reveal = gr.Markdown("", elem_classes="model-reveal model-b-reveal")
                     gr.HTML("<hr>")
         # Leaderboard Tab
         with gr.TabItem("Leaderboard", id="leaderboard-tab"):
+            gr.Markdown("# RAG SLM Summarizer/Generator Leaderboard", elem_classes="orange-title")
             gr.Markdown("View performance statistics for all models ranked by Elo rating.")
             with gr.Group(elem_id="leaderboard-info"):