oliver-aizip kai-aizip commited on
Commit
d958e2a
·
verified ·
1 Parent(s): 6b26b26

Refined writing and layout (#15)

Browse files

- Refined writing and layout (ca89290c23c802513b65d8ad50d4161204f7f72c)


Co-authored-by: Kai <kai-aizip@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +26 -14
app.py CHANGED
@@ -14,8 +14,10 @@ from utils.shared import generation_interrupt # Import from shared module
14
  feedback_options = {
15
  "left": ["Model A: More complete", "Model A: More accurate", "Model A: More relevant", "Model A: Better written", "Model A: Better refusal (if applicable)"],
16
  "right": ["Model B: More complete", "Model B: More accurate", "Model B: More relevant", "Model B: Better written", "Model B: Better refusal (if applicable)"],
17
- "tie": ["Both complete", "Both accurate", "Both well written", "Both handle refusal well (if applicable)"],
18
- "neither": ["Both incomplete", "Both hallucinate", "Both irrelevant", "Both incorrectly refuse (if applicable)", "A is bad", "B is bad"]
 
 
19
  }
20
 
21
  def load_context():
@@ -186,31 +188,39 @@ with gr.Blocks(theme=gr.themes.Default(
186
  with gr.Tabs() as tabs:
187
  # Main Arena Tab
188
  with gr.TabItem("Arena", id="arena-tab"):
189
- gr.Markdown("# RAG Summarizer Arena")
190
- gr.Markdown("Compare summaries generated by different models based on the provided context and query. Select the better summary, or choose 'Tie' or 'Neither'. Your feedback helps evaluate model performance.")
 
 
 
 
 
191
 
192
  # Main container
193
  with gr.Column(elem_id="main-interface-area") as main_interface_area:
194
  # Query section
195
  with gr.Row(elem_id="query-title-row"):
196
- gr.Markdown("### Query", elem_classes="section-heading")
197
 
198
  with gr.Row(elem_id="query-container"):
199
  with gr.Row(elem_classes="query-box-row"):
200
  query_display = gr.Markdown(value="Loading question...", elem_classes="query-text")
201
- random_question_btn = gr.Button("🔄 Get Random Question", elem_classes="query-button")
202
 
 
203
  # Context description and display
204
  context_description = gr.Markdown("", elem_classes="context-description")
205
 
 
 
206
  with gr.Row(elem_id="context-header-row"):
207
- gr.Markdown("### Context Provided", elem_classes="context-title")
208
  context_toggle_btn = gr.Button("Show Full Context", elem_classes=["context-toggle-button"])
209
 
210
  context_display = gr.HTML(value="Loading context...", label="Context Chunks")
211
 
212
  gr.Markdown("---")
213
- gr.Markdown("### Compare Summaries", elem_classes="section-heading")
214
 
215
  # Model summaries
216
  with gr.Row():
@@ -221,18 +231,20 @@ with gr.Blocks(theme=gr.themes.Default(
221
  with gr.Group(elem_classes=["summary-card", "summary-card-b"]):
222
  summary_b_display = gr.Textbox(label="Model B", lines=10, interactive=False, show_copy_button=True)
223
 
 
 
224
  # Voting section
225
- gr.Markdown("### Cast Your Vote", elem_classes="section-heading")
226
  with gr.Row():
227
  vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"])
228
  vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"])
229
  vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"])
230
- vote_button_neither = gr.Button("❌ Neither is Adequate", elem_classes=["vote-button", "vote-button-neither"])
231
 
232
  # Feedback and Submit sections
233
  with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
234
  feedback_checkboxes = gr.CheckboxGroup(label="Feedback (optional)", choices=[], interactive=False)
235
- submit_button = gr.Button("Submit Vote", variant="primary", interactive=False, elem_id="submit-button")
236
 
237
  # Results area
238
  with gr.Column(visible=False) as results_reveal_area:
@@ -242,10 +254,10 @@ with gr.Blocks(theme=gr.themes.Default(
242
  # Model reveal section
243
  with gr.Row():
244
  with gr.Column(scale=1):
245
- gr.Markdown("### Model A was actually:", elem_classes="section-heading")
246
  model_a_reveal = gr.Markdown("", elem_classes="model-reveal model-a-reveal")
247
  with gr.Column(scale=1):
248
- gr.Markdown("### Model B was actually:", elem_classes="section-heading")
249
  model_b_reveal = gr.Markdown("", elem_classes="model-reveal model-b-reveal")
250
 
251
  gr.HTML("<hr>")
@@ -256,7 +268,7 @@ with gr.Blocks(theme=gr.themes.Default(
256
 
257
  # Leaderboard Tab
258
  with gr.TabItem("Leaderboard", id="leaderboard-tab"):
259
- gr.Markdown("# Model Performance Leaderboard", elem_classes="orange-title")
260
  gr.Markdown("View performance statistics for all models ranked by Elo rating.")
261
 
262
  with gr.Group(elem_id="leaderboard-info"):
 
14
  feedback_options = {
15
  "left": ["Model A: More complete", "Model A: More accurate", "Model A: More relevant", "Model A: Better written", "Model A: Better refusal (if applicable)"],
16
  "right": ["Model B: More complete", "Model B: More accurate", "Model B: More relevant", "Model B: Better written", "Model B: Better refusal (if applicable)"],
17
+ "tie": ["Model A: More complete", "Model A: More accurate", "Model A: More relevant", "Model A: Better written", "Model A: Better refusal (if applicable)",
18
+ "Model B: More complete", "Model B: More accurate", "Model B: More relevant", "Model B: Better written", "Model B: Better refusal (if applicable)"],
19
+ "neither": ["Model A: Incomplete", "Model A: Hallucinate", "Model A: Irrelevant", "Model A: Incorrect refusal (if applicable)",
20
+ "Model B: Incomplete", "Model B: Hallucinate", "Model B: Irrelevant", "Model B: Incorrect refusal (if applicable)"]
21
  }
22
 
23
  def load_context():
 
188
  with gr.Tabs() as tabs:
189
  # Main Arena Tab
190
  with gr.TabItem("Arena", id="arena-tab"):
191
+ gr.Markdown("# RAG SLM Summarizer/Generator Arena")
192
+ gr.Markdown("""
193
+ 1️⃣ Review the query and examine the highlighted context (✨ highlights contain key information! )\n
194
+ 2️⃣ Compare answers generated by two different models side-by-side\n
195
+ 3️⃣ Vote for the better response or select 'Tie/Neither' if appropriate""")
196
+
197
+ gr.HTML("<hr>")
198
 
199
  # Main container
200
  with gr.Column(elem_id="main-interface-area") as main_interface_area:
201
  # Query section
202
  with gr.Row(elem_id="query-title-row"):
203
+ gr.Markdown("### 💬 Query (What Users Want to Ask About the Doc)", elem_classes="section-heading")
204
 
205
  with gr.Row(elem_id="query-container"):
206
  with gr.Row(elem_classes="query-box-row"):
207
  query_display = gr.Markdown(value="Loading question...", elem_classes="query-text")
208
+ random_question_btn = gr.Button("🔄 Try a New Question", elem_classes="query-button")
209
 
210
+
211
  # Context description and display
212
  context_description = gr.Markdown("", elem_classes="context-description")
213
 
214
+ gr.HTML("<hr>")
215
+
216
  with gr.Row(elem_id="context-header-row"):
217
+ gr.Markdown("### 📋 Context (Relevant Information We Got from the Database)", elem_classes="context-title")
218
  context_toggle_btn = gr.Button("Show Full Context", elem_classes=["context-toggle-button"])
219
 
220
  context_display = gr.HTML(value="Loading context...", label="Context Chunks")
221
 
222
  gr.Markdown("---")
223
+ gr.Markdown("### 🔍 Compare Answers from Models", elem_classes="section-heading")
224
 
225
  # Model summaries
226
  with gr.Row():
 
231
  with gr.Group(elem_classes=["summary-card", "summary-card-b"]):
232
  summary_b_display = gr.Textbox(label="Model B", lines=10, interactive=False, show_copy_button=True)
233
 
234
+ gr.HTML("<hr>")
235
+
236
  # Voting section
237
+ gr.Markdown("### 🏅 Cast Your Vote", elem_classes="section-heading")
238
  with gr.Row():
239
  vote_button_a = gr.Button("⬅️ Summary A is Better", elem_classes=["vote-button"])
240
  vote_button_tie = gr.Button("🤝 Tie / Equally Good", elem_classes=["vote-button"])
241
  vote_button_b = gr.Button("➡️ Summary B is Better", elem_classes=["vote-button"])
242
+ vote_button_neither = gr.Button("❌ Neither is Good", elem_classes=["vote-button", "vote-button-neither"])
243
 
244
  # Feedback and Submit sections
245
  with gr.Group(elem_classes=["feedback-section"], visible=False) as feedback_section:
246
  feedback_checkboxes = gr.CheckboxGroup(label="Feedback (optional)", choices=[], interactive=False)
247
+ submit_button = gr.Button("Submit Your Vote", variant="primary", interactive=False, elem_id="submit-button")
248
 
249
  # Results area
250
  with gr.Column(visible=False) as results_reveal_area:
 
254
  # Model reveal section
255
  with gr.Row():
256
  with gr.Column(scale=1):
257
+ gr.Markdown("### Model A was:", elem_classes="section-heading")
258
  model_a_reveal = gr.Markdown("", elem_classes="model-reveal model-a-reveal")
259
  with gr.Column(scale=1):
260
+ gr.Markdown("### Model B was:", elem_classes="section-heading")
261
  model_b_reveal = gr.Markdown("", elem_classes="model-reveal model-b-reveal")
262
 
263
  gr.HTML("<hr>")
 
268
 
269
  # Leaderboard Tab
270
  with gr.TabItem("Leaderboard", id="leaderboard-tab"):
271
+ gr.Markdown("# RAG SLM Summarizer/Generator Leaderboard", elem_classes="orange-title")
272
  gr.Markdown("View performance statistics for all models ranked by Elo rating.")
273
 
274
  with gr.Group(elem_id="leaderboard-info"):