kaikaidai commited on
Commit
00e2ba1
·
verified ·
1 Parent(s): ace4c98

Added examples in

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py CHANGED
@@ -318,6 +318,55 @@ if __name__ == "__main__":
318
 
319
  # ... rest of your Gradio app setup ...
320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  with gr.Blocks(theme='default', css=CSS_STYLES) as demo:
322
  judge_id = gr.State(get_new_session_id())
323
  gr.Markdown(MAIN_TITLE)
@@ -331,6 +380,16 @@ with gr.Blocks(theme='default', css=CSS_STYLES) as demo:
331
  gr.Markdown(BATTLE_RULES)
332
  gr.Markdown(EVAL_DESCRIPTION)
333
 
 
 
 
 
 
 
 
 
 
 
334
  # Eval Prompt and Variables side by side
335
  with gr.Row():
336
  # Left column - Eval Prompt
@@ -582,4 +641,41 @@ with gr.Blocks(theme='default', css=CSS_STYLES) as demo:
582
  outputs=[leaderboard_table, stats_display]
583
  )
584
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
585
  demo.launch()
 
318
 
319
  # ... rest of your Gradio app setup ...
320
 
321
+ # Example evaluation metrics data
322
+ EXAMPLE_METRICS = {
323
+ "Hallucination": {
324
+ "prompt": DEFAULT_EVAL_PROMPT, # We'll replace these with actual examples
325
+ "input": DEFAULT_INPUT,
326
+ "response": DEFAULT_RESPONSE
327
+ },
328
+ "Precision": {
329
+ "prompt": DEFAULT_EVAL_PROMPT,
330
+ "input": DEFAULT_INPUT,
331
+ "response": DEFAULT_RESPONSE
332
+ },
333
+ "Recall": {
334
+ "prompt": DEFAULT_EVAL_PROMPT,
335
+ "input": DEFAULT_INPUT,
336
+ "response": DEFAULT_RESPONSE
337
+ },
338
+ "Logical coherence": {
339
+ "prompt": DEFAULT_EVAL_PROMPT,
340
+ "input": DEFAULT_INPUT,
341
+ "response": DEFAULT_RESPONSE
342
+ },
343
+ "Faithfulness": {
344
+ "prompt": DEFAULT_EVAL_PROMPT,
345
+ "input": DEFAULT_INPUT,
346
+ "response": DEFAULT_RESPONSE
347
+ }
348
+ }
349
+
350
+ def set_example_metric(metric_name):
351
+ if metric_name == "Custom":
352
+ return [
353
+ DEFAULT_EVAL_PROMPT,
354
+ DEFAULT_INPUT,
355
+ DEFAULT_RESPONSE
356
+ ]
357
+
358
+ metric_data = EXAMPLE_METRICS[metric_name]
359
+ return [
360
+ metric_data["prompt"],
361
+ metric_data["input"],
362
+ metric_data["response"]
363
+ ]
364
+
365
+ # Select random metric at startup
366
+ def get_random_metric():
367
+ metrics = list(EXAMPLE_METRICS.keys())
368
+ return set_example_metric(random.choice(metrics))
369
+
370
  with gr.Blocks(theme='default', css=CSS_STYLES) as demo:
371
  judge_id = gr.State(get_new_session_id())
372
  gr.Markdown(MAIN_TITLE)
 
380
  gr.Markdown(BATTLE_RULES)
381
  gr.Markdown(EVAL_DESCRIPTION)
382
 
383
+ # Add Example Metrics Section
384
+ with gr.Accordion("Example evaluation metrics", open=True):
385
+ with gr.Row():
386
+ custom_btn = gr.Button("Custom", variant="secondary")
387
+ hallucination_btn = gr.Button("Hallucination")
388
+ precision_btn = gr.Button("Precision")
389
+ recall_btn = gr.Button("Recall")
390
+ coherence_btn = gr.Button("Logical coherence")
391
+ faithfulness_btn = gr.Button("Faithfulness")
392
+
393
  # Eval Prompt and Variables side by side
394
  with gr.Row():
395
  # Left column - Eval Prompt
 
641
  outputs=[leaderboard_table, stats_display]
642
  )
643
 
644
+ # Add click handlers for metric buttons
645
+ custom_btn.click(
646
+ fn=lambda: set_example_metric("Custom"),
647
+ outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
648
+ )
649
+
650
+ hallucination_btn.click(
651
+ fn=lambda: set_example_metric("Hallucination"),
652
+ outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
653
+ )
654
+
655
+ precision_btn.click(
656
+ fn=lambda: set_example_metric("Precision"),
657
+ outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
658
+ )
659
+
660
+ recall_btn.click(
661
+ fn=lambda: set_example_metric("Recall"),
662
+ outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
663
+ )
664
+
665
+ coherence_btn.click(
666
+ fn=lambda: set_example_metric("Logical coherence"),
667
+ outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
668
+ )
669
+
670
+ faithfulness_btn.click(
671
+ fn=lambda: set_example_metric("Faithfulness"),
672
+ outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
673
+ )
674
+
675
+ # Set random metric at startup
676
+ demo.load(
677
+ fn=get_random_metric,
678
+ outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
679
+ )
680
+
681
  demo.launch()