potsawee commited on
Commit
ddfe4fe
Β·
1 Parent(s): 4199682

add introduction text

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. src/about.py +9 -1
app.py CHANGED
@@ -42,7 +42,7 @@ with demo:
42
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
43
 
44
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
45
- show_result_page(root_path='VH', title='πŸŽ† MHaluBench', index=0)
46
  show_result_page(root_path='AVH-visual', title='πŸ“Ί AVHalluBench (Visual)', index=1)
47
  show_result_page(root_path='AVH-audio', title='πŸ”ˆ AVHalluBench (Audio)', index=2)
48
  show_about_page(index=3)
 
42
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
43
 
44
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
45
+ show_result_page(root_path='VH', title='🏞️ MHaluBench', index=0)
46
  show_result_page(root_path='AVH-visual', title='πŸ“Ί AVHalluBench (Visual)', index=1)
47
  show_result_page(root_path='AVH-audio', title='πŸ”ˆ AVHalluBench (Audio)', index=2)
48
  show_about_page(index=3)
src/about.py CHANGED
@@ -6,9 +6,17 @@ from enum import Enum
6
  NUM_FEWSHOT = 0 # Change with your few shot
7
  # ---------------------------------------------------
8
 
9
- TITLE = """<h1 align="center" id="space-title">Multimodal Hallucination Leaderboard</h1>"""
 
 
 
10
 
11
  INTRODUCTION_TEXT = """
 
 
 
 
 
12
  """
13
 
14
  LLM_BENCHMARKS_TEXT = f"""
 
6
  NUM_FEWSHOT = 0 # Change with your few shot
7
  # ---------------------------------------------------
8
 
9
+ TITLE = """<h1 align="center" id="space-title">🎭 Multimodal Hallucination Leaderboard</h1>"""
10
+
11
+
12
+ # <a href="url"></a>
13
 
14
  INTRODUCTION_TEXT = """
15
+ <p>The Multimodal Hallucination Leaderboard ranks multimodal large language models based on hallucination levels in various tasks. System rankings for three different input modalities are displayed, covering the audio, image, and video domains. For each task, hallucination levels are measured using various existing hallucination ranking metrics. The leaderboard currently consists of two main datasets (and three tasks):</p>
16
+ <ul>
17
+ <li><b><a href="https://huggingface.co/datasets/openkg/MHaluBench">MHaluBench</a></b>: An image captioning dataset where the task is to generate text given an input image. System Hallucination scores are measured using existing visual hallucination metrics, including <a href="https://arxiv.org/abs/1809.02156">CHAIR</a>, <a href="https://arxiv.org/abs/2305.10355">POPE</a>, <a href="https://arxiv.org/abs/2402.03190">UniHD</a>, <a href="https://arxiv.org/abs/2303.08896">SelfCheckGPT</a> and <a href="https://arxiv.org/abs/2405.13684">CrossCheckGPT</a>.</li>
18
+ <li><b><a href="https://huggingface.co/datasets/potsawee/avhallubench">AVHalluBench</a></b> (Visual and Audio): A video-captioning dataset where the task is to generate text descriptions given an input video. This dataset can be used for two different tasks; either generating <b>visual descriptions</b> or <b>audio descriptions</b>. Existing audio-visual hallucination metrics include <a href="https://arxiv.org/abs/2303.08896">SelfCheckGPT</a>, <a href="https://arxiv.org/abs/2405.13684">CrossCheckGPT</a>, and <a href="https://arxiv.org/abs/2405.13684">RefCheck</a>.</li>
19
+ </ul>
20
  """
21
 
22
  LLM_BENCHMARKS_TEXT = f"""