Spaces:
Running
Running
glitchbench
commited on
Commit
•
046367c
1
Parent(s):
038ab83
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from datasets import load_dataset
|
3 |
+
import random
|
4 |
+
import math
|
5 |
+
from datasets import load_dataset
|
6 |
+
import gradio as gr
|
7 |
+
import os
|
8 |
+
|
9 |
+
mydataset_private = load_dataset("glitchbench/GlitchBench")["validation"]
|
10 |
+
dataset_size = len(mydataset_private)
|
11 |
+
|
12 |
+
GRID_SIZE = (2, 2)
|
13 |
+
|
14 |
+
|
15 |
+
def get_item_data(image_index):
|
16 |
+
item = mydataset_private[image_index]
|
17 |
+
return item
|
18 |
+
|
19 |
+
|
20 |
+
def show_random_samples():
|
21 |
+
total = GRID_SIZE[0] * GRID_SIZE[1]
|
22 |
+
random_indexes = random.sample(range(dataset_size), total)
|
23 |
+
|
24 |
+
all_examples = [get_item_data(index) for index in random_indexes]
|
25 |
+
all_inputs_left_right = []
|
26 |
+
for example_idx, example in enumerate(all_examples):
|
27 |
+
all_inputs_left_right.append(example["image"])
|
28 |
+
all_inputs_left_right.append(example["source"])
|
29 |
+
all_inputs_left_right.append(example["glitch-type"])
|
30 |
+
all_inputs_left_right.append(example["reddit"])
|
31 |
+
all_inputs_left_right.append("Secrect")
|
32 |
+
|
33 |
+
return all_inputs_left_right
|
34 |
+
|
35 |
+
|
36 |
+
def make_grid(grid_size):
|
37 |
+
list_of_components = []
|
38 |
+
|
39 |
+
with gr.Row():
|
40 |
+
for row_counter in range(grid_size[0]):
|
41 |
+
with gr.Column():
|
42 |
+
for col_counter in range(grid_size[1]):
|
43 |
+
item_image = gr.Image()
|
44 |
+
with gr.Accordion("Click for details", open=False):
|
45 |
+
item_glitch_source = gr.Textbox(label="Gltich Source")
|
46 |
+
item_reddit = gr.Textbox(label="Gltich Type")
|
47 |
+
item_id = gr.Textbox(label="Reddit ID")
|
48 |
+
item_description = gr.Textbox(label="Description")
|
49 |
+
|
50 |
+
list_of_components.append(item_image)
|
51 |
+
list_of_components.append(item_glitch_source)
|
52 |
+
list_of_components.append(item_reddit)
|
53 |
+
list_of_components.append(item_id)
|
54 |
+
list_of_components.append(item_description)
|
55 |
+
|
56 |
+
return list_of_components
|
57 |
+
|
58 |
+
|
59 |
+
with gr.Blocks(title="GltichBench") as browser:
|
60 |
+
gr.HTML(
|
61 |
+
"<h1 style='font-size: 30px; text-align: center;'>GlitchBench: Can large multimodal models detect video game glitches?</h1>"
|
62 |
+
)
|
63 |
+
|
64 |
+
gr.HTML("<h2 style='font-size: 30px; text-align: center;'>Abstract</h2>")
|
65 |
+
gr.Markdown(
|
66 |
+
"""Large multimodal models (LMMs) have evolved from large language models (LLMs) to integrate multiple input modalities, such as visual inputs. This integration augments the capacity of LLMs in tasks requiring visual comprehension and reasoning. However, the extent and limitations of their enhanced abilities are not fully understood. To address this gap, we introduce GlitchBench, a novel benchmark designed to test and evaluate the common-sense reasoning and visual recognition capabilities of large multimodal models. Our dataset is curated from a variety of unusual, infrequent, and glitched scenarios from video game content and aims to challenge both the visual and linguistic reasoning powers of LMMs in detecting and interpreting out-of-the-ordinary events and scene composition
|
67 |
+
"""
|
68 |
+
)
|
69 |
+
|
70 |
+
gr.HTML("</br>")
|
71 |
+
gr.Markdown("# Browse the dataset")
|
72 |
+
|
73 |
+
with gr.Column():
|
74 |
+
random_btn = gr.Button("Random Sample")
|
75 |
+
with gr.Row():
|
76 |
+
grid = make_grid(GRID_SIZE)
|
77 |
+
random_btn.click(show_random_samples, inputs=[], outputs=[*grid])
|
78 |
+
|
79 |
+
|
80 |
+
browser.launch()
|