huangshiyu commited on
Commit
8f1e7e2
Β·
1 Parent(s): cfc3f09
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README.md +7 -8
  3. app.py +247 -0
  4. constants.py +39 -0
  5. requirements.txt +16 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea
README.md CHANGED
@@ -1,14 +1,13 @@
1
  ---
2
- title: MotionBench
3
- emoji: 🐠
4
- colorFrom: yellow
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 5.6.0
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
- short_description: Leaderboard for MotionBench
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: MotionBench Leaderboard
3
+ emoji: 🐨
4
+ colorFrom: purple
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 4.36.1
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
 
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+ import json
6
+
7
+ from constants import *
8
+ from huggingface_hub import Repository
9
+
10
+ HF_TOKEN = os.environ.get("HF_TOKEN")
11
+
12
+ global data_component, filter_component
13
+
14
+
15
+ def download_csv():
16
+ # pull the results and return this file!
17
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN,
18
+ repo_type="dataset")
19
+ submission_repo.git_pull()
20
+ return CSV_DIR, gr.update(visible=True)
21
+
22
+
23
+ def upload_file(files):
24
+ file_paths = [file.name for file in files]
25
+ return file_paths
26
+
27
+
28
+ def add_new_eval(
29
+ input_file,
30
+ model_name_textbox: str,
31
+ revision_name_textbox: str,
32
+ model_link: str,
33
+ model_date:str,
34
+ LLM_type: str,
35
+ LLM_name_textbox: str,
36
+ ):
37
+ if input_file is None:
38
+ return "Error! Empty file!"
39
+
40
+ upload_data = json.loads(input_file)
41
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN,
42
+ repo_type="dataset",git_user="auto-uploader",git_email="uploader@163.com")
43
+ submission_repo.git_pull()
44
+ csv_data = pd.read_csv(CSV_DIR)
45
+
46
+ if LLM_type == 'Other':
47
+ LLM_name = LLM_name_textbox
48
+ else:
49
+ LLM_name = LLM_type
50
+
51
+ if revision_name_textbox == '':
52
+ col = csv_data.shape[0]
53
+ model_name = model_name_textbox
54
+ else:
55
+ model_name = revision_name_textbox
56
+ model_name_list = csv_data['Model']
57
+ name_list = [name.split(']')[0][1:] for name in model_name_list]
58
+ if revision_name_textbox not in name_list:
59
+ col = csv_data.shape[0]
60
+ else:
61
+ col = name_list.index(revision_name_textbox)
62
+
63
+ if model_link == '' or "](" in model_name:
64
+ model_name = model_name # no url
65
+ else:
66
+ model_name = '[' + model_name + '](' + model_link + ')'
67
+
68
+ # add new data
69
+ new_data = [
70
+ model_name,
71
+ LLM_name,
72
+ model_date,
73
+ model_link
74
+ ]
75
+ for key in TASK_INFO:
76
+ if key in upload_data:
77
+ new_data.append(round(100*upload_data[key],1))
78
+ else:
79
+ new_data.append(0)
80
+ # print(new_data)
81
+ # print(csv_data.loc[col-1])
82
+ csv_data.loc[col] = new_data
83
+ csv_data = csv_data.to_csv(CSV_DIR, index=False)
84
+ submission_repo.push_to_hub()
85
+ return 0
86
+
87
+
88
+ def get_baseline_df():
89
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN,
90
+ repo_type="dataset")
91
+ submission_repo.git_pull()
92
+ df = pd.read_csv(CSV_DIR)
93
+ df = df.sort_values(by="Overall", ascending=False)
94
+ present_columns = MODEL_INFO + checkbox_group.value
95
+ df = df[present_columns]
96
+ return df
97
+
98
+
99
+ def get_all_df():
100
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN,
101
+ repo_type="dataset")
102
+ submission_repo.git_pull()
103
+ df = pd.read_csv(CSV_DIR)
104
+ df = df.sort_values(by="Overall", ascending=False)
105
+ return df
106
+
107
+
108
+ def on_filter_model_size_method_change(selected_columns):
109
+ updated_data = get_all_df()
110
+
111
+ # columns:
112
+ selected_columns = [item for item in TASK_INFO if item in selected_columns]
113
+ present_columns = MODEL_INFO + selected_columns
114
+ # print("selected_columns",'|'.join(selected_columns))
115
+ updated_data = updated_data[present_columns]
116
+ updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False)
117
+ updated_headers = present_columns
118
+ update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
119
+ # print(updated_data,present_columns,update_datatype)
120
+ filter_component = gr.components.Dataframe(
121
+ value=updated_data,
122
+ headers=updated_headers,
123
+ type="pandas",
124
+ datatype=update_datatype,
125
+ interactive=False,
126
+ visible=True,
127
+ )
128
+
129
+ return filter_component # .value
130
+
131
+
132
+ block = gr.Blocks()
133
+ with block:
134
+ gr.Markdown(
135
+ LEADERBORAD_INTRODUCTION
136
+ )
137
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
138
+ with gr.TabItem("πŸ… MotionBench", elem_id="lvbench-tab-table", id=1):
139
+ with gr.Row():
140
+ with gr.Accordion("Citation", open=False):
141
+ citation_button = gr.Textbox(
142
+ value=CITATION_BUTTON_TEXT,
143
+ label=CITATION_BUTTON_LABEL,
144
+ elem_id="citation-button",
145
+ lines=10,
146
+ )
147
+
148
+ gr.Markdown(
149
+ TABLE_INTRODUCTION
150
+ )
151
+
152
+ # selection for column part:
153
+ checkbox_group = gr.CheckboxGroup(
154
+ choices=TASK_INFO,
155
+ value=AVG_INFO,
156
+ label="Evaluation Dimension",
157
+ interactive=True,
158
+ )
159
+
160
+ data_component = gr.components.Dataframe(
161
+ value=get_baseline_df,
162
+ headers=COLUMN_NAMES,
163
+ type="pandas",
164
+ datatype=DATA_TITILE_TYPE,
165
+ interactive=False,
166
+ visible=True,
167
+ )
168
+
169
+ checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group],
170
+ outputs=data_component)
171
+
172
+ # table 2
173
+ with gr.TabItem("πŸ“ About", elem_id="lvbench-tab-table", id=2):
174
+ gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
175
+
176
+ # table 3
177
+ with gr.TabItem("πŸš€ Submit here! ", elem_id="lvbench-tab-table", id=3):
178
+ gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
179
+
180
+ with gr.Row():
181
+ gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
182
+
183
+ with gr.Row():
184
+ gr.Markdown("# βœ‰οΈβœ¨ Submit your model evaluation json file here!", elem_classes="markdown-text")
185
+
186
+ with gr.Row():
187
+ with gr.Column():
188
+ model_name_textbox = gr.Textbox(
189
+ label="Model name", placeholder="CogVLM2-Video"
190
+ )
191
+ revision_name_textbox = gr.Textbox(
192
+ label="Revision Model Name", placeholder="CogVLM2-Video"
193
+ )
194
+
195
+ with gr.Column():
196
+ LLM_type = gr.Dropdown(
197
+ choices=["LLaMA-3-8B", "Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "InternLM-7B", "Other"],
198
+ label="LLM type",
199
+ multiselect=False,
200
+ value="LLaMA-3-8B",
201
+ interactive=True,
202
+ )
203
+ LLM_name_textbox = gr.Textbox(
204
+ label="LLM model (for Other)",
205
+ placeholder="LLaMA-3-8B"
206
+ )
207
+ model_link = gr.Textbox(
208
+ label="Model Link", placeholder="https://cogvlm2-video.github.io/"
209
+ )
210
+ model_date = gr.Textbox(
211
+ label="Model Date", placeholder="2024/8/22"
212
+ )
213
+
214
+
215
+ with gr.Column():
216
+ input_file = gr.components.File(label="Click to Upload a json File", file_count="single", type='binary')
217
+ submit_button = gr.Button("Submit Eval")
218
+
219
+ submission_result = gr.Markdown()
220
+ submit_button.click(
221
+ add_new_eval,
222
+ inputs=[
223
+ input_file,
224
+ model_name_textbox,
225
+ revision_name_textbox,
226
+ model_link,
227
+ model_date,
228
+ LLM_type,
229
+ LLM_name_textbox,
230
+ ],
231
+ )
232
+
233
+
234
+ def refresh_data():
235
+ value1 = get_baseline_df()
236
+ return value1
237
+
238
+
239
+ with gr.Row():
240
+ data_run = gr.Button("Refresh")
241
+ with gr.Row():
242
+ result_download = gr.Button("Download Leaderboard")
243
+ file_download = gr.File(label="download the csv of leaderborad.", visible=False)
244
+ data_run.click(on_filter_model_size_method_change, inputs=[checkbox_group], outputs=data_component)
245
+ result_download.click(download_csv, inputs=None, outputs=[file_download, file_download])
246
+
247
+ block.launch()
constants.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # this is .py for store constants
4
+ MODEL_INFO = ["Model", "Language Model", "Date"]
5
+ TASK_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
6
+ AVG_INFO = ["Dev Avg"]
7
+
8
+ DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number',
9
+ 'number', 'number']
10
+
11
+ SUBMISSION_NAME = "MotionBench_submission"
12
+ SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/THUDM/", SUBMISSION_NAME)
13
+ CSV_DIR = "./MotionBench_submission/result.csv"
14
+
15
+ COLUMN_NAMES = MODEL_INFO + TASK_INFO
16
+
17
+ LEADERBORAD_INTRODUCTION = """# MotionBench Leaderboard
18
+ Welcome to the leaderboard of the MotionBench! πŸ† You can prepare your submission by following the [instructions](https://github.com/THUDM/MotionBench?tab=readme-ov-file#get-evaluation-results-and-submit-to-leaderboard).
19
+ """
20
+
21
+ SUBMIT_INTRODUCTION = """# Submit on MotionBench Benchmark Introduction
22
+ """
23
+
24
+ TABLE_INTRODUCTION = """
25
+ """
26
+
27
+ LEADERBORAD_INFO = """
28
+ In recent years, vision language models (VLMs) have made significant advancements in video understanding. However, a crucial capability β€” fine-grained motion comprehension β€” remains under-explored in current benchmarks. To address this gap, we propose MotionBench, a comprehensive evaluation benchmark designed to assess the fine-grained motion comprehension of video understanding models. MotionBench evaluates models' motion-level perception through six primary categories of motion-oriented question types and includes data collected from diverse sources, ensuring a broad representation of real-world video content. Experimental results reveal that existing VLMs perform poorly in understanding fine-grained motions. To enhance VLM's ability to perceive fine-grained motion within a limited sequence length of LLM, we conduct extensive experiments reviewing VLM architectures optimized for video feature compression and propose a novel and efficient Through-Encoder (TE) Fusion method. Experiments show that higher frame rate inputs and TE Fusion yield improvements in motion understanding, yet there is still substantial room for enhancement. Our benchmark aims to guide and motivate the development of more capable video understanding models, emphasizing the importance of fine-grained motion comprehension.
29
+ """
30
+
31
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
32
+ CITATION_BUTTON_TEXT = r"""@misc{xxx,
33
+ title={MotionBench: Benchmarking and Improving Fine-grained Video Motion Understanding for Vision Language Models},
34
+ author={xxx},
35
+ year={2024},
36
+ eprint={xx.xx},
37
+ archivePrefix={arXiv},
38
+ primaryClass={cs.CV}
39
+ }"""
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ APScheduler
2
+ black
3
+ datasets
4
+ gradio
5
+ gradio[oauth]
6
+ gradio_leaderboard==0.0.9
7
+ gradio_client
8
+ huggingface-hub>=0.18.0
9
+ matplotlib
10
+ numpy
11
+ pandas
12
+ python-dateutil
13
+ tqdm
14
+ transformers
15
+ tokenizers>=0.15.0
16
+ sentencepiece