lyx97 commited on
Commit
5aa60a6
1 Parent(s): 19c32d4
Files changed (4) hide show
  1. app.py +6 -6
  2. constants.py +6 -4
  3. file/result.csv +12 -12
  4. src/compute.py +1 -1
app.py CHANGED
@@ -97,9 +97,9 @@ def add_new_eval(
97
  input_data[24],
98
  notes,
99
  ]
100
- print(len(new_data), col)
101
- print(csv_data.loc[col-1])
102
- print(model_name, model_type, model_size)
103
  csv_data.loc[col] = new_data
104
  # with open(f'./file/{model_name}.json','w' ,encoding='utf-8') as f:
105
  # json.dump(new_data, f)
@@ -183,7 +183,7 @@ with block:
183
  '''
184
  # table 3
185
  with gr.TabItem("🚀 Submit here! ", elem_id="seed-benchmark-tab-table", id=3):
186
- gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
187
 
188
  with gr.Row():
189
  gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
@@ -194,10 +194,10 @@ with block:
194
  with gr.Row():
195
  with gr.Column():
196
  model_name_textbox = gr.Textbox(
197
- label="Model name", placeholder="Video-LLaVA"
198
  )
199
  revision_name_textbox = gr.Textbox(
200
- label="Revision Model Name", placeholder="Video-LLaVA"
201
  )
202
  model_link = gr.Textbox(
203
  label="Model Link", placeholder="https://huggingface.co/LanguageBind/Video-LLaVA-7B"
 
97
  input_data[24],
98
  notes,
99
  ]
100
+ # print(len(new_data), col)
101
+ # print(csv_data.loc[col])
102
+ # print(model_name, model_type, model_size)
103
  csv_data.loc[col] = new_data
104
  # with open(f'./file/{model_name}.json','w' ,encoding='utf-8') as f:
105
  # json.dump(new_data, f)
 
183
  '''
184
  # table 3
185
  with gr.TabItem("🚀 Submit here! ", elem_id="seed-benchmark-tab-table", id=3):
186
+ # gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
187
 
188
  with gr.Row():
189
  gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
 
194
  with gr.Row():
195
  with gr.Column():
196
  model_name_textbox = gr.Textbox(
197
+ label="Model name", placeholder="Video-LLaVA-7B"
198
  )
199
  revision_name_textbox = gr.Textbox(
200
+ label="Revision Model Name", placeholder="Video-LLaVA-7B"
201
  )
202
  model_link = gr.Textbox(
203
  label="Model Link", placeholder="https://huggingface.co/LanguageBind/Video-LLaVA-7B"
constants.py CHANGED
@@ -42,15 +42,17 @@ Welcome to the leaderboard of the TempCompass! 🏆
42
 
43
 
44
  ### Submit Example
45
- For example, if you want to submit Video-LLaVA's result in the leaderboard, you need to:
46
- 1. Fill in ‘Video-LLaVA’ in ‘Model Name’ if it is your first time to submit your result (You can leave ‘Revision Model Name’ blank).
47
- 2. Fill in ‘Video-LLaVA’ in ‘Revision Model Name’ if you want to update your result (You can leave ‘Model Name’ blank).
48
  3. Select ‘VideoLLM’ in ‘Model Type’.
49
- 4. Fill in ‘https://github.com/x/x’ in ‘Model Link’.
50
  5. Fill in ‘7B’ in ‘Model size’.
51
  6. Upload `merged_result.json`.
52
  7. Click the ‘Submit Eval’ button.
53
  8. Click ‘Refresh’ to obtain the uploaded leaderboard.
 
 
54
  """
55
 
56
  # """
 
42
 
43
 
44
  ### Submit Example
45
+ For example, if you want to submit Video-LLaVA-7B's result in the leaderboard, you need to:
46
+ 1. Fill in ‘Video-LLaVA-7B’ in ‘Model Name’ if it is your first time to submit your result (You can leave ‘Revision Model Name’ blank).
47
+ 2. Fill in ‘Video-LLaVA-7B’ in ‘Revision Model Name’ if you want to update your result (You can leave ‘Model Name’ blank).
48
  3. Select ‘VideoLLM’ in ‘Model Type’.
49
+ 4. Fill in ‘https://huggingface.co/LanguageBind/Video-LLaVA-7B’ in ‘Model Link’.
50
  5. Fill in ‘7B’ in ‘Model size’.
51
  6. Upload `merged_result.json`.
52
  7. Click the ‘Submit Eval’ button.
53
  8. Click ‘Refresh’ to obtain the uploaded leaderboard.
54
+
55
+ ## If you have any questions, please contact [liuyuanxin@stu.pku.edu.cn](liuyuanxin@stu.pku.edu.cn).
56
  """
57
 
58
  # """
file/result.csv CHANGED
@@ -1,13 +1,13 @@
1
  Model,Model Type,Model Size,Avg. All,Avg. Multi-Choice,Avg. Yes/No,Avg. Caption Matching,Avg. Caption Generation,Action. Multi-Choice,Action. Yes/No,Action. Caption Matching,Action. Caption Generation,Direction. Multi-Choice,Direction. Yes/No,Direction. Caption Matching,Direction. Caption Generation,Speed. Multi-Choice,Speed. Yes/No,Speed. Caption Matching,Speed. Caption Generation,Event Order. Multi-Choice,Event Order. Yes/No,Event Order. Caption Matching,Event Order. Caption Generation,Attribute Change. Multi-Choice,Attribute Change. Yes/No,Attribute Change. Caption Matching,Attribute Change. Caption Generation,Notes
2
- [Video-LLaVA-7B](https://huggingface.co/LanguageBind/Video-LLaVA-7B),VideoLLM,7B,49.77,45.57,56.38,63.34,34.83,76.04,74.32,87.88,50.76,35.65,50.28,58.42,23.2,35.22,51.82,53.82,28.67,37.75,49.21,59.0,38.25,40.97,51.12,58.33,33.59,"answer prompt: ""Please directly give the best option:"""
3
- [VideoChat2-vicuna-stage3](https://github.com/OpenGVLab/Ask-Anything/blob/main/video_chat2/scripts/videochat_vicuna/config_7b_stage3.py),VideoLLM,7B,48.81,42.91,58.01,53.69,38.52,76.92,72.8,73.4,54.04,33.44,53.82,49.48,32.73,29.55,53.85,51.68,30.96,35.43,51.31,48.0,34.25,36.81,53.79,45.83,41.41,"answer prompt: ""Please directly give the best option:"""
4
- [LLaMA-VID-7B-short-video](https://huggingface.co/YanweiLi/llama-vid-7b-full-224-video-fps-1),VideoLLM,7B,45.61,38.04,52.96,56.02,34.78,61.24,63.01,73.4,53.03,29.65,49.16,51.2,21.91,29.85,48.79,50.46,27.98,33.77,48.43,53.67,35.5,34.03,52.68,51.74,35.94,"answer prompt: ""Please directly give the best option:"""
5
- [mPLUG-Owl-video](https://huggingface.co/MAGAer13/mplug-owl-llama-7b-video),VideoLLM,7B,44.15,36.39,54.42,48.5,34.43,55.92,64.36,57.24,46.46,31.55,51.21,43.64,30.41,32.24,50.61,41.9,28.21,27.48,51.31,48.67,31.25,32.99,52.01,51.74,36.46,"answer prompt: ""Please directly give the best option:"""
6
- [PandaGPT-13B-maxlen-400](https://huggingface.co/openllmplayground/pandagpt_13b_max_len_400),VideoLLM,13B,41.64,34.37,51.81,51.56,27.5,36.69,53.04,49.16,23.74,33.75,50.84,49.48,26.03,34.03,49.6,55.96,25.69,32.12,53.66,54.33,29.75,35.07,52.23,48.26,32.55,"answer prompt: ""Please directly give the best option:"""
7
- [Valley2-7B](https://huggingface.co/luoruipu1/Valley2-7b),VideoLLM,7B,37.49,29.56,53.49,34.6,26.35,43.49,58.11,36.7,24.75,24.92,52.51,36.77,21.91,29.55,52.02,33.64,20.41,18.54,50.26,36.0,35.75,29.86,52.9,29.86,29.43,"answer prompt: ""Please directly give the best option:"""
8
- [Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),VideoLLM,7B,43.54,37.66,50.67,53.69,31.84,61.54,52.53,69.02,40.91,29.02,49.53,47.42,24.48,28.96,50.0,49.24,28.44,36.09,51.05,52.67,31.75,30.9,50.0,50.35,33.85,"answer prompt: ""Please directly give the best option:”"
9
- [Video-LLaMA-2-13B](https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-13B-Finetuned),VideoLLM,13B,43.4,31.27,53.73,54.16,32.24,52.07,68.07,70.71,54.29,28.08,48.79,56.01,13.92,25.07,45.95,45.26,21.33,26.82,51.83,52.0,38.5,22.22,50.89,47.57,33.85,"answer prompt: ""Please directly give the best option:”"
10
- Gemini-1.5-pro,VideoLLM,-,67.1,63.92,70.32,77.45,57.88,96.75,88.18,97.98,87.88,55.21,63.87,65.64,38.14,43.28,54.45,63.0,36.7,55.63,68.85,81.0,55.25,67.71,73.21,80.9,73.7,"answer prompt: ""Please directly give the best option:”"
11
- [LLaVA-1.5-13B](https://huggingface.co/liuhaotian/llava-v1.5-13b),ImageLLM,13B,51.3,47.41,56.38,64.27,38.42,85.5,74.66,92.59,67.42,40.06,48.98,58.76,24.74,31.04,48.79,57.49,31.88,35.43,49.48,59.0,33.0,42.36,55.36,53.82,35.42,"answer prompt: ""Please directly give the best option:”"
12
- [SPHINX-v2](https://huggingface.co/Alpha-VLLM/LLaMA2-Accessory/tree/main/finetune/mm/SPHINX/SPHINX-v2-1k),ImageLLM,13B,51.92,50.63,59.07,64.34,34.88,89.94,79.05,93.27,67.93,43.53,54.75,54.3,20.36,36.72,51.21,55.96,19.04,37.42,54.45,59.67,37.25,42.36,50.45,59.03,30.99,"answer prompt: ""Please directly give the best option:”"
13
- [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),ImageLLM,7B,52.9,51.08,60.01,64.07,37.28,88.46,81.42,91.58,62.63,41.32,59.78,54.98,29.64,37.01,51.62,54.13,27.75,41.39,50.79,63.67,34.75,44.44,49.11,56.6,32.29,"answer prompt: ""Please directly give the best option:”"
 
1
  Model,Model Type,Model Size,Avg. All,Avg. Multi-Choice,Avg. Yes/No,Avg. Caption Matching,Avg. Caption Generation,Action. Multi-Choice,Action. Yes/No,Action. Caption Matching,Action. Caption Generation,Direction. Multi-Choice,Direction. Yes/No,Direction. Caption Matching,Direction. Caption Generation,Speed. Multi-Choice,Speed. Yes/No,Speed. Caption Matching,Speed. Caption Generation,Event Order. Multi-Choice,Event Order. Yes/No,Event Order. Caption Matching,Event Order. Caption Generation,Attribute Change. Multi-Choice,Attribute Change. Yes/No,Attribute Change. Caption Matching,Attribute Change. Caption Generation,Notes
2
+ [Video-LLaVA-7B](https://huggingface.co/LanguageBind/Video-LLaVA-7B),VideoLLM,7B,49.77,45.57,56.38,63.34,34.83,76.04,74.32,87.88,50.76,35.22,51.82,53.82,28.67,35.65,50.28,58.42,23.2,37.75,49.21,59.0,38.25,40.97,51.12,58.33,33.59,"answer prompt: ""Please directly give the best option:”"
3
+ [VideoChat2-vicuna-stage3](https://github.com/OpenGVLab/Ask-Anything/blob/main/video_chat2/scripts/videochat_vicuna/config_7b_stage3.py),VideoLLM,7B,48.81,42.91,58.01,53.69,38.52,76.92,72.8,73.4,54.04,29.55,53.85,51.68,30.96,33.44,53.82,49.48,32.73,35.43,51.31,48.0,34.25,36.81,53.79,45.83,41.41,"answer prompt: ""Please directly give the best option:”"
4
+ [LLaMA-VID-7B-short-video](https://huggingface.co/YanweiLi/llama-vid-7b-full-224-video-fps-1),VideoLLM,7B,45.61,38.04,52.96,56.02,34.78,61.24,63.01,73.4,53.03,29.85,48.79,50.46,27.98,29.65,49.16,51.2,21.91,33.77,48.43,53.67,35.5,34.03,52.68,51.74,35.94,"answer prompt: ""Please directly give the best option:”"
5
+ [mPLUG-Owl-video](https://huggingface.co/MAGAer13/mplug-owl-llama-7b-video),VideoLLM,7B,44.15,36.39,54.42,48.5,34.43,55.92,64.36,57.24,46.46,32.24,50.61,41.9,28.21,31.55,51.21,43.64,30.41,27.48,51.31,48.67,31.25,32.99,52.01,51.74,36.46,"answer prompt: ""Please directly give the best option:”"
6
+ [PandaGPT-13B-maxlen-400](https://huggingface.co/openllmplayground/pandagpt_13b_max_len_400),VideoLLM,13B,41.64,34.37,51.81,51.56,27.5,36.69,53.04,49.16,23.74,34.03,49.6,55.96,25.69,33.75,50.84,49.48,26.03,32.12,53.66,54.33,29.75,35.07,52.23,48.26,32.55,"answer prompt: ""Please directly give the best option:”"
7
+ [Valley2-7B](https://huggingface.co/luoruipu1/Valley2-7b),VideoLLM,7B,37.49,29.56,53.49,34.6,26.35,43.49,58.11,36.7,24.75,29.55,52.02,33.64,20.41,24.92,52.51,36.77,21.91,18.54,50.26,36.0,35.75,29.86,52.9,29.86,29.43,"answer prompt: ""Please directly give the best option:”"
8
+ [Video-ChatGPT](https://github.com/mbzuai-oryx/Video-ChatGPT),VideoLLM,7B,43.54,37.66,50.67,53.69,31.84,61.54,52.53,69.02,40.91,28.96,50.0,49.24,28.44,29.02,49.53,47.42,24.48,36.09,51.05,52.67,31.75,30.9,50.0,50.35,33.85,"answer prompt: ""Please directly give the best option:”"
9
+ [Video-LLaMA-2-13B](https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-2-13B-Finetuned),VideoLLM,13B,43.4,31.27,53.73,54.16,32.24,52.07,68.07,70.71,54.29,25.07,45.95,45.26,21.33,28.08,48.79,56.01,13.92,26.82,51.83,52.0,38.5,22.22,50.89,47.57,33.85,"answer prompt: ""Please directly give the best option:”"
10
+ Gemini-1.5-pro,VideoLLM,-,67.1,63.92,70.32,77.45,57.88,96.75,88.18,97.98,87.88,43.28,54.45,63.0,36.7,55.21,63.87,65.64,38.14,55.63,68.85,81.0,55.25,67.71,73.21,80.9,73.7,"answer prompt: ""Please directly give the best option:”"
11
+ [LLaVA-1.5-13B](https://huggingface.co/liuhaotian/llava-v1.5-13b),ImageLLM,13B,51.3,47.41,56.38,64.27,38.42,85.5,74.66,92.59,67.42,31.04,48.79,57.49,31.88,40.06,48.98,58.76,24.74,35.43,49.48,59.0,33.0,42.36,55.36,53.82,35.42,"answer prompt: ""Please directly give the best option:”"
12
+ [SPHINX-v2](https://huggingface.co/Alpha-VLLM/LLaMA2-Accessory/tree/main/finetune/mm/SPHINX/SPHINX-v2-1k),ImageLLM,13B,51.92,50.63,59.07,64.34,34.88,89.94,79.05,93.27,67.93,36.72,51.21,55.96,19.04,43.53,54.75,54.3,20.36,37.42,54.45,59.67,37.25,42.36,50.45,59.03,30.99,"answer prompt: ""Please directly give the best option:”"
13
+ [Qwen-VL-Chat](https://huggingface.co/Qwen/Qwen-VL-Chat),ImageLLM,7B,52.9,51.08,60.01,64.07,37.28,88.46,81.42,91.58,62.63,37.01,51.62,54.13,27.75,41.32,59.78,54.98,29.64,41.39,50.79,63.67,34.75,44.44,49.11,56.6,32.29,"answer prompt: ""Please directly give the best option:”"
src/compute.py CHANGED
@@ -72,8 +72,8 @@ def compute_scores(merge_file):
72
 
73
  [dataset_scores_dict["avg_all"], dataset_scores_dict["avg_multi-choice"], dataset_scores_dict["avg_yes_no"], dataset_scores_dict["avg_caption_matching"], dataset_scores_dict["avg_captioning"],
74
  dataset_scores_dict['action_multi-choice'], dataset_scores_dict['action_yes_no'], dataset_scores_dict['action_caption_matching'], dataset_scores_dict['action_captioning'],
75
- dataset_scores_dict['speed_multi-choice'], dataset_scores_dict['speed_yes_no'], dataset_scores_dict['speed_caption_matching'], dataset_scores_dict['speed_captioning'],
76
  dataset_scores_dict['direction_multi-choice'], dataset_scores_dict['direction_yes_no'], dataset_scores_dict['direction_caption_matching'], dataset_scores_dict['direction_captioning'],
 
77
  dataset_scores_dict['order_multi-choice'], dataset_scores_dict['order_yes_no'], dataset_scores_dict['order_caption_matching'], dataset_scores_dict['order_captioning'],
78
  dataset_scores_dict['attribute_change_multi-choice'], dataset_scores_dict['attribute_change_yes_no'], dataset_scores_dict['attribute_change_caption_matching'], dataset_scores_dict['attribute_change_captioning'],
79
  ],
 
72
 
73
  [dataset_scores_dict["avg_all"], dataset_scores_dict["avg_multi-choice"], dataset_scores_dict["avg_yes_no"], dataset_scores_dict["avg_caption_matching"], dataset_scores_dict["avg_captioning"],
74
  dataset_scores_dict['action_multi-choice'], dataset_scores_dict['action_yes_no'], dataset_scores_dict['action_caption_matching'], dataset_scores_dict['action_captioning'],
 
75
  dataset_scores_dict['direction_multi-choice'], dataset_scores_dict['direction_yes_no'], dataset_scores_dict['direction_caption_matching'], dataset_scores_dict['direction_captioning'],
76
+ dataset_scores_dict['speed_multi-choice'], dataset_scores_dict['speed_yes_no'], dataset_scores_dict['speed_caption_matching'], dataset_scores_dict['speed_captioning'],
77
  dataset_scores_dict['order_multi-choice'], dataset_scores_dict['order_yes_no'], dataset_scores_dict['order_caption_matching'], dataset_scores_dict['order_captioning'],
78
  dataset_scores_dict['attribute_change_multi-choice'], dataset_scores_dict['attribute_change_yes_no'], dataset_scores_dict['attribute_change_caption_matching'], dataset_scores_dict['attribute_change_captioning'],
79
  ],