Spaces:
Running
Running
File size: 6,008 Bytes
2016488 ba1590f 2016488 ebd5cd9 2016488 010981f ebd5cd9 2016488 ba1590f 2016488 ba1590f 0e59c2c 2016488 ebd5cd9 2016488 f8121ba 2016488 ebd5cd9 010981f ebd5cd9 2016488 b4a00d5 8698e36 b4a00d5 d6833df 010981f ebd5cd9 b4a00d5 2016488 010981f 0e59c2c b4a00d5 2016488 4b8c00a bd24c06 2b86120 4b8c00a 2b86120 4b8c00a 010981f c304fa4 2b86120 010981f 2016488 010981f 491e896 75cdf4b 2016488 ba1590f 2016488 ba1590f 2016488 ba1590f 2016488 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
import pandas as pd
import gradio as gr
import csv
import json
import os
import requests
import io
import shutil
from huggingface_hub import Repository
HF_TOKEN = os.environ.get("HF_TOKEN")
BASE_COLS = ["Rank", "Models", "Model Size(B)", "Data Source"]
TASKS_V1 = ["V1-Overall", "I-CLS", "I-QA", "I-RET", "I-VG"]
TASKS_V2 = ["V2-Overall", "V-CLS", "V-QA", "V-RET", "V-MRET", "VisDoc"]
COLUMN_NAMES = BASE_COLS + TASKS_V1 + TASKS_V2
DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown'] + \
['number'] * (len(TASKS_V1) + len(TASKS_V2))
LEADERBOARD_INTRODUCTION = """
# MMEB Leaderboard
## Introduction
We introduce a novel benchmark, MMEB (Massive Multimodal Embedding Benchmark),
which includes 36 datasets spanning four meta-task categories: classification, visual question answering, retrieval, and visual grounding. MMEB provides a comprehensive framework for training
and evaluating embedding models across various combinations of text and image modalities.
All tasks are reformulated as ranking tasks, where the model follows instructions, processes a query, and selects the correct target from a set of candidates. The query and target can be an image, text,
or a combination of both. MMEB is divided into 20 in-distribution datasets, which can be used for
training, and 16 out-of-distribution datasets, reserved for evaluation.
The detailed explanation of the benchmark and datasets can be found in our paper: https://doi.org/10.48550/arXiv.2410.05160. \n
Github link: https://github.com/TIGER-AI-Lab/VLM2Vec. \n
Overview: https://tiger-ai-lab.github.io/VLM2Vec/. \n
"""
TABLE_INTRODUCTION = """"""
LEADERBOARD_INFO = """
## Dataset Summary
<img width="900" alt="abs" src="overview.png">
"""
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@article{jiang2024vlm2vec,
title={VLM2Vec: Training Vision-Language Models for Massive Multimodal Embedding Tasks},
author={Jiang, Ziyan and Meng, Rui and Yang, Xinyi and Yavuz, Semih and Zhou, Yingbo and Chen, Wenhu},
journal={arXiv preprint arXiv:2410.05160},
year={2024}
}"""
SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
## ⚠ Please note that you need to submit the JSON file with the following format:
### **TO SUBMIT V1 ONLY**
```json
[
{
"Model": "<Model Name>",
<Optional>"URL": "<Model URL>",
"Model Size(B)": 1000,
"Data Source": Self-Reported,
"V1-Overall": 50.0,
"I-CLS": 50.0,
"I-QA": 50.0,
"I-RET": 50.0,
"I-VG": 50.0
},
]
```
### **TO SUBMIT V2 ONLY**
```json
[
{
"Model": "<Model Name>",
<Optional>"URL": "<Model URL>",
"Model Size(B)": 1000,
"Data Source": Self-Reported,
"V2-Overall": 50.0,
"V-CLS": 50.0,
"V-QA": 50.0,
"V-RET": 50.0,
"V-VG": 50.0,
"VisDoc": 50.0
},
]
```
You are also welcome to submit both versions by including all the fields above! :) \n
You may refer to the Github page for instructions about evaluating your model. \n
Github link: https://github.com/TIGER-AI-Lab/VLM2Vec. \n
Please send us an email at m7su@uwaterloo.ca, attaching the JSON file. We will review your submission and update the leaderboard accordingly.
"""
def create_hyperlinked_names(df):
def convert_url(url, model_name):
return f'<a href="{url}">{model_name}</a>' if url is not None else model_name
def add_link_to_model_name(row):
row['Models'] = convert_url(row['URL'], row['Models'])
return row
df = df.copy()
df = df.apply(add_link_to_model_name, axis=1)
return df
# def fetch_data(file: str) -> pd.DataFrame:
# # fetch the leaderboard data from remote
# if file is None:
# raise ValueError("URL Not Provided")
# url = f"https://huggingface.co/spaces/TIGER-Lab/MMEB/resolve/main/{file}"
# print(f"Fetching data from {url}")
# response = requests.get(url)
# if response.status_code != 200:
# raise requests.HTTPError(f"Failed to fetch data: HTTP status code {response.status_code}")
# return pd.read_json(io.StringIO(response.text), orient='records', lines=True)
def get_df(file="results.jsonl"):
df = pd.read_json(file, orient='records', lines=True)
df['Model Size(B)'] = df['Model Size(B)'].apply(process_model_size)
df = df.sort_values(by=['V1-Overall'], ascending=False)
df = create_hyperlinked_names(df)
df['Rank'] = range(1, len(df) + 1)
return df
def refresh_data():
df = get_df()
return df[COLUMN_NAMES]
def search_and_filter_models(df, query, min_size, max_size):
filtered_df = df.copy()
if query:
filtered_df = filtered_df[filtered_df['Models'].str.contains(query, case=False, na=False)]
size_mask = filtered_df['Model Size(B)'].apply(lambda x:
(min_size <= 1000.0 <= max_size) if x == 'unknown'
else (min_size <= x <= max_size))
filtered_df = filtered_df[size_mask]
return filtered_df[COLUMN_NAMES]
def search_models(df, query):
if query:
return df[df['Models'].str.contains(query, case=False, na=False)]
return df
def get_size_range(df):
sizes = df['Model Size(B)'].apply(lambda x: 0.0 if x == 'unknown' else x)
if (sizes == 0.0).all():
return 0.0, 1000.0
return float(sizes.min()), float(sizes.max())
def process_model_size(size):
if pd.isna(size) or size == 'unk':
return 'unknown'
try:
val = float(size)
return val
except (ValueError, TypeError):
return 'unknown'
def filter_columns_by_tasks(df, selected_tasks=None):
if selected_tasks is None or len(selected_tasks) == 0:
return df[COLUMN_NAMES]
base_columns = ['Models', 'Model Size(B)', 'Data Source', 'Overall']
selected_columns = base_columns + selected_tasks
available_columns = [col for col in selected_columns if col in df.columns]
return df[available_columns]
|