Spaces:
Running
Running
import gradio as gr | |
from app.utils import add_rank_and_format, filter_models, get_refresh_function | |
from data.model_handler import ModelHandler | |
METRICS = ["ndcg_at_5", "recall_at_1"] | |
def main(): | |
model_handler = ModelHandler() | |
initial_metric = "ndcg_at_5" | |
data = model_handler.get_vidore_data(initial_metric) | |
data = add_rank_and_format(data) | |
NUM_DATASETS = len(data.columns) - 3 | |
NUM_SCORES = len(data) * NUM_DATASETS | |
NUM_MODELS = len(data) | |
css = """ | |
table > thead { | |
white-space: normal | |
} | |
table { | |
--cell-width-1: 250px | |
} | |
table > tbody > tr > td:nth-child(2) > div { | |
overflow-x: auto | |
} | |
.filter-checkbox-group { | |
max-width: max-content; | |
} | |
#markdown size | |
.markdown { | |
font-size: 1rem; | |
} | |
""" | |
with gr.Blocks(css=css) as block: | |
with gr.Tabs(): | |
with gr.TabItem("π Leaderboard"): | |
gr.Markdown("# ViDoRe: The Visual Document Retrieval Benchmark ππ") | |
gr.Markdown("### From the paper - ColPali: Efficient Document Retrieval with Vision Language Models π") | |
gr.Markdown( | |
""" | |
Visual Document Retrieval Benchmark leaderboard. To submit results, refer to the corresponding tab. | |
Refer to the [ColPali paper](https://arxiv.org/abs/2407.01449) for details on metrics, tasks and models. | |
""" | |
) | |
datasets_columns = list(data.columns[3:]) | |
anchor_columns = list(data.columns[:3]) | |
default_columns = anchor_columns + datasets_columns | |
with gr.Row(): | |
metric_dropdown = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric") | |
research_textbox = gr.Textbox(placeholder="π Search Models... [press enter]", label="Filter Models by Name", ) | |
column_checkboxes = gr.CheckboxGroup(choices=datasets_columns, value=default_columns, label="Select Columns to Display") | |
with gr.Row(): | |
datatype = ["number", "markdown"] + ["number"] * (NUM_DATASETS + 1) | |
dataframe = gr.Dataframe(data, datatype=datatype, type="pandas") | |
def update_data(metric, search_term, selected_columns): | |
data = model_handler.get_vidore_data(metric) | |
data = add_rank_and_format(data) | |
data = filter_models(data, search_term) | |
if selected_columns: | |
selected_columns = selected_columns | |
data = data[selected_columns] | |
return data | |
with gr.Row(): | |
refresh_button = gr.Button("Refresh") | |
refresh_button.click(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe, concurrency_limit=20) | |
# Automatically refresh the dataframe when the dropdown value changes | |
metric_dropdown.change(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe) | |
research_textbox.submit( | |
lambda metric, search_term, selected_columns: update_data(metric, search_term, selected_columns), | |
inputs=[metric_dropdown, research_textbox, column_checkboxes], | |
outputs=dataframe | |
) | |
column_checkboxes.change( | |
lambda metric, search_term, selected_columns: update_data(metric, search_term, selected_columns), | |
inputs=[metric_dropdown, research_textbox, column_checkboxes], | |
outputs=dataframe | |
) | |
#column_checkboxes.change(get_refresh_function(), inputs=[metric_dropdown, column_checkboxes], outputs=dataframe) | |
gr.Markdown( | |
f""" | |
- **Total Datasets**: {NUM_DATASETS} | |
- **Total Scores**: {NUM_SCORES} | |
- **Total Models**: {NUM_MODELS} | |
""" | |
+ r""" | |
Please consider citing: | |
```bibtex | |
@misc{faysse2024colpaliefficientdocumentretrieval, | |
title={ColPali: Efficient Document Retrieval with Vision Language Models}, | |
author={Manuel Faysse and Hugues Sibille and Tony Wu and Bilel Omrani and Gautier Viaud and CΓ©line Hudelot and Pierre Colombo}, | |
year={2024}, | |
eprint={2407.01449}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.IR}, | |
url={https://arxiv.org/abs/2407.01449}, | |
} | |
``` | |
""" | |
) | |
with gr.TabItem("π Submit your model"): | |
gr.Markdown("# How to Submit a New Model to the Leaderboard") | |
gr.Markdown( | |
""" | |
To submit a new model to the ViDoRe leaderboard, follow these steps: | |
1. **Evaluate your model**: | |
- Follow the evaluation script provided in the [ViDoRe GitHub repository](https://github.com/illuin-tech/vidore-benchmark/) | |
2. **Format your submission file**: | |
- The submission file should automatically be generated, and named `results.json` with the following structure: | |
```json | |
{ | |
"dataset_name_1": { | |
"metric_1": score_1, | |
"metric_2": score_2, | |
... | |
}, | |
"dataset_name_2": { | |
"metric_1": score_1, | |
"metric_2": score_2, | |
... | |
}, | |
} | |
``` | |
- The dataset names should be the same as the ViDoRe dataset names listed in the following collection: [ViDoRe Benchmark](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d). | |
3. **Submit your model**: | |
- Create a public HuggingFace model repository with your model. | |
- Add the tag `vidore` to your model in the metadata of the model card and place the `results.json` file at the root. | |
And you're done! Your model will appear on the leaderboard when you click refresh! Once the space gets rebooted, it will appear on startup. | |
""" | |
) | |
block.queue(max_size=10).launch(debug=True) | |
if __name__ == "__main__": | |
main() | |