import gradio as gr import pandas as pd from huggingface_hub import HfApi DATASETS = [ "mMARCO-fr", "BSARD", ] DENSE_SINGLE_BIENCODERS = [ "antoinelouis/biencoder-camemberta-base-mmarcoFR", "antoinelouis/biencoder-camembert-base-mmarcoFR", "antoinelouis/biencoder-distilcamembert-mmarcoFR", "antoinelouis/biencoder-camembert-L10-mmarcoFR", "antoinelouis/biencoder-camembert-L8-mmarcoFR", "antoinelouis/biencoder-camembert-L6-mmarcoFR", "antoinelouis/biencoder-camembert-L4-mmarcoFR", "antoinelouis/biencoder-camembert-L2-mmarcoFR", "antoinelouis/biencoder-electra-base-mmarcoFR", "antoinelouis/biencoder-mMiniLMv2-L12-mmarcoFR", "antoinelouis/biencoder-mMiniLMv2-L6-mmarcoFR", "OrdalieTech/Solon-embeddings-large-0.1", "OrdalieTech/Solon-embeddings-base-0.1", ] DENSE_MULTI_BIENCODERS = [ "antoinelouis/colbertv1-camembert-base-mmarcoFR", "antoinelouis/colbertv2-camembert-L4-mmarcoFR", "antoinelouis/colbert-xm", ] SPARSE_SINGLE_BIENCODERS = [ "antoinelouis/spladev2-camembert-base-mmarcoFR", ] CROSS_ENCODERS = [ "antoinelouis/crossencoder-camemberta-L2-mmarcoFR", "antoinelouis/crossencoder-camemberta-L4-mmarcoFR", "antoinelouis/crossencoder-camemberta-L6-mmarcoFR", "antoinelouis/crossencoder-camemberta-L8-mmarcoFR", "antoinelouis/crossencoder-camemberta-L10-mmarcoFR", "antoinelouis/crossencoder-camemberta-base-mmarcoFR", "antoinelouis/crossencoder-camembert-L2-mmarcoFR", "antoinelouis/crossencoder-camembert-L4-mmarcoFR", "antoinelouis/crossencoder-camembert-L6-mmarcoFR", "antoinelouis/crossencoder-camembert-L8-mmarcoFR", "antoinelouis/crossencoder-camembert-L10-mmarcoFR", "antoinelouis/crossencoder-camembert-base-mmarcoFR", "antoinelouis/crossencoder-camembert-large-mmarcoFR", "antoinelouis/crossencoder-distilcamembert-mmarcoFR", "antoinelouis/crossencoder-electra-base-mmarcoFR", "antoinelouis/crossencoder-me5-base-mmarcoFR", "antoinelouis/crossencoder-me5-small-mmarcoFR", "antoinelouis/crossencoder-t5-base-mmarcoFR", "antoinelouis/crossencoder-t5-small-mmarcoFR", "antoinelouis/crossencoder-mt5-base-mmarcoFR", "antoinelouis/crossencoder-mt5-small-mmarcoFR", "antoinelouis/crossencoder-xlm-roberta-base-mmarcoFR", "antoinelouis/crossencoder-mdebertav3-base-mmarcoFR", "antoinelouis/crossencoder-mMiniLMv2-L12-mmarcoFR", "antoinelouis/crossencoder-mMiniLMv2-L6-mmarcoFR", ] LLMS = [] COLUMNS = { "Model": "html", "#Params (M)": "number", "Type": "str", "Dataset": "str", "Recall@1000": "number", "Recall@500": "number", "Recall@100": "number", "Recall@10": "number", "MRR@10": "number", "nDCG@10": "number", "MAP@10": "number", } def get_model_info(model_id: str, model_type: str) -> pd.DataFrame: data = {} api = HfApi() model_info = api.model_info(model_id) for result in model_info.card_data.eval_results: if result.dataset_name in DATASETS and result.dataset_name not in data: data[result.dataset_name] = {key: None for key in COLUMNS.keys()} data[result.dataset_name]["Model"] = f'{model_id}' data[result.dataset_name]["#Params (M)"] = round(model_info.safetensors.total/1e6) if model_info.safetensors else None data[result.dataset_name]["Type"] = model_type data[result.dataset_name]["Dataset"] = result.dataset_name if result.dataset_name in DATASETS and result.metric_name in data[result.dataset_name]: data[result.dataset_name][result.metric_name] = result.metric_value return pd.DataFrame(list(data.values())) def load_all_results() -> pd.DataFrame: df = pd.DataFrame() for model_id in DENSE_SINGLE_BIENCODERS: df = pd.concat([df, get_model_info(model_id, model_type="DSVBE")]) for model_id in DENSE_MULTI_BIENCODERS: df = pd.concat([df, get_model_info(model_id, model_type="DMVBE")]) for model_id in SPARSE_SINGLE_BIENCODERS: df = pd.concat([df, get_model_info(model_id, model_type="SSVBE")]) for model_id in CROSS_ENCODERS: df = pd.concat([df, get_model_info(model_id, model_type="CE")]) for model_id in LLMS: df = pd.concat([df, get_model_info(model_id, model_type="LLM")]) return df def filter_dataf_by_dataset(dataf: pd.DataFrame, dataset_name: str, sort_by: str) -> pd.DataFrame: return (dataf .loc[dataf["Dataset"] == dataset_name] .drop(columns=["Dataset"]) .sort_values(by=sort_by, ascending=False) ) def update_table(dataf: pd.DataFrame, query: str, selected_types: list, selected_sizes: list) -> pd.DataFrame: filtered_df = dataf.copy() conditions = [] for val in selected_types: if val == 'Dense single-vector bi-encoder (DSVBE)': conditions.append((filtered_df['Type'] == 'DSVBE')) elif val == 'Dense multi-vector bi-encoder (DMVBE)': conditions.append((filtered_df['Type'] == 'DMVBE')) elif val == 'Sparse single-vector bi-encoder (SSVBE)': conditions.append((filtered_df['Type'] == 'SSVBE')) elif val == 'Cross-encoder (CE)': conditions.append((filtered_df['Type'] == 'CE')) elif val == 'LLM': conditions.append((filtered_df['Type'] == 'LLM')) for val in selected_sizes: if val == 'Small (< 100M)': conditions.append((filtered_df['#Params (M)'] < 100)) elif val == 'Base (100M-300M)': conditions.append((filtered_df['#Params (M)'] >= 100) & (filtered_df['#Params (M)'] <= 300)) elif val == 'Large (300M-500M)': conditions.append((filtered_df['#Params (M)'] >= 300) & (filtered_df['#Params (M)'] <= 500)) elif val == 'Extra-large (500M+)': conditions.append((filtered_df['#Params (M)'] > 500)) if conditions: filtered_df = filtered_df[pd.concat(conditions, axis=1).any(axis=1)] if query: filtered_df = filtered_df[filtered_df['Model'].str.contains(query, case=False)] return filtered_df with gr.Blocks() as demo: gr.HTML("""

Contact/Submissions

🥇 DécouvrIR\n

A Benchmark for Evaluating the Robustness of Information Retrieval Models in French

""") # Create the Pandas dataframes (one per dataset) all_df = load_all_results() mmarco_df = filter_dataf_by_dataset(all_df, dataset_name="mMARCO-fr", sort_by="Recall@500") bsard_df = filter_dataf_by_dataset(all_df, dataset_name="BSARD", sort_by="Recall@500") # Search and filter widgets with gr.Column(): with gr.Row(): search_bar = gr.Textbox(placeholder=" 🔍 Search for a model...", show_label=False, elem_id="search-bar") with gr.Row(): filter_type = gr.CheckboxGroup( label="Model type", choices=[ 'Dense single-vector bi-encoder (DSVBE)', 'Dense multi-vector bi-encoder (DMVBE)', 'Sparse single-vector bi-encoder (SSVBE)', 'Cross-encoder (CE)', 'LLM', ], value=[], interactive=True, elem_id="filter-type", ) with gr.Row(): filter_size = gr.CheckboxGroup( label="Model size", choices=['Small (< 100M)', 'Base (100M-300M)', 'Large (300M-500M)', 'Extra-large (500M+)'], value=[], interactive=True, elem_id="filter-size", ) # Leaderboard tables with gr.Tabs(): with gr.TabItem("🌐 mMARCO-fr"): gr.HTML("""

The mMARCO dataset is a machine-translated version of the widely popular MS MARCO dataset across 13 languages (including French) for studying domain-general passage retrieval.

The evaluation is performed on 6,980 dev questions labeled with relevant passages to be retrieved from a corpus of 8,841,823 candidates.

""") mmarco_table = gr.Dataframe( value=mmarco_df, datatype=[COLUMNS[col] for col in mmarco_df.columns], interactive=False, elem_classes="text-sm", ) with gr.TabItem("⚖️ BSARD"): gr.HTML("""

The Belgian Statutory Article Retrieval Dataset (BSARD) is a French native dataset for studying legal document retrieval.

The evaluation is performed on 222 test questions labeled by experienced jurists with relevant Belgian law articles to be retrieved from a corpus of 22,633 candidates.

[Coming soon...] """) # bsard_table = gr.Dataframe( # value=bsard_df, # datatype=[COLUMNS[col] for col in bsard_df.columns], # interactive=False, # elem_classes="text-sm", # ) # Update tables on search. search_bar.change( fn=lambda x: update_table(dataf=mmarco_df, query=x, selected_types=filter_type.value, selected_sizes=filter_size.value), inputs=[search_bar], outputs=mmarco_table, ) # search_bar.change( # fn=lambda x: update_table(dataf=bsard_df, query=x, selected_types=filter_type.value, selected_sizes=filter_size.value), # inputs=[search_bar], # outputs=bsard_table, # ) # Update tables on model type filter. filter_type.change( fn=lambda selected_types: update_table(mmarco_df, search_bar.value, selected_types, filter_size.value), inputs=[filter_type], outputs=mmarco_table, ) # filter_type.change( # fn=lambda selected_types: update_table(bsard_df, search_bar.value, selected_types, filter_size.value), # inputs=[filter_type], # outputs=bsard_table, # ) # Update tables on model size filter. filter_size.change( fn=lambda selected_sizes: update_table(mmarco_df, search_bar.value, filter_type.value, selected_sizes), inputs=[filter_size], outputs=mmarco_table, ) # filter_size.change( # fn=lambda selected_sizes: update_table(bsard_df, search_bar.value, filter_type.value, selected_sizes), # inputs=[filter_size], # outputs=bsard_table, # ) # Citation with gr.Column(): with gr.Row(): gr.HTML("""

Citation

For attribution in academic contexts, please cite this benchmark and any of the models released by @antoinelouis as follows:

""") with gr.Row(): citation_block = ( "@online{louis2024decouvrir,\n" "\tauthor = 'Antoine Louis',\n" "\ttitle = 'DécouvrIR: A Benchmark for Evaluating the Robustness of Information Retrieval Models in French',\n" "\tpublisher = 'Hugging Face',\n" "\tmonth = 'mar',\n" "\tyear = '2024',\n" "\turl = 'https://huggingface.co/spaces/antoinelouis/decouvrir',\n" "}\n" ) gr.Code(citation_block, language=None, show_label=False) demo.launch()