data-only-mteb-leaderboard

Runtime error

App Files Files Community

rodrigomasini commited on Feb 7

Commit

e1e11ec

•

1 Parent(s): 2ad8c60

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -863

app.py CHANGED Viewed

@@ -929,21 +929,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
         meta = metadata_load(readme_path)
         if "model-index" not in meta:
             continue
-        # meta['model-index'][0]["results"] is list of elements like:
-        # {
-        #    "task": {"type": "Classification"},
-        #    "dataset": {
-        #        "type": "mteb/amazon_massive_intent",
-        #        "name": "MTEB MassiveIntentClassification (nb)",
-        #        "config": "nb",
-        #        "split": "test",
-        #    },
-        #    "metrics": [
-        #        {"type": "accuracy", "value": 39.81506388702084},
-        #        {"type": "f1", "value": 38.809586587791664},
-        #    ],
-        # },
-        # Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
         if len(datasets) > 0:
             task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and any([x in sub_res.get("dataset", {}).get("name", "") for x in datasets])]
         elif langs:
@@ -977,7 +963,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
     return df
 def get_mteb_average():
-    global DATA_OVERALL, DATA_CLASSIFICATION_EN, DATA_CLUSTERING, DATA_PAIR_CLASSIFICATION, DATA_RERANKING, DATA_RETRIEVAL, DATA_STS_EN, DATA_SUMMARIZATION
     DATA_OVERALL = get_mteb_data(
         tasks=[
             "Classification",
@@ -1010,28 +996,6 @@ def get_mteb_average():
     DATA_OVERALL = DATA_OVERALL.round(2)
-    DATA_CLASSIFICATION_EN = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION])
-    # Only keep rows with at least one score in addition to the "Model" & rank column
-    DATA_CLASSIFICATION_EN = DATA_CLASSIFICATION_EN[DATA_CLASSIFICATION_EN.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_CLUSTERING = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING])
-    DATA_CLUSTERING = DATA_CLUSTERING[DATA_CLUSTERING.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_PAIR_CLASSIFICATION = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION])
-    DATA_PAIR_CLASSIFICATION = DATA_PAIR_CLASSIFICATION[DATA_PAIR_CLASSIFICATION.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_RERANKING = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_RERANKING])
-    DATA_RERANKING = DATA_RERANKING[DATA_RERANKING.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_RETRIEVAL = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_RETRIEVAL])
-    DATA_RETRIEVAL = DATA_RETRIEVAL[DATA_RETRIEVAL.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_STS_EN = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_STS])
-    DATA_STS_EN = DATA_STS_EN[DATA_STS_EN.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_SUMMARIZATION = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION])
-    DATA_SUMMARIZATION = DATA_SUMMARIZATION[DATA_SUMMARIZATION.iloc[:, 1:].ne("").any(axis=1)]
     # Fill NaN after averaging
     DATA_OVERALL.fillna("", inplace=True)
@@ -1040,838 +1004,96 @@ def get_mteb_average():
     return DATA_OVERALL
-def get_mteb_average_zh():
-    global DATA_OVERALL_ZH, DATA_CLASSIFICATION_ZH, DATA_CLUSTERING_ZH, DATA_PAIR_CLASSIFICATION_ZH, DATA_RERANKING_ZH, DATA_RETRIEVAL_ZH, DATA_STS_ZH
-    DATA_OVERALL_ZH = get_mteb_data(
-        tasks=[
-            "Classification",
-            "Clustering",
-            "PairClassification",
-            "Reranking",
-            "Retrieval",
-            "STS",
-        ],
-        datasets=TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH,
-        fillna=False,
-        add_emb_dim=True,
-        rank=False,
-    )
-    # Debugging:
-    # DATA_OVERALL_ZH.to_csv("overall.csv")
-    DATA_OVERALL_ZH.insert(1, f"Average ({len(TASK_LIST_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_ZH].mean(axis=1, skipna=False))
-    DATA_OVERALL_ZH.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_CLASSIFICATION_ZH].mean(axis=1, skipna=False))
-    DATA_OVERALL_ZH.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_CLUSTERING_ZH].mean(axis=1, skipna=False))
-    DATA_OVERALL_ZH.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_PAIR_CLASSIFICATION_ZH].mean(axis=1, skipna=False))
-    DATA_OVERALL_ZH.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_RERANKING_ZH].mean(axis=1, skipna=False))
-    DATA_OVERALL_ZH.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_RETRIEVAL_ZH].mean(axis=1, skipna=False))
-    DATA_OVERALL_ZH.insert(7, f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_STS_ZH].mean(axis=1, skipna=False))
-    DATA_OVERALL_ZH.sort_values(f"Average ({len(TASK_LIST_ZH)} datasets)", ascending=False, inplace=True)
-    # Start ranking from 1
-    DATA_OVERALL_ZH.insert(0, "Rank", list(range(1, len(DATA_OVERALL_ZH) + 1)))
-    DATA_OVERALL_ZH = DATA_OVERALL_ZH.round(2)
-    DATA_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_CLASSIFICATION_ZH])
-    # Only keep rows with at least one score in addition to the "Model" & rank column
-    DATA_CLASSIFICATION_ZH = DATA_CLASSIFICATION_ZH[DATA_CLASSIFICATION_ZH.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_CLUSTERING_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_CLUSTERING_ZH])
-    DATA_CLUSTERING_ZH = DATA_CLUSTERING_ZH[DATA_CLUSTERING_ZH.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_PAIR_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_PAIR_CLASSIFICATION_ZH])
-    DATA_PAIR_CLASSIFICATION_ZH = DATA_PAIR_CLASSIFICATION_ZH[DATA_PAIR_CLASSIFICATION_ZH.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_RERANKING_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_RERANKING_ZH])
-    DATA_RERANKING_ZH = DATA_RERANKING_ZH[DATA_RERANKING_ZH.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_RETRIEVAL_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_RETRIEVAL_ZH])
-    DATA_RETRIEVAL_ZH = DATA_RETRIEVAL_ZH[DATA_RETRIEVAL_ZH.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_STS_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_STS_ZH])
-    DATA_STS_ZH = DATA_STS_ZH[DATA_STS_ZH.iloc[:, 2:].ne("").any(axis=1)]
-    # Fill NaN after averaging
-    DATA_OVERALL_ZH.fillna("", inplace=True)
-    DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_ZH)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)"]]
-    DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)]
-    return DATA_OVERALL_ZH
-def get_mteb_average_pl():
-    global DATA_OVERALL_PL, DATA_CLASSIFICATION_PL, DATA_CLUSTERING_PL, DATA_PAIR_CLASSIFICATION_PL, DATA_RETRIEVAL_PL, DATA_STS_PL
-    DATA_OVERALL_PL = get_mteb_data(
-        tasks=[
-            "Classification",
-            "Clustering",
-            "PairClassification",
-            "Retrieval",
-            "STS",
-        ],
-        datasets=TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL,
-        fillna=False,
-        add_emb_dim=True,
-        rank=False,
-    )
-    # Debugging:
-    # DATA_OVERALL_PL.to_csv("overall.csv")
-    DATA_OVERALL_PL.insert(1, f"Average ({len(TASK_LIST_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_PL].mean(axis=1, skipna=False))
-    DATA_OVERALL_PL.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_CLASSIFICATION_PL].mean(axis=1, skipna=False))
-    DATA_OVERALL_PL.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_CLUSTERING_PL].mean(axis=1, skipna=False))
-    DATA_OVERALL_PL.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_PAIR_CLASSIFICATION_PL].mean(axis=1, skipna=False))
-    DATA_OVERALL_PL.insert(5, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_RETRIEVAL_PL].mean(axis=1, skipna=False))
-    DATA_OVERALL_PL.insert(6, f"STS Average ({len(TASK_LIST_STS_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_STS_PL].mean(axis=1, skipna=False))
-    DATA_OVERALL_PL.sort_values(f"Average ({len(TASK_LIST_PL)} datasets)", ascending=False, inplace=True)
-    # Start ranking from 1
-    DATA_OVERALL_PL.insert(0, "Rank", list(range(1, len(DATA_OVERALL_PL) + 1)))
-    DATA_OVERALL_PL = DATA_OVERALL_PL.round(2)
-    DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_CLASSIFICATION_PL])
-    # Only keep rows with at least one score in addition to the "Model" & rank column
-    DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_CLUSTERING_PL])
-    DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION_PL])
-    DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_RETRIEVAL_PL])
-    DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:, 2:].ne("").any(axis=1)]
-    DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_STS_PL])
-    DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:, 2:].ne("").any(axis=1)]
-    # Fill NaN after averaging
-    DATA_OVERALL_PL.fillna("", inplace=True)
-    DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_PL)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", f"STS Average ({len(TASK_LIST_STS_PL)} datasets)"]]
-    DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
-    return DATA_OVERALL_PL
 get_mteb_average()
-get_mteb_average_pl()
-get_mteb_average_zh()
-DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
-DATA_BITEXT_MINING_OTHER = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_OTHER)
-DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)
-DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_NB)
-DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)
-DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
-DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
-DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)
-# Exact, add all non-nan integer values for every dataset
-NUM_SCORES = 0
-DATASETS = []
-MODELS = []
-# LANGUAGES = []
-for d in [
-    DATA_BITEXT_MINING,
-    DATA_BITEXT_MINING_OTHER,
-    DATA_CLASSIFICATION_EN,
-    DATA_CLASSIFICATION_DA,
-    DATA_CLASSIFICATION_NB,
-    DATA_CLASSIFICATION_PL,
-    DATA_CLASSIFICATION_SV,
-    DATA_CLASSIFICATION_ZH,
-    DATA_CLASSIFICATION_OTHER,
-    DATA_CLUSTERING,
-    DATA_CLUSTERING_DE,
-    DATA_CLUSTERING_PL,
-    DATA_CLUSTERING_ZH,
-    DATA_PAIR_CLASSIFICATION,
-    DATA_PAIR_CLASSIFICATION_PL,
-    DATA_PAIR_CLASSIFICATION_ZH,
-    DATA_RERANKING,
-    DATA_RERANKING_ZH,
-    DATA_RETRIEVAL,
-    DATA_RETRIEVAL_PL,
-    DATA_RETRIEVAL_ZH,
-    DATA_STS_EN,
-    DATA_STS_PL,
-    DATA_STS_ZH,
-    DATA_STS_OTHER,
-    DATA_SUMMARIZATION,
-]:
-    # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
-    cols_to_ignore = 3 if "Average" in d.columns else 2
-    # Count number of scores including only non-nan floats & excluding the rank column
-    NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum()
-    # Exclude rank & model name column (first two); Do not count different language versions as different datasets
-    DATASETS += [i.split(" ")[0] for i in d.columns[cols_to_ignore:]]
-    # LANGUAGES += [i.split(" ")[-1] for i in d.columns[cols_to_ignore:]]
-    MODELS += d["Model"].tolist()
 NUM_DATASETS = len(set(DATASETS))
 # NUM_LANGUAGES = len(set(LANGUAGES))
 NUM_MODELS = len(set(MODELS))
-# 1. Force headers to wrap
-# 2. Force model column (maximum) width
-# 3. Prevent model column from overflowing, scroll instead
-css = """
-table > thead {
-    white-space: normal
-}
-table {
-    --cell-width-1: 210px
-}
-table > tbody > tr > td:nth-child(2) > div {
-    overflow-x: auto
-}
 """
-block = gr.Blocks(css=css)
-with block:
-    gr.Markdown(f"""
-    Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
-    """)
-    with gr.Tabs():
-        with gr.TabItem("Overall"):
-            with gr.TabItem("English"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Overall MTEB English leaderboard** 🔮
-                    - **Metric:** Various, refer to task tabs
-                    - **Languages:** English
-                    """)
-                with gr.Row():
-                    data_overall = gr.components.Dataframe(
-                        DATA_OVERALL,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL.columns),
-                        type="pandas",
-                        height=600,
-                    )
-                with gr.Row():
-                    data_run_overall = gr.Button("Refresh")
-                    data_run_overall.click(get_mteb_average, inputs=None, outputs=data_overall)
-            with gr.TabItem("Chinese"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Overall MTEB Chinese leaderboard (C-MTEB)** 🔮🇨🇳
-                    - **Metric:** Various, refer to task tabs
-                    - **Languages:** Chinese
-                    - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
-                    """)
-                with gr.Row():
-                    data_overall_zh = gr.components.Dataframe(
-                        DATA_OVERALL_ZH,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_ZH.columns),
-                        type="pandas",
-                        height=600,
-                    )
-                with gr.Row():
-                    data_run_overall_zh = gr.Button("Refresh")
-                    data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
-            with gr.TabItem("Polish"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Overall MTEB Polish leaderboard (PL-MTEB)** 🔮🇵🇱
-                    - **Metric:** Various, refer to task tabs
-                    - **Languages:** Polish
-                    - **Credits:** [Rafał Poświata](https://github.com/rafalposwiata), [Konrad Wojtasik](https://github.com/kwojtasi) & [BEIR-PL](https://arxiv.org/abs/2305.19840)
-                    """)
-                with gr.Row():
-                    data_overall_pl = gr.components.Dataframe(
-                        DATA_OVERALL_PL,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_PL.columns),
-                        type="pandas",
-                        height=600,
-                    )
-                with gr.Row():
-                    data_run_overall_pl = gr.Button("Refresh")
-                    data_run_overall_pl.click(get_mteb_average_pl, inputs=None, outputs=data_overall_pl)
-        with gr.TabItem("Bitext Mining"):
-            with gr.TabItem("English-X"):
-                with gr.Row():
-                        gr.Markdown("""
-                        **Bitext Mining English-X Leaderboard** 🎌
-                        - **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
-                        - **Languages:** 117 (Pairs of: English & other language)
-                        """)
-                with gr.Row():
-                    data_bitext_mining = gr.components.Dataframe(
-                        DATA_BITEXT_MINING,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_BITEXT_MINING.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_bitext_mining = gr.Button("Refresh")
-                    data_run_bitext_mining.click(
-                        partial(get_mteb_data, tasks=["BitextMining"], datasets=TASK_LIST_BITEXT_MINING),
-                        outputs=data_bitext_mining,
-                    )
-            with gr.TabItem("Danish"):
-                with gr.Row():
-                        gr.Markdown("""
-                        **Bitext Mining Danish Leaderboard** 🎌🇩🇰
-                        - **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
-                        - **Languages:** Danish & Bornholmsk (Danish Dialect)
-                        - **Credits:** [Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)
-                        """)
-                with gr.Row():
-                    data_bitext_mining_da = gr.components.Dataframe(
-                        DATA_BITEXT_MINING_OTHER,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_BITEXT_MINING_OTHER.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_bitext_mining_da = gr.Button("Refresh")
-                    data_run_bitext_mining_da.click(
-                        partial(get_mteb_data, tasks=["BitextMining"], datasets=TASK_LIST_BITEXT_MINING_OTHER),
-                        outputs=data_bitext_mining_da,
-                    )
-        with gr.TabItem("Classification"):
-            with gr.TabItem("English"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Classification English Leaderboard** ❤️
-                    - **Metric:** [Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)
-                    - **Languages:** English
-                    """)
-                with gr.Row():
-                    data_classification_en = gr.components.Dataframe(
-                        DATA_CLASSIFICATION_EN,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLASSIFICATION_EN.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_classification_en = gr.Button("Refresh")
-                    data_run_classification_en.click(
-                        partial(get_mteb_data, tasks=["Classification"], langs=["en"]),
-                        outputs=data_classification_en,
-                    )
-            with gr.TabItem("Chinese"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Classification Chinese Leaderboard** 🧡🇨🇳
-                    - **Metric:** [Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)
-                    - **Languages:** Chinese
-                    - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
-                    """)
-                with gr.Row():
-                    data_classification_zh = gr.components.Dataframe(
-                        DATA_CLASSIFICATION_ZH,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLASSIFICATION_ZH.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_classification_zh = gr.Button("Refresh")
-                    data_run_classification_zh.click(
-                        partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_ZH),
-                        outputs=data_classification_zh,
-                    )
-            with gr.TabItem("Danish"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Classification Danish Leaderboard** 🤍🇩🇰
-                    - **Metric:** [Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)
-                    - **Languages:** Danish
-                    - **Credits:** [Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)
-                    """)
-                with gr.Row():
-                    data_classification_da = gr.components.Dataframe(
-                        DATA_CLASSIFICATION_DA,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLASSIFICATION_DA.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_classification_da = gr.Button("Refresh")
-                    data_run_classification_da.click(
-                        partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_DA),
-                        outputs=data_run_classification_da,
-                    )
-            with gr.TabItem("Norwegian"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Classification Norwegian Leaderboard** 💙🇳🇴
-                    - **Metric:** [Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)
-                    - **Languages:** Norwegian Bokmål
-                    - **Credits:** [Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)
-                    """)
-                with gr.Row():
-                    data_classification_nb = gr.components.Dataframe(
-                        DATA_CLASSIFICATION_NB,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLASSIFICATION_NB.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_classification_nb = gr.Button("Refresh")
-                    data_run_classification_nb.click(
-                        partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_NB),
-                        outputs=data_classification_nb,
-                    )
-            with gr.TabItem("Polish"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Classification Polish Leaderboard** 🤍🇵🇱
-                    - **Metric:** [Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)
-                    - **Languages:** Polish
-                    - **Credits:** [Rafał Poświata](https://github.com/rafalposwiata)
-                    """)
-                with gr.Row():
-                    data_classification_pl = gr.components.Dataframe(
-                        DATA_CLASSIFICATION_PL,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLASSIFICATION_PL.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_classification_pl = gr.Button("Refresh")
-                    data_run_classification_pl.click(
-                        partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_PL),
-                        outputs=data_classification_pl,
-                    )
-            with gr.TabItem("Swedish"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Classification Swedish Leaderboard** 💛🇸🇪
-                    - **Metric:** [Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)
-                    - **Languages:** Swedish
-                    - **Credits:** [Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)
-                    """)
-                with gr.Row():
-                    data_classification_sv = gr.components.Dataframe(
-                        DATA_CLASSIFICATION_SV,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLASSIFICATION_SV.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_classification_sv = gr.Button("Refresh")
-                    data_run_classification_sv.click(
-                        partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_SV),
-                        outputs=data_classification_sv,
-                    )
-            with gr.TabItem("Other"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Classification Other Languages Leaderboard** 💜💚💙
-                    - **Metric:** [Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)
-                    - **Languages:** 47 (Only languages not included in the other tabs)
-                    """)
-                with gr.Row():
-                    data_classification = gr.components.Dataframe(
-                        DATA_CLASSIFICATION_OTHER,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLASSIFICATION_OTHER) * 10,
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_classification = gr.Button("Refresh")
-                    data_run_classification.click(
-                        partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_OTHER),
-                        outputs=data_classification,
-                    )
-        with gr.TabItem("Clustering"):
-            with gr.TabItem("English"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Clustering Leaderboard** ✨
-                    - **Metric:** Validity Measure (v_measure)
-                    - **Languages:** English
-                    """)
-                with gr.Row():
-                    data_clustering = gr.components.Dataframe(
-                        DATA_CLUSTERING,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLUSTERING.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_clustering_en = gr.Button("Refresh")
-                    data_run_clustering_en.click(
-                        partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING),
-                        outputs=data_clustering,
-                    )
-            with gr.TabItem("Chinese"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Clustering Chinese Leaderboard** ✨🇨🇳
-                    - **Metric:** Validity Measure (v_measure)
-                    - **Languages:** Chinese
-                    - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
-                    """)
-                with gr.Row():
-                    data_clustering_zh = gr.components.Dataframe(
-                        DATA_CLUSTERING_ZH,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLUSTERING_ZH.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_clustering_zh = gr.Button("Refresh")
-                    data_run_clustering_zh.click(
-                        partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_ZH),
-                        outputs=data_clustering_zh,
-                    )
-            with gr.TabItem("German"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Clustering German Leaderboard** ✨🇩🇪
-                    - **Metric:** Validity Measure (v_measure)
-                    - **Languages:** German
-                    - **Credits:** [Silvan](https://github.com/slvnwhrl)
-                    """)
-                with gr.Row():
-                    data_clustering_de = gr.components.Dataframe(
-                        DATA_CLUSTERING_DE,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLUSTERING_DE.columns) * 2,
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_clustering_de = gr.Button("Refresh")
-                    data_run_clustering_de.click(
-                        partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_DE),
-                        outputs=data_clustering_de,
-                    )
-            with gr.TabItem("Polish"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Clustering Polish Leaderboard** ✨🇵🇱
-                    - **Metric:** Validity Measure (v_measure)
-                    - **Languages:** Polish
-                    - **Credits:** [Rafał Poświata](https://github.com/rafalposwiata)
-                    """)
-                with gr.Row():
-                    data_clustering_pl = gr.components.Dataframe(
-                        DATA_CLUSTERING_PL,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_CLUSTERING_PL.columns) * 2,
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_clustering_pl = gr.Button("Refresh")
-                    data_run_clustering_pl.click(
-                        partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_PL),
-                        outputs=data_clustering_pl,
-                    )
-        with gr.TabItem("Pair Classification"):
-            with gr.TabItem("English"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Pair Classification English Leaderboard** 🎭
-                    - **Metric:** Average Precision based on Cosine Similarities (cos_sim_ap)
-                    - **Languages:** English
-                    """)
-                with gr.Row():
-                    data_pair_classification = gr.components.Dataframe(
-                        DATA_PAIR_CLASSIFICATION,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_PAIR_CLASSIFICATION.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_pair_classification = gr.Button("Refresh")
-                    data_run_pair_classification.click(
-                        partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION),
-                        outputs=data_pair_classification,
-                    )
-            with gr.TabItem("Chinese"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Pair Classification Chinese Leaderboard** 🎭🇨🇳
-                    - **Metric:** Average Precision based on Cosine Similarities (cos_sim_ap)
-                    - **Languages:** Chinese
-                    - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
-                    """)
-                with gr.Row():
-                    data_pair_classification_zh = gr.components.Dataframe(
-                        DATA_PAIR_CLASSIFICATION_ZH,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_PAIR_CLASSIFICATION_ZH.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_pair_classification_zh = gr.Button("Refresh")
-                    data_run_pair_classification_zh.click(
-                        partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_ZH),
-                        outputs=data_pair_classification_zh,
-                    )
-            with gr.TabItem("Polish"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Pair Classification Polish Leaderboard** 🎭🇵🇱
-                    - **Metric:** Average Precision based on Cosine Similarities (cos_sim_ap)
-                    - **Languages:** Polish
-                    - **Credits:** [Rafał Poświata](https://github.com/rafalposwiata)
-                    """)
-                with gr.Row():
-                    data_pair_classification_pl = gr.components.Dataframe(
-                        DATA_PAIR_CLASSIFICATION_PL,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_PAIR_CLASSIFICATION_PL.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_pair_classification_pl = gr.Button("Refresh")
-                    data_run_pair_classification_pl.click(
-                        partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_PL),
-                        outputs=data_pair_classification_pl,
-                    )
-        with gr.TabItem("Reranking"):
-            with gr.TabItem("English"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Reranking English Leaderboard** 🥈
-                    - **Metric:** Mean Average Precision (MAP)
-                    - **Languages:** English
-                    """)
-                with gr.Row():
-                    data_reranking = gr.components.Dataframe(
-                        DATA_RERANKING,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_RERANKING.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_reranking = gr.Button("Refresh")
-                    data_run_reranking.click(
-                        partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING),
-                        outputs=data_reranking,
-                    )
-            with gr.TabItem("Chinese"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Reranking Chinese Leaderboard** 🥈🇨🇳
-                    - **Metric:** Mean Average Precision (MAP)
-                    - **Languages:** Chinese
-                    - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
-                    """)
-                with gr.Row():
-                    data_reranking_zh = gr.components.Dataframe(
-                        DATA_RERANKING_ZH,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_RERANKING_ZH.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_reranking_zh = gr.Button("Refresh")
-                    data_run_reranking_zh.click(
-                        partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_ZH),
-                        outputs=data_reranking_zh,
-                    )
-        with gr.TabItem("Retrieval"):
-            with gr.TabItem("English"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Retrieval English Leaderboard** 🔎
-                    - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
-                    - **Languages:** English
-                    """)
-                with gr.Row():
-                    data_retrieval = gr.components.Dataframe(
-                        DATA_RETRIEVAL,
-                        # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL.columns) * 2,
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_retrieval = gr.Button("Refresh")
-                    data_run_retrieval.click(
-                        partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL),
-                        outputs=data_retrieval,
-                    )
-            with gr.TabItem("Chinese"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Retrieval Chinese Leaderboard** 🔎🇨🇳
-                    - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
-                    - **Languages:** Chinese
-                    - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
-                    """)
-                with gr.Row():
-                    data_retrieval_zh = gr.components.Dataframe(
-                        DATA_RETRIEVAL_ZH,
-                        # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_ZH.columns) * 2,
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_retrieval_zh = gr.Button("Refresh")
-                    data_run_retrieval_zh.click(
-                        partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_ZH),
-                        outputs=data_retrieval_zh,
-                    )
-            with gr.TabItem("Polish"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **Retrieval Polish Leaderboard** 🔎🇵🇱
-                    - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
-                    - **Languages:** Polish
-                    - **Credits:** [Konrad Wojtasik](https://github.com/kwojtasi) & [BEIR-PL](https://arxiv.org/abs/2305.19840)
-                    """)
-                with gr.Row():
-                    data_retrieval_pl = gr.components.Dataframe(
-                        DATA_RETRIEVAL_PL,
-                        # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_PL.columns) * 2,
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_retrieval_pl = gr.Button("Refresh")
-                    data_run_retrieval_pl.click(
-                        partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_PL),
-                        outputs=data_retrieval_pl,
-                    )
-        with gr.TabItem("STS"):
-            with gr.TabItem("English"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **STS English Leaderboard** 🤖
-                    - **Metric:** Spearman correlation based on cosine similarity
-                    - **Languages:** English
-                    """)
-                with gr.Row():
-                    data_sts_en = gr.components.Dataframe(
-                        DATA_STS_EN,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_STS_EN.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_sts_en = gr.Button("Refresh")
-                    data_run_sts_en.click(
-                        partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS),
-                        outputs=data_sts_en,
-                    )
-            with gr.TabItem("Chinese"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **STS Chinese Leaderboard** 🤖🇨🇳
-                    - **Metric:** Spearman correlation based on cosine similarity
-                    - **Languages:** Chinese
-                    - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
-                    """)
-                with gr.Row():
-                    data_sts_zh = gr.components.Dataframe(
-                        DATA_STS_ZH,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_STS_ZH.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_sts_zh = gr.Button("Refresh")
-                    data_run_sts_zh.click(
-                        partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_ZH),
-                        outputs=data_sts_zh,
-                    )
-            with gr.TabItem("Polish"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **STS Polish Leaderboard** 🤖🇵🇱
-                    - **Metric:** Spearman correlation based on cosine similarity
-                    - **Languages:** Polish
-                    - **Credits:** [Rafał Poświata](https://github.com/rafalposwiata)
-                    """)
-                with gr.Row():
-                    data_sts_pl = gr.components.Dataframe(
-                        DATA_STS_PL,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_STS_PL.columns),
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_sts_pl = gr.Button("Refresh")
-                    data_run_sts_pl.click(
-                        partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_PL),
-                        outputs=data_sts_pl,
-                    )
-            with gr.TabItem("Other"):
-                with gr.Row():
-                    gr.Markdown("""
-                    **STS Other Leaderboard** 👽
-                    - **Metric:** Spearman correlation based on cosine similarity
-                    - **Languages:** Arabic, Chinese, Dutch, English, French, German, Italian, Korean, Polish, Russian, Spanish (Only language combos not included in the other tabs)
-                    """)
-                with gr.Row():
-                    data_sts_other = gr.components.Dataframe(
-                        DATA_STS_OTHER,
-                        datatype=["number", "markdown"] + ["number"] * len(DATA_STS_OTHER.columns) * 2,
-                        type="pandas",
-                    )
-                with gr.Row():
-                    data_run_sts_other = gr.Button("Refresh")
-                    data_run_sts_other.click(
-                        partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_OTHER),
-                        outputs=data_sts_other,
-                    )
-        with gr.TabItem("Summarization"):
-            with gr.Row():
-                gr.Markdown("""
-                **Summarization Leaderboard** 📜
-                - **Metric:** Spearman correlation based on cosine similarity
-                - **Languages:** English
-                """)
-            with gr.Row():
-                data_summarization = gr.components.Dataframe(
-                    DATA_SUMMARIZATION,
-                    datatype=["number", "markdown"] + ["number"] * 2,
-                    type="pandas",
-                )
-            with gr.Row():
-                data_run = gr.Button("Refresh")
-                data_run.click(
-                    partial(get_mteb_data, tasks=["Summarization"]),
-                    outputs=data_summarization,
-                )
-    gr.Markdown(f"""
-    - **Total Datasets**: {NUM_DATASETS}
-    - **Total Languages**: 113
-    - **Total Scores**: {NUM_SCORES}
-    - **Total Models**: {NUM_MODELS}
-    """ + r"""
-    Made with ❤️ for NLP. If this work is useful to you, please consider citing:
-    ```bibtex
-    @article{muennighoff2022mteb,
-        doi = {10.48550/ARXIV.2210.07316},
-        url = {https://arxiv.org/abs/2210.07316},
-        author = {Muennighoff, Niklas and Tazi, Nouamane and Magne, Lo{\"\i}c and Reimers, Nils},
-        title = {MTEB: Massive Text Embedding Benchmark},
-        publisher = {arXiv},
-        journal={arXiv preprint arXiv:2210.07316},
-        year = {2022}
-    }
-    ```
-    """)
-    # Running the functions on page load in addition to when the button is clicked
-    # This is optional - If deactivated the data loaded at "Build time" is shown like for Overall tab
-    """
-    block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
-    """
-block.queue(max_size=10)
-block.launch()
-# Possible changes:
-# Could add graphs / other visual content
-# Could add verification marks
-# Sources:
-# https://huggingface.co/spaces/gradio/leaderboard
-# https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard
-# https://getemoji.com/

         meta = metadata_load(readme_path)
         if "model-index" not in meta:
             continue
         if len(datasets) > 0:
             task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and any([x in sub_res.get("dataset", {}).get("name", "") for x in datasets])]
         elif langs:
     return df
 def get_mteb_average():
+    global DATA_OVERALL
     DATA_OVERALL = get_mteb_data(
         tasks=[
             "Classification",
     DATA_OVERALL = DATA_OVERALL.round(2)
     # Fill NaN after averaging
     DATA_OVERALL.fillna("", inplace=True)
     return DATA_OVERALL
 get_mteb_average()
 NUM_DATASETS = len(set(DATASETS))
 # NUM_LANGUAGES = len(set(LANGUAGES))
 NUM_MODELS = len(set(MODELS))
+data_overall = gr.components.Dataframe(
+    DATA_OVERALL,
+    headers=list(DATA_OVERALL.columns)
+    datatype=list(DATA_OVERALL.values),
+    visible=False,
+    line_breaks=False,
+    interactive=False
+    )
+import unicodedata
+def is_valid_unicode(char):
+    try:
+        unicodedata.name(char)
+        return True  # Valid Unicode character
+    except ValueError:
+        return False  # Invalid Unicode character
+def remove_invalid_unicode(input_string):
+    if isinstance(input_string, str):
+        valid_chars = [char for char in input_string if is_valid_unicode(char)]
+        return ''.join(valid_chars)
+    else:
+        return input_string  # Return non-string values as is
+def display(x, y):
+    global data_overall  # Ensure we're accessing the global variable
+    # Check if leaderboard_table is indeed a DataFrame
+    if isinstance(data_overall, pd.DataFrame):
+        for column in data_overall.columns:
+            if data_overall[column].dtype == 'object':
+                data_overall[column] = data_overall[column].apply(remove_invalid_unicode)
+        # Ensure COLS is defined and valid; it's used to filter columns in the DataFrame
+        subset_df = data_overall[COLS]  # Assuming COLS is a list of column names you want to keep
+        return subset_df
+    else:
+        print("leaderboard_table is not a DataFrame.")
+dummy1 = gr.Textbox(visible=False)
+INTRODUCTION_TEXT = """
+This is a copied space from LLM Trustworthy Leaderboard. Instead of displaying
+the results as table this space was modified to simply provides a gradio API interface.
+Using the following python script below, users can access the full leaderboard data easily.
+Python on how to access the data:
+```python
+# Import dependencies
+from gradio_client import Client
+# Initialize the Gradio client with the API URL
+client = Client("https://rodrigomasini-data-only-llm-perf-leaderboard.hf.space/")
+try:
+    # Perform the API call
+    response = client.predict("","", api_name='/predict')
+    # Check if response it's directly accessible
+    if len(response) > 0:
+        print("Response received!")
+        headers = response.get('headers', [])
+        data = response.get('data', [])
+        print(headers)
+        # Remove commenst if you want to download the dataset and save in csv format
+        # Specify the path to your CSV file
+        #csv_file_path = 'llm-perf-benchmark.csv'
+        # Open the CSV file for writing
+        #with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file:
+        #    writer = csv.writer(file)
+            # Write the headers
+        #    writer.writerow(headers)
+            # Write the data
+        #    for row in data:
+        #        writer.writerow(row)
+        #print(f"Results saved to {csv_file_path}")
+    # If the above line prints a string that looks like JSON, you can parse it with json.loads(response)
+    # Otherwise, you might need to adjust based on the actual structure of `response`
+except Exception as e:
+    print(f"An error occurred: {e}")
+```
 """
+interface = gr.Interface(
+    fn=display,
+    inputs=[gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text"), dummy1],
+    outputs=[data_overall]
+)
+interface.launch()