Spaces:

SeaEval
/

SeaEval_Leaderboard

Running

App Files Files Community

binwang commited on Jan 19

Commit

1d3a534

•

1 Parent(s): 7d66eb7

new format

Browse files

Files changed (1) hide show

app.py +1001 -1044

app.py CHANGED Viewed

@@ -38,19 +38,12 @@ def make_clickable_model(model_name, link=None):
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 with open('all_results.json', 'r') as f:
     ALL_RESULTS = json.load(f)
 MODEL_LIST = list(ALL_RESULTS.keys())
 NUM_MODELS = len(set(MODEL_LIST))
 MODEL_TO_SIZE = {model: ALL_RESULTS[model]["model_size"] for model in MODEL_LIST}
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
@@ -1966,8 +1959,68 @@ MRPC_FIVE_SHOT = get_data_mrpc(eval_mode="five_shot")
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
-block = gr.Blocks()
 with block:
     gr.Markdown(f"""
@@ -1979,1054 +2032,954 @@ with block:
     - **Mode of Evaluation**: Zero-Shot, Five-Shot
     The following table shows the performance of the models on the SeaEval benchmark.
     """)
-    with gr.Tabs():
-        # dataset 1: cross-mmlu
-        with gr.TabItem("Cross-MMLU"):
-            with gr.Row():
-                gr.Markdown("""
-                **Cross-MMLU Leaderboard** 🔮
-                - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
-                - **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        cross_mmlu_zero_shot_overall = gr.components.Dataframe(
-                            CROSS_MMLU_ZERO_SHOT_OVERALL,
-                            datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_OVERALL.columns),
-                            type="pandas",
-                        )
-                with gr.TabItem("Language Performance"):
-                    with gr.Row():
-                        cross_mmlu_zero_shot_overall = gr.components.Dataframe(
-                            CROSS_MMLU_ZERO_SHOT_LANGUAGE,
-                            datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_LANGUAGE.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        cross_mmlu_zero_shot_overall = gr.components.Dataframe(
-                            CROSS_MMLU_FIVE_SHOT_OVERALL,
-                            datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_OVERALL.columns),
-                            type="pandas",
-                        )
-                with gr.TabItem("Language Performance"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CROSS_MMLU_FIVE_SHOT_LANGUAGE,
-                            datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_LANGUAGE.columns),
-                            type="pandas",
-                        )
-        # dataset 2: cross-logiqa
-        with gr.TabItem("Cross-LogiQA"):
-            with gr.Row():
-                gr.Markdown("""
-                **Cross-LogiQA Leaderboard** 🔮
-                - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
-                - **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CROSS_LOGIQA_ZERO_SHOT_OVERALL,
-                            datatype=["number", "markdown"] + ["number"] * len(CROSS_LOGIQA_ZERO_SHOT_OVERALL.columns),
-                            type="pandas",
-                        )
-                with gr.TabItem("Language Performance"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CROSS_LOGIQA_ZERO_SHOT_LANGUAGE,
-                            datatype=["number", "markdown"] + ["number"] * len(CROSS_LOGIQA_ZERO_SHOT_LANGUAGE.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CROSS_LOGIQA_FIVE_SHOT_OVERALL,
-                            datatype=["number", "markdown"] + ["number"] * len(CROSS_LOGIQA_FIVE_SHOT_OVERALL.columns),
-                            type="pandas",
-                        )
-                with gr.TabItem("Language Performance"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CROSS_LOGIQA_FIVE_SHOT_LANGUAGE,
-                            datatype=["number", "markdown"] + ["number"] * len(CROSS_LOGIQA_FIVE_SHOT_LANGUAGE.columns),
-                            type="pandas",
-                        )
-        # dataset 3: SG_EVAL
-        with gr.TabItem("SG_EVAL"):
-            with gr.Row():
-                gr.Markdown("""
-                **SG_EVAL Leaderboard** 🔮
-                - **Metric:** Accuracy
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            SG_EVAL_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(SG_EVAL_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            SG_EVAL_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(SG_EVAL_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 4:
-        with gr.TabItem("US_EVAL"):
-            with gr.Row():
-                gr.Markdown("""
-                **US_EVAL Leaderboard** 🔮
-                - **Metric:** Accuracy
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            US_EVAL_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(US_EVAL_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            US_EVAL_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(US_EVAL_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 5:
-        with gr.TabItem("CN_EVAL"):
-            with gr.Row():
-                gr.Markdown("""
-                **CN_EVAL Leaderboard** 🔮
-                - **Metric:** Accuracy
-                - **Languages:** Chinese
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CN_EVAL_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(CN_EVAL_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CN_EVAL_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(CN_EVAL_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 6:
-        with gr.TabItem("PH_EVAL"):
-            with gr.Row():
-                gr.Markdown("""
-                **PH_EVAL Leaderboard** 🔮
-                - **Metric:** Accuracy
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            PH_EVAL_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(PH_EVAL_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            PH_EVAL_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(PH_EVAL_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 7:
-        with gr.TabItem("Singlish to English Translation"):
-            with gr.Row():
-                gr.Markdown("""
-                **SING2ENG Leaderboard** 🔮
-                - **Metric:** BLEU Avg.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            SING2ENG_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(SING2ENG_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            SING2ENG_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(SING2ENG_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-    gr.Markdown(f"""
-                The following are datasets that are not originally collected by SeaEval, but are included in the leaderboard for completeness.
-    """)
-    with gr.Tabs():
-        # dataset 8:
-        with gr.TabItem("FLORES Indonesian to English Translation"):
-            with gr.Row():
-                gr.Markdown("""
-                **flores_ind2eng Leaderboard** 🔮
-                - **Metric:** BLEU Avg.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            FLORES_IND2ENG_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(FLORES_IND2ENG_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            FLORES_IND2ENG_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(FLORES_IND2ENG_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 9:
-        with gr.TabItem("FLORES Vitenamese to English Translation"):
-            with gr.Row():
-                gr.Markdown("""
-                **flores_vie2eng Leaderboard** 🔮
-                - **Metric:** BLEU Avg.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            FLORES_VIE2ENG_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(FLORES_VIE2ENG_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            FLORES_VIE2ENG_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(FLORES_VIE2ENG_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 10:
-        with gr.TabItem("FLORES Chinese to English Translation"):
-            with gr.Row():
-                gr.Markdown("""
-                **flores_zho2eng Leaderboard** 🔮
-                - **Metric:** BLEU Avg.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            FLORES_ZHO2ENG_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(FLORES_ZHO2ENG_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            FLORES_ZHO2ENG_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(FLORES_ZHO2ENG_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 11:
-        with gr.TabItem("FLORES Malay to English Translation"):
-            with gr.Row():
-                gr.Markdown("""
-                **flores_zsm2eng Leaderboard** 🔮
-                - **Metric:** BLEU Avg.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            FLORES_ZSM2ENG_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(FLORES_ZSM2ENG_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            FLORES_ZSM2ENG_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(FLORES_ZSM2ENG_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 12:
-        with gr.TabItem("MMLU"):
-            with gr.Row():
-                gr.Markdown("""
-                **MMLU Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            MMLU_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(MMLU_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            MMLU_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(MMLU_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 13:
-        with gr.TabItem("MMLU Full"):
-            with gr.Row():
-                gr.Markdown("""
-                **MMLU Full Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            MMLU_FULL_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(MMLU_FULL_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            MMLU_FULL_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(MMLU_FULL_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 14:
-        with gr.TabItem("C_EVAL"):
-            with gr.Row():
-                gr.Markdown("""
-                **C_EVAL Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** Chinese
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            C_EVAL_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(C_EVAL_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            C_EVAL_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(C_EVAL_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 15:
-        with gr.TabItem("C_EVAL Full"):
-            with gr.Row():
-                gr.Markdown("""
-                **C_EVAL Full Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** Chinese
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            C_EVAL_FULL_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(C_EVAL_FULL_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            C_EVAL_FULL_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(C_EVAL_FULL_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 16:
-        with gr.TabItem("CMMLU"):
-            with gr.Row():
-                gr.Markdown("""
-                **CMMLU Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** Chinese
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CMMLU_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(CMMLU_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CMMLU_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(CMMLU_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 17:
-        with gr.TabItem("CMMLU Full"):
-            with gr.Row():
-                gr.Markdown("""
-                **CMMLU Full Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** Chinese
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CMMLU_FULL_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(CMMLU_FULL_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            CMMLU_FULL_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(CMMLU_FULL_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 18:
-        with gr.TabItem("ZBench"):
-            with gr.Row():
-                gr.Markdown("""
-                **ZBench Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** Chinese
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            ZBENCH_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(ZBENCH_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            ZBENCH_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(ZBENCH_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset 18:
-        with gr.TabItem("ind_emotion"):
-            with gr.Row():
-                gr.Markdown("""
-                **ind_emotion Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** Indonesian
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            IND_EMOTION_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(IND_EMOTION_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            IND_EMOTION_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(IND_EMOTION_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("OCNLI"):
-            with gr.Row():
-                gr.Markdown("""
-                **OCNLI Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** Chinese
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            OCNLI_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(OCNLI_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            OCNLI_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(OCNLI_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("C3"):
-            with gr.Row():
-                gr.Markdown("""
-                **C3 Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** Chinese
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            C3_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(C3_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            C3_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(C3_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("DREAM"):
-            with gr.Row():
-                gr.Markdown("""
-                **DREAM Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            DREAM_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(DREAM_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            DREAM_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(DREAM_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("SAMSum"):
-            with gr.Row():
-                gr.Markdown("""
-                **SAMSum Leaderboard** 🔮
-                - **Metric:** ROUGE.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            SAMSUM_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(SAMSUM_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            SAMSUM_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(SAMSUM_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("DialogSum"):
-            with gr.Row():
-                gr.Markdown("""
-                **DialogSum Leaderboard** 🔮
-                - **Metric:** ROUGE.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            DIALOGSUM_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(DIALOGSUM_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            DIALOGSUM_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(DIALOGSUM_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("SST2"):
-            with gr.Row():
-                gr.Markdown("""
-                **SST2 Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            SST2_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(SST2_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            SST2_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(SST2_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("COLA"):
-            with gr.Row():
-                gr.Markdown("""
-                **COLA Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            COLA_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(COLA_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            COLA_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(COLA_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("QQP"):
-            with gr.Row():
-                gr.Markdown("""
-                **QQP Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            QQP_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(QQP_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            QQP_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(QQP_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("MNLI"):
-            with gr.Row():
-                gr.Markdown("""
-                **MNLI Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            MNLI_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(MNLI_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            MNLI_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(MNLI_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("QNLI"):
-            with gr.Row():
-                gr.Markdown("""
-                **QNLI Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            QNLI_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(QNLI_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            QNLI_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(QNLI_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("WNLI"):
-            with gr.Row():
-                gr.Markdown("""
-                **WNLI Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            WNLI_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(WNLI_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            WNLI_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(WNLI_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("RTE"):
-            with gr.Row():
-                gr.Markdown("""
-                **RTE Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            RTE_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(RTE_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            RTE_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(RTE_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
-        # dataset
-        with gr.TabItem("MRPC"):
-            with gr.Row():
-                gr.Markdown("""
-                **MRPC Leaderboard** 🔮
-                - **Metric:** Accuracy.
-                - **Languages:** English
-                """)
-            with gr.TabItem("zero_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            MRPC_ZERO_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(MRPC_ZERO_SHOT.columns),
-                            type="pandas",
-                        )
-            with gr.TabItem("five_shot"):
-                with gr.TabItem("Overall"):
-                    with gr.Row():
-                        gr.components.Dataframe(
-                            MRPC_FIVE_SHOT,
-                            datatype=["number", "markdown"] + ["number"] * len(MRPC_FIVE_SHOT.columns),
-                            type="pandas",
-                        )
     gr.Markdown(r"""
-    If this work is useful to you, please citing our work:
     ```bibtex
         @article{SeaEval2023,
         title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
         author={Wang, Bin and Liu, Zhengyuan and Huang, Xin and Jiao, Fangkai and Ding, Yang and Aw, Ai Ti and Chen, Nancy F.},
         journal={arXiv preprint arXiv:2309.04766},
-        year={2023}
-        }
     ```
     """)
     # Running the functions on page load in addition to when the button is clicked
@@ -3035,8 +2988,12 @@ with block:
     block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
     """
 block.queue(max_size=10)
-block.launch(server_name="0.0.0.0", share=True)
 # Possible changes:

 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 with open('all_results.json', 'r') as f:
     ALL_RESULTS = json.load(f)
 MODEL_LIST = list(ALL_RESULTS.keys())
 NUM_MODELS = len(set(MODEL_LIST))
 MODEL_TO_SIZE = {model: ALL_RESULTS[model]["model_size"] for model in MODEL_LIST}
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
 # =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =  =
+# block = gr.Blocks(theme=gr.themes.Soft())
+theme = gr.themes.Soft().set(
+    background_fill_primary='*secondary_50'
+)
+block = gr.Blocks(theme='finlaymacklon/smooth_slate')
 with block:
     gr.Markdown(f"""
     - **Mode of Evaluation**: Zero-Shot, Five-Shot
     The following table shows the performance of the models on the SeaEval benchmark.
+    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     """)
+    with gr.Tabs():
+        with gr.TabItem("Cross-Lingual Consistency"):
+            # dataset 1: cross-mmlu
+            with gr.TabItem("Cross-MMLU"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            cross_mmlu_zero_shot_overall = gr.components.Dataframe(
+                                CROSS_MMLU_ZERO_SHOT_OVERALL,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_OVERALL.columns),
+                                type="pandas",
+                            )
+                    with gr.TabItem("Language Performance"):
+                        with gr.Row():
+                            cross_mmlu_zero_shot_overall = gr.components.Dataframe(
+                                CROSS_MMLU_ZERO_SHOT_LANGUAGE,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_ZERO_SHOT_LANGUAGE.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            cross_mmlu_zero_shot_overall = gr.components.Dataframe(
+                                CROSS_MMLU_FIVE_SHOT_OVERALL,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_OVERALL.columns),
+                                type="pandas",
+                            )
+                    with gr.TabItem("Language Performance"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CROSS_MMLU_FIVE_SHOT_LANGUAGE,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_MMLU_FIVE_SHOT_LANGUAGE.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **Cross-MMLU Leaderboard** 🔮
+                    - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
+                    - **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
+                    """)
+            # dataset 2: cross-logiqa
+            with gr.TabItem("Cross-LogiQA"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CROSS_LOGIQA_ZERO_SHOT_OVERALL,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_LOGIQA_ZERO_SHOT_OVERALL.columns),
+                                type="pandas",
+                            )
+                    with gr.TabItem("Language Performance"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CROSS_LOGIQA_ZERO_SHOT_LANGUAGE,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_LOGIQA_ZERO_SHOT_LANGUAGE.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CROSS_LOGIQA_FIVE_SHOT_OVERALL,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_LOGIQA_FIVE_SHOT_OVERALL.columns),
+                                type="pandas",
+                            )
+                    with gr.TabItem("Language Performance"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CROSS_LOGIQA_FIVE_SHOT_LANGUAGE,
+                                datatype=["number", "markdown"] + ["number"] * len(CROSS_LOGIQA_FIVE_SHOT_LANGUAGE.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **Cross-LogiQA Leaderboard** 🔮
+                    - **Metric:** Cross-Lingual Consistency, Accuracy, AC3
+                    - **Languages:** English, Chinese, Malay, Indonesian, Spanish, Vietnamese, Filipino
+                    """)
+        with gr.TabItem("Cultural Reasoning and Understanding"):
+            # dataset 3: SG_EVAL
+            with gr.TabItem("SG_EVAL"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                SG_EVAL_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(SG_EVAL_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                SG_EVAL_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(SG_EVAL_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **SG_EVAL Leaderboard** 🔮
+                    - **Metric:** Accuracy
+                    - **Languages:** English
+                    """)
+            # dataset 4:
+            with gr.TabItem("US_EVAL"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                US_EVAL_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(US_EVAL_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                US_EVAL_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(US_EVAL_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **US_EVAL Leaderboard** 🔮
+                    - **Metric:** Accuracy
+                    - **Languages:** English
+                    """)
+            # dataset 5:
+            with gr.TabItem("CN_EVAL"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CN_EVAL_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(CN_EVAL_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CN_EVAL_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(CN_EVAL_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **CN_EVAL Leaderboard** 🔮
+                    - **Metric:** Accuracy
+                    - **Languages:** Chinese
+                    """)
+            # dataset 6:
+            with gr.TabItem("PH_EVAL"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                PH_EVAL_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(PH_EVAL_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                PH_EVAL_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(PH_EVAL_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **PH_EVAL Leaderboard** 🔮
+                    - **Metric:** Accuracy
+                    - **Languages:** English
+                    """)
+            # dataset 7:
+            with gr.TabItem("Singlish to English Translation"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                SING2ENG_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(SING2ENG_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                SING2ENG_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(SING2ENG_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **SING2ENG Leaderboard** 🔮
+                    - **Metric:** BLEU Avg.
+                    - **Languages:** English
+                    """)
+        with gr.TabItem("Reasoning"):
+            # dataset 12:
+            with gr.TabItem("MMLU"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                MMLU_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(MMLU_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                MMLU_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(MMLU_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **MMLU Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
+            # dataset 13:
+            with gr.TabItem("MMLU Full"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                MMLU_FULL_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(MMLU_FULL_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                MMLU_FULL_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(MMLU_FULL_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **MMLU Full Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
+            # dataset 14:
+            with gr.TabItem("C_EVAL"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                C_EVAL_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(C_EVAL_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                C_EVAL_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(C_EVAL_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **C_EVAL Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** Chinese
+                    """)
+            # dataset 15:
+            with gr.TabItem("C_EVAL Full"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                C_EVAL_FULL_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(C_EVAL_FULL_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                C_EVAL_FULL_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(C_EVAL_FULL_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **C_EVAL Full Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** Chinese
+                    """)
+            # dataset 16:
+            with gr.TabItem("CMMLU"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CMMLU_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(CMMLU_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CMMLU_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(CMMLU_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **CMMLU Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** Chinese
+                    """)
+            # dataset 17:
+            with gr.TabItem("CMMLU Full"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CMMLU_FULL_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(CMMLU_FULL_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                CMMLU_FULL_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(CMMLU_FULL_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **CMMLU Full Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** Chinese
+                    """)
+            # dataset 18:
+            with gr.TabItem("ZBench"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                ZBENCH_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(ZBENCH_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                ZBENCH_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(ZBENCH_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **ZBench Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** Chinese
+                    """)
+        with gr.TabItem("FLORES Translation"):
+            # dataset 8:
+            with gr.TabItem("FLORES Indonesian to English Translation"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                FLORES_IND2ENG_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(FLORES_IND2ENG_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                FLORES_IND2ENG_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(FLORES_IND2ENG_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **flores_ind2eng Leaderboard** 🔮
+                    - **Metric:** BLEU Avg.
+                    - **Languages:** English
+                    """)
+            # dataset 9:
+            with gr.TabItem("FLORES Vitenamese to English Translation"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                FLORES_VIE2ENG_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(FLORES_VIE2ENG_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                FLORES_VIE2ENG_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(FLORES_VIE2ENG_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **flores_vie2eng Leaderboard** 🔮
+                    - **Metric:** BLEU Avg.
+                    - **Languages:** English
+                    """)
+            # dataset 10:
+            with gr.TabItem("FLORES Chinese to English Translation"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                FLORES_ZHO2ENG_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(FLORES_ZHO2ENG_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                FLORES_ZHO2ENG_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(FLORES_ZHO2ENG_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **flores_zho2eng Leaderboard** 🔮
+                    - **Metric:** BLEU Avg.
+                    - **Languages:** English
+                    """)
+            # dataset 11:
+            with gr.TabItem("FLORES Malay to English Translation"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                FLORES_ZSM2ENG_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(FLORES_ZSM2ENG_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                FLORES_ZSM2ENG_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(FLORES_ZSM2ENG_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **flores_zsm2eng Leaderboard** 🔮
+                    - **Metric:** BLEU Avg.
+                    - **Languages:** English
+                    """)
+        with gr.TabItem("Emotion Recognition"):
+            # dataset 18:
+            with gr.TabItem("ind_emotion"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                IND_EMOTION_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(IND_EMOTION_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                IND_EMOTION_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(IND_EMOTION_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **ind_emotion Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** Indonesian
+                    """)
+            # dataset
+            with gr.TabItem("SST2"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                SST2_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(SST2_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                SST2_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(SST2_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **SST2 Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
+        with gr.TabItem("Dialogue"):
+            # dataset
+            with gr.TabItem("DREAM"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                DREAM_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(DREAM_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                DREAM_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(DREAM_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **DREAM Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
+            # dataset
+            with gr.TabItem("SAMSum"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                SAMSUM_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(SAMSUM_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                SAMSUM_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(SAMSUM_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **SAMSum Leaderboard** 🔮
+                    - **Metric:** ROUGE.
+                    - **Languages:** English
+                    """)
+            # dataset
+            with gr.TabItem("DialogSum"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                DIALOGSUM_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(DIALOGSUM_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                DIALOGSUM_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(DIALOGSUM_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **DialogSum Leaderboard** 🔮
+                    - **Metric:** ROUGE.
+                    - **Languages:** English
+                    """)
+        with gr.TabItem("Foundamental NLP"):
+            # dataset
+            with gr.TabItem("OCNLI"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                OCNLI_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(OCNLI_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                OCNLI_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(OCNLI_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **OCNLI Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** Chinese
+                    """)
+            # dataset
+            with gr.TabItem("C3"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                C3_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(C3_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                C3_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(C3_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **C3 Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** Chinese
+                    """)
+            # dataset
+            with gr.TabItem("COLA"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                COLA_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(COLA_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                COLA_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(COLA_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **COLA Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
+            # dataset
+            with gr.TabItem("QQP"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                QQP_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(QQP_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                QQP_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(QQP_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **QQP Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
+            # dataset
+            with gr.TabItem("MNLI"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                MNLI_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(MNLI_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                MNLI_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(MNLI_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **MNLI Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
+            # dataset
+            with gr.TabItem("QNLI"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                QNLI_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(QNLI_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                QNLI_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(QNLI_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **QNLI Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
+            # dataset
+            with gr.TabItem("WNLI"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                WNLI_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(WNLI_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                WNLI_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(WNLI_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **WNLI Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
+            # dataset
+            with gr.TabItem("RTE"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                RTE_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(RTE_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                RTE_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(RTE_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **RTE Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
+            # dataset
+            with gr.TabItem("MRPC"):
+                with gr.TabItem("Zero Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                MRPC_ZERO_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(MRPC_ZERO_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.TabItem("Five Shot"):
+                    with gr.TabItem("Overall"):
+                        with gr.Row():
+                            gr.components.Dataframe(
+                                MRPC_FIVE_SHOT,
+                                datatype=["number", "markdown"] + ["number"] * len(MRPC_FIVE_SHOT.columns),
+                                type="pandas",
+                            )
+                with gr.Row():
+                    gr.Markdown("""
+                    **MRPC Leaderboard** 🔮
+                    - **Metric:** Accuracy.
+                    - **Languages:** English
+                    """)
     gr.Markdown(r"""
+    If our datasets and leaderboard are useful, please consider cite:
     ```bibtex
         @article{SeaEval2023,
         title={SeaEval for Multilingual Foundation Models: From Cross-Lingual Alignment to Cultural Reasoning},
         author={Wang, Bin and Liu, Zhengyuan and Huang, Xin and Jiao, Fangkai and Ding, Yang and Aw, Ai Ti and Chen, Nancy F.},
         journal={arXiv preprint arXiv:2309.04766},
+        year={2023}}
     ```
     """)
     # Running the functions on page load in addition to when the button is clicked
     block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
     """
 block.queue(max_size=10)
+block.launch(server_name="0.0.0.0", share=False)
 # Possible changes: