Spaces:

panuthept
/

thai_sentence_embedding_benchmark

Runtime error

App Files Files Community

panuthept commited on Aug 8

Commit

c99ced4

•

1 Parent(s): 7fd4e12

test update code

Browse files

Files changed (4) hide show

.gitignore +2 -0
app.py +286 -191
app_demo.py +204 -0
src/about.py +1 -1

.gitignore CHANGED Viewed

@@ -11,3 +11,5 @@ eval-results/
 eval-queue-bk/
 eval-results-bk/
 logs/

 eval-queue-bk/
 eval-results-bk/
 logs/
+.DS_Store

app.py CHANGED Viewed

@@ -1,204 +1,299 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
 import pandas as pd
-from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download
-from src.about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
-    INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
-    TITLE,
-)
-from src.display.css_html_js import custom_css
-from src.display.utils import (
-    BENCHMARK_COLS,
-    COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
-    AutoEvalColumn,
-    ModelType,
-    fields,
-    WeightType,
-    Precision
-)
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
-def restart_space():
-    API.restart_space(repo_id=REPO_ID)
-### Space initialisation
-try:
-    print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-try:
-    print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-def init_leaderboard(dataframe):
-    if dataframe is None or dataframe.empty:
-        raise ValueError("Leaderboard DataFrame is empty or None.")
-    return Leaderboard(
-        value=dataframe,
-        datatype=[c.type for c in fields(AutoEvalColumn)],
-        select_columns=SelectColumns(
-            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
-            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
-            label="Select Columns to Display:",
-        ),
-        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
-        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
-        filter_columns=[
-            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
-            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
-            ColumnFilter(
-                AutoEvalColumn.params.name,
-                type="slider",
-                min=0.01,
-                max=150,
-                label="Select the number of parameters (B)",
-            ),
-            ColumnFilter(
-                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
-            ),
-        ],
-        bool_checkboxgroup_label="Hide models",
-        interactive=False,
-    )
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
-            )
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
-scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-demo.queue(default_concurrency_limit=40).launch()

 import gradio as gr
 import pandas as pd
+from css_html_js import custom_css
+TITLE = """<h1 align="center" id="space-title">🇲🇾 Malay LLM Leaderboard</h1>"""
+INTRODUCTION_TEXT = """
+📐 The 🇲🇾 Malay LLM Leaderboard aims to track, rank and evaluate open LLMs on Malay tasks. All notebooks at https://github.com/mesolitica/llm-benchmarks, feel free to submit your own score at https://huggingface.co/spaces/mesolitica/malay-llm-leaderboard/discussions with link to the notebook.
+## Dataset
+📈 We evaluate models based on 3 datasets,
+1. BM-PT3 Paper 1, contains 54 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/BM-pt3
+- This test is for 15 years old Malaysia student, it is about reading comprehension and general knowledge for malay language.
+2. Tatabahasa, contains 349 questions, https://github.com/mesolitica/malaysian-dataset/tree/master/llm-benchmark/tatabahasabm.tripod.com
+- This test is general test for malay grammar.
+3. General high school science questions, contains 323 questions, https://huggingface.co/datasets/mesolitica/mysoalan.com-qa
+- This test is general test for science.
+4. Translated MMLU, https://huggingface.co/datasets/mesolitica/translated-MMLU
+- This test is to test general knowledge, originally from MMLU.
+## Contributions
+1. Claude 1.3 and 2.0 Tatabahasa contributed by https://www.linkedin.com/in/fahim-surani
+2. Claude 3.0 contributed by https://github.com/theblackcat102, https://huggingface.co/theblackcat102
+## Tagging
+🟢 pretrained ⭕ instruction-tuned 📦 close sourced
+"""
+close_source = [
+    {
+        'T': '📦',
+        'model': 'claude-3-opus-20240229',
+        'BM-PT3 0-shot': 57.41,
+        'BM-PT3 1-shot': 53.70,
+        'BM-PT3 3-shots': 62.96,
+        'Tatabahasa 0-shot': 77.08,
+        'Tatabahasa 1-shot': 73.93,
+        'Tatabahasa 3-shots': 75.64,
+    },
+    {
+        'T': '📦',
+        'model': 'claude-3-sonnet-20240229',
+        'BM-PT3 0-shot': 48.15,
+        'BM-PT3 1-shot': 50.00,
+        'BM-PT3 3-shots': 37.04,
+        'Tatabahasa 0-shot': 65.90,
+        'Tatabahasa 1-shot': 38.40,
+        'Tatabahasa 3-shots': 40.97,
+    },
+    {
+        'T': '📦',
+        'model': 'claude-3-haiku-20240307',
+        'BM-PT3 0-shot': 48.15,
+        'BM-PT3 1-shot': 50.00,
+        'BM-PT3 3-shots': 50.00,
+        'Tatabahasa 0-shot': 62.75,
+        'Tatabahasa 1-shot': 49.86,
+        'Tatabahasa 3-shots': 24.07,
+    },
+    {
+        'T': '📦',
+        'model': 'AWS Bedrock Claude 1.3',
+        'Tatabahasa 0-shot': 60.650887573964496,
+        'Tatabahasa 1-shot': 62.46418338108882,
+        'Tatabahasa 3-shots': 67.34104046242774,
+    },
+    {
+        'T': '📦',
+        'model': 'AWS Bedrock Claude 2',
+        'Tatabahasa 0-shot': 61.702127659574465,
+        'Tatabahasa 1-shot': 60.17191977077364,
+        'Tatabahasa 3-shots': 59.598853868194844,
+    },
+    {
+        'T': '📦',
+        'model': 'gpt-4-1106-preview',
+        'BM-PT3 0-shot': 51.85185185185185,
+        'BM-PT3 1-shot': 66.66666666666666,
+        'BM-PT3 3-shots': 55.55555555555556,
+        'Tatabahasa 0-shot': 75.64469914040114,
+        'Tatabahasa 1-shot': 73.63896848137536,
+        'Tatabahasa 3-shots': 75.64469914040114,
+    },
+    {
+        'T': '📦',
+        'model': 'gpt-3.5-turbo-0613',
+        'BM-PT3 0-shot': 36.53846153846153,
+        'BM-PT3 1-shot': 28.846153846153843,
+        'BM-PT3 3-shots': 24.528301886792452,
+        'Tatabahasa 0-shot': 59.530791788856305,
+        'Tatabahasa 1-shot': 60.80691642651297,
+        'Tatabahasa 3-shots': 63.03724928366762,
+    },
+]
+open_source = [
+    {
+        'T': '🟢',
+        'model': '[meta-llama/llama2-7b](https://huggingface.co/meta-llama/Llama-2-7b-hf)',
+        'Tatabahasa 0-shot': 24.355300859598856,
+        'Tatabahasa 1-shot': 28.08022922636103,
+        'Tatabahasa 3-shots': 24.641833810888254,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/tinyllama-1.1b-4096-fpf](https://huggingface.co/mesolitica/tinyllama-1.1b-4096-fpf)',
+        'Tatabahasa 0-shot': 23.248407643312103,
+        'Tatabahasa 1-shot': 27.22063037249284,
+        'Tatabahasa 3-shots': 24.355300859598856,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/malaysian-llama2-7b-32k](https://huggingface.co/mesolitica/llama-7b-hf-32768-fpf)',
+        'BM-PT3 0-shot': 20.37037037037037,
+        'BM-PT3 1-shot': 20.37037037037037,
+        'BM-PT3 3-shots': 29.629629629629626,
+        'Tatabahasa 0-shot': 17.765042979942695,
+        'Tatabahasa 1-shot': 24.068767908309454,
+        'Tatabahasa 3-shots': 27.507163323782237,
+    },
+    {
+        'T': '⭕',
+        'model': '[mesolitica/malaysian-llama2-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-7b-32k-instructions-v2)',
+        'BM-PT3 0-shot': 33.33333333333333,
+        'BM-PT3 1-shot': 37.03703703703704,
+        'BM-PT3 3-shots': 35.18518518518518,
+        'Tatabahasa 0-shot': 59.31232091690545,
+        'Tatabahasa 1-shot': 53.86819484240688,
+        'Tatabahasa 3-shots': 45.55873925501432,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/malaysian-llama2-13b-32k](https://huggingface.co/mesolitica/llama-13b-hf-32768-fpf)',
+        'BM-PT3 0-shot': 33.33333333333333,
+        'BM-PT3 1-shot': 20.37037037037037,
+        'BM-PT3 3-shots': 31.48148148148148,
+        'Tatabahasa 0-shot': 26.07449856733524,
+        'Tatabahasa 1-shot': 25.214899713467048,
+        'Tatabahasa 3-shots': 24.355300859598856,
+    },
+    {
+        'T': '⭕',
+        'model': '[mistralai/malaysian-llama2-13b-32k-instructions](https://huggingface.co/mesolitica/malaysian-llama2-13b-32k-instructions)',
+        'BM-PT3 0-shot': 28.57142857142857,
+        'BM-PT3 1-shot': 12.244897959183673,
+        'BM-PT3 3-shots': 17.307692307692307,
+    },
+    {
+        'T': '🟢',
+        'model': '[mistralai/mistral-7b](https://huggingface.co/mistralai/Mistral-7B-v0.1)',
+        'Tatabahasa 0-shot': 28.939828080229223,
+        'Tatabahasa 1-shot': 34.38395415472779,
+        'Tatabahasa 3-shots': 32.95128939828081,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/malaysian-mistral-7b-4k](https://huggingface.co/mesolitica/mistral-7b-4096-fpf)',
+        'BM-PT3 0-shot': 20.37037037037037,
+        'BM-PT3 1-shot': 22.22222222222222,
+        'BM-PT3 3-shots': 33.33333333333333,
+        'Tatabahasa 0-shot': 21.48997134670487,
+        'Tatabahasa 1-shot': 28.939828080229223,
+        'Tatabahasa 3-shots': 24.641833810888254,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/malaysian-mistral-7b-32k](https://huggingface.co/mesolitica/mistral-7b-32768-fpf)',
+        'BM-PT3 0-shot': 16.666666666666664,
+        'BM-PT3 1-shot': 16.666666666666664,
+        'BM-PT3 3-shots': 25.925925925925924,
+        'Tatabahasa 0-shot': 18.624641833810887,
+        'Tatabahasa 1-shot': 24.355300859598856,
+        'Tatabahasa 3-shots': 28.653295128939828,
+    },
+    {
+        'T': '⭕',
+        'model': '[mesolitica/malaysian-mistral-7b-32k-instructions](https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions)',
+        'BM-PT3 0-shot': 40.74074074074074,
+        'BM-PT3 1-shot': 33.33333333333333,
+        'BM-PT3 3-shots': 37.03703703703704,
+        'Tatabahasa 0-shot': 65.32951289398281,
+        'Tatabahasa 1-shot': 57.306590257879655,
+        'Tatabahasa 3-shots': 56.446991404011456,
+    },
+    {
+        'T': '⭕',
+        'model': '[mesolitica/malaysian-mistral-7b-32k-instructions-v4](https://huggingface.co/mesolitica/malaysian-mistral-7b-32k-instructions)',
+        'BM-PT3 0-shot': 35.18518518518518,
+        'BM-PT3 1-shot': 31.48148148148148,
+        'BM-PT3 3-shots': 33.33333333333333,
+        'Tatabahasa 0-shot': 66.4756446991404,
+        'Tatabahasa 1-shot': 54.15472779369628,
+        'Tatabahasa 3-shots': 49.8567335243553,
+    },
+    {
+        'T': '🟢',
+        'model': '[aisingapore/sealion3b](https://huggingface.co/aisingapore/sealion3b)',
+        'BM-PT3 0-shot': 20.37037037037037,
+        'BM-PT3 1-shot': 25.925925925925924,
+        'BM-PT3 3-shots': 31.48148148148148,
+        'Tatabahasa 0-shot': 21.776504297994272,
+        'Tatabahasa 1-shot': 21.776504297994272,
+        'Tatabahasa 3-shots': 24.641833810888254,
+    },
+    {
+        'T': '🟢',
+        'model': '[aisingapore/sealion7b](https://huggingface.co/aisingapore/sealion7b)',
+        'BM-PT3 0-shot': 20.37037037037037,
+        'BM-PT3 1-shot': 24.074074074074073,
+        'BM-PT3 3-shots': 33.33333333333333,
+        'Tatabahasa 0-shot': 25.787965616045845,
+        'Tatabahasa 1-shot': 27.507163323782237,
+        'Tatabahasa 3-shots': 26.07449856733524,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/mallam-1.1B-4096](https://huggingface.co/mesolitica/mallam-1.1B-4096)',
+        'Tatabahasa 0-shot': 25.757575757575758,
+        'Tatabahasa 1-shot': 25.787965616045845,
+        'Tatabahasa 3-shots': 28.08022922636103,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/mallam-3B-4096](https://huggingface.co/mesolitica/mallam-3B-4096)',
+        'Tatabahasa 0-shot': 24.567474048442904,
+        'Tatabahasa 1-shot': 24.641833810888254,
+        'Tatabahasa 3-shots': 28.653295128939828,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/mallam-5B-4096](https://huggingface.co/mesolitica/mallam-5B-4096)',
+        'Tatabahasa 0-shot': 24.074074074074073,
+        'Tatabahasa 1-shot': 27.793696275071632,
+        'Tatabahasa 3-shots': 28.653295128939828,
+    },
+    {
+        'T': '🟢',
+        'model': '[sail/Sailor-0.5B](https://huggingface.co/sail/Sailor-0.5B)',
+        'Tatabahasa 0-shot': 17.191977077363894,
+        'Tatabahasa 1-shot': 23.78223495702006,
+        'Tatabahasa 3-shots': 25.501432664756447,
+    },
+    {
+        'T': '🟢',
+        'model': '[sail/Sailor-1.8B](https://huggingface.co/sail/Sailor-1.8B)',
+        'Tatabahasa 0-shot': 29.512893982808023,
+        'Tatabahasa 1-shot': 27.507163323782237,
+        'Tatabahasa 3-shots': 24.92836676217765,
+    },
+    {
+        'T': '🟢',
+        'model': '[sail/Sailor-4B](https://huggingface.co/sail/Sailor-4B)',
+        'Tatabahasa 0-shot': 31.51862464183381,
+        'Tatabahasa 1-shot': 36.10315186246418,
+        'Tatabahasa 3-shots': 27.507163323782237,
+    },
+    {
+        'T': '🟢',
+        'model': '[sail/Sailor-7B](https://huggingface.co/sail/Sailor-7B)',
+        'Tatabahasa 0-shot': 55.30085959885387,
+        'Tatabahasa 1-shot': 54.72779369627507,
+        'Tatabahasa 3-shots': 59.02578796561605,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/mallam-5B-4096](https://huggingface.co/mesolitica/mallam-5B-4096)',
+        'Tatabahasa 0-shot': 24.074074074074073,
+        'Tatabahasa 1-shot': 27.793696275071632,
+        'Tatabahasa 3-shots': 28.653295128939828,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/gemma-2B-8192-fpf](https://huggingface.co/mesolitica/gemma-2B-8192-fpf)',
+        'Tatabahasa 0-shot': 14.613180515759314,
+        'Tatabahasa 1-shot': 25.501432664756447,
+        'Tatabahasa 3-shots': 23.49570200573066,
+    },
+    {
+        'T': '🟢',
+        'model': '[mesolitica/Qwen1.5-0.5B-4096-fpf](https://huggingface.co/mesolitica/Qwen1.5-0.5B-4096-fpf)',
+        'Tatabahasa 0-shot': 13.753581661891118,
+        'Tatabahasa 1-shot': 21.20343839541547,
+        'Tatabahasa 3-shots': 22.636103151862464,
+    },
+    {
+        'T': '⭕',
+        'model': '[mesolitica/mallam-1.1b-20k-instructions](https://huggingface.co/mesolitica/mallam-1.1b-20k-instructions)',
+        'Tatabahasa 0-shot': 26.923076923076923,
+        'Tatabahasa 1-shot': 28.939828080229223,
+        'Tatabahasa 3-shots': 21.776504297994272,
+    },
+]
+data = pd.DataFrame(close_source + open_source)
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+    gr.DataFrame(data, datatype = 'markdown')
+demo.launch()

app_demo.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import gradio as gr
+from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
+import pandas as pd
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import snapshot_download
+from src.about import (
+    CITATION_BUTTON_LABEL,
+    CITATION_BUTTON_TEXT,
+    EVALUATION_QUEUE_TEXT,
+    INTRODUCTION_TEXT,
+    LLM_BENCHMARKS_TEXT,
+    TITLE,
+)
+from src.display.css_html_js import custom_css
+from src.display.utils import (
+    BENCHMARK_COLS,
+    COLS,
+    EVAL_COLS,
+    EVAL_TYPES,
+    AutoEvalColumn,
+    ModelType,
+    fields,
+    WeightType,
+    Precision
+)
+from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
+from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.submission.submit import add_new_eval
+def restart_space():
+    API.restart_space(repo_id=REPO_ID)
+### Space initialisation
+try:
+    print(EVAL_REQUESTS_PATH)
+    snapshot_download(
+        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
+    )
+except Exception:
+    restart_space()
+try:
+    print(EVAL_RESULTS_PATH)
+    snapshot_download(
+        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
+    )
+except Exception:
+    restart_space()
+LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
+(
+    finished_eval_queue_df,
+    running_eval_queue_df,
+    pending_eval_queue_df,
+) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+def init_leaderboard(dataframe):
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+    return Leaderboard(
+        value=dataframe,
+        datatype=[c.type for c in fields(AutoEvalColumn)],
+        select_columns=SelectColumns(
+            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
+            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
+            label="Select Columns to Display:",
+        ),
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
+        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
+            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
+            ColumnFilter(
+                AutoEvalColumn.params.name,
+                type="slider",
+                min=0.01,
+                max=150,
+                label="Select the number of parameters (B)",
+            ),
+            ColumnFilter(
+                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
+            ),
+        ],
+        bool_checkboxgroup_label="Hide models",
+        interactive=False,
+    )
+demo = gr.Blocks(css=custom_css)
+with demo:
+    gr.HTML(TITLE)
+    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
+            leaderboard = init_leaderboard(LEADERBOARD_DF)
+        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
+            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
+            with gr.Column():
+                with gr.Row():
+                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
+                with gr.Column():
+                    with gr.Accordion(
+                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
+                        open=False,
+                    ):
+                        with gr.Row():
+                            finished_eval_table = gr.components.Dataframe(
+                                value=finished_eval_queue_df,
+                                headers=EVAL_COLS,
+                                datatype=EVAL_TYPES,
+                                row_count=5,
+                            )
+                    with gr.Accordion(
+                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
+                        open=False,
+                    ):
+                        with gr.Row():
+                            running_eval_table = gr.components.Dataframe(
+                                value=running_eval_queue_df,
+                                headers=EVAL_COLS,
+                                datatype=EVAL_TYPES,
+                                row_count=5,
+                            )
+                    with gr.Accordion(
+                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
+                        open=False,
+                    ):
+                        with gr.Row():
+                            pending_eval_table = gr.components.Dataframe(
+                                value=pending_eval_queue_df,
+                                headers=EVAL_COLS,
+                                datatype=EVAL_TYPES,
+                                row_count=5,
+                            )
+            with gr.Row():
+                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+            with gr.Row():
+                with gr.Column():
+                    model_name_textbox = gr.Textbox(label="Model name")
+                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
+                    model_type = gr.Dropdown(
+                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
+                        label="Model type",
+                        multiselect=False,
+                        value=None,
+                        interactive=True,
+                    )
+                with gr.Column():
+                    precision = gr.Dropdown(
+                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
+                        label="Precision",
+                        multiselect=False,
+                        value="float16",
+                        interactive=True,
+                    )
+                    weight_type = gr.Dropdown(
+                        choices=[i.value.name for i in WeightType],
+                        label="Weights type",
+                        multiselect=False,
+                        value="Original",
+                        interactive=True,
+                    )
+                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+            submit_button = gr.Button("Submit Eval")
+            submission_result = gr.Markdown()
+            submit_button.click(
+                add_new_eval,
+                [
+                    model_name_textbox,
+                    base_model_name_textbox,
+                    revision_name_textbox,
+                    precision,
+                    weight_type,
+                    model_type,
+                ],
+                submission_result,
+            )
+    with gr.Row():
+        with gr.Accordion("📙 Citation", open=False):
+            citation_button = gr.Textbox(
+                value=CITATION_BUTTON_TEXT,
+                label=CITATION_BUTTON_LABEL,
+                lines=20,
+                elem_id="citation-button",
+                show_copy_button=True,
+            )
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=1800)
+scheduler.start()
+demo.queue(default_concurrency_limit=40).launch()

src/about.py CHANGED Viewed

@@ -21,7 +21,7 @@ NUM_FEWSHOT = 0 # Change with your few shot
 # Your leaderboard name
-TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """

 # Your leaderboard name
+TITLE = """<h1 align="center" id="space-title">Thai Sentence Embedding Leaderboard</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """