File size: 3,041 Bytes
2c07158
 
 
 
 
 
 
 
 
 
 
86468e9
2c07158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86468e9
 
 
 
 
 
 
 
 
 
 
2c07158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edfe6ac
2c15f13
50e1bd2
2c07158
 
 
 
 
 
 
 
 
 
 
 
 
bb29f12
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from src.display.utils import EVAL_COLS, EVAL_TYPES
from src.envs import EVAL_REQUESTS_PATH
from src.populate import get_evaluation_queue_df
from src.submission.submit import add_new_eval
import gradio as gr

def show_submit_page(index: int):
    (
        finished_eval_queue_df,
        running_eval_queue_df,
        pending_eval_queue_df,
        failed_eval_queue_df,
    ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
    with gr.TabItem("πŸš€ Submit! ", elem_id="llm-benchmark-tab-table", id=index):
        
        with gr.Column():
            with gr.Accordion(
                f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
                open=False,
            ):
                with gr.Row():
                    finished_eval_table = gr.components.Dataframe(
                        value=finished_eval_queue_df,
                        headers=EVAL_COLS,
                        datatype=EVAL_TYPES,
                        row_count=5,
                    )
            with gr.Accordion(
                f"πŸ”΄ Failed Evaluations ({len(failed_eval_queue_df)})",
                open=False,
            ):
                with gr.Row():
                    failed_eval_table = gr.components.Dataframe(
                        value=failed_eval_queue_df,
                        headers=EVAL_COLS,
                        datatype=EVAL_TYPES,
                        row_count=5,
                    )

            with gr.Accordion(
                f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
                open=False,
            ):
                with gr.Row():
                    pending_eval_table = gr.components.Dataframe(
                        value=pending_eval_queue_df,
                        headers=EVAL_COLS,
                        datatype=EVAL_TYPES,
                        row_count=5,
                    )

        with gr.Row():
            gr.Markdown("# βœ‰οΈβœ¨ Submit your model!", elem_classes="markdown-text")

        with gr.Row():
            with gr.Column():
                model_name_textbox = gr.Textbox(label="Huggingface Model")
                link_to_model_blog = gr.Textbox(label="Link to model release blog / technical report")
                gr.Markdown("* The evaluation will be run manually in batches. Please allow up to one week for processing.")
                gr.Markdown("* By default, the model is running using Flash-Attn2. If the model doesn't support this, please contact us via the <a href=\"https://discord.gg/CqyBscMFpg\">OpenTyphoon Discord</a>.")

        submit_button = gr.Button("Submit Model")
        submission_result = gr.Markdown()
        submit_button.click(
            add_new_eval,
            [
                model_name_textbox,
                link_to_model_blog
            ],
            submission_result,
        )

        with gr.Row():
            gr.Markdown('# βœ‰οΈβœ¨ Submit your task <a href="https://github.com/scb-10x/seacrowd-eval">here!</a>', elem_classes="markdown-text")