File size: 4,643 Bytes
9346f1c
 
4596a70
2a5f9fb
 
1257fc3
8c49cb6
 
 
 
 
 
 
976f398
df66f6e
 
 
 
 
 
 
 
 
 
9d22eee
 
df66f6e
efeee6d
df66f6e
1eaecb2
8c49cb6
2a73469
10f9b3c
efeee6d
d084b26
99ec88b
 
 
 
 
 
 
ac78880
 
 
 
 
 
 
26286b2
a885f09
3dfaf22
1eaecb2
 
 
 
adb0416
2a73469
614ee1f
01233b7
 
58733e4
1eaecb2
10f9b3c
8cb7546
613696b
99ec88b
6e8f400
8c49cb6
1eaecb2
8c49cb6
 
1eaecb2
6e8f400
 
ecef2dc
 
fc1e99b
6e8f400
460d762
6e8f400
 
2a5f9fb
6e8f400
 
 
 
99ec88b
f2bc0a5
613696b
6e8f400
0227006
613696b
8dfa543
0227006
8dfa543
6e8f400
99ec88b
00358b1
1eaecb2
0227006
99ec88b
 
 
 
 
 
 
 
 
 
 
 
 
6e8f400
99ec88b
 
d16cee2
 
 
 
 
67109fc
d16cee2
adb0416
 
d16cee2
10f9b3c
1eaecb2
99ec88b
1eaecb2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download

from src.display.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
    BENCHMARK_COLS,
    COLS,
    EVAL_COLS,
    EVAL_TYPES,
    NUMERIC_INTERVALS,
    TYPES,
    AutoEvalColumn,
    ModelType,
    fields,
    WeightType,
    Precision
)
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, TOKEN, QUEUE_REPO, REPO_ID, RESULTS_REPO
from src.populate import get_evaluation_queue_df, get_leaderboard_df
from src.submission.submit import add_new_eval, upload_file


def restart_space():
    API.restart_space(repo_id=REPO_ID, token=TOKEN)

# try:
#     print(EVAL_REQUESTS_PATH)
#     snapshot_download(
#         repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
#     )
# except Exception:
#     restart_space()
try:
    print(EVAL_RESULTS_PATH)
    snapshot_download(
        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
    )
except Exception:
    restart_space()


raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
value=[ c.name for c in fields(AutoEvalColumn) 
       if c.displayed_by_default and not c.hidden and not c.never_hidden]
                        

leaderboard_df = original_df.copy()


demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
           
            leaderboard_table = gr.components.Dataframe(
                value=leaderboard_df[
                    [c.name for c in fields(AutoEvalColumn) if c.never_hidden] + value
                    + [AutoEvalColumn.dummy.name]
                ],
                headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden]  + value,
                datatype=TYPES,
                elem_id="leaderboard-table",
                interactive=False,
                visible=True,
                column_widths=["2%", "33%"] 
            )

            # Dummy leaderboard for handling the case when the user uses backspace key
            hidden_leaderboard_table_for_search = gr.components.Dataframe(
                value=original_df[COLS],
                headers=COLS,
                datatype=TYPES,
                visible=False,
            )
            

        with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

        with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
            with gr.Column():
                with gr.Row():
                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

               
            with gr.Row():
                gr.Markdown("# βœ‰οΈβœ¨ Submit your files here!", elem_classes="markdown-text")

            def update_leaderboard(file_obj):
                upload_file(file_obj)
                raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
                value=[ c.name for c in fields(AutoEvalColumn) 
                    if c.displayed_by_default and not c.hidden and not c.never_hidden]
                        
                leaderboard_df = original_df.copy()
                leaderboard_table = leaderboard_df[
                    [c.name for c in fields(AutoEvalColumn) if c.never_hidden] + value
                    + [AutoEvalColumn.dummy.name]
                ]
                return leaderboard_table
                
            with gr.Row():
                upload = gr.Interface(fn=update_leaderboard,inputs="file" ,outputs=leaderboard_table)
               
    with gr.Row():
        with gr.Accordion("πŸ“™ Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                lines=20,
                elem_id="citation-button",
                show_copy_button=True,
            )

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=30)
# scheduler.start()

demo.queue(default_concurrency_limit=40).launch()