File size: 17,412 Bytes
cd2355c
278fc7f
867b5a3
cd2355c
53b6e2c
 
 
06cfc0b
 
 
53b6e2c
 
 
cd2355c
 
 
 
 
 
 
 
 
 
278fc7f
 
867b5a3
 
0090c02
278fc7f
3a2452f
 
 
cd2355c
 
 
 
 
 
278fc7f
491fabd
 
 
278fc7f
 
 
 
2c70658
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1b5244
 
 
 
2c70658
 
 
46f3e87
afc700f
2c70658
 
 
46f3e87
cd2355c
491fabd
278fc7f
 
 
 
 
 
 
 
 
a75abaf
3633aa4
0fbd2eb
6af4a5e
 
 
 
 
 
ef0f6a7
 
6af4a5e
 
 
0fbd2eb
6af4a5e
 
 
 
 
 
53b6e2c
6af4a5e
 
13b89e4
16d04aa
13b89e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278fc7f
cd2355c
b2842f3
cd2355c
 
 
 
 
 
278fc7f
cd2355c
 
278fc7f
867b5a3
cd2355c
 
867b5a3
cd2355c
 
b050867
cd2355c
 
b050867
278fc7f
cd2355c
 
278fc7f
cd2355c
 
 
 
adcaec0
 
 
9b48a57
47f3547
 
 
 
918e224
45e90ed
97a5917
47f3547
cd2355c
 
 
 
 
 
 
47f3547
278fc7f
867b5a3
 
b050867
867b5a3
278fc7f
 
47f3547
adcaec0
 
 
 
278fc7f
cd2355c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
867b5a3
 
cd2355c
 
 
 
 
 
278fc7f
 
 
cd2355c
 
 
 
 
 
 
 
 
278fc7f
cd2355c
 
278fc7f
cd2355c
 
 
afc700f
cd2355c
 
 
278fc7f
b3d1484
f35bc97
cd2355c
 
278fc7f
cd2355c
 
 
 
 
 
 
 
 
 
 
278fc7f
 
cd2355c
 
278fc7f
cd2355c
 
278fc7f
cd2355c
278fc7f
 
cd2355c
 
867b5a3
cd2355c
 
278fc7f
cd2355c
278fc7f
cd2355c
 
98506a2
 
 
 
f35bc97
cd2355c
2c70658
cd2355c
 
 
 
 
 
278fc7f
 
 
5d799e5
278fc7f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
import pandas as pd
import requests
import os

import gradio

# work around due to HF Spaces bug
#if gradio.__version__ != '4.16.0':
#    os.system("pip uninstall -y gradio")
#    os.system("pip install gradio==4.16.0")

import gradio as gr

from info.train_a_model import (
    LLM_BENCHMARKS_TEXT)
from info.submit import (
    SUBMIT_TEXT)
from info.deployment import (
    DEPLOY_TEXT)
from info.programs import (
    PROGRAMS_TEXT)
from info.citation import(
    CITATION_TEXT)
from info.validated_chat_models import(
    VALIDATED_CHAT_MODELS)
from info.about import(
    ABOUT)
from src.processing import filter_benchmarks_table

inference_endpoint_url = os.environ['inference_endpoint_url']
submission_form_endpoint_url = os.environ['submission_form_endpoint_url']
inference_concurrency_limit = os.environ['inference_concurrency_limit']

demo = gr.Blocks()

with demo:
    
    gr.HTML("""<h1 align="center" id="space-title">πŸ€—Powered-by-Intel LLM Leaderboard πŸ’»</h1>""")
    gr.Markdown("""This leaderboard is designed to evaluate, score, and rank open-source LLMs
                that have been pre-trained or fine-tuned on Intel Hardware 🦾. To submit your model for evaluation,
        follow the instructions and complete the form in the 🏎️ Submit tab. Models submitted to the leaderboard are evaluated 
        on the Intel Developer Cloud ☁️. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from
        the  [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness).""")
    gr.Markdown("""A special shout-out to the πŸ€— [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) 
                team for generously sharing their code and best 
                practices, ensuring that AI Developers have a valuable and enjoyable tool at their disposal.""")

    def submit_to_endpoint(model_name, revision_name, model_type, hw_type, terms, precision, weight_type, training_infra, affiliation, base_model):
        # Construct the data payload to send
        data = {
            "model_name": model_name,
            "revision_name": revision_name,
            "model_type": model_type,
            "hw_type": hw_type,
            "terms": terms,
            "precision": precision,
            "weight_type": weight_type,
            "training_infrastructure": training_infra,
            "affiliation": affiliation,
            "base_model": base_model
        }
        
        # URL of the endpoint expecting the HTTP request
        url = submission_form_endpoint_url
        
        for key, value in data.items():
            if value == "" or (key == "terms" and value is False):
                return f"❌ Failed Submission: '{key}' ensure all fields are completed and that you have agreed to evaluation terms."
        
        try:
            response = requests.post(url, json=data)
            if response.status_code == 200:
                return "βœ… Submission successful! Please allow for 5 - 10 days for model evaluation to be completed. We will contact you \
                through your model's discussion forum if we encounter any issues with your submission."
            else:
                return f"Submission failed with status code {response.status_code}"
        except Exception as e:
            return f"❌Failed to submit due to an error: {str(e)}"
    
    with gr.Accordion("Chat with Top Models on the Leaderboard Here πŸ’¬", open=False):
        
        chat_model_dropdown = gr.Dropdown(
                        choices=VALIDATED_CHAT_MODELS,
                        label="Select a leaderboard model to chat with. ",
                        multiselect=False,
                        value=VALIDATED_CHAT_MODELS[0],
                        interactive=True,
                    )
        
        #chat_model_selection = chat_model_dropdown.value
        chat_model_selection = 'yuriachermann/My_AGI_llama_2_7B'
        
        def call_api_and_stream_response(query, chat_model):
            """
            Call the API endpoint and yield characters as they are received.
            This function simulates streaming by yielding characters one by one.
            """
            url = inference_endpoint_url
            params = {"query": query, "selected_model": chat_model}
            with requests.get(url, json=params, stream=True) as r:  # Use params for query parameters
                for chunk in r.iter_content(chunk_size=1):
                    if chunk:
                        yield chunk.decode()

        def get_response(query, history):
            """
            Wrapper function to call the streaming API and compile the response.
            """
            response = ''
            for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
                if char == '<':  # This is stopping condition; adjust as needed.
                    break
                response += char
                yield [(f"πŸ€– Response from LLM: {chat_model_selection}", response)]  # Correct format for Gradio Chatbot
#

        chatbot = gr.Chatbot()
        msg = gr.Textbox()
        submit = gr.Button("Submit")
        clear = gr.Button("Clear")
        def user(user_message, history):
            return "", history + [[user_message, None]]
        def clear_chat(*args):
            return []  # Returning an empty list to signify clearing the chat, adjust as per Gradio's capabilities
        submit.click(
            fn=get_response,
            inputs=[msg, chatbot],
            outputs=chatbot
        )
        clear.click(
            fn=clear_chat,
            inputs=None,
            outputs=chatbot
        )
        

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("πŸ† LLM Leaderboard", elem_id="llm-benchmark-table", id=0):
            with gr.Row():
                with gr.Column():
                    filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
                                     label="Select Training Platform*",
                                     elem_id="compute_platforms",
                                     value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"])
                    filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
                                     label="Training Infrastructure*",
                                     elem_id="training_infra",
                                     value=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"])
                    filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"],
                                     label="Intel Program Affiliation",
                                     elem_id="program_affiliation",
                                     value=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"])
                    
                with gr.Column():
                    filter_size = gr.CheckboxGroup(choices=[1,2,3,5,7,13,35,60,70,100],
                                     label="Model Sizes (Billion of Parameters)",
                                     elem_id="parameter_size",
                                     value=[1,2,3,5,7,13,35,60,70,100])
                    filter_precision = gr.CheckboxGroup(choices=["fp32","fp16","bf16","int8","fp8", "int4"],
                                     label="Model Precision",
                                     elem_id="precision",
                                     value=["fp32","fp16","bf16","int8","fp8", "int4"])
                    filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"],
                                     label="Model Types",
                                     elem_id="model_types",
                                     value=["pretrained","fine-tuned","chat-models","merges/moerges"])
                    inbox_text = gr.CheckboxGroup(label = """Inference Tested Column Legend: 🟨 = Gaudi, 🟦 = Xeon, πŸŸ₯ = GPU Max, 🟠 = Core Ultra, 🟒 = Arc GPU     (Please see "❓About" tab for more info)""")

            # formatting model name and adding links
            color = '#2f82d4'
            def make_clickable(row):
                return f'<a href="https://huggingface.co/{row["Model"]}" target="_blank" style="color: {color}; text-decoration: underline;">{row["Model"]}</a>'

            
            initial_df = pd.read_csv("./status/leaderboard_status_060524.csv")
            initial_df["Model"] = initial_df.apply(make_clickable, axis=1)
            initial_df = initial_df.sort_values(by='Average', ascending=False)
            
            
            def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
                filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected, 
                                                      affiliation_selected=affiliation_selected, size_selected=size_selected, 
                                                      precision_selected=precision_selected, type_selected=type_selected)
                return filtered_df
            
            
            initial_filtered_df = update_df(["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"], 
                                ["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"], 
                                ["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"], 
                                [1,2,3,5,7,13,35,60,70,100], 
                                ["fp32","fp16","bf16","int8","fp8", "int4"], 
                                ["pretrained","fine-tuned","chat-models","merges/moerges"])
            
            
            gradio_df_display = gr.Dataframe(value=initial_filtered_df, headers=["Inference Tested","Model","Average","ARC","HellaSwag","MMLU",
                                                                                 "TruthfulQA","Winogrande","Training Hardware","Model Type","Precision",
                                                                                 "Size","Infrastructure","Affiliation"],
                                             datatype=["html","html","str","str","str","str","str","str","str","str","str","str","str","str"])
            
            filter_hw.change(fn=update_df, 
                             inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], 
                             outputs=[gradio_df_display])
            filter_platform.change(fn=update_df, 
                                inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], 
                                outputs=[gradio_df_display])
            filter_affiliation.change(fn=update_df, 
                                inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], 
                                outputs=[gradio_df_display])
            filter_size.change(fn=update_df, 
                               inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], 
                               outputs=[gradio_df_display])
            filter_precision.change(fn=update_df, 
                                inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], 
                                outputs=[gradio_df_display])
            filter_type.change(fn=update_df, 
                               inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], 
                               outputs=[gradio_df_display])
        
            
        with gr.TabItem("🧰 Train a Model", elem_id="getting-started", id=1):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
        with gr.TabItem("πŸš€ Deployment Tips", elem_id="deployment-tips", id=2):
            gr.Markdown(DEPLOY_TEXT, elem_classes="markdown-text")
        with gr.TabItem("πŸ‘©β€πŸ’» Developer Programs", elem_id="hardward-program", id=3):
            gr.Markdown(PROGRAMS_TEXT, elem_classes="markdown-text")
        with gr.TabItem("❓ About ", elem_id="about", id=5):
            gr.Markdown(ABOUT, elem_classes="markdown-text")
        with gr.TabItem("🏎️ Submit", elem_id="submit", id=4):
            gr.Markdown(SUBMIT_TEXT, elem_classes="markdown-text")
            with gr.Row():
                gr.Markdown("# Submit Model for Evaluation 🏎️", elem_classes="markdown-text")
            with gr.Row():
                with gr.Column():
                    model_name_textbox = gr.Textbox(label="Model name", 
                                                    info = """ Name of Model in the Hub. For example: 'Intel/neural-chat-7b-v1-1'""",)
                    revision_name_textbox = gr.Textbox(label="Revision commit (Branch)", placeholder="main")
                    model_type = gr.Dropdown(
                        choices=["pretrained","fine-tuned","chat models","merges/moerges"],
                        label="Model type",
                        multiselect=False,
                        value="pretrained",
                        interactive=True,
                    )
                    
                    hw_type = gr.Dropdown(
                        choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
                        label="Training Hardware",
                        multiselect=False,
                        value="Gaudi",
                        interactive=True,
                    )
                    terms = gr.Checkbox(
                        label="Check if you agree to having your model evaluated and published to the leaderboard by our team.",
                        value=False,
                        interactive=True,
                    )
                    submit_button = gr.Button("πŸ€— Submit Eval πŸ’»")
                    submission_result = gr.Markdown()

                with gr.Column():
                    precision = gr.Dropdown(
                        choices=["fp32","fp16","bf16","int8","fp8", "int4"],
                        label="Precision",
                        multiselect=False,
                        value="fp16",
                        interactive=True,
                    )
                    weight_type = gr.Dropdown(
                        choices=["Original", "Adapter", "Delta"],
                        label="Weights type",
                        multiselect=False,
                        value="Original",
                        interactive=True,
                        info = """ Select the appropriate weights. If you have fine-tuned or adapted a model with PEFT or Delta-Tuning you likely have
                        LoRA Adapters or Delta Weights.""",
                    )
                    training_infra = gr.Dropdown(
                        choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
                        label="Training Infrastructure",
                        multiselect=False,
                        value="Intel Developer Cloud",
                        interactive=True,
                        info = """ Select the infrastructure that the model was developed on. 
                        Local is the ideal choice for Core Ultra, ARC GPUs, and local data center infrastructure.""",
                    )
                    affiliation = gr.Dropdown(
                        choices=["No Affiliation","Intel Innovator","Student Ambassador","Intel Liftoff", "Intel Engineering", "Other"],
                        label="Affiliation with Intel",
                        multiselect=False,
                        value="No Affiliation",
                        interactive=True,
                        info = """ Select "No Affiliation" if not part of any Intel programs.""",
                    )
                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")

                    submit_button.click(
                        fn=submit_to_endpoint,
                        inputs=[model_name_textbox, revision_name_textbox, model_type, hw_type, terms, precision, weight_type, training_infra, affiliation, base_model_name_textbox],
                        outputs=submission_result)
                
           
            
    with gr.Accordion("πŸ“™ Citation", open=False):
            citation =gr.Textbox(value = CITATION_TEXT,
                                 lines=6,
                                 label="Use the following to cite this content")
            
    gr.Markdown("""<div style="display: flex; justify-content: center;"> <p> Intel, the Intel logo and Gaudi are trademarks of Intel Corporation or its subsidiaries.
*Other names and brands may be claimed as the property of others.
</p> </div>""")
demo.queue()
demo.launch(share=False)