Spaces:

Intel
/

powered_by_intel_llm_leaderboard

Running

App Files Files Community

eduardo-alvarez commited on Mar 8, 2024

Commit

278fc7f

•

1 Parent(s): e56faab

correcting df load bug, adding chat functionality, and improved docs

Browse files

Files changed (18) hide show

.gitignore +2 -1
__pycache__/app.cpython-38.pyc +0 -0
app.py +95 -41
info/__pycache__/citation.cpython-38.pyc +0 -0
info/__pycache__/deployment.cpython-38.pyc +0 -0
info/__pycache__/programs.cpython-38.pyc +0 -0
info/__pycache__/submit.cpython-38.pyc +0 -0
info/__pycache__/train_a_model.cpython-38.pyc +0 -0
info/__pycache__/validated_chat_models.cpython-38.pyc +0 -0
info/citation.py +2 -2
info/deployment.py +158 -36
info/programs.py +8 -1
info/submit.py +9 -7
info/train_a_model.py +7 -7
info/validated_chat_models.py +1 -0
src/__pycache__/processing.cpython-38.pyc +0 -0
src/leaderboard_filtered.csv +0 -7
src/submit.py +1 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
 .ipynb_checkpoints
 *.pkl
-dask-worker-space/

 .ipynb_checkpoints
 *.pkl
+dask-worker-space/
+app_test.py

__pycache__/app.cpython-38.pyc ADDED Viewed

Binary file (7.81 kB). View file

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 import pandas as pd
-import random
-import time
 from info.train_a_model import (
     LLM_BENCHMARKS_TEXT)
@@ -13,46 +13,84 @@ from info.programs import (
     PROGRAMS_TEXT)
 from info.citation import(
     CITATION_TEXT)
-from src.processing import filter_benchmarks_table, make_clickable
 demo = gr.Blocks()
 with demo:
     gr.HTML("""<h1 align="center" id="space-title">🤗Powered-by-Intel LLM Leaderboard 💻</h1>""")
-    gr.Markdown("This leaderboard is designed to evaluate, score, and rank open-source large language \
-        models that have been pre-trained or fine-tuned on Intel Hardware 🦾")
-    gr.Markdown("Models submitted to the leaderboard are evaluated \
-        on the Intel Developer Cloud ☁️")
-    # TODO: Coming soon comparison tool
-    #with gr.Accordion("🥊Large Language Model Boxing Ring 🥊", open=False):
-    #    with gr.Row():
-    #        chat_a = gr.Chatbot()
-    #        chat_b = gr.Chatbot()
-    #    msg = gr.Textbox()
-    #    gr.ClearButton([msg, chat_a])
-#
-    #    def respond(message, chat_history):
-    #        bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
-    #        chat_history.append((message, bot_message))
-    #        time.sleep(2)
-    #        return "", chat_history
-#
-    #    msg.submit(respond, inputs = [msg, chat_a],outputs = [msg, chat_a])
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏆 LLM Benchmark", elem_id="llm-benchmark-table", id=0):
             with gr.Row():
                 with gr.Column():
                     filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
                                      label="Select Training Platform*",
                                      elem_id="compute_platforms",
                                      value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"])
-                    filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","GCP","Local"],
                                      label="Training Infrastructure*",
                                      elem_id="training_infra",
-                                     value=["Intel Developer Cloud","AWS","Azure","GCP","Local"])
                     filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"],
                                      label="Intel Program Affiliation",
                                      elem_id="program_affiliation",
@@ -63,10 +101,10 @@ with demo:
                                      label="Model Sizes (Billion of Parameters)",
                                      elem_id="parameter_size",
                                      value=[1,3,5,7,13,35,60,70,100])
-                    filter_precision = gr.CheckboxGroup(choices=["fp8","fp16","bf16","int8","4bit"],
                                      label="Model Precision",
                                      elem_id="precision",
-                                     value=["fp8","fp16","bf16","int8","4bit"])
                     filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"],
                                      label="Model Types",
                                      elem_id="model_types",
@@ -74,14 +112,21 @@ with demo:
             initial_df = pd.read_csv("./status/leaderboard_status_030424.csv")
-            gradio_df_display = gr.Dataframe()
             def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
                 filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected,
                                                       affiliation_selected=affiliation_selected, size_selected=size_selected,
                                                       precision_selected=precision_selected, type_selected=type_selected)
                 return filtered_df
             filter_hw.change(fn=update_df,
                              inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
                              outputs=[gradio_df_display])
@@ -114,8 +159,9 @@ with demo:
                 gr.Markdown("# Submit Model for Evaluation 🏎️", elem_classes="markdown-text")
             with gr.Row():
                 with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
                     model_type = gr.Dropdown(
                         choices=["pretrained","fine-tuned","chat models","merges/moerges"],
                         label="Model type",
@@ -125,10 +171,10 @@ with demo:
                     )
                     hw_type = gr.Dropdown(
-                        choices=["Gaudi","Xeon","GPU Max","Arc GPU"],
                         label="Training Hardware",
                         multiselect=False,
-                        value="Gaudi2",
                         interactive=True,
                     )
                     terms = gr.Checkbox(
@@ -137,9 +183,11 @@ with demo:
                         value=False,
                         interactive=True,
                     )
                 with gr.Column():
                     precision = gr.Dropdown(
-                        choices=["fp8","fp16","bf16","int8","4bit"],
                         label="Precision",
                         multiselect=False,
                         value="fp16",
@@ -151,31 +199,37 @@ with demo:
                         multiselect=False,
                         value="Original",
                         interactive=True,
                     )
                     training_infra = gr.Dropdown(
-                        choices=["IDC","AWS","Azure","GCP","Local"],
                         label="Training Infrastructure",
                         multiselect=False,
-                        value="IDC",
                         interactive=True,
                     )
                     affiliation = gr.Dropdown(
                         choices=["No Affiliation","Innovator","Student Ambassador","Intel Liftoff", "Intel Labs", "Other"],
                         label="Affiliation with Intel",
                         multiselect=False,
-                        value="Independent",
                         interactive=True,
                     )
                     base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            #submit_button = gr.Button("Submit Eval")
-            #submission_result = gr.Markdown()
-            gr.Markdown("Community Submissions Coming soon!")
     with gr.Accordion("📙 Citation", open=False):
             citation =gr.Textbox(value = CITATION_TEXT,
                                  lines=6,
                                  label="Use the following to cite this content")
-demo.launch()

 import gradio as gr
 import pandas as pd
+import requests
 from info.train_a_model import (
     LLM_BENCHMARKS_TEXT)
     PROGRAMS_TEXT)
 from info.citation import(
     CITATION_TEXT)
+from info.validated_chat_models import(
+    VALIDATED_CHAT_MODELS)
+from src.processing import filter_benchmarks_table
+#inference_endpoint_url = os.environ['inference_endpoint_url']
+#inference_concurrency_limit = os.environ['inference_concurrency_limit']
 demo = gr.Blocks()
 with demo:
     gr.HTML("""<h1 align="center" id="space-title">🤗Powered-by-Intel LLM Leaderboard 💻</h1>""")
+    gr.Markdown("""This leaderboard is designed to evaluate, score, and rank open-source LLMs
+                that have been pre-trained or fine-tuned on Intel Hardware 🦾 To submit your model for evaluation
+        follow the instructions and complete the form in the "🏎️ Submit" tab. Models submitted to the leaderboard are evaluated
+        on the Intel Developer Cloud ☁️ The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from
+        the  [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness).""")
+    gr.Markdown("""Join  5000+ developers on the [Intel DevHub Discord](https://discord.gg/yNYNxK2k) to  get support with your submission and
+                talk about everything from GenAI and HPC to Quantum Computing.""")
+    gr.Markdown("""A special shout-out to the 🤗 [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
+                team for generously sharing their code and best
+                practices, ensuring that AI Developers have a valuable and enjoyable tool at their disposal.""")
+    with gr.Accordion("Chat with Top Models on the Leaderboard Here 💬 ", open=False):
+       # import pdb
+        chat_model_dropdown = gr.Dropdown(
+                        choices=VALIDATED_CHAT_MODELS,
+                        label="Select a leaderboard model to chat with. ",
+                        multiselect=False,
+                        value=VALIDATED_CHAT_MODELS[0],
+                        interactive=True,
+                    )
+        #chat_model_selection = chat_model_dropdown.value
+        chat_model_selection = 'Intel/neural-chat-7b-v1-1'
+        def call_api_and_stream_response(query, chat_model):
+            """
+            Call the API endpoint and yield characters as they are received.
+            This function simulates streaming by yielding characters one by one.
+            """
+            url = "http://localhost:5004/query-stream/"
+            params = {"query": query,"selected_model":chat_model}
+            with requests.get(url, json=params, stream=True) as r:
+                for chunk in r.iter_content(chunk_size=1):
+                    if chunk:
+                        yield chunk.decode()
+        def get_response(query, history):
+            """
+            Wrapper function to call the streaming API and compile the response.
+            """
+            response = ''
+            global chat_model_selection
+            for char in call_api_and_stream_response(query, chat_model=chat_model_selection):
+                if char == '<':
+                    break
+                response += char
+                yield response
+        gr.ChatInterface(get_response, retry_btn = None, undo_btn=None, theme=gr.themes.Soft(), concurrency_limit=5).launch()
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏆 LLM Leadeboard", elem_id="llm-benchmark-table", id=0):
             with gr.Row():
                 with gr.Column():
                     filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
                                      label="Select Training Platform*",
                                      elem_id="compute_platforms",
                                      value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"])
+                    filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
                                      label="Training Infrastructure*",
                                      elem_id="training_infra",
+                                     value=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"])
                     filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"],
                                      label="Intel Program Affiliation",
                                      elem_id="program_affiliation",
                                      label="Model Sizes (Billion of Parameters)",
                                      elem_id="parameter_size",
                                      value=[1,3,5,7,13,35,60,70,100])
+                    filter_precision = gr.CheckboxGroup(choices=["fp32","fp16","bf16","int8","fp8", "int4"],
                                      label="Model Precision",
                                      elem_id="precision",
+                                     value=["fp32","fp16","bf16","int8","fp8", "int4"])
                     filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"],
                                      label="Model Types",
                                      elem_id="model_types",
             initial_df = pd.read_csv("./status/leaderboard_status_030424.csv")
             def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
                 filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected,
                                                       affiliation_selected=affiliation_selected, size_selected=size_selected,
                                                       precision_selected=precision_selected, type_selected=type_selected)
                 return filtered_df
+            initial_filtered_df = update_df(["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
+                                ["Intel Developer Cloud","AWS","Azure","GCP","Local"],
+                                ["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"],
+                                [1,3,5,7,13,35,60,70,100],
+                                ["fp8","fp16","bf16","int8","4bit"],
+                                ["pretrained","fine-tuned","chat-models","merges/moerges"])
+            gradio_df_display = gr.Dataframe(value=initial_filtered_df)
             filter_hw.change(fn=update_df,
                              inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
                              outputs=[gradio_df_display])
                 gr.Markdown("# Submit Model for Evaluation 🏎️", elem_classes="markdown-text")
             with gr.Row():
                 with gr.Column():
+                    model_name_textbox = gr.Textbox(label="Model name",
+                                                    info = """ Name of Model in the Hub. For example: 'Intel/neural-chat-7b-v1-1'""",)
+                    revision_name_textbox = gr.Textbox(label="Revision commit (Branch)", placeholder="main")
                     model_type = gr.Dropdown(
                         choices=["pretrained","fine-tuned","chat models","merges/moerges"],
                         label="Model type",
                     )
                     hw_type = gr.Dropdown(
+                        choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
                         label="Training Hardware",
                         multiselect=False,
+                        value="Gaudi",
                         interactive=True,
                     )
                     terms = gr.Checkbox(
                         value=False,
                         interactive=True,
                     )
+                    submit_button = gr.Button("🤗 Submit Eval 💻")
+                    submission_result = gr.Markdown()
                 with gr.Column():
                     precision = gr.Dropdown(
+                        choices=["fp32","fp16","bf16","int8","fp8", "int4"],
                         label="Precision",
                         multiselect=False,
                         value="fp16",
                         multiselect=False,
                         value="Original",
                         interactive=True,
+                        info = """ Select the appropriate weights. If you have fine-tuned or adapted a model with PEFT or Delta-Tuning you likely have
+                        LoRA Adapters or Delta Weights.""",
                     )
                     training_infra = gr.Dropdown(
+                        choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"],
                         label="Training Infrastructure",
                         multiselect=False,
+                        value="Intel Developer Cloud",
                         interactive=True,
+                        info = """ Select the infrastructure that the model was developed on.
+                        Local is the ideal choice for Core Ultra, ARC GPUs, and local data center infrastructure.""",
                     )
                     affiliation = gr.Dropdown(
                         choices=["No Affiliation","Innovator","Student Ambassador","Intel Liftoff", "Intel Labs", "Other"],
                         label="Affiliation with Intel",
                         multiselect=False,
+                        value="No Affiliation",
                         interactive=True,
+                        info = """ Select "No Affiliation" if not part of any Intel programs.""",
                     )
                     base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+           # gr.Markdown("Community Submissions Coming soon!")
     with gr.Accordion("📙 Citation", open=False):
             citation =gr.Textbox(value = CITATION_TEXT,
                                  lines=6,
                                  label="Use the following to cite this content")
+    gr.Markdown("""<div style="display: flex; justify-content: center;"> <p> Intel, the Intel logo and Gaudi are trademarks of Intel Corporation or its subsidiaries.
+*Other names and brands may be claimed as the property of others.
+</p> </div>""")
+demo.launch(share=False)

info/__pycache__/citation.cpython-38.pyc ADDED Viewed

Binary file (513 Bytes). View file

info/__pycache__/deployment.cpython-38.pyc ADDED Viewed

Binary file (6.67 kB). View file

info/__pycache__/programs.cpython-38.pyc ADDED Viewed

Binary file (2.55 kB). View file

info/__pycache__/submit.cpython-38.pyc ADDED Viewed

Binary file (2.91 kB). View file

info/__pycache__/train_a_model.cpython-38.pyc ADDED Viewed

Binary file (3.57 kB). View file

info/__pycache__/validated_chat_models.cpython-38.pyc ADDED Viewed

Binary file (287 Bytes). View file

info/citation.py CHANGED Viewed

@@ -1,8 +1,8 @@
 CITATION_TEXT = r"""@misc{powered-by-intel-llm-leaderboard,
-  author = {Eduardo Alvarez},
   title = {Powered By Intel LLM Leaderboard},
   year = {2024},
   publisher = {Intel},
-  howpublished = "\url{https://huggingface.co/spaces/Intel/powered_by_intel_leaderboard}"
 }
 """

 CITATION_TEXT = r"""@misc{powered-by-intel-llm-leaderboard,
+  author = {Eduardo Alvarez and Jack Erickson and Benjamin Consolvo},
   title = {Powered By Intel LLM Leaderboard},
   year = {2024},
   publisher = {Intel},
+  howpublished = "\url{https://huggingface.co/spaces/Intel/powered_by_intel_llm_leaderboard}"
 }
 """

info/deployment.py CHANGED Viewed

@@ -1,10 +1,97 @@
 DEPLOY_TEXT = f"""
-Having table full of powerful models is nice and call but at the end of the day, you have to be able to use
-them for something. Below you will find sample code to help you load models and perform inference.
-## Inference with Gaudi 2
 Habana's SDK, Intel Gaudi Software, supports PyTorch and DeepSpeed for accelerating LLM training and inference.
 The Intel Gaudi Software graph compiler will optimize the execution of the operations accumulated in the graph
 (e.g. operator fusion, data layout management, parallelization, pipelining and memory management,
@@ -26,26 +113,10 @@ python run_generation.py \
 --prompt "Hello world" "How are you?"
 ```
-# Inference Intel Extension for Transformers
-Intel® Extension for Transformers is an innovative toolkit designed to accelerate GenAI/LLM
-everywhere with the optimal performance of Transformer-based models on various Intel platforms,
-including Intel Gaudi2, Intel CPU, and Intel GPU.
-### INT4 Inference (CPU)
-```python
-from transformers import AutoTokenizer
-from intel_extension_for_transformers.transformers import AutoModelForCausalLM
-model_name = "Intel/neural-chat-7b-v3-1"
-prompt = "When winter becomes spring, the flowers..."
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-inputs = tokenizer(prompt, return_tensors="pt").input_ids
-model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True)
-outputs = model.generate(inputs)
-```
 ### INT4 Inference (GPU)
 ```python
 import intel_extension_for_pytorch as ipex
@@ -65,19 +136,11 @@ model = ipex.optimize_transformers(model, inplace=True, dtype=torch.float16, woq
 output = model.generate(inputs)
 ```
-# Intel Extension for PyTorch
-Intel® Extension for PyTorch extends PyTorch with up-to-date features optimizations for an
-extra performance boost on Intel hardware. Optimizations take advantage of Intel® Advanced
-Vector Extensions 512 (Intel® AVX-512) Vector Neural Network Instructions (VNNI) and Intel®
-Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel Xe Matrix Extensions
-(XMX) AI engines on Intel discrete GPUs. Moreover, Intel® Extension for PyTorch* provides easy
-GPU acceleration for Intel discrete GPUs through the PyTorch* xpu device.
-There are a few flavors of PyTorch that can be leveraged for inference. For detailed documentation,
-the visit https://intel.github.io/intel-extension-for-pytorch/#introduction
-### IPEX with Optimum Intel (no quantization)
 Requires installing/updating optimum `pip install --upgrade-strategy eager optimum[ipex]
 `
 ```python
@@ -90,7 +153,7 @@ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 results = pipe("A fisherman at sea...")
 ```
-### IPEX with Stock PyTorch with Mixed Precision
 ```python
 import torch
 import intel_extension_for_pytorch as ipex
@@ -106,7 +169,60 @@ with torch.inference_mode():
     model.generate()
 ```
-# OpenVINO Toolkit
 ```python
 from optimum.intel import OVModelForCausalLM
@@ -122,5 +238,11 @@ pipe("In the spring, beautiful flowers bloom...")
 ```
 """

 DEPLOY_TEXT = f"""
+# 🚀 Deployment Tips
+A collection of powerful models is valuable, but ultimately, you need to be able to use them effectively.
+This tab is dedicated to providing guidance and code snippets for performing inference with leaderboard models on Intel platforms.
+Below, you'll find a table of open-source software options for inference, along with the supported Intel Hardware Platforms.
+A 🚀 indicates that inference with the associated software package is supported on the hardware. We hope this information
+helps you choose the best option for your specific use case. Happy building!
+<div style="display: flex; justify-content: center;">
+<table border="1">
+  <tr>
+    <th>Inference Software</th>
+    <th>Gaudi</th>
+    <th>Xeon</th>
+    <th>GPU Max</th>
+    <th>Arc GPU</th>
+    <th>Core Ultra</th>
+  </tr>
+  <tr>
+    <td>Optimum Habana</td>
+    <td>🚀</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+  <tr>
+    <td>Intel Extension for PyTorch</td>
+    <td></td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td></td>
+  </tr>
+  <tr>
+    <td>Intel Extension for Transformers</td>
+    <td></td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td></td>
+  </tr>
+  <tr>
+    <td>OpenVINO</td>
+    <td></td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+  </tr>
+  <tr>
+    <td>BigDL</td>
+    <td></td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+  </tr>
+    <tr>
+    <td>NPU Acceleration Library</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td>🚀</td>
+  </tr>
+</tr>
+    <tr>
+    <td>PyTorch</td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+  </tr>
+</tr>
+    <tr>
+    <td>Tensorflow</td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+    <td>🚀</td>
+</tr>
+</table>
+</div>
+<hr>
+# Intel® Gaudi Accelerators
 Habana's SDK, Intel Gaudi Software, supports PyTorch and DeepSpeed for accelerating LLM training and inference.
 The Intel Gaudi Software graph compiler will optimize the execution of the operations accumulated in the graph
 (e.g. operator fusion, data layout management, parallelization, pipelining and memory management,
 --prompt "Hello world" "How are you?"
 ```
+<hr>
+# Intel® Max Series GPU
 ### INT4 Inference (GPU)
 ```python
 import intel_extension_for_pytorch as ipex
 output = model.generate(inputs)
 ```
+<hr>
+# Intel® Xeon CPUs
+### Intel Extension for PyTorch - Optimum Intel (no quantization)
 Requires installing/updating optimum `pip install --upgrade-strategy eager optimum[ipex]
 `
 ```python
 results = pipe("A fisherman at sea...")
 ```
+### Intel® Extension for PyTorch - Mixed Precision (fp32 and bf16)
 ```python
 import torch
 import intel_extension_for_pytorch as ipex
     model.generate()
 ```
+### Intel® Extension for Transformers - INT4 Inference (CPU)
+```python
+from transformers import AutoTokenizer
+from intel_extension_for_transformers.transformers import AutoModelForCausalLM
+model_name = "Intel/neural-chat-7b-v3-1"
+prompt = "When winter becomes spring, the flowers..."
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+inputs = tokenizer(prompt, return_tensors="pt").input_ids
+model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True)
+outputs = model.generate(inputs)
+```
+<hr>
+# Intel® Core Ultra (NPUs and iGPUs)
+### Intel® NPU Acceleration Library
+```python
+from transformers import AutoTokenizer, TextStreamer, AutoModelForCausalLM
+import intel_npu_acceleration_library
+import torch
+model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+model = AutoModelForCausalLM.from_pretrained(model_id, use_cache=True).eval()
+tokenizer = AutoTokenizer.from_pretrained(model_id, use_default_system_prompt=True)
+tokenizer.pad_token_id = tokenizer.eos_token_id
+streamer = TextStreamer(tokenizer, skip_special_tokens=True)
+print("Compile model for the NPU")
+model = intel_npu_acceleration_library.compile(model, dtype=torch.int8)
+query = input("Ask something: ")
+prefix = tokenizer(query, return_tensors="pt")["input_ids"]
+generation_kwargs = dict(
+   input_ids=prefix,
+   streamer=streamer,
+   do_sample=True,
+   top_k=50,
+   top_p=0.9,
+   max_new_tokens=512,
+)
+print("Run inference")
+_ = model.generate(**generation_kwargs)
+```
+### OpenVINO Toolking with Optimum Habana
 ```python
 from optimum.intel import OVModelForCausalLM
 ```
+<hr>
+# Intel ARC GPUs
+Coming Soon!
 """

info/programs.py CHANGED Viewed

@@ -1,7 +1,11 @@
 PROGRAMS_TEXT= """
 Intel offers a range of programs to grant early, short, and long-term access to developers. A great way to build
 and share models on the "Powered by Intel" LLM Leaderboard is to join one of these programs. Learn more about
-these opportunities below:
 ## Intel Liftoff Program
 Intel® Liftoff for startups is open to early-stage AI and machine learning startups. This free virtual program
@@ -12,6 +16,8 @@ like Gaudi, Max Series GPUs, and Xeon Processors.
 Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/liftoff.html
 ## Intel Student Ambassador Program
 This program is focused on undergraduate and graduate students who are passionate about technology and
 working with developer communities to promote learning, sharing, and collaboration. It provides opportunities
@@ -23,6 +29,7 @@ a nondisclosure agreement (NDA) and extended access to Intel® Developer Cloud.
 Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/training/academic-program/student-ambassador.html#gs.5f5oi3
 ## Intel Innovator Program
 This program is for developers using oneAPI on Intel® architecture who provide technical leadership and inspiration

 PROGRAMS_TEXT= """
+# 👩‍💻 Developer Programs
 Intel offers a range of programs to grant early, short, and long-term access to developers. A great way to build
 and share models on the "Powered by Intel" LLM Leaderboard is to join one of these programs. Learn more about
+these opportunities below:
+<hr>
 ## Intel Liftoff Program
 Intel® Liftoff for startups is open to early-stage AI and machine learning startups. This free virtual program
 Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/liftoff.html
+<hr>
 ## Intel Student Ambassador Program
 This program is focused on undergraduate and graduate students who are passionate about technology and
 working with developer communities to promote learning, sharing, and collaboration. It provides opportunities
 Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/training/academic-program/student-ambassador.html#gs.5f5oi3
+<hr>
 ## Intel Innovator Program
 This program is for developers using oneAPI on Intel® architecture who provide technical leadership and inspiration

info/submit.py CHANGED Viewed

@@ -1,7 +1,9 @@
 SUBMIT_TEXT = f"""
-# Evaluation Queue for the 🤗"Powered by Intel" LLM Leaderboard 💻
-Models added here will be queued for evaluation on the Intel Developer Cloud ☁️
 ## First steps before submitting a model
 ### 1) Make sure you can load your model and tokenizer using AutoClasses:
@@ -19,19 +21,19 @@ Note: if your model needs `use_remote_code=True`, we do not support this option
 It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
 ### 3) Make sure your model has an open license!
-This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
 ### 4) Fill up your model card
 We use your model card to better understand the properties of your model and make them more easily discoverable for other users.
 Model cards are required to have mentions of the hardware, software, and infrastructure used for training - without this information
-we cannot accept your model as a valid submission.
 ### 5) Select the correct precision
 Not all models are converted properly from `float16` to `bfloat16`, and selecting the wrong precision can sometimes cause evaluation error (as loading a `bf16` model in `fp16` can sometimes generate NaNs, depending on the weight range).
 ## In case of model failure
-If your model is displayed in the `FAILED` category, its execution stopped.
-Make sure you have followed the above steps first.
-If everything is done, check you can launch the EleutherAIHarness on your model locally, using the command in the About tab under "Reproducibility" with all arguments specified (you can add `--limit` to limit the number of examples per task).
 """

 SUBMIT_TEXT = f"""
+# 🏎️ Submit
+Models added here will be queued for evaluation on the Intel Developer Cloud ☁️ Depending on the queue, your model may take up to 10 days to show up on the leaderboard.
+We will work to create greater transperancy as our leaderboard community grows!
 ## First steps before submitting a model
 ### 1) Make sure you can load your model and tokenizer using AutoClasses:
 It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
 ### 3) Make sure your model has an open license!
+This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗 A good example of an open source license is apache-2.0.
+Typically model licenses that are allow for commercial and research use tend to be the most attractive to other developers in the ecosystem!
 ### 4) Fill up your model card
 We use your model card to better understand the properties of your model and make them more easily discoverable for other users.
 Model cards are required to have mentions of the hardware, software, and infrastructure used for training - without this information
+we cannot accept your model as a valid submission. Remember, only models trained on these processors are eligle to participate in evaluation:
+Intel® Gaudi Accelerators, Intel® Xeon® Processors, Intel® Data Center GPU Max Series, Intel® ARC GPUs, and Intel® Core Ultra
 ### 5) Select the correct precision
 Not all models are converted properly from `float16` to `bfloat16`, and selecting the wrong precision can sometimes cause evaluation error (as loading a `bf16` model in `fp16` can sometimes generate NaNs, depending on the weight range).
 ## In case of model failure
+If your model fails evaluation 😔, we will contact you by opening a new discussion in your model respository. Let's work together to get your model the love it deserves ❤️!
 """

info/train_a_model.py CHANGED Viewed

@@ -1,9 +1,9 @@
 LLM_BENCHMARKS_TEXT = f"""
-# Use the Resources Below to Start Training a Model Today
-Intel offers a variety of platforms that can be used to train LLMs including datacenter and consumer grade cpus, gpus, and ASICs.
-Below, you'll find documentation on how to access free and paid resources to train a model and submit it to the "Intel Inside Leaderboard".
 ## Intel Developer Cloud - Quick Start
 The Intel Developer Cloud is one of the best places to access free and paid compute instances for model training. Intel offers Jupyter Notebook instances supported by
@@ -11,19 +11,19 @@ The Intel Developer Cloud is one of the best places to access free and paid comp
 1. Visit [cloud.intel.com](cloud.intel.com) and create a free account.
 2. Navigate to the "Training" module under the "Software" section in the left panel
 3. Under the GenAI Essentials section, select the LLM Fine-Tuning with QLoRA notebook and click "Launch"
-4. Follow the instructions in the notebook to train your model using Intel Max Series 1100 GPUs
 5. Upload your model to the Hugging Face Model Hub
 6. Go to the "Submit" tab follow instructions to create a leaderboard evaluation request
 ## Additional Training Code Samples
 Below you will find a list of additional resources for training models on different intel hardware platforms:
-- Gaudi Processors
     - [Parameter Efficient Fine-Tuning of Llama-2 70B](https://github.com/HabanaAI/Gaudi-tutorials/blob/main/PyTorch/llama2_fine_tuning_inference/llama2_fine_tuning_inference.ipynb)
-- Xeon Processors
     - [Distributed Training of GPT2 LLMs on AWS](https://github.com/intel/intel-cloud-optimizations-aws/tree/main/distributed-training)
     - [Fine-tuning Falcon 7B on Xeon Processors](https://medium.com/@eduand-alvarez/fine-tune-falcon-7-billion-on-xeon-cpus-with-hugging-face-and-oneapi-a25e10803a53)
-- Max Series GPUs
     - [LLM Fine-tuning with QLoRA on Max Series GPUs](https://console.idcservice.net/training/detail/159c24e4-5598-3155-a790-2qv973tlm172)
 ## Submitting your Model to the Hub
 Once you have trained your model, it is a straighforward process to upload and open source it on the Hugging Face Hub.

 LLM_BENCHMARKS_TEXT = f"""
+# 🧰 Train a Model
+Intel offers a variety of platforms that can be used to train LLMs including datacenter and consumer grade CPUs, GPUs, and ASICs.
+Below, you'll find documentation on how to access free and paid resources to train a model and submit it to the Powered-by-Intel LLM Leaderboard.
 ## Intel Developer Cloud - Quick Start
 The Intel Developer Cloud is one of the best places to access free and paid compute instances for model training. Intel offers Jupyter Notebook instances supported by
 1. Visit [cloud.intel.com](cloud.intel.com) and create a free account.
 2. Navigate to the "Training" module under the "Software" section in the left panel
 3. Under the GenAI Essentials section, select the LLM Fine-Tuning with QLoRA notebook and click "Launch"
+4. Follow the instructions in the notebook to train your model using Intel® Data Center GPU Max 1100
 5. Upload your model to the Hugging Face Model Hub
 6. Go to the "Submit" tab follow instructions to create a leaderboard evaluation request
 ## Additional Training Code Samples
 Below you will find a list of additional resources for training models on different intel hardware platforms:
+- Intel® Gaudi® Accelerators
     - [Parameter Efficient Fine-Tuning of Llama-2 70B](https://github.com/HabanaAI/Gaudi-tutorials/blob/main/PyTorch/llama2_fine_tuning_inference/llama2_fine_tuning_inference.ipynb)
+- Intel® Xeon® Processors
     - [Distributed Training of GPT2 LLMs on AWS](https://github.com/intel/intel-cloud-optimizations-aws/tree/main/distributed-training)
     - [Fine-tuning Falcon 7B on Xeon Processors](https://medium.com/@eduand-alvarez/fine-tune-falcon-7-billion-on-xeon-cpus-with-hugging-face-and-oneapi-a25e10803a53)
+- Intel® Data Center GPU Max Series
     - [LLM Fine-tuning with QLoRA on Max Series GPUs](https://console.idcservice.net/training/detail/159c24e4-5598-3155-a790-2qv973tlm172)
 ## Submitting your Model to the Hub
 Once you have trained your model, it is a straighforward process to upload and open source it on the Hugging Face Hub.

info/validated_chat_models.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ VALIDATED_CHAT_MODELS = ['Intel/neural-chat-7b-v1-1','More Coming Soon!']

src/__pycache__/processing.cpython-38.pyc ADDED Viewed

Binary file (779 Bytes). View file

src/leaderboard_filtered.csv DELETED Viewed

@@ -1,7 +0,0 @@
-,Model,Average,Hardware,Model Type,Precision,Size,Infrastructure,ARC,MMLU,TruthfulQA,Winogrande,GSM8K,Affiliation
-1,BetaWave,83.21,Arc GPU,fine-tuned,fp16,7,Local,70.44,92.32,78.67,85.55,90.0,Innovator
-4,EpsilonWave,58.44,Xeon,fine-tuned,int8,3,AWS,91.22,82.1,60.55,80.11,77.89,Partner
-6,EtaMatrix,69.78,Xeon,fine-tuned,int8,3,GCP,85.55,79.33,70.89,72.18,79.44,Liftoff
-7,ThetaCore,88.12,Arc GPU,fine-tuned,int8,3,Local,67.33,85.78,88.55,86.9,83.11,Liftoff
-14,BetaNeural,79.67,Gaudi 1,fine-tuned,4bit,7,AWS,85.44,77.22,83.1,75.45,71.33,Partner
-15,TrackSpeed,88.12,Arc GPU,fine-tuned,4bit,7,Local,67.33,85.78,88.55,86.9,83.11,Student Ambassador

src/submit.py CHANGED Viewed

	@@ -0,0 +1 @@


1	+ # eval submission logic