eduardo-alvarez commited on
Commit
cd2355c
1 Parent(s): 15635cd

create leaderboard

Browse files
__pycache__/app.cpython-38.pyc ADDED
Binary file (4.74 kB). View file
 
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import random
4
+ import time
5
+
6
+ from info.train_a_model import (
7
+ LLM_BENCHMARKS_TEXT)
8
+ from info.submit import (
9
+ SUBMIT_TEXT)
10
+ from info.deployment import (
11
+ DEPLOY_TEXT)
12
+ from info.programs import (
13
+ PROGRAMS_TEXT)
14
+ from info.citation import(
15
+ CITATION_TEXT)
16
+ from src.processing import filter_benchmarks_table, make_clickable
17
+
18
+ demo = gr.Blocks()
19
+
20
+ with demo:
21
+
22
+ gr.HTML("""<h1 align="center" id="space-title">🤗Powered-by-Intel LLM Leaderboard 💻</h1>""")
23
+ gr.Markdown("This leaderboard is designed to evaluate, score, and rank open-source large language \
24
+ models that have been pre-trained or fine-tuned on Intel Hardware 🦾")
25
+ gr.Markdown("Models submitted to the leaderboard are evaluated \
26
+ on the Intel Developer Cloud ☁️")
27
+
28
+ # TODO: Coming soon comparison tool
29
+ #with gr.Accordion("🥊Large Language Model Boxing Ring 🥊", open=False):
30
+ # with gr.Row():
31
+ # chat_a = gr.Chatbot()
32
+ # chat_b = gr.Chatbot()
33
+ # msg = gr.Textbox()
34
+ # gr.ClearButton([msg, chat_a])
35
+ #
36
+ # def respond(message, chat_history):
37
+ # bot_message = random.choice(["How are you?", "I love you", "I'm very hungry"])
38
+ # chat_history.append((message, bot_message))
39
+ # time.sleep(2)
40
+ # return "", chat_history
41
+ #
42
+ # msg.submit(respond, inputs = [msg, chat_a],outputs = [msg, chat_a])
43
+
44
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
45
+ with gr.TabItem("🏆 LLM Benchmark", elem_id="llm-benchmark-table", id=0):
46
+ with gr.Row():
47
+ with gr.Column():
48
+ filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"],
49
+ label="Select Training Platform*",
50
+ elem_id="compute_platforms",
51
+ value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"])
52
+ filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","GCP","Local"],
53
+ label="Training Infrastructure*",
54
+ elem_id="training_infra",
55
+ value=["Intel Developer Cloud","AWS","Azure","GCP","Local"])
56
+ filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"],
57
+ label="Intel Program Affiliation",
58
+ elem_id="program_affiliation",
59
+ value=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"])
60
+
61
+ with gr.Column():
62
+ filter_size = gr.CheckboxGroup(choices=[1,3,5,7,13,35,60,70,100],
63
+ label="Model Sizes (Billion of Parameters)",
64
+ elem_id="parameter_size",
65
+ value=[1,3,5,7,13,35,60,70,100])
66
+ filter_precision = gr.CheckboxGroup(choices=["fp8","fp16","bf16","int8","4bit"],
67
+ label="Model Precision",
68
+ elem_id="precision",
69
+ value=["fp8","fp16","bf16","int8","4bit"])
70
+ filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"],
71
+ label="Model Types",
72
+ elem_id="model_types",
73
+ value=["pretrained","fine-tuned","chat-models","merges/moerges"])
74
+
75
+ initial_df = pd.read_csv("leaderboard_status_030424.csv")
76
+
77
+ gradio_df_display = gr.Dataframe()
78
+
79
+ def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected):
80
+ filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected,
81
+ affiliation_selected=affiliation_selected, size_selected=size_selected,
82
+ precision_selected=precision_selected, type_selected=type_selected)
83
+ return filtered_df
84
+
85
+ filter_hw.change(fn=update_df,
86
+ inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
87
+ outputs=[gradio_df_display])
88
+ filter_platform.change(fn=update_df,
89
+ inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
90
+ outputs=[gradio_df_display])
91
+ filter_affiliation.change(fn=update_df,
92
+ inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
93
+ outputs=[gradio_df_display])
94
+ filter_size.change(fn=update_df,
95
+ inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
96
+ outputs=[gradio_df_display])
97
+ filter_precision.change(fn=update_df,
98
+ inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
99
+ outputs=[gradio_df_display])
100
+ filter_type.change(fn=update_df,
101
+ inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type],
102
+ outputs=[gradio_df_display])
103
+
104
+
105
+ with gr.TabItem("🧰 Train a Model", elem_id="getting-started", id=1):
106
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
107
+ with gr.TabItem("🚀 Deployment Tips", elem_id="deployment-tips", id=2):
108
+ gr.Markdown(DEPLOY_TEXT, elem_classes="markdown-text")
109
+ with gr.TabItem("👩‍💻 Developer Programs", elem_id="hardward-program", id=3):
110
+ gr.Markdown(PROGRAMS_TEXT, elem_classes="markdown-text")
111
+ with gr.TabItem("🏎️ Submit", elem_id="submit", id=4):
112
+ gr.Markdown(SUBMIT_TEXT, elem_classes="markdown-text")
113
+ with gr.Row():
114
+ gr.Markdown("# Submit Model for Evaluation 🏎️", elem_classes="markdown-text")
115
+ with gr.Row():
116
+ with gr.Column():
117
+ model_name_textbox = gr.Textbox(label="Model name")
118
+ revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
119
+ model_type = gr.Dropdown(
120
+ choices=["pretrained","fine-tuned","chat models","merges/moerges"],
121
+ label="Model type",
122
+ multiselect=False,
123
+ value="pretrained",
124
+ interactive=True,
125
+ )
126
+
127
+ hw_type = gr.Dropdown(
128
+ choices=["Gaudi","Xeon","GPU Max","Arc GPU"],
129
+ label="Training Hardware",
130
+ multiselect=False,
131
+ value="Gaudi2",
132
+ interactive=True,
133
+ )
134
+ terms = gr.Checkbox(
135
+ label="Check if you have read and agreed to terms and conditions associated with submitting\
136
+ a model to the leaderboard.",
137
+ value=False,
138
+ interactive=True,
139
+ )
140
+ with gr.Column():
141
+ precision = gr.Dropdown(
142
+ choices=["fp8","fp16","bf16","int8","4bit"],
143
+ label="Precision",
144
+ multiselect=False,
145
+ value="fp16",
146
+ interactive=True,
147
+ )
148
+ weight_type = gr.Dropdown(
149
+ choices=["Original", "Adapter", "Delta"],
150
+ label="Weights type",
151
+ multiselect=False,
152
+ value="Original",
153
+ interactive=True,
154
+ )
155
+ training_infra = gr.Dropdown(
156
+ choices=["IDC","AWS","Azure","GCP","Local"],
157
+ label="Training Infrastructure",
158
+ multiselect=False,
159
+ value="IDC",
160
+ interactive=True,
161
+ )
162
+ affiliation = gr.Dropdown(
163
+ choices=["No Affiliation","Innovator","Student Ambassador","Intel Liftoff", "Intel Labs", "Other"],
164
+ label="Affiliation with Intel",
165
+ multiselect=False,
166
+ value="Independent",
167
+ interactive=True,
168
+ )
169
+ base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
170
+
171
+ #submit_button = gr.Button("Submit Eval")
172
+ #submission_result = gr.Markdown()
173
+ gr.Markdown("Community Submissions Coming soon!")
174
+
175
+ with gr.Accordion("📙 Citation", open=False):
176
+ citation =gr.Textbox(value = CITATION_TEXT,
177
+ lines=6,
178
+ label="Use the following to cite this content")
179
+
180
+
181
+ demo.launch()
info/__pycache__/citation.cpython-38.pyc ADDED
Binary file (466 Bytes). View file
 
info/__pycache__/deployment.cpython-38.pyc ADDED
Binary file (4.89 kB). View file
 
info/__pycache__/programs.cpython-38.pyc ADDED
Binary file (2.48 kB). View file
 
info/__pycache__/submit.cpython-38.pyc ADDED
Binary file (2.56 kB). View file
 
info/__pycache__/train_a_model.cpython-38.pyc ADDED
Binary file (3.54 kB). View file
 
info/citation.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ CITATION_TEXT = r"""@misc{powered-by-intel-llm-leaderboard,
2
+ author = {Eduardo Alvarez},
3
+ title = {Powered By Intel LLM Leaderboard},
4
+ year = {2024},
5
+ publisher = {Intel},
6
+ howpublished = "\url{https://huggingface.co/spaces/Intel/powered_by_intel_leaderboard}"
7
+ }
8
+ """
info/deployment.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DEPLOY_TEXT = f"""
2
+
3
+ Having table full of powerful models is nice and call but at the end of the day, you have to be able to use
4
+ them for something. Below you will find sample code to help you load models and perform inference.
5
+
6
+
7
+ ## Inference with Gaudi 2
8
+ Habana's SDK, Intel Gaudi Software, supports PyTorch and DeepSpeed for accelerating LLM training and inference.
9
+ The Intel Gaudi Software graph compiler will optimize the execution of the operations accumulated in the graph
10
+ (e.g. operator fusion, data layout management, parallelization, pipelining and memory management,
11
+ and graph-level optimizations).
12
+
13
+ Optimum Habana provides covenient functionality for various tasks, below you'll find the command line
14
+ snippet that you would run to perform inference on Gaudi with meta-llama/Llama-2-7b-hf.
15
+
16
+ The "run_generation.py" script below can be found [here](https://github.com/huggingface/optimum-habana/tree/main/examples/text-generation)
17
+
18
+ ```bash
19
+ python run_generation.py \
20
+ --model_name_or_path meta-llama/Llama-2-7b-hf \
21
+ --use_hpu_graphs \
22
+ --use_kv_cache \
23
+ --max_new_tokens 100 \
24
+ --do_sample \
25
+ --batch_size 2 \
26
+ --prompt "Hello world" "How are you?"
27
+
28
+ ```
29
+
30
+ # Inference Intel Extension for Transformers
31
+ Intel® Extension for Transformers is an innovative toolkit designed to accelerate GenAI/LLM
32
+ everywhere with the optimal performance of Transformer-based models on various Intel platforms,
33
+ including Intel Gaudi2, Intel CPU, and Intel GPU.
34
+
35
+ ### INT4 Inference (CPU)
36
+ ```python
37
+ from transformers import AutoTokenizer
38
+ from intel_extension_for_transformers.transformers import AutoModelForCausalLM
39
+ model_name = "Intel/neural-chat-7b-v3-1"
40
+ prompt = "When winter becomes spring, the flowers..."
41
+
42
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
43
+ inputs = tokenizer(prompt, return_tensors="pt").input_ids
44
+
45
+ model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True)
46
+ outputs = model.generate(inputs)
47
+
48
+ ```
49
+ ### INT4 Inference (GPU)
50
+ ```python
51
+ import intel_extension_for_pytorch as ipex
52
+ from intel_extension_for_transformers.transformers.modeling import AutoModelForCausalLM
53
+ from transformers import AutoTokenizer
54
+
55
+ device_map = "xpu"
56
+ model_name ="Qwen/Qwen-7B"
57
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
58
+ prompt = "When winter becomes spring, the flowers..."
59
+ inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(device_map)
60
+
61
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
62
+ device_map=device_map, load_in_4bit=True)
63
+
64
+ model = ipex.optimize_transformers(model, inplace=True, dtype=torch.float16, woq=True, device=device_map)
65
+
66
+ output = model.generate(inputs)
67
+ ```
68
+
69
+ # Intel Extension for PyTorch
70
+ Intel® Extension for PyTorch extends PyTorch with up-to-date features optimizations for an
71
+ extra performance boost on Intel hardware. Optimizations take advantage of Intel® Advanced
72
+ Vector Extensions 512 (Intel® AVX-512) Vector Neural Network Instructions (VNNI) and Intel®
73
+ Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel Xe Matrix Extensions
74
+ (XMX) AI engines on Intel discrete GPUs. Moreover, Intel® Extension for PyTorch* provides easy
75
+ GPU acceleration for Intel discrete GPUs through the PyTorch* xpu device.
76
+
77
+ There are a few flavors of PyTorch that can be leveraged for inference. For detailed documentation,
78
+ the visit https://intel.github.io/intel-extension-for-pytorch/#introduction
79
+
80
+ ### IPEX with Optimum Intel (no quantization)
81
+ Requires installing/updating optimum `pip install --upgrade-strategy eager optimum[ipex]
82
+ `
83
+ ```python
84
+ from optimum.intel import IPEXModelForCausalLM
85
+ from transformers import AutoTokenizer, pipeline
86
+
87
+ model = IPEXModelForCausalLM.from_pretrained(model_id)
88
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
89
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
90
+ results = pipe("A fisherman at sea...")
91
+ ```
92
+
93
+ ### IPEX with Stock PyTorch with Mixed Precision
94
+ ```python
95
+ import torch
96
+ import intel_extension_for_pytorch as ipex
97
+ import transformers
98
+
99
+ model= transformers.AutoModelForCausalLM(model_name_or_path).eval()
100
+
101
+ dtype = torch.float # or torch.bfloat16
102
+ model = ipex.llm.optimize(model, dtype=dtype)
103
+
104
+ # generation inference loop
105
+ with torch.inference_mode():
106
+ model.generate()
107
+ ```
108
+
109
+ # OpenVINO Toolkit
110
+
111
+ ```python
112
+ from optimum.intel import OVModelForCausalLM
113
+ from transformers import AutoTokenizer, pipeline
114
+
115
+ model_id = "helenai/gpt2-ov"
116
+ model = OVModelForCausalLM.from_pretrained(model_id)
117
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
118
+
119
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
120
+
121
+ pipe("In the spring, beautiful flowers bloom...")
122
+
123
+ ```
124
+
125
+
126
+ """
info/programs.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROGRAMS_TEXT= """
2
+ Intel offers a range of programs to grant early, short, and long-term access to developers. A great way to build
3
+ and share models on the "Powered by Intel" LLM Leaderboard is to join one of these programs. Learn more about
4
+ these opportunities below:
5
+
6
+ ## Intel Liftoff Program
7
+ Intel® Liftoff for startups is open to early-stage AI and machine learning startups. This free virtual program
8
+ helps you innovate and scale, no matter where you are in your entrepreneurial journey.
9
+
10
+ Through Intel Liftoff, startups can access the computational power they need to build powerful LLMs on platforms
11
+ like Gaudi, Max Series GPUs, and Xeon Processors.
12
+
13
+ Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/liftoff.html
14
+
15
+ ## Intel Student Ambassador Program
16
+ This program is focused on undergraduate and graduate students who are passionate about technology and
17
+ working with developer communities to promote learning, sharing, and collaboration. It provides opportunities
18
+ for students to enhance their AI and oneAPI skills, expand their network, and learn about the cutting-edge Intel®
19
+ hardware and software products.
20
+
21
+ Through the Student Ambassador Program you can get early access to latest technology developments that are under
22
+ a nondisclosure agreement (NDA) and extended access to Intel® Developer Cloud.
23
+
24
+ Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/tools/oneapi/training/academic-program/student-ambassador.html#gs.5f5oi3
25
+
26
+
27
+ ## Intel Innovator Program
28
+ This program is for developers using oneAPI on Intel® architecture who provide technical leadership and inspiration
29
+ to the global developer community through their projects, expertise, and advocacies. It provides recognition as a
30
+ oneAPI expert, opportunities for event support, spotlight showcase for projects, and more.
31
+
32
+ Through this program you will have free ccess to paid features on Intel® Developer Cloud, a development
33
+ environment for projects on the latest Intel technology and as a oneAPI expert, and interactions with
34
+ others in the community​ and within Intel
35
+
36
+ Learn more and apply through the program at https://www.intel.com/content/www/us/en/developer/community/innovators/oneapi-innovator.html
37
+ """
info/submit.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ SUBMIT_TEXT = f"""
3
+ # Evaluation Queue for the 🤗"Powered by Intel" LLM Leaderboard 💻
4
+ Models added here will be queued for evaluation on the Intel Developer Cloud ☁️
5
+ ## First steps before submitting a model
6
+
7
+ ### 1) Make sure you can load your model and tokenizer using AutoClasses:
8
+ ```python
9
+ from transformers import AutoConfig, AutoModel, AutoTokenizer
10
+ config = AutoConfig.from_pretrained("your model name", revision=revision)
11
+ model = AutoModel.from_pretrained("your model name", revision=revision)
12
+ tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
13
+ ```
14
+ If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
15
+ Note: make sure your model is public!
16
+ Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
17
+
18
+ ### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
19
+ It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
20
+
21
+ ### 3) Make sure your model has an open license!
22
+ This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
23
+
24
+ ### 4) Fill up your model card
25
+ We use your model card to better understand the properties of your model and make them more easily discoverable for other users.
26
+ Model cards are required to have mentions of the hardware, software, and infrastructure used for training - without this information
27
+ we cannot accept your model as a valid submission.
28
+
29
+ ### 5) Select the correct precision
30
+ Not all models are converted properly from `float16` to `bfloat16`, and selecting the wrong precision can sometimes cause evaluation error (as loading a `bf16` model in `fp16` can sometimes generate NaNs, depending on the weight range).
31
+
32
+ ## In case of model failure
33
+ If your model is displayed in the `FAILED` category, its execution stopped.
34
+ Make sure you have followed the above steps first.
35
+ If everything is done, check you can launch the EleutherAIHarness on your model locally, using the command in the About tab under "Reproducibility" with all arguments specified (you can add `--limit` to limit the number of examples per task).
36
+
37
+ """
info/train_a_model.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ LLM_BENCHMARKS_TEXT = f"""
3
+ # Use the Resources Below to Start Training a Model Today
4
+
5
+ Intel offers a variety of platforms that can be used to train LLMs including datacenter and consumer grade cpus, gpus, and ASICs.
6
+ Below, you'll find documentation on how to access free and paid resources to train a model and submit it to the "Intel Inside Leaderboard".
7
+
8
+ ## Intel Developer Cloud - Quick Start
9
+ The Intel Developer Cloud is one of the best places to access free and paid compute instances for model training. Intel offers Jupyter Notebook instances supported by
10
+ 224 Core 4th Generation Xeon Baremetal nodes with 4x Max Series GPU 1100 GPUs. To access these resources please follow the instructions below:
11
+ 1. Visit [cloud.intel.com](cloud.intel.com) and create a free account.
12
+ 2. Navigate to the "Training" module under the "Software" section in the left panel
13
+ 3. Under the GenAI Essentials section, select the LLM Fine-Tuning with QLoRA notebook and click "Launch"
14
+ 4. Follow the instructions in the notebook to train your model using Intel Max Series 1100 GPUs
15
+ 5. Upload your model to the Hugging Face Model Hub
16
+ 6. Go to the "Submit" tab follow instructions to create a leaderboard evaluation request
17
+
18
+ ## Additional Training Code Samples
19
+
20
+ Below you will find a list of additional resources for training models on different intel hardware platforms:
21
+ - Gaudi Processors
22
+ - [Parameter Efficient Fine-Tuning of Llama-2 70B](https://github.com/HabanaAI/Gaudi-tutorials/blob/main/PyTorch/llama2_fine_tuning_inference/llama2_fine_tuning_inference.ipynb)
23
+ - Xeon Processors
24
+ - [Distributed Training of GPT2 LLMs on AWS](https://github.com/intel/intel-cloud-optimizations-aws/tree/main/distributed-training)
25
+ - [Fine-tuning Falcon 7B on Xeon Processors](https://medium.com/@eduand-alvarez/fine-tune-falcon-7-billion-on-xeon-cpus-with-hugging-face-and-oneapi-a25e10803a53)
26
+ - Max Series GPUs
27
+ - [LLM Fine-tuning with QLoRA on Max Series GPUs](https://console.idcservice.net/training/detail/159c24e4-5598-3155-a790-2qv973tlm172)
28
+ ## Submitting your Model to the Hub
29
+ Once you have trained your model, it is a straighforward process to upload and open source it on the Hugging Face Hub.
30
+
31
+ ```python
32
+
33
+ # Logging in to Hugging Face
34
+
35
+ from huggingface_hub import notebook_login, Repository
36
+
37
+ # Login to Hugging Face
38
+ notebook_login()
39
+
40
+ # Model and Tokenize Loading
41
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
42
+
43
+ # Define the path to the checkpoint
44
+ checkpoint_path = "" # Replace with your checkpoint folder
45
+
46
+ # Load the model
47
+ model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path)
48
+
49
+ # Load the tokenizer
50
+ tokenizer = AutoTokenizer.from_pretrained("") #add name of your model's tokenizer on Hugging Face OR custom tokenizer
51
+
52
+ #Saving and Uploading the Model and Tokenizer
53
+
54
+ # Save the model and tokenizer
55
+ model_name_on_hub = "desired-model-name"
56
+ model.save_pretrained(model_name_on_hub)
57
+ tokenizer.save_pretrained(model_name_on_hub)
58
+
59
+ # Push to the hub
60
+ model.push_to_hub(model_name_on_hub)
61
+ tokenizer.push_to_hub(model_name_on_hub)
62
+
63
+ # Congratulations! Your fine-tuned model is now uploaded to the Hugging Face Model Hub.
64
+ # You can view and share your model using its URL: https://huggingface.co/your-username/your-model-name
65
+
66
+ ```
67
+
68
+ """
69
+
70
+ SUBMIT_TEXT = f"""
71
+ # Use the Resource Below to Start Training a Model Today
72
+
73
+ """
leaderboard_status_030424.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Model,Average,Hardware,Model Type,Precision,Size,Infrastructure,ARC,HellaSwag,MMLU,TruthfulQA,Winogrande,GSM8K,Affiliation
2
+ Intel/neural-chat-7b-v3-3,69.83,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,66.89,85.26,63.07,63.01,79.64,61.11,Intel Labs
3
+ Intel/neural-chat-7b-v3-2,68.29,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,67.49,83.92,63.55,59.68,79.65,55.12,Intel Labs
4
+ Intel/neural-chat-7b-v3-1,61.59,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,66.21,83.64,62.37,59.65,78.14,19.56,Intel Labs
5
+ Intel/neural-chat-7b-v3,58.46,Gaudi,fine-tuned,fp16,7,Intel Developer Cloud,67.15,83.29,62.26,58.77,78.06,1.21,Intel Labs
src/__pycache__/processing.cpython-38.pyc ADDED
Binary file (779 Bytes). View file
 
src/leaderboard_filtered.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ,Model,Average,Hardware,Model Type,Precision,Size,Infrastructure,ARC,MMLU,TruthfulQA,Winogrande,GSM8K,Affiliation
2
+ 1,BetaWave,83.21,Arc GPU,fine-tuned,fp16,7,Local,70.44,92.32,78.67,85.55,90.0,Innovator
3
+ 4,EpsilonWave,58.44,Xeon,fine-tuned,int8,3,AWS,91.22,82.1,60.55,80.11,77.89,Partner
4
+ 6,EtaMatrix,69.78,Xeon,fine-tuned,int8,3,GCP,85.55,79.33,70.89,72.18,79.44,Liftoff
5
+ 7,ThetaCore,88.12,Arc GPU,fine-tuned,int8,3,Local,67.33,85.78,88.55,86.9,83.11,Liftoff
6
+ 14,BetaNeural,79.67,Gaudi 1,fine-tuned,4bit,7,AWS,85.44,77.22,83.1,75.45,71.33,Partner
7
+ 15,TrackSpeed,88.12,Arc GPU,fine-tuned,4bit,7,Local,67.33,85.78,88.55,86.9,83.11,Student Ambassador
src/processing.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ def filter_benchmarks_table(df, hw_selected, platform_selected,
3
+ size_selected, precision_selected,
4
+ type_selected, affiliation_selected):
5
+
6
+ filtered_df = df[
7
+ df['Hardware'].isin(hw_selected) &
8
+ df['Infrastructure'].isin(platform_selected) &
9
+ df['Size'].isin(size_selected) &
10
+ df['Precision'].isin(precision_selected) &
11
+ df['Model Type'].isin(type_selected) &
12
+ df['Affiliation'].isin(affiliation_selected)]
13
+
14
+ return filtered_df
15
+
16
+ def make_clickable(val):
17
+ return f'<a target="_blank" href="{val}">{val}</a>'
18
+
19
+
20
+
src/submit.py ADDED
File without changes