Sai Vinay G commited on
Commit
0f32a96
1 Parent(s): 212e5f2
README.md CHANGED
File without changes
app.py CHANGED
@@ -2,6 +2,8 @@ import json
2
  import os
3
  from datetime import datetime, timezone
4
 
 
 
5
 
6
  import gradio as gr
7
  import numpy as np
 
2
  import os
3
  from datetime import datetime, timezone
4
 
5
+ os.environ["H4_TOKEN"]="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"
6
+ os.environ["IS_PUBLIC"]="True"
7
 
8
  import gradio as gr
9
  import numpy as np
requirements.txt CHANGED
File without changes
src/assets/css_html_js.py CHANGED
File without changes
src/assets/hardcoded_evals.py CHANGED
@@ -10,6 +10,7 @@ gpt4_values = {
10
  AutoEvalColumn.mmlu.name: 86.4,
11
  AutoEvalColumn.truthfulqa.name: 59.0,
12
  AutoEvalColumn.dummy.name: "GPT-4",
 
13
  }
14
 
15
  gpt35_values = {
@@ -22,6 +23,7 @@ gpt35_values = {
22
  AutoEvalColumn.mmlu.name: 70.0,
23
  AutoEvalColumn.truthfulqa.name: 47.0,
24
  AutoEvalColumn.dummy.name: "GPT-3.5",
 
25
  }
26
 
27
  baseline = {
@@ -34,5 +36,5 @@ baseline = {
34
  AutoEvalColumn.mmlu.name: 25.0,
35
  AutoEvalColumn.truthfulqa.name: 25.0,
36
  AutoEvalColumn.dummy.name: "baseline",
 
37
  }
38
-
 
10
  AutoEvalColumn.mmlu.name: 86.4,
11
  AutoEvalColumn.truthfulqa.name: 59.0,
12
  AutoEvalColumn.dummy.name: "GPT-4",
13
+ AutoEvalColumn.model_type.name: "",
14
  }
15
 
16
  gpt35_values = {
 
23
  AutoEvalColumn.mmlu.name: 70.0,
24
  AutoEvalColumn.truthfulqa.name: 47.0,
25
  AutoEvalColumn.dummy.name: "GPT-3.5",
26
+ AutoEvalColumn.model_type.name: "",
27
  }
28
 
29
  baseline = {
 
36
  AutoEvalColumn.mmlu.name: 25.0,
37
  AutoEvalColumn.truthfulqa.name: 25.0,
38
  AutoEvalColumn.dummy.name: "baseline",
39
+ AutoEvalColumn.model_type.name: "",
40
  }
 
src/assets/scale-hf-logo.png CHANGED

Git LFS Details

  • SHA256: 11a263a1abe4c7c9cf022cbe052dc567dcea164bdfbc111299aae3270e992934
  • Pointer size: 132 Bytes
  • Size of remote file: 1.88 MB

Git LFS Details

  • SHA256: 11a263a1abe4c7c9cf022cbe052dc567dcea164bdfbc111299aae3270e992934
  • Pointer size: 132 Bytes
  • Size of remote file: 1.88 MB
src/assets/text_content.py CHANGED
@@ -62,6 +62,8 @@ INTRODUCTION_TEXT = f"""
62
  🤗 Anyone from the community can submit a model for automated evaluation on the 🤗 GPU cluster, as long as it is a 🤗 Transformers model with weights on the Hub. We also support evaluation of models with delta-weights for non-commercial licensed models, such as the original LLaMa release.
63
 
64
  Other cool benchmarks for LLMs are developped at HuggingFace, go check them out: 🙋🤖 [human and GPT4 evals](https://huggingface.co/spaces/HuggingFaceH4/human_eval_llm_leaderboard), 🖥️ [performance benchmarks](https://huggingface.co/spaces/optimum/llm-perf-leaderboard)
 
 
65
  """
66
 
67
  LLM_BENCHMARKS_TEXT = f"""
@@ -75,6 +77,7 @@ With the plethora of large language models (LLMs) and chatbots being released we
75
  - <a href="https://arxiv.org/abs/2009.03300" target="_blank"> MMLU </a> (5-shot) - a test to measure a text model's multitask accuracy. The test covers 57 tasks including elementary mathematics, US history, computer science, law, and more.
76
  - <a href="https://arxiv.org/abs/2109.07958" target="_blank"> TruthfulQA </a> (0-shot) - a test to measure a model’s propensity to reproduce falsehoods commonly found online. Note: TruthfulQA in the Harness is actually a minima a 6-shots task, as it is prepended by 6 examples systematically, even when launched using 0 for the number of few-shot examples.
77
 
 
78
  We chose these benchmarks as they test a variety of reasoning and general knowledge across a wide variety of fields in 0-shot and few-shot settings.
79
 
80
  # Some good practices before submitting a model
@@ -127,6 +130,13 @@ To get more information about quantization, see:
127
  - 8 bits: [blog post](https://huggingface.co/blog/hf-bitsandbytes-integration), [paper](https://arxiv.org/abs/2208.07339)
128
  - 4 bits: [blog post](https://huggingface.co/blog/4bit-transformers-bitsandbytes), [paper](https://arxiv.org/abs/2305.14314)
129
 
 
 
 
 
 
 
 
130
  # In case of model failure
131
  If your model is displayed in the `FAILED` category, its execution stopped.
132
  Make sure you have followed the above steps first.
@@ -140,13 +150,13 @@ These models will be automatically evaluated on the 🤗 cluster.
140
  """
141
 
142
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
143
- CITATION_BUTTON_TEXT = r"""@misc{open-llm-leaderboard,
 
144
  author = {Edward Beeching, Clémentine Fourrier, Nathan Habib, Sheon Han, Nathan Lambert, Nazneen Rajani, Omar Sanseviero, Lewis Tunstall, Thomas Wolf},
145
  title = {Open LLM Leaderboard},
146
  year = {2023},
147
  publisher = {Hugging Face},
148
  howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
149
-
150
  }
151
  @software{eval-harness,
152
  author = {Gao, Leo and
 
62
  🤗 Anyone from the community can submit a model for automated evaluation on the 🤗 GPU cluster, as long as it is a 🤗 Transformers model with weights on the Hub. We also support evaluation of models with delta-weights for non-commercial licensed models, such as the original LLaMa release.
63
 
64
  Other cool benchmarks for LLMs are developped at HuggingFace, go check them out: 🙋🤖 [human and GPT4 evals](https://huggingface.co/spaces/HuggingFaceH4/human_eval_llm_leaderboard), 🖥️ [performance benchmarks](https://huggingface.co/spaces/optimum/llm-perf-leaderboard)
65
+
66
+ 🟢: Base pretrained model – 🔶: Instruction finetuned model – 🟦: Model finetuned with RL (read more details in "About" tab)
67
  """
68
 
69
  LLM_BENCHMARKS_TEXT = f"""
 
77
  - <a href="https://arxiv.org/abs/2009.03300" target="_blank"> MMLU </a> (5-shot) - a test to measure a text model's multitask accuracy. The test covers 57 tasks including elementary mathematics, US history, computer science, law, and more.
78
  - <a href="https://arxiv.org/abs/2109.07958" target="_blank"> TruthfulQA </a> (0-shot) - a test to measure a model’s propensity to reproduce falsehoods commonly found online. Note: TruthfulQA in the Harness is actually a minima a 6-shots task, as it is prepended by 6 examples systematically, even when launched using 0 for the number of few-shot examples.
79
 
80
+ For all these evaluations, a higher score is a better score.
81
  We chose these benchmarks as they test a variety of reasoning and general knowledge across a wide variety of fields in 0-shot and few-shot settings.
82
 
83
  # Some good practices before submitting a model
 
130
  - 8 bits: [blog post](https://huggingface.co/blog/hf-bitsandbytes-integration), [paper](https://arxiv.org/abs/2208.07339)
131
  - 4 bits: [blog post](https://huggingface.co/blog/4bit-transformers-bitsandbytes), [paper](https://arxiv.org/abs/2305.14314)
132
 
133
+ ### Icons
134
+ 🟢 means that the model is pretrained
135
+ 🔶 that it is finetuned
136
+ 🟦 that is was trained with RL.
137
+ If there is no icon, we have not uploaded the information on the model yet, feel free to open an issue with the model information!
138
+
139
+
140
  # In case of model failure
141
  If your model is displayed in the `FAILED` category, its execution stopped.
142
  Make sure you have followed the above steps first.
 
150
  """
151
 
152
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
153
+ CITATION_BUTTON_TEXT = r"""
154
+ @misc{open-llm-leaderboard,
155
  author = {Edward Beeching, Clémentine Fourrier, Nathan Habib, Sheon Han, Nathan Lambert, Nazneen Rajani, Omar Sanseviero, Lewis Tunstall, Thomas Wolf},
156
  title = {Open LLM Leaderboard},
157
  year = {2023},
158
  publisher = {Hugging Face},
159
  howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
 
160
  }
161
  @software{eval-harness,
162
  author = {Gao, Leo and
src/auto_leaderboard/get_model_metadata.py CHANGED
@@ -53,4 +53,4 @@ def get_model_size(model_name, model_info):
53
 
54
  def apply_metadata(leaderboard_data: List[dict]):
55
  get_model_type(leaderboard_data)
56
- get_model_infos_from_hub(leaderboard_data)
 
53
 
54
  def apply_metadata(leaderboard_data: List[dict]):
55
  get_model_type(leaderboard_data)
56
+ get_model_infos_from_hub(leaderboard_data)
src/auto_leaderboard/load_results.py CHANGED
@@ -4,6 +4,7 @@ import glob
4
  import json
5
  import os
6
  from typing import Dict, List, Tuple
 
7
 
8
  from src.utils_display import AutoEvalColumn, make_clickable_model
9
  import numpy as np
@@ -26,6 +27,8 @@ class EvalResult:
26
  revision: str
27
  results: dict
28
  precision: str = "16bit"
 
 
29
 
30
  def to_dict(self):
31
  if self.org is not None:
@@ -35,7 +38,9 @@ class EvalResult:
35
  data_dict = {}
36
 
37
  data_dict["eval_name"] = self.eval_name # not a column, just a save name
 
38
  data_dict[AutoEvalColumn.precision.name] = self.precision
 
39
  data_dict[AutoEvalColumn.model.name] = make_clickable_model(base_model)
40
  data_dict[AutoEvalColumn.dummy.name] = base_model
41
  data_dict[AutoEvalColumn.revision.name] = self.revision
@@ -92,23 +97,37 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
92
  continue
93
  mean_acc = round(np.mean(accs) * 100.0, 1)
94
  eval_results.append(EvalResult(
95
- result_key, org, model, model_sha, {benchmark: mean_acc}
96
  ))
97
 
98
  return result_key, eval_results
99
 
100
 
101
  def get_eval_results(is_public) -> List[EvalResult]:
102
- json_filepaths = glob.glob(
103
- "eval-results/**/results*.json", recursive=True
104
- )
105
- if not is_public:
106
- json_filepaths += glob.glob(
107
- "private-eval-results/**/results*.json", recursive=True
108
- )
109
 
110
- eval_results = {}
 
 
 
 
 
 
 
 
 
111
 
 
 
 
 
 
 
 
 
 
 
 
112
  for json_filepath in json_filepaths:
113
  result_key, results = parse_eval_result(json_filepath)
114
  for eval_result in results:
@@ -125,4 +144,4 @@ def get_eval_results(is_public) -> List[EvalResult]:
125
  def get_eval_results_dicts(is_public=True) -> List[Dict]:
126
  eval_results = get_eval_results(is_public)
127
 
128
- return [e.to_dict() for e in eval_results]
 
4
  import json
5
  import os
6
  from typing import Dict, List, Tuple
7
+ import dateutil
8
 
9
  from src.utils_display import AutoEvalColumn, make_clickable_model
10
  import numpy as np
 
27
  revision: str
28
  results: dict
29
  precision: str = "16bit"
30
+ model_type: str = ""
31
+ weight_type: str = ""
32
 
33
  def to_dict(self):
34
  if self.org is not None:
 
38
  data_dict = {}
39
 
40
  data_dict["eval_name"] = self.eval_name # not a column, just a save name
41
+ data_dict["weight_type"] = self.weight_type # not a column, just a save name
42
  data_dict[AutoEvalColumn.precision.name] = self.precision
43
+ data_dict[AutoEvalColumn.model_type.name] = self.model_type
44
  data_dict[AutoEvalColumn.model.name] = make_clickable_model(base_model)
45
  data_dict[AutoEvalColumn.dummy.name] = base_model
46
  data_dict[AutoEvalColumn.revision.name] = self.revision
 
97
  continue
98
  mean_acc = round(np.mean(accs) * 100.0, 1)
99
  eval_results.append(EvalResult(
100
+ eval_name=result_key, org=org, model=model, revision=model_sha, results={benchmark: mean_acc}, #todo model_type=, weight_type=
101
  ))
102
 
103
  return result_key, eval_results
104
 
105
 
106
  def get_eval_results(is_public) -> List[EvalResult]:
107
+ json_filepaths = []
 
 
 
 
 
 
108
 
109
+ for root, dir, files in os.walk("eval-results"):
110
+ # We should only have json files in model results
111
+ if len(files) == 0 or any([not f.endswith(".json") for f in files]):
112
+ continue
113
+
114
+ # Sort the files by date
115
+ try:
116
+ files.sort(key=lambda x: dateutil.parser.parse(x.split("_", 1)[-1][:-5]))
117
+ except dateutil.parser._parser.ParserError:
118
+ up_to_date = files[-1]
119
 
120
+ up_to_date = files[-1]
121
+
122
+ if len(files) > 1:
123
+ print(root)
124
+ print(files)
125
+ print(up_to_date)
126
+ print("===")
127
+
128
+ json_filepaths.append(os.path.join(root, up_to_date))
129
+
130
+ eval_results = {}
131
  for json_filepath in json_filepaths:
132
  result_key, results = parse_eval_result(json_filepath)
133
  for eval_result in results:
 
144
  def get_eval_results_dicts(is_public=True) -> List[Dict]:
145
  eval_results = get_eval_results(is_public)
146
 
147
+ return [e.to_dict() for e in eval_results]
src/auto_leaderboard/model_metadata_type.py CHANGED
@@ -1,172 +1,487 @@
 
1
  from enum import Enum
2
  from typing import Dict, List
3
 
 
 
 
 
 
 
 
 
4
  class ModelType(Enum):
5
- PT = "pretrained"
6
- SFT = "finetuned"
7
- RL = "with RL"
8
 
9
 
10
  TYPE_METADATA: Dict[str, ModelType] = {
 
11
  "aisquared/dlite-v1-355m": ModelType.SFT,
 
 
 
 
 
12
  "aisquared/dlite-v2-774m": ModelType.SFT,
13
  "aisquared/dlite-v2-1_5b": ModelType.SFT,
 
 
 
 
 
14
  "TheBloke/wizardLM-7B-HF": ModelType.SFT,
 
 
 
15
  "TheBloke/dromedary-65b-lora-HF": ModelType.SFT,
 
 
 
 
 
16
  "TheBloke/vicuna-13B-1.1-HF": ModelType.SFT,
 
 
 
 
17
  "TheBloke/Wizard-Vicuna-13B-Uncensored-HF": ModelType.SFT,
18
- "wordcab/llama-natural-instructions-13b": ModelType.SFT,
19
- "JosephusCheung/Guanaco": ModelType.SFT,
20
- "AlekseyKorshuk/vicuna-7b": ModelType.SFT,
 
 
 
 
21
  "AlekseyKorshuk/chatml-pyg-v1": ModelType.SFT,
 
22
  "concedo/OPT-19M-ChatSalad": ModelType.SFT,
 
 
 
 
 
 
 
23
  "digitous/Javalion-R": ModelType.SFT,
 
24
  "digitous/Alpacino30b": ModelType.SFT,
 
 
25
  "digitous/Javelin-GPTJ": ModelType.SFT,
 
26
  "anton-l/gpt-j-tiny-random": ModelType.SFT,
 
 
 
27
  "IDEA-CCNL/Ziya-LLaMA-13B-Pretrain-v1": ModelType.SFT,
 
 
28
  "gpt2-medium": ModelType.PT,
 
 
29
  "PygmalionAI/pygmalion-6b": ModelType.SFT,
 
 
 
 
30
  "medalpaca/medalpaca-7b": ModelType.SFT,
31
- "medalpaca/medalpaca-13b": ModelType.SFT,
32
- "chavinlo/alpaca-13b": ModelType.SFT,
33
- "chavinlo/alpaca-native": ModelType.SFT,
34
- "chavinlo/gpt4-x-alpaca": ModelType.SFT,
 
 
 
35
  "hakurei/lotus-12B": ModelType.SFT,
36
- "amazon/LightGPT": ModelType.SFT,
37
  "shibing624/chinese-llama-plus-13b-hf": ModelType.SFT,
38
- "mosaicml/mpt-7b": ModelType.PT,
39
- "PSanni/Deer-3b": ModelType.SFT,
40
- "bigscience/bloom-1b1": ModelType.PT,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  "MetaIX/GPT4-X-Alpasta-30b": ModelType.SFT,
 
 
 
 
 
 
 
 
 
42
  "EleutherAI/gpt-neox-20b": ModelType.PT,
 
 
 
 
43
  "EleutherAI/gpt-j-6b": ModelType.PT,
44
- "roneneldan/TinyStories-28M": ModelType.SFT,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  "lmsys/vicuna-13b-delta-v1.1": ModelType.SFT,
46
  "lmsys/vicuna-7b-delta-v1.1": ModelType.SFT,
47
  "abhiramtirumala/DialoGPT-sarcastic-medium": ModelType.SFT,
 
 
 
48
  "pillowtalks-ai/delta13b": ModelType.SFT,
 
 
49
  "bigcode/starcoderplus": ModelType.SFT,
 
 
 
50
  "microsoft/DialoGPT-large": ModelType.SFT,
 
 
51
  "microsoft/CodeGPT-small-py": ModelType.SFT,
 
52
  "Pirr/pythia-13b-deduped-green_devil": ModelType.SFT,
53
  "Aeala/GPT4-x-AlpacaDente2-30b": ModelType.SFT,
 
 
54
  "Aeala/VicUnlocked-alpaca-30b": ModelType.SFT,
55
- "dvruette/llama-13b-pretrained-sft-epoch-2": ModelType.SFT,
 
 
56
  "dvruette/oasst-gpt-neox-20b-1000-steps": ModelType.SFT,
57
- "openlm-research/open_llama_3b_350bt_preview": ModelType.PT,
 
 
 
 
 
 
 
 
58
  "openlm-research/open_llama_7b_700bt_preview": ModelType.PT,
59
  "openlm-research/open_llama_7b": ModelType.PT,
 
60
  "openlm-research/open_llama_3b": ModelType.PT,
61
- "openlm-research/open_llama_7b_400bt_preview": ModelType.PT,
 
62
  "PocketDoc/Dans-PileOfSets-Mk1-llama-13b-merged": ModelType.SFT,
63
- "GeorgiaTechResearchInstitute/galactica-6.7b-evol-instruct-70k": ModelType.SFT,
 
64
  "databricks/dolly-v2-7b": ModelType.SFT,
65
  "databricks/dolly-v2-3b": ModelType.SFT,
66
  "databricks/dolly-v2-12b": ModelType.SFT,
67
- "pinkmanlove/llama-65b-hf": ModelType.SFT,
68
  "Rachneet/gpt2-xl-alpaca": ModelType.SFT,
69
  "Locutusque/gpt2-conversational-or-qa": ModelType.SFT,
 
70
  "NbAiLab/nb-gpt-j-6B-alpaca": ModelType.SFT,
 
 
 
 
71
  "Fredithefish/ScarletPajama-3B-HF": ModelType.SFT,
 
 
72
  "eachadea/vicuna-7b-1.1": ModelType.SFT,
73
  "eachadea/vicuna-13b": ModelType.SFT,
74
  "openaccess-ai-collective/wizard-mega-13b": ModelType.SFT,
75
  "openaccess-ai-collective/manticore-13b": ModelType.SFT,
76
  "openaccess-ai-collective/manticore-30b-chat-pyg-alpha": ModelType.SFT,
77
  "openaccess-ai-collective/minotaur-13b": ModelType.SFT,
78
- "lamini/instruct-tuned-3b": ModelType.SFT,
 
 
79
  "pythainlp/wangchanglm-7.5B-sft-enth": ModelType.SFT,
80
  "pythainlp/wangchanglm-7.5B-sft-en-sharded": ModelType.SFT,
 
 
81
  "stabilityai/stablelm-tuned-alpha-7b": ModelType.SFT,
 
 
 
 
 
82
  "CalderaAI/30B-Lazarus": ModelType.SFT,
 
 
 
 
 
 
 
 
 
 
 
83
  "KoboldAI/OPT-13B-Nerybus-Mix": ModelType.SFT,
84
- "distilgpt2": ModelType.PT,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  "wahaha1987/llama_7b_sharegpt94k_fastchat": ModelType.SFT,
 
 
 
86
  "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5": ModelType.SFT,
 
 
 
87
  "junelee/wizard-vicuna-13b": ModelType.SFT,
 
 
 
 
 
 
88
  "BreadAi/StoryPy": ModelType.SFT,
 
 
 
 
 
 
 
 
89
  "togethercomputer/RedPajama-INCITE-Base-3B-v1": ModelType.PT,
 
90
  "togethercomputer/RedPajama-INCITE-Base-7B-v0.1": ModelType.PT,
 
 
 
 
 
91
  "Writer/camel-5b-hf": ModelType.SFT,
92
  "Writer/palmyra-base": ModelType.PT,
 
 
 
 
 
 
93
  "MBZUAI/lamini-neo-125m": ModelType.SFT,
94
- "TehVenom/DiffMerge_Pygmalion_Main-onto-V8P4": ModelType.SFT,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  "vicgalle/gpt2-alpaca-gpt4": ModelType.SFT,
96
  "facebook/opt-350m": ModelType.PT,
97
  "facebook/opt-125m": ModelType.PT,
 
 
 
 
98
  "facebook/opt-13b": ModelType.PT,
99
- "facebook/opt-1.3b": ModelType.PT,
100
  "facebook/opt-66b": ModelType.PT,
101
- "facebook/galactica-120b": ModelType.PT,
102
- "Abe13/jgpt2-v1": ModelType.SFT,
 
 
 
 
 
 
103
  "gpt2-xl": ModelType.PT,
104
- "HuggingFaceH4/stable-vicuna-13b-2904": ModelType.RL,
105
- "HuggingFaceH4/llama-7b-ift-alpaca": ModelType.SFT,
106
- "HuggingFaceH4/starchat-alpha": ModelType.SFT,
 
 
 
107
  "HuggingFaceH4/starchat-beta": ModelType.SFT,
108
- "ausboss/Llama30B-SuperHOT": ModelType.SFT,
 
 
 
 
109
  "ausboss/llama-13b-supercot": ModelType.SFT,
110
  "ausboss/llama-30b-supercot": ModelType.SFT,
111
  "Neko-Institute-of-Science/metharme-7b": ModelType.SFT,
 
112
  "SebastianSchramm/Cerebras-GPT-111M-instruction": ModelType.SFT,
113
  "victor123/WizardLM-13B-1.0": ModelType.SFT,
114
- "AlpinDale/pygmalion-instruct": ModelType.SFT,
115
- "tiiuae/falcon-7b-instruct": ModelType.SFT,
116
  "tiiuae/falcon-40b-instruct": ModelType.SFT,
117
  "tiiuae/falcon-40b": ModelType.PT,
118
  "tiiuae/falcon-7b": ModelType.PT,
119
- "cyl/awsome-llama": ModelType.SFT,
120
- "xzuyn/Alpacino-SuperCOT-13B": ModelType.SFT,
 
 
 
121
  "xzuyn/MedicWizard-7B": ModelType.SFT,
122
  "beomi/KoAlpaca-Polyglot-5.8B": ModelType.SFT,
123
- "chainyo/alpaca-lora-7b": ModelType.SFT,
 
124
  "Salesforce/codegen-16B-nl": ModelType.PT,
125
- "Salesforce/codegen-16B-multi": ModelType.SFT,
126
  "ai-forever/rugpt3large_based_on_gpt2": ModelType.SFT,
127
  "gpt2-large": ModelType.PT,
 
 
128
  "huggingface/llama-13b": ModelType.PT,
129
  "huggingface/llama-7b": ModelType.PT,
130
  "huggingface/llama-65b": ModelType.PT,
 
131
  "huggingface/llama-30b": ModelType.PT,
132
- "jondurbin/airoboros-7b": ModelType.SFT,
133
- "jondurbin/airoboros-13b": ModelType.SFT,
 
 
 
 
 
 
134
  "cerebras/Cerebras-GPT-1.3B": ModelType.PT,
 
 
135
  "cerebras/Cerebras-GPT-111M": ModelType.PT,
 
 
 
 
 
136
  "NousResearch/Nous-Hermes-13b": ModelType.SFT,
137
  "project-baize/baize-v2-7b": ModelType.SFT,
138
  "project-baize/baize-v2-13b": ModelType.SFT,
 
139
  "LLMs/AlpacaGPT4-7B-elina": ModelType.SFT,
140
- "LLMs/Vicuna-EvolInstruct-13B": ModelType.SFT,
 
 
141
  "huggingtweets/jerma985": ModelType.SFT,
 
 
 
142
  "huggyllama/llama-65b": ModelType.PT,
 
 
 
143
  "WizardLM/WizardLM-13B-1.0": ModelType.SFT,
 
 
144
  "gpt2": ModelType.PT,
145
- "alessandropalla/instruct_gpt2": ModelType.SFT,
 
 
 
 
146
  "MayaPH/FinOPT-Lincoln": ModelType.SFT,
147
  "MayaPH/FinOPT-Franklin": ModelType.SFT,
 
 
 
 
 
148
  "timdettmers/guanaco-33b-merged": ModelType.SFT,
149
- "timdettmers/guanaco-65b-merged": ModelType.SFT,
150
- "elinas/llama-30b-hf-transformers-4.29": ModelType.SFT,
151
  "elinas/chronos-33b": ModelType.SFT,
152
- "nmitchko/medguanaco-65b-GPTQ": ModelType.SFT,
 
 
 
 
 
 
 
 
153
  "xhyi/PT_GPTNEO350_ATG": ModelType.SFT,
 
 
 
 
 
 
154
  "h2oai/h2ogpt-oasst1-512-20b": ModelType.SFT,
 
155
  "h2oai/h2ogpt-gm-oasst1-en-1024-12b": ModelType.SFT,
156
- "nomic-ai/gpt4all-13b-snoozy": ModelType.SFT,
157
- "nomic-ai/gpt4all-j": ModelType.SFT,
 
 
 
158
  }
159
 
160
 
161
  def get_model_type(leaderboard_data: List[dict]):
162
  for model_data in leaderboard_data:
163
- model_data["Type"] = TYPE_METADATA.get(model_data["model_name_for_query"], "N/A")
164
- if model_data["Type"] == "N/A":
165
- if any([i in model_data["model_name_for_query"] for i in ["finetuned", "-ft-"]]):
166
- model_data["Type"] = ModelType.SFT
167
- elif any([i in model_data["model_name_for_query"] for i in ["pretrained"]]):
168
- model_data["Type"] = ModelType.PT
169
- elif any([i in model_data["model_name_for_query"] for i in ["-rl-", "-rlhf-"]]):
170
- model_data["Type"] = ModelType.RL
 
 
 
 
 
 
 
 
 
 
 
171
 
172
 
 
1
+ from dataclasses import dataclass
2
  from enum import Enum
3
  from typing import Dict, List
4
 
5
+ from ..utils_display import AutoEvalColumn
6
+
7
+ @dataclass
8
+ class ModelInfo:
9
+ name: str
10
+ symbol: str # emoji
11
+
12
+
13
  class ModelType(Enum):
14
+ PT = ModelInfo(name="pretrained", symbol="🟢")
15
+ SFT = ModelInfo(name="finetuned", symbol="🔶")
16
+ RL = ModelInfo(name="with RL", symbol="🟦")
17
 
18
 
19
  TYPE_METADATA: Dict[str, ModelType] = {
20
+ "notstoic/PygmalionCoT-7b": ModelType.SFT,
21
  "aisquared/dlite-v1-355m": ModelType.SFT,
22
+ "aisquared/dlite-v1-1_5b": ModelType.SFT,
23
+ "aisquared/dlite-v1-774m": ModelType.SFT,
24
+ "aisquared/dlite-v1-124m": ModelType.SFT,
25
+ "aisquared/chopt-2_7b": ModelType.SFT,
26
+ "aisquared/dlite-v2-124m": ModelType.SFT,
27
  "aisquared/dlite-v2-774m": ModelType.SFT,
28
  "aisquared/dlite-v2-1_5b": ModelType.SFT,
29
+ "aisquared/chopt-1_3b": ModelType.SFT,
30
+ "aisquared/dlite-v2-355m": ModelType.SFT,
31
+ "TheBloke/tulu-7B-fp16": ModelType.SFT,
32
+ "TheBloke/guanaco-7B-HF": ModelType.SFT,
33
+ "TheBloke/koala-7B-HF": ModelType.SFT,
34
  "TheBloke/wizardLM-7B-HF": ModelType.SFT,
35
+ "TheBloke/airoboros-13B-HF": ModelType.SFT,
36
+ "TheBloke/koala-13B-HF": ModelType.SFT,
37
+ "TheBloke/Wizard-Vicuna-7B-Uncensored-HF": ModelType.SFT,
38
  "TheBloke/dromedary-65b-lora-HF": ModelType.SFT,
39
+ "TheBloke/wizardLM-13B-1.0-fp16": ModelType.SFT,
40
+ "TheBloke/Wizard-Vicuna-30B-Uncensored-fp16": ModelType.SFT,
41
+ "TheBloke/wizard-vicuna-13B-HF": ModelType.SFT,
42
+ "TheBloke/UltraLM-13B-fp16": ModelType.SFT,
43
+ "TheBloke/OpenAssistant-SFT-7-Llama-30B-HF": ModelType.SFT,
44
  "TheBloke/vicuna-13B-1.1-HF": ModelType.SFT,
45
+ "TheBloke/guanaco-13B-HF": ModelType.SFT,
46
+ "TheBloke/airoboros-7b-gpt4-fp16": ModelType.SFT,
47
+ "TheBloke/Llama-2-13B-fp16": ModelType.PT,
48
+ "TheBloke/Planner-7B-fp16": ModelType.SFT,
49
  "TheBloke/Wizard-Vicuna-13B-Uncensored-HF": ModelType.SFT,
50
+ "TheBloke/gpt4-alpaca-lora-13B-HF": ModelType.SFT,
51
+ "TheBloke/gpt4-x-vicuna-13B-HF": ModelType.SFT,
52
+ "TheBloke/tulu-13B-fp16": ModelType.SFT,
53
+ "jphme/orca_mini_v2_ger_7b": ModelType.SFT,
54
+ "Ejafa/vicuna_7B_vanilla_1.1": ModelType.SFT,
55
+ "kevinpro/Vicuna-13B-CoT": ModelType.SFT,
56
+ "AlekseyKorshuk/pygmalion-6b-vicuna-chatml": ModelType.SFT,
57
  "AlekseyKorshuk/chatml-pyg-v1": ModelType.SFT,
58
+ "concedo/Vicuzard-30B-Uncensored": ModelType.SFT,
59
  "concedo/OPT-19M-ChatSalad": ModelType.SFT,
60
+ "concedo/Pythia-70M-ChatSalad": ModelType.SFT,
61
+ "digitous/13B-HyperMantis": ModelType.SFT,
62
+ "digitous/Adventien-GPTJ": ModelType.SFT,
63
+ "digitous/Alpacino13b": ModelType.SFT,
64
+ "digitous/GPT-R": ModelType.SFT,
65
+ "digitous/Javelin-R": ModelType.SFT,
66
+ "digitous/Javalion-GPTJ": ModelType.SFT,
67
  "digitous/Javalion-R": ModelType.SFT,
68
+ "digitous/Skegma-GPTJ": ModelType.SFT,
69
  "digitous/Alpacino30b": ModelType.SFT,
70
+ "digitous/Janin-GPTJ": ModelType.SFT,
71
+ "digitous/Janin-R": ModelType.SFT,
72
  "digitous/Javelin-GPTJ": ModelType.SFT,
73
+ "SaylorTwift/gpt2_test": ModelType.PT,
74
  "anton-l/gpt-j-tiny-random": ModelType.SFT,
75
+ "Andron00e/YetAnother_Open-Llama-3B-LoRA-OpenOrca": ModelType.SFT,
76
+ "Lazycuber/pyg-instruct-wizardlm": ModelType.SFT,
77
+ "Lazycuber/Janemalion-6B": ModelType.SFT,
78
  "IDEA-CCNL/Ziya-LLaMA-13B-Pretrain-v1": ModelType.SFT,
79
+ "IDEA-CCNL/Ziya-LLaMA-13B-v1": ModelType.SFT,
80
+ "dsvv-cair/alpaca-cleaned-llama-30b-bf16": ModelType.SFT,
81
  "gpt2-medium": ModelType.PT,
82
+ "camel-ai/CAMEL-13B-Combined-Data": ModelType.SFT,
83
+ "camel-ai/CAMEL-13B-Role-Playing-Data": ModelType.SFT,
84
  "PygmalionAI/pygmalion-6b": ModelType.SFT,
85
+ "PygmalionAI/metharme-1.3b": ModelType.SFT,
86
+ "PygmalionAI/pygmalion-1.3b": ModelType.SFT,
87
+ "PygmalionAI/pygmalion-350m": ModelType.SFT,
88
+ "PygmalionAI/pygmalion-2.7b": ModelType.SFT,
89
  "medalpaca/medalpaca-7b": ModelType.SFT,
90
+ "lilloukas/Platypus-30B": ModelType.SFT,
91
+ "lilloukas/GPlatty-30B": ModelType.SFT,
92
+ "mncai/chatdoctor": ModelType.SFT,
93
+ "chaoyi-wu/MedLLaMA_13B": ModelType.SFT,
94
+ "LoupGarou/WizardCoder-Guanaco-15B-V1.0": ModelType.SFT,
95
+ "LoupGarou/WizardCoder-Guanaco-15B-V1.1": ModelType.SFT,
96
+ "hakurei/instruct-12b": ModelType.SFT,
97
  "hakurei/lotus-12B": ModelType.SFT,
 
98
  "shibing624/chinese-llama-plus-13b-hf": ModelType.SFT,
99
+ "shibing624/chinese-alpaca-plus-7b-hf": ModelType.SFT,
100
+ "shibing624/chinese-alpaca-plus-13b-hf": ModelType.SFT,
101
+ "mosaicml/mpt-7b-instruct": ModelType.SFT,
102
+ "mosaicml/mpt-30b-chat": ModelType.SFT,
103
+ "mosaicml/mpt-7b-storywriter": ModelType.SFT,
104
+ "mosaicml/mpt-30b-instruct": ModelType.SFT,
105
+ "mosaicml/mpt-7b-chat": ModelType.SFT,
106
+ "mosaicml/mpt-30b": ModelType.PT,
107
+ "Corianas/111m": ModelType.SFT,
108
+ "Corianas/Quokka_1.3b": ModelType.SFT,
109
+ "Corianas/256_5epoch": ModelType.SFT,
110
+ "Corianas/Quokka_256m": ModelType.SFT,
111
+ "Corianas/Quokka_590m": ModelType.SFT,
112
+ "Corianas/gpt-j-6B-Dolly": ModelType.SFT,
113
+ "Corianas/Quokka_2.7b": ModelType.SFT,
114
+ "cyberagent/open-calm-7b": ModelType.SFT,
115
+ "Aspik101/Nous-Hermes-13b-pl-lora_unload": ModelType.SFT,
116
+ "THUDM/chatglm2-6b": ModelType.SFT,
117
  "MetaIX/GPT4-X-Alpasta-30b": ModelType.SFT,
118
+ "NYTK/PULI-GPTrio": ModelType.PT,
119
+ "EleutherAI/pythia-1.3b": ModelType.PT,
120
+ "EleutherAI/pythia-2.8b-deduped": ModelType.PT,
121
+ "EleutherAI/gpt-neo-125m": ModelType.PT,
122
+ "EleutherAI/pythia-160m": ModelType.PT,
123
+ "EleutherAI/gpt-neo-2.7B": ModelType.PT,
124
+ "EleutherAI/pythia-1b-deduped": ModelType.PT,
125
+ "EleutherAI/pythia-6.7b": ModelType.PT,
126
+ "EleutherAI/pythia-70m-deduped": ModelType.PT,
127
  "EleutherAI/gpt-neox-20b": ModelType.PT,
128
+ "EleutherAI/pythia-1.4b-deduped": ModelType.PT,
129
+ "EleutherAI/pythia-2.7b": ModelType.PT,
130
+ "EleutherAI/pythia-6.9b-deduped": ModelType.PT,
131
+ "EleutherAI/pythia-70m": ModelType.PT,
132
  "EleutherAI/gpt-j-6b": ModelType.PT,
133
+ "EleutherAI/pythia-12b-deduped": ModelType.PT,
134
+ "EleutherAI/gpt-neo-1.3B": ModelType.PT,
135
+ "EleutherAI/pythia-410m-deduped": ModelType.PT,
136
+ "EleutherAI/pythia-160m-deduped": ModelType.PT,
137
+ "EleutherAI/polyglot-ko-12.8b": ModelType.PT,
138
+ "EleutherAI/pythia-12b": ModelType.PT,
139
+ "roneneldan/TinyStories-33M": ModelType.PT,
140
+ "roneneldan/TinyStories-28M": ModelType.PT,
141
+ "roneneldan/TinyStories-1M": ModelType.PT,
142
+ "roneneldan/TinyStories-8M": ModelType.PT,
143
+ "roneneldan/TinyStories-3M": ModelType.PT,
144
+ "jerryjalapeno/nart-100k-7b": ModelType.SFT,
145
+ "lmsys/vicuna-13b-v1.3": ModelType.SFT,
146
+ "lmsys/vicuna-7b-v1.3": ModelType.SFT,
147
+ "lmsys/vicuna-13b-v1.1": ModelType.SFT,
148
  "lmsys/vicuna-13b-delta-v1.1": ModelType.SFT,
149
  "lmsys/vicuna-7b-delta-v1.1": ModelType.SFT,
150
  "abhiramtirumala/DialoGPT-sarcastic-medium": ModelType.SFT,
151
+ "haonan-li/bactrian-x-llama-13b-merged": ModelType.SFT,
152
+ "Gryphe/MythoLogic-13b": ModelType.SFT,
153
+ "Gryphe/MythoBoros-13b": ModelType.SFT,
154
  "pillowtalks-ai/delta13b": ModelType.SFT,
155
+ "wannaphong/openthaigpt-0.1.0-beta-full-model_for_open_llm_leaderboard": ModelType.SFT,
156
+ "bigcode/tiny_starcoder_py": ModelType.PT,
157
  "bigcode/starcoderplus": ModelType.SFT,
158
+ "bigcode/gpt_bigcode-santacoder": ModelType.PT,
159
+ "bigcode/starcoder": ModelType.PT,
160
+ "Open-Orca/OpenOrca-Preview1-13B": ModelType.SFT,
161
  "microsoft/DialoGPT-large": ModelType.SFT,
162
+ "microsoft/DialoGPT-small": ModelType.SFT,
163
+ "microsoft/DialoGPT-medium": ModelType.SFT,
164
  "microsoft/CodeGPT-small-py": ModelType.SFT,
165
+ "Tincando/fiction_story_generator": ModelType.SFT,
166
  "Pirr/pythia-13b-deduped-green_devil": ModelType.SFT,
167
  "Aeala/GPT4-x-AlpacaDente2-30b": ModelType.SFT,
168
+ "Aeala/GPT4-x-AlpacaDente-30b": ModelType.SFT,
169
+ "Aeala/GPT4-x-Alpasta-13b": ModelType.SFT,
170
  "Aeala/VicUnlocked-alpaca-30b": ModelType.SFT,
171
+ "Tap-M/Luna-AI-Llama2-Uncensored": ModelType.SFT,
172
+ "illuin/test-custom-llama": ModelType.SFT,
173
+ "dvruette/oasst-llama-13b-2-epochs": ModelType.SFT,
174
  "dvruette/oasst-gpt-neox-20b-1000-steps": ModelType.SFT,
175
+ "dvruette/llama-13b-pretrained-dropout": ModelType.PT,
176
+ "dvruette/llama-13b-pretrained": ModelType.PT,
177
+ "dvruette/llama-13b-pretrained-sft-epoch-1": ModelType.PT,
178
+ "dvruette/llama-13b-pretrained-sft-do2": ModelType.PT,
179
+ "dvruette/oasst-gpt-neox-20b-3000-steps": ModelType.SFT,
180
+ "dvruette/oasst-pythia-12b-pretrained-sft": ModelType.PT,
181
+ "dvruette/oasst-pythia-6.9b-4000-steps": ModelType.SFT,
182
+ "dvruette/gpt-neox-20b-full-precision": ModelType.SFT,
183
+ "dvruette/oasst-llama-13b-1000-steps": ModelType.SFT,
184
  "openlm-research/open_llama_7b_700bt_preview": ModelType.PT,
185
  "openlm-research/open_llama_7b": ModelType.PT,
186
+ "openlm-research/open_llama_7b_v2": ModelType.PT,
187
  "openlm-research/open_llama_3b": ModelType.PT,
188
+ "openlm-research/open_llama_13b": ModelType.PT,
189
+ "openlm-research/open_llama_3b_v2": ModelType.PT,
190
  "PocketDoc/Dans-PileOfSets-Mk1-llama-13b-merged": ModelType.SFT,
191
+ "GeorgiaTechResearchInstitute/galpaca-30b": ModelType.SFT,
192
+ "GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct": ModelType.SFT,
193
  "databricks/dolly-v2-7b": ModelType.SFT,
194
  "databricks/dolly-v2-3b": ModelType.SFT,
195
  "databricks/dolly-v2-12b": ModelType.SFT,
 
196
  "Rachneet/gpt2-xl-alpaca": ModelType.SFT,
197
  "Locutusque/gpt2-conversational-or-qa": ModelType.SFT,
198
+ "psyche/kogpt": ModelType.SFT,
199
  "NbAiLab/nb-gpt-j-6B-alpaca": ModelType.SFT,
200
+ "Mikael110/llama-2-7b-guanaco-fp16": ModelType.SFT,
201
+ "Mikael110/llama-2-13b-guanaco-fp16": ModelType.SFT,
202
+ "Fredithefish/CrimsonPajama": ModelType.SFT,
203
+ "Fredithefish/RedPajama-INCITE-Chat-3B-ShareGPT-11K": ModelType.SFT,
204
  "Fredithefish/ScarletPajama-3B-HF": ModelType.SFT,
205
+ "Fredithefish/RedPajama-INCITE-Chat-3B-Instruction-Tuning-with-GPT-4": ModelType.SFT,
206
+ "eachadea/vicuna-13b-1.1": ModelType.SFT,
207
  "eachadea/vicuna-7b-1.1": ModelType.SFT,
208
  "eachadea/vicuna-13b": ModelType.SFT,
209
  "openaccess-ai-collective/wizard-mega-13b": ModelType.SFT,
210
  "openaccess-ai-collective/manticore-13b": ModelType.SFT,
211
  "openaccess-ai-collective/manticore-30b-chat-pyg-alpha": ModelType.SFT,
212
  "openaccess-ai-collective/minotaur-13b": ModelType.SFT,
213
+ "openaccess-ai-collective/minotaur-13b-fixed": ModelType.SFT,
214
+ "openaccess-ai-collective/hippogriff-30b-chat": ModelType.SFT,
215
+ "openaccess-ai-collective/manticore-13b-chat-pyg": ModelType.SFT,
216
  "pythainlp/wangchanglm-7.5B-sft-enth": ModelType.SFT,
217
  "pythainlp/wangchanglm-7.5B-sft-en-sharded": ModelType.SFT,
218
+ "euclaise/gpt-neox-122m-minipile-digits": ModelType.SFT,
219
+ "stabilityai/FreeWilly1-Delta-SafeTensor": ModelType.SFT,
220
  "stabilityai/stablelm-tuned-alpha-7b": ModelType.SFT,
221
+ "stabilityai/FreeWilly2": ModelType.SFT,
222
+ "stabilityai/stablelm-base-alpha-7b": ModelType.PT,
223
+ "stabilityai/stablelm-base-alpha-3b": ModelType.PT,
224
+ "stabilityai/stablelm-tuned-alpha-3b": ModelType.SFT,
225
+ "alibidaran/medical_transcription_generator": ModelType.SFT,
226
  "CalderaAI/30B-Lazarus": ModelType.SFT,
227
+ "CalderaAI/13B-BlueMethod": ModelType.SFT,
228
+ "CalderaAI/13B-Ouroboros": ModelType.SFT,
229
+ "KoboldAI/OPT-13B-Erebus": ModelType.SFT,
230
+ "KoboldAI/GPT-J-6B-Janeway": ModelType.SFT,
231
+ "KoboldAI/GPT-J-6B-Shinen": ModelType.SFT,
232
+ "KoboldAI/fairseq-dense-2.7B": ModelType.PT,
233
+ "KoboldAI/OPT-6B-nerys-v2": ModelType.SFT,
234
+ "KoboldAI/GPT-NeoX-20B-Skein": ModelType.SFT,
235
+ "KoboldAI/PPO_Pygway-6b-Mix": ModelType.SFT,
236
+ "KoboldAI/fairseq-dense-6.7B": ModelType.PT,
237
+ "KoboldAI/fairseq-dense-125M": ModelType.PT,
238
  "KoboldAI/OPT-13B-Nerybus-Mix": ModelType.SFT,
239
+ "KoboldAI/OPT-2.7B-Erebus": ModelType.SFT,
240
+ "KoboldAI/OPT-350M-Nerys-v2": ModelType.SFT,
241
+ "KoboldAI/OPT-2.7B-Nerys-v2": ModelType.SFT,
242
+ "KoboldAI/OPT-2.7B-Nerybus-Mix": ModelType.SFT,
243
+ "KoboldAI/OPT-13B-Nerys-v2": ModelType.SFT,
244
+ "KoboldAI/GPT-NeoX-20B-Erebus": ModelType.SFT,
245
+ "KoboldAI/OPT-6.7B-Erebus": ModelType.SFT,
246
+ "KoboldAI/fairseq-dense-355M": ModelType.PT,
247
+ "KoboldAI/OPT-6.7B-Nerybus-Mix": ModelType.SFT,
248
+ "KoboldAI/GPT-J-6B-Adventure": ModelType.SFT,
249
+ "KoboldAI/OPT-350M-Erebus": ModelType.SFT,
250
+ "KoboldAI/GPT-J-6B-Skein": ModelType.SFT,
251
+ "KoboldAI/OPT-30B-Erebus": ModelType.SFT,
252
+ "klosax/pythia-160m-deduped-step92k-193bt": ModelType.PT,
253
+ "klosax/open_llama_3b_350bt_preview": ModelType.PT,
254
+ "klosax/openllama-3b-350bt": ModelType.PT,
255
+ "klosax/pythia-70m-deduped-step44k-92bt": ModelType.PT,
256
+ "klosax/open_llama_13b_600bt_preview": ModelType.PT,
257
+ "klosax/open_llama_7b_400bt_preview": ModelType.PT,
258
+ "WeOpenML/Alpaca-7B-v1": ModelType.SFT,
259
+ "WeOpenML/PandaLM-Alpaca-7B-v1": ModelType.SFT,
260
+ "TFLai/gpt2-turkish-uncased": ModelType.SFT,
261
+ "ehartford/WizardLM-13B-Uncensored": ModelType.SFT,
262
+ "ehartford/dolphin-llama-13b": ModelType.SFT,
263
+ "ehartford/Wizard-Vicuna-30B-Uncensored": ModelType.SFT,
264
+ "ehartford/WizardLM-30B-Uncensored": ModelType.SFT,
265
+ "ehartford/Wizard-Vicuna-13B-Uncensored": ModelType.SFT,
266
+ "ehartford/WizardLM-7B-Uncensored": ModelType.SFT,
267
+ "ehartford/based-30b": ModelType.SFT,
268
+ "ehartford/Wizard-Vicuna-7B-Uncensored": ModelType.SFT,
269
  "wahaha1987/llama_7b_sharegpt94k_fastchat": ModelType.SFT,
270
+ "wahaha1987/llama_13b_sharegpt94k_fastchat": ModelType.SFT,
271
+ "OpenAssistant/oasst-sft-1-pythia-12b": ModelType.SFT,
272
+ "OpenAssistant/stablelm-7b-sft-v7-epoch-3": ModelType.SFT,
273
  "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5": ModelType.SFT,
274
+ "OpenAssistant/pythia-12b-sft-v8-2.5k-steps": ModelType.SFT,
275
+ "OpenAssistant/pythia-12b-sft-v8-7k-steps": ModelType.SFT,
276
+ "OpenAssistant/pythia-12b-pre-v8-12.5k-steps": ModelType.SFT,
277
  "junelee/wizard-vicuna-13b": ModelType.SFT,
278
+ "BreadAi/gpt-YA-1-1_160M": ModelType.PT,
279
+ "BreadAi/MuseCan": ModelType.PT,
280
+ "BreadAi/MusePy-1-2": ModelType.PT,
281
+ "BreadAi/DiscordPy": ModelType.PT,
282
+ "BreadAi/PM_modelV2": ModelType.PT,
283
+ "BreadAi/gpt-Youtube": ModelType.PT,
284
  "BreadAi/StoryPy": ModelType.SFT,
285
+ "julianweng/Llama-2-7b-chat-orcah": ModelType.SFT,
286
+ "AGI-inc/lora_moe_7b_baseline": ModelType.SFT,
287
+ "AGI-inc/lora_moe_7b": ModelType.SFT,
288
+ "togethercomputer/GPT-NeoXT-Chat-Base-20B": ModelType.SFT,
289
+ "togethercomputer/RedPajama-INCITE-Chat-7B-v0.1": ModelType.SFT,
290
+ "togethercomputer/RedPajama-INCITE-Instruct-7B-v0.1": ModelType.SFT,
291
+ "togethercomputer/RedPajama-INCITE-7B-Base": ModelType.PT,
292
+ "togethercomputer/RedPajama-INCITE-7B-Instruct": ModelType.SFT,
293
  "togethercomputer/RedPajama-INCITE-Base-3B-v1": ModelType.PT,
294
+ "togethercomputer/Pythia-Chat-Base-7B": ModelType.SFT,
295
  "togethercomputer/RedPajama-INCITE-Base-7B-v0.1": ModelType.PT,
296
+ "togethercomputer/GPT-JT-6B-v1": ModelType.SFT,
297
+ "togethercomputer/GPT-JT-6B-v0": ModelType.SFT,
298
+ "togethercomputer/RedPajama-INCITE-Chat-3B-v1": ModelType.SFT,
299
+ "togethercomputer/RedPajama-INCITE-7B-Chat": ModelType.SFT,
300
+ "togethercomputer/RedPajama-INCITE-Instruct-3B-v1": ModelType.SFT,
301
  "Writer/camel-5b-hf": ModelType.SFT,
302
  "Writer/palmyra-base": ModelType.PT,
303
+ "MBZUAI/LaMini-GPT-1.5B": ModelType.SFT,
304
+ "MBZUAI/lamini-cerebras-111m": ModelType.SFT,
305
+ "MBZUAI/lamini-neo-1.3b": ModelType.SFT,
306
+ "MBZUAI/lamini-cerebras-1.3b": ModelType.SFT,
307
+ "MBZUAI/lamini-cerebras-256m": ModelType.SFT,
308
+ "MBZUAI/LaMini-GPT-124M": ModelType.SFT,
309
  "MBZUAI/lamini-neo-125m": ModelType.SFT,
310
+ "TehVenom/DiffMerge-DollyGPT-Pygmalion": ModelType.SFT,
311
+ "TehVenom/PPO_Shygmalion-6b": ModelType.SFT,
312
+ "TehVenom/Dolly_Shygmalion-6b-Dev_V8P2": ModelType.SFT,
313
+ "TehVenom/Pygmalion_AlpacaLora-7b": ModelType.SFT,
314
+ "TehVenom/PPO_Pygway-V8p4_Dev-6b": ModelType.SFT,
315
+ "TehVenom/Dolly_Malion-6b": ModelType.SFT,
316
+ "TehVenom/PPO_Shygmalion-V8p4_Dev-6b": ModelType.SFT,
317
+ "TehVenom/ChanMalion": ModelType.SFT,
318
+ "TehVenom/GPT-J-Pyg_PPO-6B": ModelType.SFT,
319
+ "TehVenom/Pygmalion-13b-Merged": ModelType.SFT,
320
+ "TehVenom/Metharme-13b-Merged": ModelType.SFT,
321
+ "TehVenom/Dolly_Shygmalion-6b": ModelType.SFT,
322
+ "TehVenom/GPT-J-Pyg_PPO-6B-Dev-V8p4": ModelType.SFT,
323
+ "georgesung/llama2_7b_chat_uncensored": ModelType.SFT,
324
+ "vicgalle/gpt2-alpaca": ModelType.SFT,
325
+ "vicgalle/alpaca-7b": ModelType.SFT,
326
  "vicgalle/gpt2-alpaca-gpt4": ModelType.SFT,
327
  "facebook/opt-350m": ModelType.PT,
328
  "facebook/opt-125m": ModelType.PT,
329
+ "facebook/xglm-4.5B": ModelType.PT,
330
+ "facebook/opt-2.7b": ModelType.PT,
331
+ "facebook/opt-6.7b": ModelType.PT,
332
+ "facebook/galactica-30b": ModelType.PT,
333
  "facebook/opt-13b": ModelType.PT,
 
334
  "facebook/opt-66b": ModelType.PT,
335
+ "facebook/xglm-7.5B": ModelType.PT,
336
+ "facebook/xglm-564M": ModelType.PT,
337
+ "facebook/opt-30b": ModelType.PT,
338
+ "golaxy/gogpt-7b": ModelType.SFT,
339
+ "psmathur/orca_mini_v2_7b": ModelType.SFT,
340
+ "psmathur/orca_mini_7b": ModelType.SFT,
341
+ "psmathur/orca_mini_3b": ModelType.SFT,
342
+ "psmathur/orca_mini_v2_13b": ModelType.SFT,
343
  "gpt2-xl": ModelType.PT,
344
+ "lxe/Cerebras-GPT-2.7B-Alpaca-SP": ModelType.SFT,
345
+ "Monero/Manticore-13b-Chat-Pyg-Guanaco": ModelType.SFT,
346
+ "Monero/WizardLM-Uncensored-SuperCOT-StoryTelling-30b": ModelType.SFT,
347
+ "Monero/WizardLM-13b-OpenAssistant-Uncensored": ModelType.SFT,
348
+ "Monero/WizardLM-30B-Uncensored-Guanaco-SuperCOT-30b": ModelType.SFT,
349
+ "jzjiao/opt-1.3b-rlhf": ModelType.SFT,
350
  "HuggingFaceH4/starchat-beta": ModelType.SFT,
351
+ "KnutJaegersberg/gpt-2-xl-EvolInstruct": ModelType.SFT,
352
+ "KnutJaegersberg/megatron-GPT-2-345m-EvolInstruct": ModelType.SFT,
353
+ "openchat/openchat_8192": ModelType.SFT,
354
+ "openchat/openchat_v2": ModelType.SFT,
355
+ "openchat/openchat_v2_w": ModelType.SFT,
356
  "ausboss/llama-13b-supercot": ModelType.SFT,
357
  "ausboss/llama-30b-supercot": ModelType.SFT,
358
  "Neko-Institute-of-Science/metharme-7b": ModelType.SFT,
359
+ "Neko-Institute-of-Science/pygmalion-7b": ModelType.SFT,
360
  "SebastianSchramm/Cerebras-GPT-111M-instruction": ModelType.SFT,
361
  "victor123/WizardLM-13B-1.0": ModelType.SFT,
362
+ "OpenBuddy/openbuddy-openllama-13b-v7-fp16": ModelType.SFT,
363
+ "baichuan-inc/Baichuan-7B": ModelType.PT,
364
  "tiiuae/falcon-40b-instruct": ModelType.SFT,
365
  "tiiuae/falcon-40b": ModelType.PT,
366
  "tiiuae/falcon-7b": ModelType.PT,
367
+ "YeungNLP/firefly-llama-13b": ModelType.SFT,
368
+ "YeungNLP/firefly-llama-13b-v1.2": ModelType.SFT,
369
+ "YeungNLP/firefly-ziya-13b": ModelType.SFT,
370
+ "shaohang/Sparse0.5_OPT-1.3": ModelType.SFT,
371
+ "xzuyModelType.lpacino-SuperCOT-13B": ModelType.SFT,
372
  "xzuyn/MedicWizard-7B": ModelType.SFT,
373
  "beomi/KoAlpaca-Polyglot-5.8B": ModelType.SFT,
374
+ "beomi/llama-2-ko-7b": ModelType.SFT,
375
+ "Salesforce/codegen-6B-multi": ModelType.PT,
376
  "Salesforce/codegen-16B-nl": ModelType.PT,
377
+ "Salesforce/codegen-6B-nl": ModelType.PT,
378
  "ai-forever/rugpt3large_based_on_gpt2": ModelType.SFT,
379
  "gpt2-large": ModelType.PT,
380
+ "frank098/orca_mini_3b_juniper": ModelType.SFT,
381
+ "frank098/WizardLM_13B_juniper": ModelType.SFT,
382
  "huggingface/llama-13b": ModelType.PT,
383
  "huggingface/llama-7b": ModelType.PT,
384
  "huggingface/llama-65b": ModelType.PT,
385
+ "huggingface/llama-65b": ModelType.PT,
386
  "huggingface/llama-30b": ModelType.PT,
387
+ "jondurbiModelType.iroboros-13b-gpt4-1.4": ModelType.SFT,
388
+ "jondurbiModelType.iroboros-7b": ModelType.SFT,
389
+ "jondurbiModelType.iroboros-7b-gpt4-1.4": ModelType.SFT,
390
+ "jondurbiModelType.iroboros-l2-13b-gpt4-1.4.1": ModelType.SFT,
391
+ "jondurbiModelType.iroboros-13b": ModelType.SFT,
392
+ "ariellee/SuperPlatty-30B": ModelType.SFT,
393
+ "danielhanchen/open_llama_3b_600bt_preview": ModelType.SFT,
394
+ "cerebras/Cerebras-GPT-256M": ModelType.PT,
395
  "cerebras/Cerebras-GPT-1.3B": ModelType.PT,
396
+ "cerebras/Cerebras-GPT-13B": ModelType.PT,
397
+ "cerebras/Cerebras-GPT-2.7B": ModelType.PT,
398
  "cerebras/Cerebras-GPT-111M": ModelType.PT,
399
+ "cerebras/Cerebras-GPT-6.7B": ModelType.PT,
400
+ "Yhyu13/oasst-rlhf-2-llama-30b-7k-steps-hf": ModelType.RL,
401
+ "Yhyu13/llama-30B-hf-openassitant": ModelType.SFT,
402
+ "NousResearch/Nous-Hermes-Llama2-13b": ModelType.SFT,
403
+ "NousResearch/Redmond-Puffin-13B": ModelType.SFT,
404
  "NousResearch/Nous-Hermes-13b": ModelType.SFT,
405
  "project-baize/baize-v2-7b": ModelType.SFT,
406
  "project-baize/baize-v2-13b": ModelType.SFT,
407
+ "LLMs/WizardLM-13B-V1.0": ModelType.SFT,
408
  "LLMs/AlpacaGPT4-7B-elina": ModelType.SFT,
409
+ "wenge-research/yayi-7b-llama2": ModelType.SFT,
410
+ "yhyhy3/open_llama_7b_v2_med_instruct": ModelType.SFT,
411
+ "llama-anon/instruct-13b": ModelType.SFT,
412
  "huggingtweets/jerma985": ModelType.SFT,
413
+ "huggingtweets/gladosystem": ModelType.SFT,
414
+ "huggingtweets/bladeecity-jerma985": ModelType.SFT,
415
+ "huggyllama/llama-13b": ModelType.PT,
416
  "huggyllama/llama-65b": ModelType.PT,
417
+ "FabbriSimo01/Facebook_opt_1.3b_Quantized": ModelType.PT,
418
+ "upstage/llama-30b-instruct-2048": ModelType.SFT,
419
+ "upstage/llama-30b-instruct": ModelType.SFT,
420
  "WizardLM/WizardLM-13B-1.0": ModelType.SFT,
421
+ "WizardLM/WizardLM-30B-V1.0": ModelType.SFT,
422
+ "WizardLM/WizardCoder-15B-V1.0": ModelType.SFT,
423
  "gpt2": ModelType.PT,
424
+ "keyfan/vicuna-chinese-replication-v1.1": ModelType.SFT,
425
+ "nthngdy/pythia-owt2-70m-100k": ModelType.SFT,
426
+ "nthngdy/pythia-owt2-70m-50k": ModelType.SFT,
427
+ "quantumaikr/KoreanLM-hf": ModelType.SFT,
428
+ "quantumaikr/open_llama_7b_hf": ModelType.SFT,
429
  "MayaPH/FinOPT-Lincoln": ModelType.SFT,
430
  "MayaPH/FinOPT-Franklin": ModelType.SFT,
431
+ "MayaPH/GodziLLa-30B": ModelType.SFT,
432
+ "MayaPH/FinOPT-Washington": ModelType.SFT,
433
+ "ogimgio/gpt-neo-125m-neurallinguisticpioneers": ModelType.SFT,
434
+ "layoric/llama-2-13b-code-alpaca": ModelType.SFT,
435
+ "CobraMamba/mamba-gpt-3b": ModelType.SFT,
436
  "timdettmers/guanaco-33b-merged": ModelType.SFT,
 
 
437
  "elinas/chronos-33b": ModelType.SFT,
438
+ "heegyu/RedTulu-Uncensored-3B-0719": ModelType.SFT,
439
+ "heegyu/WizardVicuna-Uncensored-3B-0719": ModelType.SFT,
440
+ "heegyu/WizardVicuna-3B-0719": ModelType.SFT,
441
+ "meta-llama/Llama-2-7b-chat-hf": ModelType.RL,
442
+ "meta-llama/Llama-2-7b-hf": ModelType.PT,
443
+ "meta-llama/Llama-2-13b-chat-hf": ModelType.RL,
444
+ "meta-llama/Llama-2-13b-hf": ModelType.PT,
445
+ "meta-llama/Llama-2-70b-chat-hf": ModelType.RL,
446
+ "meta-llama/Llama-2-70b-hf": ModelType.PT,
447
  "xhyi/PT_GPTNEO350_ATG": ModelType.SFT,
448
+ "h2oai/h2ogpt-gm-oasst1-en-1024-20b": ModelType.SFT,
449
+ "h2oai/h2ogpt-gm-oasst1-en-1024-open-llama-7b-preview-400bt": ModelType.SFT,
450
+ "h2oai/h2ogpt-oig-oasst1-512-6_9b": ModelType.SFT,
451
+ "h2oai/h2ogpt-oasst1-512-12b": ModelType.SFT,
452
+ "h2oai/h2ogpt-oig-oasst1-256-6_9b": ModelType.SFT,
453
+ "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt": ModelType.SFT,
454
  "h2oai/h2ogpt-oasst1-512-20b": ModelType.SFT,
455
+ "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2": ModelType.SFT,
456
  "h2oai/h2ogpt-gm-oasst1-en-1024-12b": ModelType.SFT,
457
+ "h2oai/h2ogpt-gm-oasst1-multilang-1024-20b": ModelType.SFT,
458
+ "bofenghuang/vigogne-13b-instruct": ModelType.SFT,
459
+ "Vmware/open-llama-7b-v2-open-instruct": ModelType.SFT,
460
+ "VMware/open-llama-0.7T-7B-open-instruct-v1.1": ModelType.SFT,
461
+ "ewof/koishi-instruct-3b": ModelType.SFT,
462
  }
463
 
464
 
465
  def get_model_type(leaderboard_data: List[dict]):
466
  for model_data in leaderboard_data:
467
+ # Todo @clefourrier once requests are connected with results
468
+ is_delta = False # (model_data["weight_type"] != "Original")
469
+ # Stored information
470
+ if model_data["model_name_for_query"] in TYPE_METADATA:
471
+ model_data[AutoEvalColumn.model_type.name] = TYPE_METADATA[model_data["model_name_for_query"]].value.name
472
+ model_data[AutoEvalColumn.model_type_symbol.name] = TYPE_METADATA[model_data["model_name_for_query"]].value.symbol + ("🔺" if is_delta else "")
473
+ # Inferred from the name or the selected type
474
+ elif model_data[AutoEvalColumn.model_type.name] == "pretrained" or any([i in model_data["model_name_for_query"] for i in ["pretrained"]]):
475
+ model_data[AutoEvalColumn.model_type.name] = ModelType.PT.value.name
476
+ model_data[AutoEvalColumn.model_type_symbol.name] = ModelType.PT.value.symbol + ("🔺" if is_delta else "")
477
+ elif model_data[AutoEvalColumn.model_type.name] == "finetuned" or any([i in model_data["model_name_for_query"] for i in ["finetuned", "-ft-"]]):
478
+ model_data[AutoEvalColumn.model_type.name] = ModelType.SFT.value.name
479
+ model_data[AutoEvalColumn.model_type_symbol.name] = ModelType.SFT.value.symbol + ("🔺" if is_delta else "")
480
+ elif model_data[AutoEvalColumn.model_type.name] == "with RL" or any([i in model_data["model_name_for_query"] for i in ["-rl-", "-rlhf-"]]):
481
+ model_data[AutoEvalColumn.model_type.name] = ModelType.RL.value.name
482
+ model_data[AutoEvalColumn.model_type_symbol.name] = ModelType.RL.value.symbol + ("🔺" if is_delta else "")
483
+ else:
484
+ model_data[AutoEvalColumn.model_type.name] = "N/A"
485
+ model_data[AutoEvalColumn.model_type_symbol.name] = ("🔺" if is_delta else "")
486
 
487
 
src/init.py CHANGED
File without changes
src/utils_display.py CHANGED
@@ -14,12 +14,13 @@ def fields(raw_class):
14
 
15
  @dataclass(frozen=True)
16
  class AutoEvalColumn: # Auto evals column
 
17
  model = ColumnContent("Model", "markdown", True)
18
  average = ColumnContent("Average ⬆️", "number", True)
19
- arc = ColumnContent("ARC ⬆️", "number", True)
20
- hellaswag = ColumnContent("HellaSwag ⬆️", "number", True)
21
- mmlu = ColumnContent("MMLU ⬆️", "number", True)
22
- truthfulqa = ColumnContent("TruthfulQA (MC) ⬆️", "number", True)
23
  model_type = ColumnContent("Type", "str", False)
24
  precision = ColumnContent("Precision", "str", False, True)
25
  license = ColumnContent("Hub License", "str", False)
 
14
 
15
  @dataclass(frozen=True)
16
  class AutoEvalColumn: # Auto evals column
17
+ model_type_symbol = ColumnContent("T", "str", True)
18
  model = ColumnContent("Model", "markdown", True)
19
  average = ColumnContent("Average ⬆️", "number", True)
20
+ arc = ColumnContent("ARC", "number", True)
21
+ hellaswag = ColumnContent("HellaSwag", "number", True)
22
+ mmlu = ColumnContent("MMLU", "number", True)
23
+ truthfulqa = ColumnContent("TruthfulQA", "number", True)
24
  model_type = ColumnContent("Type", "str", False)
25
  precision = ColumnContent("Precision", "str", False, True)
26
  license = ColumnContent("Hub License", "str", False)