Clémentine commited on
Commit
d52179b
1 Parent(s): 460d762

column fix

Browse files
app.py CHANGED
@@ -27,6 +27,8 @@ GPT_4_EVAL_REPO = "HuggingFaceH4/open_llm_leaderboard_oai_evals"
27
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
28
  ADD_PLOTS = False
29
 
 
 
30
  api = HfApi()
31
 
32
 
@@ -37,10 +39,10 @@ def restart_space():
37
 
38
  auto_eval_repo, human_eval_repo, gpt_4_eval_repo, requested_models = load_all_info_from_hub(LMEH_REPO, HUMAN_EVAL_REPO, GPT_4_EVAL_REPO)
39
 
40
- COLS = [c.name for c in fields(AutoEvalColumn)]
41
- TYPES = [c.type for c in fields(AutoEvalColumn)]
42
- COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default]
43
- TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default]
44
 
45
  if not IS_PUBLIC:
46
  COLS.insert(2, AutoEvalColumn.is_8bit.name)
@@ -95,14 +97,14 @@ def get_evaluation_queue_df():
95
 
96
  entries = [
97
  entry
98
- for entry in os.listdir("auto_evals/eval_requests")
99
  if not entry.startswith(".")
100
  ]
101
  all_evals = []
102
 
103
  for entry in entries:
104
  if ".json" in entry:
105
- file_path = os.path.join("auto_evals/eval_requests", entry)
106
  with open(file_path) as fp:
107
  data = json.load(fp)
108
 
@@ -115,11 +117,11 @@ def get_evaluation_queue_df():
115
  # this is a folder
116
  sub_entries = [
117
  e
118
- for e in os.listdir(f"auto_evals/eval_requests/{entry}")
119
  if not e.startswith(".")
120
  ]
121
  for sub_entry in sub_entries:
122
- file_path = os.path.join("auto_evals/eval_requests", entry, sub_entry)
123
  with open(file_path) as fp:
124
  data = json.load(fp)
125
 
@@ -244,7 +246,7 @@ def add_new_eval(
244
  user_name = model.split("/")[0]
245
  model_path = model.split("/")[1]
246
 
247
- OUT_DIR = f"auto_evals/eval_requests/{user_name}"
248
  os.makedirs(OUT_DIR, exist_ok=True)
249
  out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{is_8_bit_eval}_{is_delta_weight}.json"
250
 
 
27
  IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True))
28
  ADD_PLOTS = False
29
 
30
+ EVAL_REQUESTS_PATH = "auto_evals/eval_requests"
31
+
32
  api = HfApi()
33
 
34
 
 
39
 
40
  auto_eval_repo, human_eval_repo, gpt_4_eval_repo, requested_models = load_all_info_from_hub(LMEH_REPO, HUMAN_EVAL_REPO, GPT_4_EVAL_REPO)
41
 
42
+ COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
43
+ TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
44
+ COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
45
+ TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
46
 
47
  if not IS_PUBLIC:
48
  COLS.insert(2, AutoEvalColumn.is_8bit.name)
 
97
 
98
  entries = [
99
  entry
100
+ for entry in os.listdir(EVAL_REQUESTS_PATH)
101
  if not entry.startswith(".")
102
  ]
103
  all_evals = []
104
 
105
  for entry in entries:
106
  if ".json" in entry:
107
+ file_path = os.path.join(EVAL_REQUESTS_PATH, entry)
108
  with open(file_path) as fp:
109
  data = json.load(fp)
110
 
 
117
  # this is a folder
118
  sub_entries = [
119
  e
120
+ for e in os.listdir(f"{EVAL_REQUESTS_PATH}/{entry}")
121
  if not e.startswith(".")
122
  ]
123
  for sub_entry in sub_entries:
124
+ file_path = os.path.join(EVAL_REQUESTS_PATH, entry, sub_entry)
125
  with open(file_path) as fp:
126
  data = json.load(fp)
127
 
 
246
  user_name = model.split("/")[0]
247
  model_path = model.split("/")[1]
248
 
249
+ OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
250
  os.makedirs(OUT_DIR, exist_ok=True)
251
  out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{is_8_bit_eval}_{is_delta_weight}.json"
252
 
src/assets/text_content.py CHANGED
@@ -1,4 +1,8 @@
1
  CHANGELOG_TEXT = f"""
 
 
 
 
2
  ## [2023-06-16]
3
  - Refactored code base
4
  - Added new columns: number of parameters, hub likes, license
 
1
  CHANGELOG_TEXT = f"""
2
+ ## [2023-06-19]
3
+ - Added model type column
4
+ - Hid revision and 8bit columns since all models are the same atm
5
+
6
  ## [2023-06-16]
7
  - Refactored code base
8
  - Added new columns: number of parameters, hub likes, license
src/auto_leaderboard/get_model_metadata.py CHANGED
@@ -2,6 +2,7 @@ import re
2
  from typing import List
3
 
4
  from src.utils_display import AutoEvalColumn
 
5
 
6
  from huggingface_hub import HfApi
7
  import huggingface_hub
@@ -38,17 +39,18 @@ size_pattern = re.compile(r"\d+(b|m)")
38
  def get_model_size(model_name, model_info):
39
  # In billions
40
  try:
41
- return model_info.safetensors["total"] / 1e9
42
  except AttributeError:
43
  #print(f"Repository {model_id} does not have safetensors weights")
44
  pass
45
  try:
46
  size_match = re.search(size_pattern, model_name.lower())
47
  size = size_match.group(0)
48
- return int(size[:-1]) if size[-1] == "b" else int(size[:-1]) / 1e3
49
  except AttributeError:
50
  return None
51
 
52
 
53
  def apply_metadata(leaderboard_data: List[dict]):
 
54
  get_model_infos_from_hub(leaderboard_data)
 
2
  from typing import List
3
 
4
  from src.utils_display import AutoEvalColumn
5
+ from src.auto_leaderboard.model_metadata_type import get_model_type
6
 
7
  from huggingface_hub import HfApi
8
  import huggingface_hub
 
39
  def get_model_size(model_name, model_info):
40
  # In billions
41
  try:
42
+ return round(model_info.safetensors["total"] / 1e9, 3)
43
  except AttributeError:
44
  #print(f"Repository {model_id} does not have safetensors weights")
45
  pass
46
  try:
47
  size_match = re.search(size_pattern, model_name.lower())
48
  size = size_match.group(0)
49
+ return round(int(size[:-1]) if size[-1] == "b" else int(size[:-1]) / 1e3, 3)
50
  except AttributeError:
51
  return None
52
 
53
 
54
  def apply_metadata(leaderboard_data: List[dict]):
55
+ get_model_type(leaderboard_data)
56
  get_model_infos_from_hub(leaderboard_data)
src/auto_leaderboard/model_metadata_type.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+ from typing import Dict, List
3
+
4
+ class ModelType(Enum):
5
+ PT = "pretrained"
6
+ SFT = "finetuned"
7
+ RL = "with RL"
8
+
9
+
10
+ TYPE_METADATA: Dict[str, ModelType] = {
11
+ "aisquared/dlite-v1-355m": ModelType.SFT,
12
+ "aisquared/dlite-v2-774m": ModelType.SFT,
13
+ "aisquared/dlite-v2-1_5b": ModelType.SFT,
14
+ "TheBloke/wizardLM-7B-HF": ModelType.SFT,
15
+ "TheBloke/dromedary-65b-lora-HF": ModelType.SFT,
16
+ "TheBloke/vicuna-13B-1.1-HF": ModelType.SFT,
17
+ "TheBloke/Wizard-Vicuna-13B-Uncensored-HF": ModelType.SFT,
18
+ "wordcab/llama-natural-instructions-13b": ModelType.SFT,
19
+ "JosephusCheung/Guanaco": ModelType.SFT,
20
+ "AlekseyKorshuk/vicuna-7b": ModelType.SFT,
21
+ "AlekseyKorshuk/chatml-pyg-v1": ModelType.SFT,
22
+ "concedo/OPT-19M-ChatSalad": ModelType.SFT,
23
+ "digitous/Javalion-R": ModelType.SFT,
24
+ "digitous/Alpacino30b": ModelType.SFT,
25
+ "digitous/Javelin-GPTJ": ModelType.SFT,
26
+ "anton-l/gpt-j-tiny-random": ModelType.SFT,
27
+ "IDEA-CCNL/Ziya-LLaMA-13B-Pretrain-v1": ModelType.SFT,
28
+ "gpt2-medium": ModelType.PT,
29
+ "PygmalionAI/pygmalion-6b": ModelType.SFT,
30
+ "medalpaca/medalpaca-7b": ModelType.SFT,
31
+ "medalpaca/medalpaca-13b": ModelType.SFT,
32
+ "chavinlo/alpaca-13b": ModelType.SFT,
33
+ "chavinlo/alpaca-native": ModelType.SFT,
34
+ "chavinlo/gpt4-x-alpaca": ModelType.SFT,
35
+ "hakurei/lotus-12B": ModelType.SFT,
36
+ "amazon/LightGPT": ModelType.SFT,
37
+ "shibing624/chinese-llama-plus-13b-hf": ModelType.SFT,
38
+ "mosaicml/mpt-7b": ModelType.PT,
39
+ "PSanni/Deer-3b": ModelType.SFT,
40
+ "bigscience/bloom-1b1": ModelType.PT,
41
+ "MetaIX/GPT4-X-Alpasta-30b": ModelType.SFT,
42
+ "EleutherAI/gpt-neox-20b": ModelType.PT,
43
+ "EleutherAI/gpt-j-6b": ModelType.PT,
44
+ "roneneldan/TinyStories-28M": ModelType.SFT,
45
+ "lmsys/vicuna-13b-delta-v1.1": ModelType.SFT,
46
+ "lmsys/vicuna-7b-delta-v1.1": ModelType.SFT,
47
+ "abhiramtirumala/DialoGPT-sarcastic-medium": ModelType.SFT,
48
+ "pillowtalks-ai/delta13b": ModelType.SFT,
49
+ "bigcode/starcoderplus": ModelType.SFT,
50
+ "microsoft/DialoGPT-large": ModelType.SFT,
51
+ "microsoft/CodeGPT-small-py": ModelType.SFT,
52
+ "Pirr/pythia-13b-deduped-green_devil": ModelType.SFT,
53
+ "Aeala/GPT4-x-AlpacaDente2-30b": ModelType.SFT,
54
+ "Aeala/VicUnlocked-alpaca-30b": ModelType.SFT,
55
+ "dvruette/llama-13b-pretrained-sft-epoch-2": ModelType.SFT,
56
+ "dvruette/oasst-gpt-neox-20b-1000-steps": ModelType.SFT,
57
+ "openlm-research/open_llama_3b_350bt_preview": ModelType.PT,
58
+ "openlm-research/open_llama_7b_700bt_preview": ModelType.PT,
59
+ "openlm-research/open_llama_7b": ModelType.PT,
60
+ "openlm-research/open_llama_3b": ModelType.PT,
61
+ "openlm-research/open_llama_7b_400bt_preview": ModelType.PT,
62
+ "PocketDoc/Dans-PileOfSets-Mk1-llama-13b-merged": ModelType.SFT,
63
+ "GeorgiaTechResearchInstitute/galactica-6.7b-evol-instruct-70k": ModelType.SFT,
64
+ "databricks/dolly-v2-7b": ModelType.SFT,
65
+ "databricks/dolly-v2-3b": ModelType.SFT,
66
+ "databricks/dolly-v2-12b": ModelType.SFT,
67
+ "pinkmanlove/llama-65b-hf": ModelType.SFT,
68
+ "Rachneet/gpt2-xl-alpaca": ModelType.SFT,
69
+ "Locutusque/gpt2-conversational-or-qa": ModelType.SFT,
70
+ "NbAiLab/nb-gpt-j-6B-alpaca": ModelType.SFT,
71
+ "Fredithefish/ScarletPajama-3B-HF": ModelType.SFT,
72
+ "eachadea/vicuna-7b-1.1": ModelType.SFT,
73
+ "eachadea/vicuna-13b": ModelType.SFT,
74
+ "openaccess-ai-collective/wizard-mega-13b": ModelType.SFT,
75
+ "openaccess-ai-collective/manticore-13b": ModelType.SFT,
76
+ "openaccess-ai-collective/manticore-30b-chat-pyg-alpha": ModelType.SFT,
77
+ "openaccess-ai-collective/minotaur-13b": ModelType.SFT,
78
+ "lamini/instruct-tuned-3b": ModelType.SFT,
79
+ "pythainlp/wangchanglm-7.5B-sft-enth": ModelType.SFT,
80
+ "pythainlp/wangchanglm-7.5B-sft-en-sharded": ModelType.SFT,
81
+ "stabilityai/stablelm-tuned-alpha-7b": ModelType.SFT,
82
+ "CalderaAI/30B-Lazarus": ModelType.SFT,
83
+ "KoboldAI/OPT-13B-Nerybus-Mix": ModelType.SFT,
84
+ "distilgpt2": ModelType.PT,
85
+ "wahaha1987/llama_7b_sharegpt94k_fastchat": ModelType.SFT,
86
+ "OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5": ModelType.SFT,
87
+ "junelee/wizard-vicuna-13b": ModelType.SFT,
88
+ "BreadAi/StoryPy": ModelType.SFT,
89
+ "togethercomputer/RedPajama-INCITE-Base-3B-v1": ModelType.PT,
90
+ "togethercomputer/RedPajama-INCITE-Base-7B-v0.1": ModelType.PT,
91
+ "Writer/camel-5b-hf": ModelType.SFT,
92
+ "Writer/palmyra-base": ModelType.PT,
93
+ "MBZUAI/lamini-neo-125m": ModelType.SFT,
94
+ "TehVenom/DiffMerge_Pygmalion_Main-onto-V8P4": ModelType.SFT,
95
+ "vicgalle/gpt2-alpaca-gpt4": ModelType.SFT,
96
+ "facebook/opt-350m": ModelType.PT,
97
+ "facebook/opt-125m": ModelType.PT,
98
+ "facebook/opt-13b": ModelType.PT,
99
+ "facebook/opt-1.3b": ModelType.PT,
100
+ "facebook/opt-66b": ModelType.PT,
101
+ "facebook/galactica-120b": ModelType.PT,
102
+ "Abe13/jgpt2-v1": ModelType.SFT,
103
+ "gpt2-xl": ModelType.PT,
104
+ "HuggingFaceH4/stable-vicuna-13b-2904": ModelType.RL,
105
+ "HuggingFaceH4/llama-7b-ift-alpaca": ModelType.SFT,
106
+ "HuggingFaceH4/starchat-alpha": ModelType.SFT,
107
+ "HuggingFaceH4/starchat-beta": ModelType.SFT,
108
+ "ausboss/Llama30B-SuperHOT": ModelType.SFT,
109
+ "ausboss/llama-13b-supercot": ModelType.SFT,
110
+ "ausboss/llama-30b-supercot": ModelType.SFT,
111
+ "Neko-Institute-of-Science/metharme-7b": ModelType.SFT,
112
+ "SebastianSchramm/Cerebras-GPT-111M-instruction": ModelType.SFT,
113
+ "victor123/WizardLM-13B-1.0": ModelType.SFT,
114
+ "AlpinDale/pygmalion-instruct": ModelType.SFT,
115
+ "tiiuae/falcon-7b-instruct": ModelType.SFT,
116
+ "tiiuae/falcon-40b-instruct": ModelType.SFT,
117
+ "tiiuae/falcon-40b": ModelType.PT,
118
+ "tiiuae/falcon-7b": ModelType.PT,
119
+ "cyl/awsome-llama": ModelType.SFT,
120
+ "xzuyn/Alpacino-SuperCOT-13B": ModelType.SFT,
121
+ "xzuyn/MedicWizard-7B": ModelType.SFT,
122
+ "beomi/KoAlpaca-Polyglot-5.8B": ModelType.SFT,
123
+ "chainyo/alpaca-lora-7b": ModelType.SFT,
124
+ "Salesforce/codegen-16B-nl": ModelType.PT,
125
+ "Salesforce/codegen-16B-multi": ModelType.SFT,
126
+ "ai-forever/rugpt3large_based_on_gpt2": ModelType.SFT,
127
+ "gpt2-large": ModelType.PT,
128
+ "huggingface/llama-13b": ModelType.PT,
129
+ "huggingface/llama-7b": ModelType.PT,
130
+ "huggingface/llama-65b": ModelType.PT,
131
+ "huggingface/llama-30b": ModelType.PT,
132
+ "jondurbin/airoboros-7b": ModelType.SFT,
133
+ "jondurbin/airoboros-13b": ModelType.SFT,
134
+ "cerebras/Cerebras-GPT-1.3B": ModelType.PT,
135
+ "cerebras/Cerebras-GPT-111M": ModelType.PT,
136
+ "NousResearch/Nous-Hermes-13b": ModelType.SFT,
137
+ "project-baize/baize-v2-7b": ModelType.SFT,
138
+ "project-baize/baize-v2-13b": ModelType.SFT,
139
+ "LLMs/AlpacaGPT4-7B-elina": ModelType.SFT,
140
+ "LLMs/Vicuna-EvolInstruct-13B": ModelType.SFT,
141
+ "huggingtweets/jerma985": ModelType.SFT,
142
+ "huggyllama/llama-65b": ModelType.PT,
143
+ "WizardLM/WizardLM-13B-1.0": ModelType.SFT,
144
+ "gpt2": ModelType.PT,
145
+ "alessandropalla/instruct_gpt2": ModelType.SFT,
146
+ "MayaPH/FinOPT-Lincoln": ModelType.SFT,
147
+ "MayaPH/FinOPT-Franklin": ModelType.SFT,
148
+ "timdettmers/guanaco-33b-merged": ModelType.SFT,
149
+ "timdettmers/guanaco-65b-merged": ModelType.SFT,
150
+ "elinas/llama-30b-hf-transformers-4.29": ModelType.SFT,
151
+ "elinas/chronos-33b": ModelType.SFT,
152
+ "nmitchko/medguanaco-65b-GPTQ": ModelType.SFT,
153
+ "xhyi/PT_GPTNEO350_ATG": ModelType.SFT,
154
+ "h2oai/h2ogpt-oasst1-512-20b": ModelType.SFT,
155
+ "h2oai/h2ogpt-gm-oasst1-en-1024-12b": ModelType.SFT,
156
+ "nomic-ai/gpt4all-13b-snoozy": ModelType.SFT,
157
+ "nomic-ai/gpt4all-j": ModelType.SFT,
158
+ }
159
+
160
+
161
+ def get_model_type(leaderboard_data: List[dict]):
162
+ for model_data in leaderboard_data:
163
+ model_data["Type"] = TYPE_METADATA.get(model_data["model_name_for_query"], "N/A")
src/utils_display.py CHANGED
@@ -7,6 +7,7 @@ class ColumnContent:
7
  name: str
8
  type: str
9
  displayed_by_default: bool
 
10
 
11
  def fields(raw_class):
12
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
@@ -14,8 +15,9 @@ def fields(raw_class):
14
  @dataclass(frozen=True)
15
  class AutoEvalColumn: # Auto evals column
16
  model = ColumnContent("Model", "markdown", True)
17
- revision = ColumnContent("Revision", "str", True)
18
- is_8bit = ColumnContent("8bit", "bool", False)
 
19
  license = ColumnContent("Hub License", "str", False)
20
  params = ColumnContent("#Params (B)", "number", False)
21
  likes = ColumnContent("Hub ❤️", "number", False)
 
7
  name: str
8
  type: str
9
  displayed_by_default: bool
10
+ hidden: bool = False
11
 
12
  def fields(raw_class):
13
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
 
15
  @dataclass(frozen=True)
16
  class AutoEvalColumn: # Auto evals column
17
  model = ColumnContent("Model", "markdown", True)
18
+ revision = ColumnContent("Revision", "str", True, True)
19
+ model_type = ColumnContent("Type", "bool", False)
20
+ is_8bit = ColumnContent("8bit", "bool", False, True)
21
  license = ColumnContent("Hub License", "str", False)
22
  params = ColumnContent("#Params (B)", "number", False)
23
  likes = ColumnContent("Hub ❤️", "number", False)