ClΓ©mentine commited on
Commit
8c49cb6
β€’
1 Parent(s): ba25d90

Cleaned and refactored the code, improved filtering, added selection of deleted models

Browse files
.pre-commit-config.yaml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ default_language_version:
16
+ python: python3
17
+
18
+ ci:
19
+ autofix_prs: true
20
+ autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
21
+ autoupdate_schedule: quarterly
22
+
23
+ repos:
24
+ - repo: https://github.com/pre-commit/pre-commit-hooks
25
+ rev: v4.3.0
26
+ hooks:
27
+ - id: check-yaml
28
+ - id: check-case-conflict
29
+ - id: detect-private-key
30
+ - id: check-added-large-files
31
+ args: ['--maxkb=1000']
32
+ - id: requirements-txt-fixer
33
+ - id: end-of-file-fixer
34
+ - id: trailing-whitespace
35
+
36
+ - repo: https://github.com/PyCQA/isort
37
+ rev: 5.12.0
38
+ hooks:
39
+ - id: isort
40
+ name: Format imports
41
+
42
+ - repo: https://github.com/psf/black
43
+ rev: 22.12.0
44
+ hooks:
45
+ - id: black
46
+ name: Format code
47
+ additional_dependencies: ['click==8.0.2']
48
+
49
+ - repo: https://github.com/charliermarsh/ruff-pre-commit
50
+ # Ruff version.
51
+ rev: 'v0.0.267'
52
+ hooks:
53
+ - id: ruff
Makefile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: style format
2
+
3
+
4
+ style:
5
+ python -m black --line-length 119 .
6
+ python -m isort .
7
+ ruff check --fix .
8
+
9
+
10
+ quality:
11
+ python -m black --check --line-length 119 .
12
+ python -m isort --check-only .
13
+ ruff check .
app.py CHANGED
@@ -2,23 +2,32 @@ import json
2
  import os
3
  from datetime import datetime, timezone
4
 
5
-
6
  import gradio as gr
7
- import numpy as np
8
  import pandas as pd
9
  from apscheduler.schedulers.background import BackgroundScheduler
10
  from huggingface_hub import HfApi
11
- from transformers import AutoConfig
12
 
13
- from src.auto_leaderboard.get_model_metadata import apply_metadata, DO_NOT_SUBMIT_MODELS
14
- from src.assets.text_content import *
15
- from src.auto_leaderboard.load_results import get_eval_results_dicts, make_clickable_model
16
- from src.assets.hardcoded_evals import gpt4_values, gpt35_values, baseline
17
  from src.assets.css_html_js import custom_css, get_window_url_params
18
- from src.utils_display import AutoEvalColumn, EvalQueueColumn, fields, styled_error, styled_warning, styled_message
19
- from src.init import get_all_requested_models, load_all_info_from_hub
20
-
21
- pd.set_option('display.precision', 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # clone / pull the lmeh eval data
24
  H4_TOKEN = os.environ.get("H4_TOKEN", None)
@@ -37,20 +46,14 @@ EVAL_RESULTS_PATH = "eval-results"
37
  EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private"
38
  EVAL_RESULTS_PATH_PRIVATE = "eval-results-private"
39
 
40
- api = HfApi()
 
41
 
42
  def restart_space():
43
- api.restart_space(
44
- repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN
45
- )
46
 
47
- eval_queue, requested_models, eval_results = load_all_info_from_hub(QUEUE_REPO, RESULTS_REPO, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH)
48
-
49
- if not IS_PUBLIC:
50
- eval_queue_private, requested_models_private, eval_results_private = load_all_info_from_hub(PRIVATE_QUEUE_REPO, PRIVATE_RESULTS_REPO, EVAL_REQUESTS_PATH_PRIVATE, EVAL_RESULTS_PATH_PRIVATE)
51
- else:
52
- eval_queue_private, eval_results_private = None, None
53
 
 
54
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
55
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
56
  COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
@@ -63,116 +66,41 @@ if not IS_PUBLIC:
63
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
64
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
65
 
66
- BENCHMARK_COLS = [c.name for c in [AutoEvalColumn.arc, AutoEvalColumn.hellaswag, AutoEvalColumn.mmlu, AutoEvalColumn.truthfulqa]]
67
-
68
-
69
- def has_no_nan_values(df, columns):
70
- return df[columns].notna().all(axis=1)
71
-
72
-
73
- def has_nan_values(df, columns):
74
- return df[columns].isna().any(axis=1)
75
-
76
-
77
- def get_leaderboard_df():
78
- if eval_results:
79
- print("Pulling evaluation results for the leaderboard.")
80
- eval_results.git_pull()
81
- if eval_results_private:
82
- print("Pulling evaluation results for the leaderboard.")
83
- eval_results_private.git_pull()
84
-
85
- all_data = get_eval_results_dicts()
86
-
87
- if not IS_PUBLIC:
88
- all_data.append(gpt4_values)
89
- all_data.append(gpt35_values)
90
-
91
- all_data.append(baseline)
92
- apply_metadata(all_data) # Populate model type based on known hardcoded values in `metadata.py`
93
-
94
- df = pd.DataFrame.from_records(all_data)
95
- df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
96
- df = df[COLS].round(decimals=2)
97
-
98
- # filter out if any of the benchmarks have not been produced
99
- df = df[has_no_nan_values(df, BENCHMARK_COLS)]
100
- return df
101
 
 
 
 
 
102
 
103
- def get_evaluation_queue_df():
104
- if eval_queue:
105
- print("Pulling changes for the evaluation queue.")
106
- eval_queue.git_pull()
107
- if eval_queue_private:
108
- print("Pulling changes for the evaluation queue.")
109
- eval_queue_private.git_pull()
 
 
110
 
111
- entries = [
112
- entry
113
- for entry in os.listdir(EVAL_REQUESTS_PATH)
114
- if not entry.startswith(".")
115
- ]
116
- all_evals = []
117
-
118
- for entry in entries:
119
- if ".json" in entry:
120
- file_path = os.path.join(EVAL_REQUESTS_PATH, entry)
121
- with open(file_path) as fp:
122
- data = json.load(fp)
123
-
124
- data["# params"] = "unknown"
125
- data["model"] = make_clickable_model(data["model"])
126
- data["revision"] = data.get("revision", "main")
127
-
128
- all_evals.append(data)
129
- elif ".md" not in entry:
130
- # this is a folder
131
- sub_entries = [
132
- e
133
- for e in os.listdir(f"{EVAL_REQUESTS_PATH}/{entry}")
134
- if not e.startswith(".")
135
- ]
136
- for sub_entry in sub_entries:
137
- file_path = os.path.join(EVAL_REQUESTS_PATH, entry, sub_entry)
138
- with open(file_path) as fp:
139
- data = json.load(fp)
140
-
141
- # data["# params"] = get_n_params(data["model"])
142
- data["model"] = make_clickable_model(data["model"])
143
- all_evals.append(data)
144
-
145
- pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
146
- running_list = [e for e in all_evals if e["status"] == "RUNNING"]
147
- finished_list = [e for e in all_evals if e["status"].startswith("FINISHED")]
148
- df_pending = pd.DataFrame.from_records(pending_list, columns=EVAL_COLS)
149
- df_running = pd.DataFrame.from_records(running_list, columns=EVAL_COLS)
150
- df_finished = pd.DataFrame.from_records(finished_list, columns=EVAL_COLS)
151
- return df_finished[EVAL_COLS], df_running[EVAL_COLS], df_pending[EVAL_COLS]
152
-
153
-
154
-
155
- original_df = get_leaderboard_df()
156
  leaderboard_df = original_df.copy()
157
  (
158
  finished_eval_queue_df,
159
  running_eval_queue_df,
160
  pending_eval_queue_df,
161
- ) = get_evaluation_queue_df()
162
-
163
- def is_model_on_hub(model_name, revision) -> bool:
164
- try:
165
- AutoConfig.from_pretrained(model_name, revision=revision)
166
- return True, None
167
-
168
- except ValueError as e:
169
- return False, "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard."
170
-
171
- except Exception as e:
172
- print(f"Could not get the model config from the hub.: {e}")
173
- return False, "was not found on hub!"
174
 
175
 
 
176
  def add_new_eval(
177
  model: str,
178
  base_model: str,
@@ -196,13 +124,12 @@ def add_new_eval(
196
  base_model_on_hub, error = is_model_on_hub(base_model, revision)
197
  if not base_model_on_hub:
198
  return styled_error(f'Base model "{base_model}" {error}')
199
-
200
 
201
  if not weight_type == "Adapter":
202
  model_on_hub, error = is_model_on_hub(model, revision)
203
  if not model_on_hub:
204
  return styled_error(f'Model "{model}" {error}')
205
-
206
  print("adding new eval")
207
 
208
  eval_entry = {
@@ -233,7 +160,7 @@ def add_new_eval(
233
 
234
  # Check for duplicate submission
235
  if out_path.split("eval-queue/")[1].lower() in requested_models:
236
- return styled_warning("This model has been already submitted.")
237
 
238
  with open(out_path, "w") as f:
239
  f.write(json.dumps(eval_entry))
@@ -242,7 +169,6 @@ def add_new_eval(
242
  path_or_fileobj=out_path,
243
  path_in_repo=out_path.split("eval-queue/")[1],
244
  repo_id=QUEUE_REPO,
245
- token=H4_TOKEN,
246
  repo_type="dataset",
247
  commit_message=f"Add {model} to eval queue",
248
  )
@@ -250,16 +176,19 @@ def add_new_eval(
250
  # remove the local file
251
  os.remove(out_path)
252
 
253
- return styled_message("Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list.")
 
 
254
 
255
 
256
- def refresh():
257
- leaderboard_df = get_leaderboard_df()
 
258
  (
259
  finished_eval_queue_df,
260
  running_eval_queue_df,
261
  pending_eval_queue_df,
262
- ) = get_evaluation_queue_df()
263
  return (
264
  leaderboard_df,
265
  finished_eval_queue_df,
@@ -268,74 +197,72 @@ def refresh():
268
  )
269
 
270
 
271
- def search_table(df, leaderboard_table, query):
272
- if AutoEvalColumn.model_type.name in leaderboard_table.columns:
 
 
 
 
 
 
 
 
 
 
 
 
273
  filtered_df = df[
274
  (df[AutoEvalColumn.dummy.name].str.contains(query, case=False))
275
  | (df[AutoEvalColumn.model_type.name].str.contains(query, case=False))
276
- ]
277
  else:
278
  filtered_df = df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
279
- return filtered_df[leaderboard_table.columns]
280
 
281
 
282
- def select_columns(df, columns):
283
- always_here_cols = [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name]
284
- # We use COLS to maintain sorting
285
- filtered_df = df[always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.dummy.name]]
 
 
 
 
 
286
  return filtered_df
287
 
288
- #TODO allow this to filter by values of any columns
289
- def filter_items(df, leaderboard_table, query):
290
- if query == "all":
291
- return df[leaderboard_table.columns]
292
- else:
293
- query = query[0] #take only the emoji character
294
- if AutoEvalColumn.model_type_symbol.name in leaderboard_table.columns:
295
- filtered_df = df[(df[AutoEvalColumn.model_type_symbol.name] == query)]
296
- else:
297
- return filtered_df[leaderboard_table.columns]
298
- return filtered_df[leaderboard_table.columns]
299
-
300
- def filter_items_size(df, leaderboard_table, query):
301
- numeric_intervals = {
302
- "all": None,
303
- "< 1B": (0, 1),
304
- "~3B": (1, 5),
305
- "~7B": (6, 11),
306
- "~13B": (12, 15),
307
- "~35B": (16, 55),
308
- "60B+": (55, 1000)
309
- }
310
 
311
- if query == "all":
312
- return df[leaderboard_table.columns]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
- numeric_interval = numeric_intervals[query]
315
-
316
- if AutoEvalColumn.params.name in leaderboard_table.columns:
317
- params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors='coerce')
318
- filtered_df = df[params_column.between(*numeric_interval)]
319
- else:
320
- return filtered_df[leaderboard_table.columns]
321
- return filtered_df[leaderboard_table.columns]
322
-
323
- def change_tab(query_param):
324
- query_param = query_param.replace("'", '"')
325
- query_param = json.loads(query_param)
326
-
327
- if (
328
- isinstance(query_param, dict)
329
- and "tab" in query_param
330
- and query_param["tab"] == "evaluation"
331
- ):
332
- return gr.Tabs.update(selected=1)
333
- else:
334
- return gr.Tabs.update(selected=0)
335
-
336
- def update_filter_type(input_type, shown_columns):
337
- shown_columns.append(AutoEvalColumn.params.name)
338
- return gr.update(visible=(input_type == 'types')), gr.update(visible=(input_type == 'sizes')), shown_columns
339
 
340
 
341
  demo = gr.Blocks(css=custom_css)
@@ -346,13 +273,39 @@ with demo:
346
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
347
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
348
  with gr.Row():
349
- shown_columns = gr.CheckboxGroup(
350
- choices = [c for c in COLS if c not in [AutoEvalColumn.dummy.name, AutoEvalColumn.model.name, AutoEvalColumn.model_type_symbol.name]],
351
- value = [c for c in COLS_LITE if c not in [AutoEvalColumn.dummy.name, AutoEvalColumn.model.name, AutoEvalColumn.model_type_symbol.name]],
352
- label="Select columns to show",
353
- elem_id="column-select",
354
- interactive=True,
355
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  with gr.Column(min_width=320):
357
  search_bar = gr.Textbox(
358
  placeholder="πŸ” Search for your model and press ENTER...",
@@ -360,46 +313,47 @@ with demo:
360
  elem_id="search-bar",
361
  )
362
  with gr.Box(elem_id="box-filter"):
363
- filter_type = gr.Dropdown(
364
- label="⏚ Filter model",
365
- choices=["types", "sizes"], value="types",
366
- interactive=True,
367
- elem_id="filter_type"
368
- )
369
- filter_columns = gr.Radio(
370
  label="⏚ Filter model types",
371
- show_label=False,
372
- choices = [
373
- "all",
374
  ModelType.PT.to_str(),
375
  ModelType.FT.to_str(),
376
  ModelType.IFT.to_str(),
377
- ModelType.RL.to_str(),
378
  ],
379
  value="all",
380
- elem_id="filter-columns"
 
381
  )
382
  filter_columns_size = gr.Radio(
383
  label="⏚ Filter model sizes",
384
- show_label=False,
385
- choices = [
386
  "all",
387
  "< 1B",
388
  "~3B",
389
  "~7B",
390
  "~13B",
391
  "~35B",
392
- "60B+"
393
  ],
394
  value="all",
395
- visible=False,
396
  interactive=True,
397
- elem_id="filter-columns-size"
398
  )
399
-
400
  leaderboard_table = gr.components.Dataframe(
401
- value=leaderboard_df[[AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name] + shown_columns.value + [AutoEvalColumn.dummy.name]],
402
- headers=[AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name] + shown_columns.value + [AutoEvalColumn.dummy.name],
 
 
 
 
 
 
 
 
 
403
  datatype=TYPES,
404
  max_rows=None,
405
  elem_id="leaderboard-table",
@@ -417,14 +371,55 @@ with demo:
417
  )
418
  search_bar.submit(
419
  search_table,
420
- [hidden_leaderboard_table_for_search, leaderboard_table, search_bar],
 
 
 
 
 
 
 
 
 
421
  leaderboard_table,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  )
423
-
424
- filter_type.change(update_filter_type,inputs=[filter_type, shown_columns],outputs=[filter_columns, filter_columns_size, shown_columns],queue=False).then(select_columns, [hidden_leaderboard_table_for_search, shown_columns], leaderboard_table, queue=False)
425
- shown_columns.change(select_columns, [hidden_leaderboard_table_for_search, shown_columns], leaderboard_table, queue=False)
426
- filter_columns.change(filter_items, [hidden_leaderboard_table_for_search, leaderboard_table, filter_columns], leaderboard_table, queue=False)
427
- filter_columns_size.change(filter_items_size, [hidden_leaderboard_table_for_search, leaderboard_table, filter_columns_size], leaderboard_table, queue=False)
428
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
429
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
430
 
@@ -434,7 +429,10 @@ with demo:
434
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
435
 
436
  with gr.Column():
437
- with gr.Accordion(f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
 
 
 
438
  with gr.Row():
439
  finished_eval_table = gr.components.Dataframe(
440
  value=finished_eval_queue_df,
@@ -442,7 +440,10 @@ with demo:
442
  datatype=EVAL_TYPES,
443
  max_rows=5,
444
  )
445
- with gr.Accordion(f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
 
 
 
446
  with gr.Row():
447
  running_eval_table = gr.components.Dataframe(
448
  value=running_eval_queue_df,
@@ -451,7 +452,10 @@ with demo:
451
  max_rows=5,
452
  )
453
 
454
- with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
 
 
 
455
  with gr.Row():
456
  pending_eval_table = gr.components.Dataframe(
457
  value=pending_eval_queue_df,
@@ -465,20 +469,16 @@ with demo:
465
  with gr.Row():
466
  with gr.Column():
467
  model_name_textbox = gr.Textbox(label="Model name")
468
- revision_name_textbox = gr.Textbox(
469
- label="revision", placeholder="main"
470
- )
471
- private = gr.Checkbox(
472
- False, label="Private", visible=not IS_PUBLIC
473
- )
474
  model_type = gr.Dropdown(
475
- choices=[
476
  ModelType.PT.to_str(" : "),
477
  ModelType.FT.to_str(" : "),
478
  ModelType.IFT.to_str(" : "),
479
- ModelType.RL.to_str(" : "),
480
- ],
481
- label="Model type",
482
  multiselect=False,
483
  value=None,
484
  interactive=True,
@@ -486,22 +486,25 @@ with demo:
486
 
487
  with gr.Column():
488
  precision = gr.Dropdown(
489
- choices=["float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)"],
490
- label="Precision",
 
 
 
 
 
491
  multiselect=False,
492
  value="float16",
493
  interactive=True,
494
  )
495
  weight_type = gr.Dropdown(
496
  choices=["Original", "Delta", "Adapter"],
497
- label="Weights type",
498
  multiselect=False,
499
  value="Original",
500
  interactive=True,
501
  )
502
- base_model_name_textbox = gr.Textbox(
503
- label="Base model (for delta or adapter weights)"
504
- )
505
 
506
  submit_button = gr.Button("Submit Eval")
507
  submission_result = gr.Markdown()
@@ -514,7 +517,7 @@ with demo:
514
  precision,
515
  private,
516
  weight_type,
517
- model_type
518
  ],
519
  submission_result,
520
  )
@@ -551,4 +554,4 @@ with demo:
551
  scheduler = BackgroundScheduler()
552
  scheduler.add_job(restart_space, "interval", seconds=3600)
553
  scheduler.start()
554
- demo.queue(concurrency_count=40).launch()
 
2
  import os
3
  from datetime import datetime, timezone
4
 
 
5
  import gradio as gr
 
6
  import pandas as pd
7
  from apscheduler.schedulers.background import BackgroundScheduler
8
  from huggingface_hub import HfApi
 
9
 
 
 
 
 
10
  from src.assets.css_html_js import custom_css, get_window_url_params
11
+ from src.assets.text_content import (
12
+ CITATION_BUTTON_LABEL,
13
+ CITATION_BUTTON_TEXT,
14
+ EVALUATION_QUEUE_TEXT,
15
+ INTRODUCTION_TEXT,
16
+ LLM_BENCHMARKS_TEXT,
17
+ TITLE,
18
+ )
19
+ from src.display_models.get_model_metadata import DO_NOT_SUBMIT_MODELS, ModelType
20
+ from src.display_models.utils import (
21
+ AutoEvalColumn,
22
+ EvalQueueColumn,
23
+ fields,
24
+ styled_error,
25
+ styled_message,
26
+ styled_warning,
27
+ )
28
+ from src.load_from_hub import get_evaluation_queue_df, get_leaderboard_df, is_model_on_hub, load_all_info_from_hub
29
+
30
+ pd.set_option("display.precision", 1)
31
 
32
  # clone / pull the lmeh eval data
33
  H4_TOKEN = os.environ.get("H4_TOKEN", None)
 
46
  EVAL_REQUESTS_PATH_PRIVATE = "eval-queue-private"
47
  EVAL_RESULTS_PATH_PRIVATE = "eval-results-private"
48
 
49
+ api = HfApi(token=H4_TOKEN)
50
+
51
 
52
  def restart_space():
53
+ api.restart_space(repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN)
 
 
54
 
 
 
 
 
 
 
55
 
56
+ # Column selection
57
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
58
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
59
  COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
 
66
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
67
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
68
 
69
+ BENCHMARK_COLS = [
70
+ c.name
71
+ for c in [
72
+ AutoEvalColumn.arc,
73
+ AutoEvalColumn.hellaswag,
74
+ AutoEvalColumn.mmlu,
75
+ AutoEvalColumn.truthfulqa,
76
+ ]
77
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ ## LOAD INFO FROM HUB
80
+ eval_queue, requested_models, eval_results = load_all_info_from_hub(
81
+ QUEUE_REPO, RESULTS_REPO, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH
82
+ )
83
 
84
+ if not IS_PUBLIC:
85
+ (eval_queue_private, requested_models_private, eval_results_private,) = load_all_info_from_hub(
86
+ PRIVATE_QUEUE_REPO,
87
+ PRIVATE_RESULTS_REPO,
88
+ EVAL_REQUESTS_PATH_PRIVATE,
89
+ EVAL_RESULTS_PATH_PRIVATE,
90
+ )
91
+ else:
92
+ eval_queue_private, eval_results_private = None, None
93
 
94
+ original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  leaderboard_df = original_df.copy()
96
  (
97
  finished_eval_queue_df,
98
  running_eval_queue_df,
99
  pending_eval_queue_df,
100
+ ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
 
103
+ ## INTERACTION FUNCTIONS
104
  def add_new_eval(
105
  model: str,
106
  base_model: str,
 
124
  base_model_on_hub, error = is_model_on_hub(base_model, revision)
125
  if not base_model_on_hub:
126
  return styled_error(f'Base model "{base_model}" {error}')
 
127
 
128
  if not weight_type == "Adapter":
129
  model_on_hub, error = is_model_on_hub(model, revision)
130
  if not model_on_hub:
131
  return styled_error(f'Model "{model}" {error}')
132
+
133
  print("adding new eval")
134
 
135
  eval_entry = {
 
160
 
161
  # Check for duplicate submission
162
  if out_path.split("eval-queue/")[1].lower() in requested_models:
163
+ return styled_warning("This model has been already submitted.")
164
 
165
  with open(out_path, "w") as f:
166
  f.write(json.dumps(eval_entry))
 
169
  path_or_fileobj=out_path,
170
  path_in_repo=out_path.split("eval-queue/")[1],
171
  repo_id=QUEUE_REPO,
 
172
  repo_type="dataset",
173
  commit_message=f"Add {model} to eval queue",
174
  )
 
176
  # remove the local file
177
  os.remove(out_path)
178
 
179
+ return styled_message(
180
+ "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
181
+ )
182
 
183
 
184
+ # Basics
185
+ def refresh() -> list[pd.DataFrame]:
186
+ leaderboard_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
187
  (
188
  finished_eval_queue_df,
189
  running_eval_queue_df,
190
  pending_eval_queue_df,
191
+ ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, COLS)
192
  return (
193
  leaderboard_df,
194
  finished_eval_queue_df,
 
197
  )
198
 
199
 
200
+ def change_tab(query_param: str):
201
+ query_param = query_param.replace("'", '"')
202
+ query_param = json.loads(query_param)
203
+
204
+ if isinstance(query_param, dict) and "tab" in query_param and query_param["tab"] == "evaluation":
205
+ return gr.Tabs.update(selected=1)
206
+ else:
207
+ return gr.Tabs.update(selected=0)
208
+
209
+
210
+ # Searching and filtering
211
+ def search_table(df: pd.DataFrame, current_columns_df: pd.DataFrame, query: str) -> pd.DataFrame:
212
+ current_columns = current_columns_df.columns
213
+ if AutoEvalColumn.model_type.name in current_columns:
214
  filtered_df = df[
215
  (df[AutoEvalColumn.dummy.name].str.contains(query, case=False))
216
  | (df[AutoEvalColumn.model_type.name].str.contains(query, case=False))
217
+ ]
218
  else:
219
  filtered_df = df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]
220
+ return filtered_df[current_columns]
221
 
222
 
223
+ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
224
+ always_here_cols = [
225
+ AutoEvalColumn.model_type_symbol.name,
226
+ AutoEvalColumn.model.name,
227
+ ]
228
+ # We use COLS to maintain sorting
229
+ filtered_df = df[
230
+ always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.dummy.name]
231
+ ]
232
  return filtered_df
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
+ def filter_models(
236
+ df: pd.DataFrame, current_columns_df: pd.DataFrame, type_query: str, size_query: str, show_deleted: bool
237
+ ) -> pd.DataFrame:
238
+ current_columns = current_columns_df.columns
239
+
240
+ # Show all models
241
+ if show_deleted:
242
+ filtered_df = df[current_columns]
243
+ else: # Show only still on the hub models
244
+ filtered_df = df[df[AutoEvalColumn.still_on_hub.name] is True][current_columns]
245
+
246
+ if type_query != "all":
247
+ type_emoji = type_query[0]
248
+ filtered_df = filtered_df[df[AutoEvalColumn.model_type_symbol.name] == type_emoji]
249
+
250
+ if size_query != "all":
251
+ numeric_intervals = {
252
+ "all": (0, 10000),
253
+ "< 1B": (0, 1),
254
+ "~3B": (1, 5),
255
+ "~7B": (6, 11),
256
+ "~13B": (12, 15),
257
+ "~35B": (16, 55),
258
+ "60B+": (55, 10000),
259
+ }
260
+ numeric_interval = numeric_intervals[size_query]
261
+ params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
262
+
263
+ filtered_df = filtered_df[params_column.between(*numeric_interval)]
264
 
265
+ return filtered_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
 
268
  demo = gr.Blocks(css=custom_css)
 
273
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
274
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
275
  with gr.Row():
276
+ with gr.Column():
277
+ with gr.Row():
278
+ shown_columns = gr.CheckboxGroup(
279
+ choices=[
280
+ c
281
+ for c in COLS
282
+ if c
283
+ not in [
284
+ AutoEvalColumn.dummy.name,
285
+ AutoEvalColumn.model.name,
286
+ AutoEvalColumn.model_type_symbol.name,
287
+ AutoEvalColumn.still_on_hub.name,
288
+ ]
289
+ ],
290
+ value=[
291
+ c
292
+ for c in COLS_LITE
293
+ if c
294
+ not in [
295
+ AutoEvalColumn.dummy.name,
296
+ AutoEvalColumn.model.name,
297
+ AutoEvalColumn.model_type_symbol.name,
298
+ AutoEvalColumn.still_on_hub.name,
299
+ ]
300
+ ],
301
+ label="Select columns to show",
302
+ elem_id="column-select",
303
+ interactive=True,
304
+ )
305
+ with gr.Row():
306
+ deleted_models_visibility = gr.Checkbox(
307
+ value=True, label="Show models removed from the hub", interactive=True
308
+ )
309
  with gr.Column(min_width=320):
310
  search_bar = gr.Textbox(
311
  placeholder="πŸ” Search for your model and press ENTER...",
 
313
  elem_id="search-bar",
314
  )
315
  with gr.Box(elem_id="box-filter"):
316
+ filter_columns_type = gr.Radio(
 
 
 
 
 
 
317
  label="⏚ Filter model types",
318
+ choices=[
319
+ "all",
 
320
  ModelType.PT.to_str(),
321
  ModelType.FT.to_str(),
322
  ModelType.IFT.to_str(),
323
+ ModelType.RL.to_str(),
324
  ],
325
  value="all",
326
+ interactive=True,
327
+ elem_id="filter-columns-type",
328
  )
329
  filter_columns_size = gr.Radio(
330
  label="⏚ Filter model sizes",
331
+ choices=[
 
332
  "all",
333
  "< 1B",
334
  "~3B",
335
  "~7B",
336
  "~13B",
337
  "~35B",
338
+ "60B+",
339
  ],
340
  value="all",
 
341
  interactive=True,
342
+ elem_id="filter-columns-size",
343
  )
344
+
345
  leaderboard_table = gr.components.Dataframe(
346
+ value=leaderboard_df[
347
+ [AutoEvalColumn.model_type_symbol.name, AutoEvalColumn.model.name]
348
+ + shown_columns.value
349
+ + [AutoEvalColumn.dummy.name]
350
+ ],
351
+ headers=[
352
+ AutoEvalColumn.model_type_symbol.name,
353
+ AutoEvalColumn.model.name,
354
+ ]
355
+ + shown_columns.value
356
+ + [AutoEvalColumn.dummy.name],
357
  datatype=TYPES,
358
  max_rows=None,
359
  elem_id="leaderboard-table",
 
371
  )
372
  search_bar.submit(
373
  search_table,
374
+ [
375
+ hidden_leaderboard_table_for_search,
376
+ leaderboard_table,
377
+ search_bar,
378
+ ],
379
+ leaderboard_table,
380
+ )
381
+ shown_columns.change(
382
+ select_columns,
383
+ [hidden_leaderboard_table_for_search, shown_columns],
384
  leaderboard_table,
385
+ queue=False,
386
+ )
387
+ filter_columns_type.change(
388
+ filter_models,
389
+ [
390
+ hidden_leaderboard_table_for_search,
391
+ leaderboard_table,
392
+ filter_columns_type,
393
+ filter_columns_size,
394
+ deleted_models_visibility,
395
+ ],
396
+ leaderboard_table,
397
+ queue=False,
398
+ )
399
+ filter_columns_size.change(
400
+ filter_models,
401
+ [
402
+ hidden_leaderboard_table_for_search,
403
+ leaderboard_table,
404
+ filter_columns_type,
405
+ filter_columns_size,
406
+ deleted_models_visibility,
407
+ ],
408
+ leaderboard_table,
409
+ queue=False,
410
+ )
411
+ deleted_models_visibility.change(
412
+ filter_models,
413
+ [
414
+ hidden_leaderboard_table_for_search,
415
+ leaderboard_table,
416
+ filter_columns_type,
417
+ filter_columns_size,
418
+ deleted_models_visibility,
419
+ ],
420
+ leaderboard_table,
421
+ queue=False,
422
  )
 
 
 
 
 
423
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
424
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
425
 
 
429
  gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
430
 
431
  with gr.Column():
432
+ with gr.Accordion(
433
+ f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
434
+ open=False,
435
+ ):
436
  with gr.Row():
437
  finished_eval_table = gr.components.Dataframe(
438
  value=finished_eval_queue_df,
 
440
  datatype=EVAL_TYPES,
441
  max_rows=5,
442
  )
443
+ with gr.Accordion(
444
+ f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})",
445
+ open=False,
446
+ ):
447
  with gr.Row():
448
  running_eval_table = gr.components.Dataframe(
449
  value=running_eval_queue_df,
 
452
  max_rows=5,
453
  )
454
 
455
+ with gr.Accordion(
456
+ f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
457
+ open=False,
458
+ ):
459
  with gr.Row():
460
  pending_eval_table = gr.components.Dataframe(
461
  value=pending_eval_queue_df,
 
469
  with gr.Row():
470
  with gr.Column():
471
  model_name_textbox = gr.Textbox(label="Model name")
472
+ revision_name_textbox = gr.Textbox(label="revision", placeholder="main")
473
+ private = gr.Checkbox(False, label="Private", visible=not IS_PUBLIC)
 
 
 
 
474
  model_type = gr.Dropdown(
475
+ choices=[
476
  ModelType.PT.to_str(" : "),
477
  ModelType.FT.to_str(" : "),
478
  ModelType.IFT.to_str(" : "),
479
+ ModelType.RL.to_str(" : "),
480
+ ],
481
+ label="Model type",
482
  multiselect=False,
483
  value=None,
484
  interactive=True,
 
486
 
487
  with gr.Column():
488
  precision = gr.Dropdown(
489
+ choices=[
490
+ "float16",
491
+ "bfloat16",
492
+ "8bit (LLM.int8)",
493
+ "4bit (QLoRA / FP4)",
494
+ ],
495
+ label="Precision",
496
  multiselect=False,
497
  value="float16",
498
  interactive=True,
499
  )
500
  weight_type = gr.Dropdown(
501
  choices=["Original", "Delta", "Adapter"],
502
+ label="Weights type",
503
  multiselect=False,
504
  value="Original",
505
  interactive=True,
506
  )
507
+ base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
 
 
508
 
509
  submit_button = gr.Button("Submit Eval")
510
  submission_result = gr.Markdown()
 
517
  precision,
518
  private,
519
  weight_type,
520
+ model_type,
521
  ],
522
  submission_result,
523
  )
 
554
  scheduler = BackgroundScheduler()
555
  scheduler.add_job(restart_space, "interval", seconds=3600)
556
  scheduler.start()
557
+ demo.queue(concurrency_count=40).launch()
pyproject.toml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.ruff]
2
+ # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
3
+ select = ["E", "F"]
4
+ ignore = ["E501"] # line too long (black is taking care of this)
5
+ line-length = 119
6
+ fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
7
+
8
+ [tool.isort]
9
+ profile = "black"
10
+ line_length = 119
11
+
12
+ [tool.black]
13
+ line-length = 119
src/assets/css_html_js.py CHANGED
@@ -89,13 +89,13 @@ table th:first-child {
89
  #filter_type label > .wrap .wrap-inner input{
90
  width: 1px
91
  }
92
- #filter-columns{
93
  border:0;
94
- padding:0;
95
  }
96
  #filter-columns-size{
97
  border:0;
98
- padding:0;
99
  }
100
  #box-filter > .form{
101
  border: 0
@@ -108,4 +108,4 @@ get_window_url_params = """
108
  url_params = Object.fromEntries(params);
109
  return url_params;
110
  }
111
- """
 
89
  #filter_type label > .wrap .wrap-inner input{
90
  width: 1px
91
  }
92
+ #filter-columns-type{
93
  border:0;
94
+ padding:0.5;
95
  }
96
  #filter-columns-size{
97
  border:0;
98
+ padding:0.5;
99
  }
100
  #box-filter > .form{
101
  border: 0
 
108
  url_params = Object.fromEntries(params);
109
  return url_params;
110
  }
111
+ """
src/assets/hardcoded_evals.py CHANGED
@@ -1,4 +1,4 @@
1
- from src.utils_display import AutoEvalColumn, model_hyperlink
2
 
3
  gpt4_values = {
4
  AutoEvalColumn.model.name: model_hyperlink("https://arxiv.org/abs/2303.08774", "gpt4"),
@@ -6,9 +6,9 @@ gpt4_values = {
6
  AutoEvalColumn.precision.name: None,
7
  AutoEvalColumn.average.name: 84.3,
8
  AutoEvalColumn.arc.name: 96.3,
9
- AutoEvalColumn.hellaswag.name: 95.3,
10
- AutoEvalColumn.mmlu.name: 86.4,
11
- AutoEvalColumn.truthfulqa.name: 59.0,
12
  AutoEvalColumn.dummy.name: "GPT-4",
13
  AutoEvalColumn.model_type.name: "",
14
  }
@@ -19,9 +19,9 @@ gpt35_values = {
19
  AutoEvalColumn.precision.name: None,
20
  AutoEvalColumn.average.name: 71.9,
21
  AutoEvalColumn.arc.name: 85.2,
22
- AutoEvalColumn.hellaswag.name: 85.5,
23
- AutoEvalColumn.mmlu.name: 70.0,
24
- AutoEvalColumn.truthfulqa.name: 47.0,
25
  AutoEvalColumn.dummy.name: "GPT-3.5",
26
  AutoEvalColumn.model_type.name: "",
27
  }
@@ -32,10 +32,9 @@ baseline = {
32
  AutoEvalColumn.precision.name: None,
33
  AutoEvalColumn.average.name: 25.0,
34
  AutoEvalColumn.arc.name: 25.0,
35
- AutoEvalColumn.hellaswag.name: 25.0,
36
- AutoEvalColumn.mmlu.name: 25.0,
37
- AutoEvalColumn.truthfulqa.name: 25.0,
38
  AutoEvalColumn.dummy.name: "baseline",
39
  AutoEvalColumn.model_type.name: "",
40
  }
41
-
 
1
+ from src.display_models.utils import AutoEvalColumn, model_hyperlink
2
 
3
  gpt4_values = {
4
  AutoEvalColumn.model.name: model_hyperlink("https://arxiv.org/abs/2303.08774", "gpt4"),
 
6
  AutoEvalColumn.precision.name: None,
7
  AutoEvalColumn.average.name: 84.3,
8
  AutoEvalColumn.arc.name: 96.3,
9
+ AutoEvalColumn.hellaswag.name: 95.3,
10
+ AutoEvalColumn.mmlu.name: 86.4,
11
+ AutoEvalColumn.truthfulqa.name: 59.0,
12
  AutoEvalColumn.dummy.name: "GPT-4",
13
  AutoEvalColumn.model_type.name: "",
14
  }
 
19
  AutoEvalColumn.precision.name: None,
20
  AutoEvalColumn.average.name: 71.9,
21
  AutoEvalColumn.arc.name: 85.2,
22
+ AutoEvalColumn.hellaswag.name: 85.5,
23
+ AutoEvalColumn.mmlu.name: 70.0,
24
+ AutoEvalColumn.truthfulqa.name: 47.0,
25
  AutoEvalColumn.dummy.name: "GPT-3.5",
26
  AutoEvalColumn.model_type.name: "",
27
  }
 
32
  AutoEvalColumn.precision.name: None,
33
  AutoEvalColumn.average.name: 25.0,
34
  AutoEvalColumn.arc.name: 25.0,
35
+ AutoEvalColumn.hellaswag.name: 25.0,
36
+ AutoEvalColumn.mmlu.name: 25.0,
37
+ AutoEvalColumn.truthfulqa.name: 25.0,
38
  AutoEvalColumn.dummy.name: "baseline",
39
  AutoEvalColumn.model_type.name: "",
40
  }
 
src/assets/text_content.py CHANGED
@@ -1,17 +1,17 @@
1
- from ..auto_leaderboard.model_metadata_type import ModelType
2
 
3
  TITLE = """<h1 align="center" id="space-title">πŸ€— Open LLM Leaderboard</h1>"""
4
 
5
- INTRODUCTION_TEXT = f"""
6
  πŸ“ The πŸ€— Open LLM Leaderboard aims to track, rank and evaluate open LLMs and chatbots.
7
 
8
- πŸ€— Submit a model for automated evaluation on the πŸ€— GPU cluster on the "Submit" page!
9
  The leaderboard's backend runs the great [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness) - read more details in the "About" page!
10
  """
11
 
12
  LLM_BENCHMARKS_TEXT = f"""
13
  # Context
14
- With the plethora of large language models (LLMs) and chatbots being released week upon week, often with grandiose claims of their performance, it can be hard to filter out the genuine progress that is being made by the open-source community and which model is the current state of the art.
15
 
16
  ## Icons
17
  {ModelType.PT.to_str(" : ")} model
@@ -25,14 +25,14 @@ If there is no icon, we have not uploaded the information on the model yet, feel
25
 
26
  ## How it works
27
 
28
- πŸ“ˆ We evaluate models on 4 key benchmarks using the <a href="https://github.com/EleutherAI/lm-evaluation-harness" target="_blank"> Eleuther AI Language Model Evaluation Harness </a>, a unified framework to test generative language models on a large number of different evaluation tasks.
29
 
30
  - <a href="https://arxiv.org/abs/1803.05457" target="_blank"> AI2 Reasoning Challenge </a> (25-shot) - a set of grade-school science questions.
31
  - <a href="https://arxiv.org/abs/1905.07830" target="_blank"> HellaSwag </a> (10-shot) - a test of commonsense inference, which is easy for humans (~95%) but challenging for SOTA models.
32
  - <a href="https://arxiv.org/abs/2009.03300" target="_blank"> MMLU </a> (5-shot) - a test to measure a text model's multitask accuracy. The test covers 57 tasks including elementary mathematics, US history, computer science, law, and more.
33
  - <a href="https://arxiv.org/abs/2109.07958" target="_blank"> TruthfulQA </a> (0-shot) - a test to measure a model’s propensity to reproduce falsehoods commonly found online. Note: TruthfulQA in the Harness is actually a minima a 6-shots task, as it is prepended by 6 examples systematically, even when launched using 0 for the number of few-shot examples.
34
 
35
- For all these evaluations, a higher score is a better score.
36
  We chose these benchmarks as they test a variety of reasoning and general knowledge across a wide variety of fields in 0-shot and few-shot settings.
37
 
38
  ## Details and logs
@@ -46,7 +46,7 @@ To reproduce our results, here is the commands you can run, using [this version]
46
  `python main.py --model=hf-causal --model_args="pretrained=<your_model>,use_accelerate=True,revision=<your_model_revision>"`
47
  ` --tasks=<task_list> --num_fewshot=<n_few_shot> --batch_size=2 --output_path=<output_path>`
48
 
49
- The total batch size we get for models which fit on one A100 node is 16 (8 GPUs * 2). If you don't use parallelism, adapt your batch size to fit.
50
  *You can expect results to vary slightly for different batch sizes because of padding.*
51
 
52
  The tasks and few shots parameters are:
@@ -65,7 +65,7 @@ If you still have questions, you can check our FAQ [here](https://huggingface.co
65
  We also gather cool resources from the community, other teams, and other labs [here](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/174)!
66
  """
67
 
68
- EVALUATION_QUEUE_TEXT = f"""
69
  # Evaluation Queue for the πŸ€— Open LLM Leaderboard
70
 
71
  Models added here will be automatically evaluated on the πŸ€— cluster.
@@ -79,7 +79,7 @@ config = AutoConfig.from_pretrained("your model name", revision=revision)
79
  model = AutoModel.from_pretrained("your model name", revision=revision)
80
  tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
81
  ```
82
- If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
83
 
84
  Note: make sure your model is public!
85
  Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
@@ -94,8 +94,8 @@ This is a leaderboard for Open LLMs, and we'd love for as many people as possibl
94
  When we add extra information about models to the leaderboard, it will be automatically taken from the model card
95
 
96
  ## In case of model failure
97
- If your model is displayed in the `FAILED` category, its execution stopped.
98
- Make sure you have followed the above steps first.
99
  If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
100
  """
101
 
@@ -135,7 +135,7 @@ CITATION_BUTTON_TEXT = r"""
135
  url = {https://doi.org/10.5281/zenodo.5371628}
136
  }
137
  @misc{clark2018think,
138
- title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
139
  author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
140
  year={2018},
141
  eprint={1803.05457},
@@ -143,7 +143,7 @@ CITATION_BUTTON_TEXT = r"""
143
  primaryClass={cs.AI}
144
  }
145
  @misc{zellers2019hellaswag,
146
- title={HellaSwag: Can a Machine Really Finish Your Sentence?},
147
  author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
148
  year={2019},
149
  eprint={1905.07830},
@@ -151,7 +151,7 @@ CITATION_BUTTON_TEXT = r"""
151
  primaryClass={cs.CL}
152
  }
153
  @misc{hendrycks2021measuring,
154
- title={Measuring Massive Multitask Language Understanding},
155
  author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
156
  year={2021},
157
  eprint={2009.03300},
@@ -159,7 +159,7 @@ CITATION_BUTTON_TEXT = r"""
159
  primaryClass={cs.CY}
160
  }
161
  @misc{lin2022truthfulqa,
162
- title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
163
  author={Stephanie Lin and Jacob Hilton and Owain Evans},
164
  year={2022},
165
  eprint={2109.07958},
 
1
+ from src.display_models.model_metadata_type import ModelType
2
 
3
  TITLE = """<h1 align="center" id="space-title">πŸ€— Open LLM Leaderboard</h1>"""
4
 
5
+ INTRODUCTION_TEXT = """
6
  πŸ“ The πŸ€— Open LLM Leaderboard aims to track, rank and evaluate open LLMs and chatbots.
7
 
8
+ πŸ€— Submit a model for automated evaluation on the πŸ€— GPU cluster on the "Submit" page!
9
  The leaderboard's backend runs the great [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness) - read more details in the "About" page!
10
  """
11
 
12
  LLM_BENCHMARKS_TEXT = f"""
13
  # Context
14
+ With the plethora of large language models (LLMs) and chatbots being released week upon week, often with grandiose claims of their performance, it can be hard to filter out the genuine progress that is being made by the open-source community and which model is the current state of the art.
15
 
16
  ## Icons
17
  {ModelType.PT.to_str(" : ")} model
 
25
 
26
  ## How it works
27
 
28
+ πŸ“ˆ We evaluate models on 4 key benchmarks using the <a href="https://github.com/EleutherAI/lm-evaluation-harness" target="_blank"> Eleuther AI Language Model Evaluation Harness </a>, a unified framework to test generative language models on a large number of different evaluation tasks.
29
 
30
  - <a href="https://arxiv.org/abs/1803.05457" target="_blank"> AI2 Reasoning Challenge </a> (25-shot) - a set of grade-school science questions.
31
  - <a href="https://arxiv.org/abs/1905.07830" target="_blank"> HellaSwag </a> (10-shot) - a test of commonsense inference, which is easy for humans (~95%) but challenging for SOTA models.
32
  - <a href="https://arxiv.org/abs/2009.03300" target="_blank"> MMLU </a> (5-shot) - a test to measure a text model's multitask accuracy. The test covers 57 tasks including elementary mathematics, US history, computer science, law, and more.
33
  - <a href="https://arxiv.org/abs/2109.07958" target="_blank"> TruthfulQA </a> (0-shot) - a test to measure a model’s propensity to reproduce falsehoods commonly found online. Note: TruthfulQA in the Harness is actually a minima a 6-shots task, as it is prepended by 6 examples systematically, even when launched using 0 for the number of few-shot examples.
34
 
35
+ For all these evaluations, a higher score is a better score.
36
  We chose these benchmarks as they test a variety of reasoning and general knowledge across a wide variety of fields in 0-shot and few-shot settings.
37
 
38
  ## Details and logs
 
46
  `python main.py --model=hf-causal --model_args="pretrained=<your_model>,use_accelerate=True,revision=<your_model_revision>"`
47
  ` --tasks=<task_list> --num_fewshot=<n_few_shot> --batch_size=2 --output_path=<output_path>`
48
 
49
+ The total batch size we get for models which fit on one A100 node is 16 (8 GPUs * 2). If you don't use parallelism, adapt your batch size to fit.
50
  *You can expect results to vary slightly for different batch sizes because of padding.*
51
 
52
  The tasks and few shots parameters are:
 
65
  We also gather cool resources from the community, other teams, and other labs [here](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/174)!
66
  """
67
 
68
+ EVALUATION_QUEUE_TEXT = """
69
  # Evaluation Queue for the πŸ€— Open LLM Leaderboard
70
 
71
  Models added here will be automatically evaluated on the πŸ€— cluster.
 
79
  model = AutoModel.from_pretrained("your model name", revision=revision)
80
  tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
81
  ```
82
+ If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
83
 
84
  Note: make sure your model is public!
85
  Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
 
94
  When we add extra information about models to the leaderboard, it will be automatically taken from the model card
95
 
96
  ## In case of model failure
97
+ If your model is displayed in the `FAILED` category, its execution stopped.
98
+ Make sure you have followed the above steps first.
99
  If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
100
  """
101
 
 
135
  url = {https://doi.org/10.5281/zenodo.5371628}
136
  }
137
  @misc{clark2018think,
138
+ title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
139
  author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
140
  year={2018},
141
  eprint={1803.05457},
 
143
  primaryClass={cs.AI}
144
  }
145
  @misc{zellers2019hellaswag,
146
+ title={HellaSwag: Can a Machine Really Finish Your Sentence?},
147
  author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
148
  year={2019},
149
  eprint={1905.07830},
 
151
  primaryClass={cs.CL}
152
  }
153
  @misc{hendrycks2021measuring,
154
+ title={Measuring Massive Multitask Language Understanding},
155
  author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
156
  year={2021},
157
  eprint={2009.03300},
 
159
  primaryClass={cs.CY}
160
  }
161
  @misc{lin2022truthfulqa,
162
+ title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
163
  author={Stephanie Lin and Jacob Hilton and Owain Evans},
164
  year={2022},
165
  eprint={2109.07958},
src/auto_leaderboard/model_metadata_type.py DELETED
@@ -1,551 +0,0 @@
1
- from dataclasses import dataclass
2
- from enum import Enum
3
- from typing import Dict
4
-
5
-
6
- @dataclass
7
- class ModelInfo:
8
- name: str
9
- symbol: str # emoji
10
-
11
-
12
- class ModelType(Enum):
13
- PT = ModelInfo(name="pretrained", symbol="🟒")
14
- FT = ModelInfo(name="fine-tuned", symbol="πŸ”Ά")
15
- IFT = ModelInfo(name="instruction-tuned", symbol="β­•")
16
- RL = ModelInfo(name="RL-tuned", symbol="🟦")
17
- Unknown = ModelInfo(name="Unknown, add type to request file!", symbol="?")
18
-
19
- def to_str(self, separator = " "):
20
- return f"{self.value.symbol}{separator}{self.value.name}"
21
-
22
-
23
- MODEL_TYPE_METADATA: Dict[str, ModelType] = {
24
- 'notstoic/PygmalionCoT-7b': ModelType.IFT,
25
- 'aisquared/dlite-v1-355m': ModelType.IFT,
26
- 'aisquared/dlite-v1-1_5b': ModelType.IFT,
27
- 'aisquared/dlite-v1-774m': ModelType.IFT,
28
- 'aisquared/dlite-v1-124m': ModelType.IFT,
29
- 'aisquared/chopt-2_7b': ModelType.IFT,
30
- 'aisquared/dlite-v2-124m': ModelType.IFT,
31
- 'aisquared/dlite-v2-774m': ModelType.IFT,
32
- 'aisquared/dlite-v2-1_5b': ModelType.IFT,
33
- 'aisquared/chopt-1_3b': ModelType.IFT,
34
- 'aisquared/dlite-v2-355m': ModelType.IFT,
35
- 'augtoma/qCammel-13': ModelType.IFT,
36
- 'Aspik101/Llama-2-7b-hf-instruct-pl-lora_unload': ModelType.IFT,
37
- 'Aspik101/vicuna-7b-v1.3-instruct-pl-lora_unload': ModelType.IFT,
38
- 'TheBloke/alpaca-lora-65B-HF': ModelType.FT,
39
- 'TheBloke/tulu-7B-fp16': ModelType.IFT,
40
- 'TheBloke/guanaco-7B-HF': ModelType.FT,
41
- 'TheBloke/koala-7B-HF': ModelType.FT,
42
- 'TheBloke/wizardLM-7B-HF': ModelType.IFT,
43
- 'TheBloke/airoboros-13B-HF': ModelType.IFT,
44
- 'TheBloke/koala-13B-HF': ModelType.FT,
45
- 'TheBloke/Wizard-Vicuna-7B-Uncensored-HF': ModelType.FT,
46
- 'TheBloke/dromedary-65b-lora-HF': ModelType.IFT,
47
- 'TheBloke/wizardLM-13B-1.0-fp16': ModelType.IFT,
48
- 'TheBloke/WizardLM-13B-V1-1-SuperHOT-8K-fp16': ModelType.FT,
49
- 'TheBloke/Wizard-Vicuna-30B-Uncensored-fp16': ModelType.FT,
50
- 'TheBloke/wizard-vicuna-13B-HF': ModelType.IFT,
51
- 'TheBloke/UltraLM-13B-fp16': ModelType.IFT,
52
- 'TheBloke/OpenAssistant-FT-7-Llama-30B-HF': ModelType.FT,
53
- 'TheBloke/vicuna-13B-1.1-HF': ModelType.IFT,
54
- 'TheBloke/guanaco-13B-HF': ModelType.FT,
55
- 'TheBloke/guanaco-65B-HF': ModelType.FT,
56
- 'TheBloke/airoboros-7b-gpt4-fp16': ModelType.IFT,
57
- 'TheBloke/llama-30b-supercot-SuperHOT-8K-fp16': ModelType.IFT,
58
- 'TheBloke/Llama-2-13B-fp16': ModelType.PT,
59
- 'TheBloke/llama-2-70b-Guanaco-QLoRA-fp16': ModelType.FT,
60
- 'TheBloke/landmark-attention-llama7b-fp16': ModelType.IFT,
61
- 'TheBloke/Planner-7B-fp16': ModelType.IFT,
62
- 'TheBloke/Wizard-Vicuna-13B-Uncensored-HF': ModelType.FT,
63
- 'TheBloke/gpt4-alpaca-lora-13B-HF': ModelType.IFT,
64
- 'TheBloke/gpt4-x-vicuna-13B-HF': ModelType.IFT,
65
- 'TheBloke/gpt4-alpaca-lora_mlp-65B-HF': ModelType.IFT,
66
- 'TheBloke/tulu-13B-fp16': ModelType.IFT,
67
- 'TheBloke/VicUnlocked-alpaca-65B-QLoRA-fp16': ModelType.IFT,
68
- 'TheBloke/Llama-2-70B-fp16': ModelType.IFT,
69
- 'TheBloke/WizardLM-30B-fp16': ModelType.IFT,
70
- 'TheBloke/robin-13B-v2-fp16': ModelType.FT,
71
- 'TheBloke/robin-33B-v2-fp16': ModelType.FT,
72
- 'TheBloke/Vicuna-13B-CoT-fp16': ModelType.IFT,
73
- 'TheBloke/Vicuna-33B-1-3-SuperHOT-8K-fp16': ModelType.IFT,
74
- 'TheBloke/Wizard-Vicuna-30B-Superhot-8K-fp16': ModelType.FT,
75
- 'TheBloke/Nous-Hermes-13B-SuperHOT-8K-fp16': ModelType.IFT,
76
- 'TheBloke/GPlatty-30B-SuperHOT-8K-fp16': ModelType.FT,
77
- 'TheBloke/CAMEL-33B-Combined-Data-SuperHOT-8K-fp16': ModelType.IFT,
78
- 'TheBloke/Chinese-Alpaca-33B-SuperHOT-8K-fp16': ModelType.IFT,
79
- 'jphme/orca_mini_v2_ger_7b': ModelType.IFT,
80
- 'Ejafa/vicuna_7B_vanilla_1.1': ModelType.FT,
81
- 'kevinpro/Vicuna-13B-CoT': ModelType.IFT,
82
- 'AlekseyKorshuk/pygmalion-6b-vicuna-chatml': ModelType.FT,
83
- 'AlekseyKorshuk/chatml-pyg-v1': ModelType.FT,
84
- 'concedo/Vicuzard-30B-Uncensored': ModelType.FT,
85
- 'concedo/OPT-19M-ChatSalad': ModelType.FT,
86
- 'concedo/Pythia-70M-ChatSalad': ModelType.FT,
87
- 'digitous/13B-HyperMantis': ModelType.IFT,
88
- 'digitous/Adventien-GPTJ': ModelType.FT,
89
- 'digitous/Alpacino13b': ModelType.IFT,
90
- 'digitous/GPT-R': ModelType.IFT,
91
- 'digitous/Javelin-R': ModelType.IFT,
92
- 'digitous/Javalion-GPTJ': ModelType.IFT,
93
- 'digitous/Javalion-R': ModelType.IFT,
94
- 'digitous/Skegma-GPTJ': ModelType.FT,
95
- 'digitous/Alpacino30b': ModelType.IFT,
96
- 'digitous/Janin-GPTJ': ModelType.FT,
97
- 'digitous/Janin-R': ModelType.FT,
98
- 'digitous/Javelin-GPTJ': ModelType.FT,
99
- 'SaylorTwift/gpt2_test': ModelType.PT,
100
- 'anton-l/gpt-j-tiny-random': ModelType.FT,
101
- 'Andron00e/YetAnother_Open-Llama-3B-LoRA-OpenOrca': ModelType.FT,
102
- 'Lazycuber/pyg-instruct-wizardlm': ModelType.FT,
103
- 'Lazycuber/Janemalion-6B': ModelType.FT,
104
- 'IDEA-CCNL/Ziya-LLaMA-13B-Pretrain-v1': ModelType.FT,
105
- 'IDEA-CCNL/Ziya-LLaMA-13B-v1': ModelType.IFT,
106
- 'dsvv-cair/alpaca-cleaned-llama-30b-bf16': ModelType.FT,
107
- 'gpt2-medium': ModelType.PT,
108
- 'camel-ai/CAMEL-13B-Combined-Data': ModelType.IFT,
109
- 'camel-ai/CAMEL-13B-Role-Playing-Data': ModelType.FT,
110
- 'camel-ai/CAMEL-33B-Combined-Data': ModelType.IFT,
111
- 'PygmalionAI/pygmalion-6b': ModelType.FT,
112
- 'PygmalionAI/metharme-1.3b': ModelType.IFT,
113
- 'PygmalionAI/pygmalion-1.3b': ModelType.FT,
114
- 'PygmalionAI/pygmalion-350m': ModelType.FT,
115
- 'PygmalionAI/pygmalion-2.7b': ModelType.FT,
116
- 'medalpaca/medalpaca-7b': ModelType.FT,
117
- 'lilloukas/Platypus-30B': ModelType.IFT,
118
- 'lilloukas/GPlatty-30B': ModelType.FT,
119
- 'mncai/chatdoctor': ModelType.FT,
120
- 'chaoyi-wu/MedLLaMA_13B': ModelType.FT,
121
- 'LoupGarou/WizardCoder-Guanaco-15B-V1.0': ModelType.IFT,
122
- 'LoupGarou/WizardCoder-Guanaco-15B-V1.1': ModelType.FT,
123
- 'hakurei/instruct-12b': ModelType.IFT,
124
- 'hakurei/lotus-12B': ModelType.FT,
125
- 'shibing624/chinese-llama-plus-13b-hf': ModelType.IFT,
126
- 'shibing624/chinese-alpaca-plus-7b-hf': ModelType.IFT,
127
- 'shibing624/chinese-alpaca-plus-13b-hf': ModelType.IFT,
128
- 'mosaicml/mpt-7b-instruct': ModelType.IFT,
129
- 'mosaicml/mpt-30b-chat': ModelType.IFT,
130
- 'mosaicml/mpt-7b-storywriter': ModelType.FT,
131
- 'mosaicml/mpt-30b-instruct': ModelType.IFT,
132
- 'mosaicml/mpt-7b-chat': ModelType.IFT,
133
- 'mosaicml/mpt-30b': ModelType.PT,
134
- 'Corianas/111m': ModelType.IFT,
135
- 'Corianas/Quokka_1.3b': ModelType.IFT,
136
- 'Corianas/256_5epoch': ModelType.FT,
137
- 'Corianas/Quokka_256m': ModelType.IFT,
138
- 'Corianas/Quokka_590m': ModelType.IFT,
139
- 'Corianas/gpt-j-6B-Dolly': ModelType.FT,
140
- 'Corianas/Quokka_2.7b': ModelType.IFT,
141
- 'cyberagent/open-calm-7b': ModelType.FT,
142
- 'Aspik101/Nous-Hermes-13b-pl-lora_unload': ModelType.IFT,
143
- 'THUDM/chatglm2-6b': ModelType.IFT,
144
- 'MetaIX/GPT4-X-Alpasta-30b': ModelType.IFT,
145
- 'NYTK/PULI-GPTrio': ModelType.PT,
146
- 'EleutherAI/pythia-1.3b': ModelType.PT,
147
- 'EleutherAI/pythia-2.8b-deduped': ModelType.PT,
148
- 'EleutherAI/gpt-neo-125m': ModelType.PT,
149
- 'EleutherAI/pythia-160m': ModelType.PT,
150
- 'EleutherAI/gpt-neo-2.7B': ModelType.PT,
151
- 'EleutherAI/pythia-1b-deduped': ModelType.PT,
152
- 'EleutherAI/pythia-6.7b': ModelType.PT,
153
- 'EleutherAI/pythia-70m-deduped': ModelType.PT,
154
- 'EleutherAI/gpt-neox-20b': ModelType.PT,
155
- 'EleutherAI/pythia-1.4b-deduped': ModelType.PT,
156
- 'EleutherAI/pythia-2.7b': ModelType.PT,
157
- 'EleutherAI/pythia-6.9b-deduped': ModelType.PT,
158
- 'EleutherAI/pythia-70m': ModelType.PT,
159
- 'EleutherAI/gpt-j-6b': ModelType.PT,
160
- 'EleutherAI/pythia-12b-deduped': ModelType.PT,
161
- 'EleutherAI/gpt-neo-1.3B': ModelType.PT,
162
- 'EleutherAI/pythia-410m-deduped': ModelType.PT,
163
- 'EleutherAI/pythia-160m-deduped': ModelType.PT,
164
- 'EleutherAI/polyglot-ko-12.8b': ModelType.PT,
165
- 'EleutherAI/pythia-12b': ModelType.PT,
166
- 'roneneldan/TinyStories-33M': ModelType.PT,
167
- 'roneneldan/TinyStories-28M': ModelType.PT,
168
- 'roneneldan/TinyStories-1M': ModelType.PT,
169
- 'roneneldan/TinyStories-8M': ModelType.PT,
170
- 'roneneldan/TinyStories-3M': ModelType.PT,
171
- 'jerryjalapeno/nart-100k-7b': ModelType.FT,
172
- 'lmsys/vicuna-13b-v1.3': ModelType.IFT,
173
- 'lmsys/vicuna-7b-v1.3': ModelType.IFT,
174
- 'lmsys/vicuna-13b-v1.1': ModelType.IFT,
175
- 'lmsys/vicuna-13b-delta-v1.1': ModelType.IFT,
176
- 'lmsys/vicuna-7b-delta-v1.1': ModelType.IFT,
177
- 'abhiramtirumala/DialoGPT-sarcastic-medium': ModelType.FT,
178
- 'haonan-li/bactrian-x-llama-13b-merged': ModelType.IFT,
179
- 'Gryphe/MythoLogic-13b': ModelType.IFT,
180
- 'Gryphe/MythoBoros-13b': ModelType.IFT,
181
- 'pillowtalks-ai/delta13b': ModelType.FT,
182
- 'wannaphong/openthaigpt-0.1.0-beta-full-model_for_open_llm_leaderboard': ModelType.FT,
183
- 'bigscience/bloom-7b1': ModelType.PT,
184
- 'bigcode/tiny_starcoder_py': ModelType.PT,
185
- 'bigcode/starcoderplus': ModelType.FT,
186
- 'bigcode/gpt_bigcode-santacoder': ModelType.PT,
187
- 'bigcode/starcoder': ModelType.PT,
188
- 'Open-Orca/OpenOrca-Preview1-13B': ModelType.IFT,
189
- 'microsoft/DialoGPT-large': ModelType.FT,
190
- 'microsoft/DialoGPT-small': ModelType.FT,
191
- 'microsoft/DialoGPT-medium': ModelType.FT,
192
- 'microsoft/CodeGPT-small-py': ModelType.FT,
193
- 'Tincando/fiction_story_generator': ModelType.FT,
194
- 'Pirr/pythia-13b-deduped-green_devil': ModelType.FT,
195
- 'Aeala/GPT4-x-AlpacaDente2-30b': ModelType.FT,
196
- 'Aeala/GPT4-x-AlpacaDente-30b': ModelType.FT,
197
- 'Aeala/GPT4-x-Alpasta-13b': ModelType.FT,
198
- 'Aeala/VicUnlocked-alpaca-30b': ModelType.IFT,
199
- 'Tap-M/Luna-AI-Llama2-Uncensored': ModelType.FT,
200
- 'illuin/test-custom-llama': ModelType.FT,
201
- 'dvruette/oasst-llama-13b-2-epochs': ModelType.FT,
202
- 'dvruette/oasst-gpt-neox-20b-1000-steps': ModelType.FT,
203
- 'dvruette/llama-13b-pretrained-dropout': ModelType.PT,
204
- 'dvruette/llama-13b-pretrained': ModelType.PT,
205
- 'dvruette/llama-13b-pretrained-sft-epoch-1': ModelType.FT,
206
- 'dvruette/llama-13b-pretrained-sft-do2': ModelType.FT,
207
- 'dvruette/oasst-gpt-neox-20b-3000-steps': ModelType.FT,
208
- 'dvruette/oasst-pythia-12b-pretrained-sft': ModelType.FT,
209
- 'dvruette/oasst-pythia-6.9b-4000-steps': ModelType.FT,
210
- 'dvruette/gpt-neox-20b-full-precision': ModelType.FT,
211
- 'dvruette/oasst-llama-13b-1000-steps': ModelType.FT,
212
- 'openlm-research/open_llama_7b_700bt_preview': ModelType.PT,
213
- 'openlm-research/open_llama_7b': ModelType.PT,
214
- 'openlm-research/open_llama_7b_v2': ModelType.PT,
215
- 'openlm-research/open_llama_3b': ModelType.PT,
216
- 'openlm-research/open_llama_13b': ModelType.PT,
217
- 'openlm-research/open_llama_3b_v2': ModelType.PT,
218
- 'PocketDoc/Dans-PileOfSets-Mk1-llama-13b-merged': ModelType.IFT,
219
- 'GeorgiaTechResearchInstitute/galpaca-30b': ModelType.IFT,
220
- 'GeorgiaTechResearchInstitute/starcoder-gpteacher-code-instruct': ModelType.IFT,
221
- 'databricks/dolly-v2-7b': ModelType.IFT,
222
- 'databricks/dolly-v2-3b': ModelType.IFT,
223
- 'databricks/dolly-v2-12b': ModelType.IFT,
224
- 'Rachneet/gpt2-xl-alpaca': ModelType.FT,
225
- 'Locutusque/gpt2-conversational-or-qa': ModelType.FT,
226
- 'psyche/kogpt': ModelType.FT,
227
- 'NbAiLab/nb-gpt-j-6B-alpaca': ModelType.IFT,
228
- 'Mikael110/llama-2-7b-guanaco-fp16': ModelType.FT,
229
- 'Mikael110/llama-2-13b-guanaco-fp16': ModelType.FT,
230
- 'Fredithefish/CrimsonPajama': ModelType.IFT,
231
- 'Fredithefish/RedPajama-INCITE-Chat-3B-ShareGPT-11K': ModelType.FT,
232
- 'Fredithefish/ScarletPajama-3B-HF': ModelType.FT,
233
- 'Fredithefish/RedPajama-INCITE-Chat-3B-Instruction-Tuning-with-GPT-4': ModelType.IFT,
234
- 'acrastt/RedPajama-INCITE-Chat-Instruct-3B-V1': ModelType.IFT,
235
- 'eachadea/vicuna-13b-1.1': ModelType.FT,
236
- 'eachadea/vicuna-7b-1.1': ModelType.FT,
237
- 'eachadea/vicuna-13b': ModelType.FT,
238
- 'openaccess-ai-collective/wizard-mega-13b': ModelType.IFT,
239
- 'openaccess-ai-collective/manticore-13b': ModelType.IFT,
240
- 'openaccess-ai-collective/manticore-30b-chat-pyg-alpha': ModelType.IFT,
241
- 'openaccess-ai-collective/minotaur-13b': ModelType.IFT,
242
- 'openaccess-ai-collective/minotaur-13b-fixed': ModelType.IFT,
243
- 'openaccess-ai-collective/hippogriff-30b-chat': ModelType.IFT,
244
- 'openaccess-ai-collective/manticore-13b-chat-pyg': ModelType.IFT,
245
- 'pythainlp/wangchanglm-7.5B-sft-enth': ModelType.IFT,
246
- 'pythainlp/wangchanglm-7.5B-sft-en-sharded': ModelType.IFT,
247
- 'euclaise/gpt-neox-122m-minipile-digits': ModelType.FT,
248
- 'stabilityai/StableBeluga1-Delta': ModelType.IFT,
249
- 'stabilityai/stablelm-tuned-alpha-7b': ModelType.IFT,
250
- 'stabilityai/StableBeluga2': ModelType.IFT,
251
- 'stabilityai/StableBeluga-13B': ModelType.IFT,
252
- 'stabilityai/StableBeluga-7B': ModelType.IFT,
253
- 'stabilityai/stablelm-base-alpha-7b': ModelType.PT,
254
- 'stabilityai/stablelm-base-alpha-3b': ModelType.PT,
255
- 'stabilityai/stablelm-tuned-alpha-3b': ModelType.IFT,
256
- 'alibidaran/medical_transcription_generator': ModelType.FT,
257
- 'CalderaAI/30B-Lazarus': ModelType.IFT,
258
- 'CalderaAI/13B-BlueMethod': ModelType.IFT,
259
- 'CalderaAI/13B-Ouroboros': ModelType.IFT,
260
- 'KoboldAI/OPT-13B-Erebus': ModelType.FT,
261
- 'KoboldAI/GPT-J-6B-Janeway': ModelType.FT,
262
- 'KoboldAI/GPT-J-6B-Shinen': ModelType.FT,
263
- 'KoboldAI/fairseq-dense-2.7B': ModelType.PT,
264
- 'KoboldAI/OPT-6B-nerys-v2': ModelType.FT,
265
- 'KoboldAI/GPT-NeoX-20B-Skein': ModelType.FT,
266
- 'KoboldAI/PPO_Pygway-6b-Mix': ModelType.FT,
267
- 'KoboldAI/fairseq-dense-6.7B': ModelType.PT,
268
- 'KoboldAI/fairseq-dense-125M': ModelType.PT,
269
- 'KoboldAI/OPT-13B-Nerybus-Mix': ModelType.FT,
270
- 'KoboldAI/OPT-2.7B-Erebus': ModelType.FT,
271
- 'KoboldAI/OPT-350M-Nerys-v2': ModelType.FT,
272
- 'KoboldAI/OPT-2.7B-Nerys-v2': ModelType.FT,
273
- 'KoboldAI/OPT-2.7B-Nerybus-Mix': ModelType.FT,
274
- 'KoboldAI/OPT-13B-Nerys-v2': ModelType.FT,
275
- 'KoboldAI/GPT-NeoX-20B-Erebus': ModelType.FT,
276
- 'KoboldAI/OPT-6.7B-Erebus': ModelType.FT,
277
- 'KoboldAI/fairseq-dense-355M': ModelType.PT,
278
- 'KoboldAI/OPT-6.7B-Nerybus-Mix': ModelType.FT,
279
- 'KoboldAI/GPT-J-6B-Adventure': ModelType.FT,
280
- 'KoboldAI/OPT-350M-Erebus': ModelType.FT,
281
- 'KoboldAI/GPT-J-6B-Skein': ModelType.FT,
282
- 'KoboldAI/OPT-30B-Erebus': ModelType.FT,
283
- 'klosax/pythia-160m-deduped-step92k-193bt': ModelType.PT,
284
- 'klosax/open_llama_3b_350bt_preview': ModelType.PT,
285
- 'klosax/openllama-3b-350bt': ModelType.PT,
286
- 'klosax/pythia-70m-deduped-step44k-92bt': ModelType.PT,
287
- 'klosax/open_llama_13b_600bt_preview': ModelType.PT,
288
- 'klosax/open_llama_7b_400bt_preview': ModelType.PT,
289
- 'kfkas/Llama-2-ko-7b-Chat': ModelType.IFT,
290
- 'WeOpenML/Alpaca-7B-v1': ModelType.IFT,
291
- 'WeOpenML/PandaLM-Alpaca-7B-v1': ModelType.IFT,
292
- 'TFLai/gpt2-turkish-uncased': ModelType.FT,
293
- 'ehartford/WizardLM-13B-Uncensored': ModelType.IFT,
294
- 'ehartford/dolphin-llama-13b': ModelType.IFT,
295
- 'ehartford/Wizard-Vicuna-30B-Uncensored': ModelType.FT,
296
- 'ehartford/WizardLM-30B-Uncensored': ModelType.IFT,
297
- 'ehartford/Wizard-Vicuna-13B-Uncensored': ModelType.FT,
298
- 'ehartford/WizardLM-7B-Uncensored': ModelType.IFT,
299
- 'ehartford/based-30b': ModelType.FT,
300
- 'ehartford/Wizard-Vicuna-7B-Uncensored': ModelType.FT,
301
- 'wahaha1987/llama_7b_sharegpt94k_fastchat': ModelType.FT,
302
- 'wahaha1987/llama_13b_sharegpt94k_fastchat': ModelType.FT,
303
- 'OpenAssistant/oasst-sft-1-pythia-12b': ModelType.FT,
304
- 'OpenAssistant/stablelm-7b-sft-v7-epoch-3': ModelType.IFT,
305
- 'OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5': ModelType.FT,
306
- 'OpenAssistant/pythia-12b-sft-v8-2.5k-steps': ModelType.IFT,
307
- 'OpenAssistant/pythia-12b-sft-v8-7k-steps': ModelType.IFT,
308
- 'OpenAssistant/pythia-12b-pre-v8-12.5k-steps': ModelType.IFT,
309
- 'OpenAssistant/llama2-13b-orca-8k-3319': ModelType.IFT,
310
- 'junelee/wizard-vicuna-13b': ModelType.FT,
311
- 'BreadAi/gpt-YA-1-1_160M': ModelType.PT,
312
- 'BreadAi/MuseCan': ModelType.PT,
313
- 'BreadAi/MusePy-1-2': ModelType.PT,
314
- 'BreadAi/DiscordPy': ModelType.PT,
315
- 'BreadAi/PM_modelV2': ModelType.PT,
316
- 'BreadAi/gpt-Youtube': ModelType.PT,
317
- 'BreadAi/StoryPy': ModelType.FT,
318
- 'julianweng/Llama-2-7b-chat-orcah': ModelType.FT,
319
- 'AGI-inc/lora_moe_7b_baseline': ModelType.FT,
320
- 'AGI-inc/lora_moe_7b': ModelType.FT,
321
- 'togethercomputer/GPT-NeoXT-Chat-Base-20B': ModelType.IFT,
322
- 'togethercomputer/RedPajama-INCITE-Chat-7B-v0.1': ModelType.IFT,
323
- 'togethercomputer/RedPajama-INCITE-Instruct-7B-v0.1': ModelType.IFT,
324
- 'togethercomputer/RedPajama-INCITE-7B-Base': ModelType.PT,
325
- 'togethercomputer/RedPajama-INCITE-7B-Instruct': ModelType.IFT,
326
- 'togethercomputer/RedPajama-INCITE-Base-3B-v1': ModelType.PT,
327
- 'togethercomputer/Pythia-Chat-Base-7B': ModelType.IFT,
328
- 'togethercomputer/RedPajama-INCITE-Base-7B-v0.1': ModelType.PT,
329
- 'togethercomputer/GPT-JT-6B-v1': ModelType.IFT,
330
- 'togethercomputer/GPT-JT-6B-v0': ModelType.IFT,
331
- 'togethercomputer/RedPajama-INCITE-Chat-3B-v1': ModelType.IFT,
332
- 'togethercomputer/RedPajama-INCITE-7B-Chat': ModelType.IFT,
333
- 'togethercomputer/RedPajama-INCITE-Instruct-3B-v1': ModelType.IFT,
334
- 'Writer/camel-5b-hf': ModelType.IFT,
335
- 'Writer/palmyra-base': ModelType.PT,
336
- 'MBZUAI/LaMini-GPT-1.5B': ModelType.IFT,
337
- 'MBZUAI/lamini-cerebras-111m': ModelType.IFT,
338
- 'MBZUAI/lamini-neo-1.3b': ModelType.IFT,
339
- 'MBZUAI/lamini-cerebras-1.3b': ModelType.IFT,
340
- 'MBZUAI/lamini-cerebras-256m': ModelType.IFT,
341
- 'MBZUAI/LaMini-GPT-124M': ModelType.IFT,
342
- 'MBZUAI/lamini-neo-125m': ModelType.IFT,
343
- 'TehVenom/DiffMerge-DollyGPT-Pygmalion': ModelType.FT,
344
- 'TehVenom/PPO_Shygmalion-6b': ModelType.FT,
345
- 'TehVenom/Dolly_Shygmalion-6b-Dev_V8P2': ModelType.FT,
346
- 'TehVenom/Pygmalion_AlpacaLora-7b': ModelType.FT,
347
- 'TehVenom/PPO_Pygway-V8p4_Dev-6b': ModelType.FT,
348
- 'TehVenom/Dolly_Malion-6b': ModelType.FT,
349
- 'TehVenom/PPO_Shygmalion-V8p4_Dev-6b': ModelType.FT,
350
- 'TehVenom/ChanMalion': ModelType.FT,
351
- 'TehVenom/GPT-J-Pyg_PPO-6B': ModelType.IFT,
352
- 'TehVenom/Pygmalion-13b-Merged': ModelType.FT,
353
- 'TehVenom/Metharme-13b-Merged': ModelType.IFT,
354
- 'TehVenom/Dolly_Shygmalion-6b': ModelType.FT,
355
- 'TehVenom/GPT-J-Pyg_PPO-6B-Dev-V8p4': ModelType.IFT,
356
- 'georgesung/llama2_7b_chat_uncensored': ModelType.FT,
357
- 'vicgalle/gpt2-alpaca': ModelType.IFT,
358
- 'vicgalle/alpaca-7b': ModelType.FT,
359
- 'vicgalle/gpt2-alpaca-gpt4': ModelType.IFT,
360
- 'facebook/opt-350m': ModelType.PT,
361
- 'facebook/opt-125m': ModelType.PT,
362
- 'facebook/xglm-4.5B': ModelType.PT,
363
- 'facebook/opt-2.7b': ModelType.PT,
364
- 'facebook/opt-6.7b': ModelType.PT,
365
- 'facebook/galactica-30b': ModelType.PT,
366
- 'facebook/opt-13b': ModelType.PT,
367
- 'facebook/opt-66b': ModelType.PT,
368
- 'facebook/xglm-7.5B': ModelType.PT,
369
- 'facebook/xglm-564M': ModelType.PT,
370
- 'facebook/opt-30b': ModelType.PT,
371
- 'golaxy/gogpt-7b': ModelType.FT,
372
- 'golaxy/gogpt2-7b': ModelType.FT,
373
- 'golaxy/gogpt-7b-bloom': ModelType.FT,
374
- 'golaxy/gogpt-3b-bloom': ModelType.FT,
375
- 'psmathur/orca_mini_v2_7b': ModelType.IFT,
376
- 'psmathur/orca_mini_7b': ModelType.IFT,
377
- 'psmathur/orca_mini_3b': ModelType.IFT,
378
- 'psmathur/orca_mini_v2_13b': ModelType.IFT,
379
- 'gpt2-xl': ModelType.PT,
380
- 'lxe/Cerebras-GPT-2.7B-Alpaca-SP': ModelType.FT,
381
- 'Monero/Manticore-13b-Chat-Pyg-Guanaco': ModelType.FT,
382
- 'Monero/WizardLM-Uncensored-SuperCOT-StoryTelling-30b': ModelType.IFT,
383
- 'Monero/WizardLM-13b-OpenAssistant-Uncensored': ModelType.IFT,
384
- 'Monero/WizardLM-30B-Uncensored-Guanaco-SuperCOT-30b': ModelType.IFT,
385
- 'jzjiao/opt-1.3b-rlhf': ModelType.FT,
386
- 'HuggingFaceH4/starchat-beta': ModelType.IFT,
387
- 'KnutJaegersberg/gpt-2-xl-EvolInstruct': ModelType.IFT,
388
- 'KnutJaegersberg/megatron-GPT-2-345m-EvolInstruct': ModelType.IFT,
389
- 'KnutJaegersberg/galactica-orca-wizardlm-1.3b': ModelType.IFT,
390
- 'openchat/openchat_8192': ModelType.IFT,
391
- 'openchat/openchat_v2': ModelType.IFT,
392
- 'openchat/openchat_v2_w': ModelType.IFT,
393
- 'ausboss/llama-13b-supercot': ModelType.IFT,
394
- 'ausboss/llama-30b-supercot': ModelType.IFT,
395
- 'Neko-Institute-of-Science/metharme-7b': ModelType.IFT,
396
- 'Neko-Institute-of-Science/pygmalion-7b': ModelType.FT,
397
- 'SebastianSchramm/Cerebras-GPT-111M-instruction': ModelType.IFT,
398
- 'victor123/WizardLM-13B-1.0': ModelType.IFT,
399
- 'OpenBuddy/openbuddy-openllama-13b-v7-fp16': ModelType.FT,
400
- 'OpenBuddy/openbuddy-llama2-13b-v8.1-fp16': ModelType.FT,
401
- 'OpenBuddyEA/openbuddy-llama-30b-v7.1-bf16': ModelType.FT,
402
- 'baichuan-inc/Baichuan-7B': ModelType.PT,
403
- 'tiiuae/falcon-40b-instruct': ModelType.IFT,
404
- 'tiiuae/falcon-40b': ModelType.PT,
405
- 'tiiuae/falcon-7b': ModelType.PT,
406
- 'YeungNLP/firefly-llama-13b': ModelType.FT,
407
- 'YeungNLP/firefly-llama-13b-v1.2': ModelType.FT,
408
- 'YeungNLP/firefly-llama2-13b': ModelType.FT,
409
- 'YeungNLP/firefly-ziya-13b': ModelType.FT,
410
- 'shaohang/Sparse0.5_OPT-1.3': ModelType.FT,
411
- 'xzuyn/Alpacino-SuperCOT-13B': ModelType.IFT,
412
- 'xzuyn/MedicWizard-7B': ModelType.FT,
413
- 'xDAN-AI/xDAN_13b_l2_lora': ModelType.FT,
414
- 'beomi/KoAlpaca-Polyglot-5.8B': ModelType.FT,
415
- 'beomi/llama-2-ko-7b': ModelType.IFT,
416
- 'Salesforce/codegen-6B-multi': ModelType.PT,
417
- 'Salesforce/codegen-16B-nl': ModelType.PT,
418
- 'Salesforce/codegen-6B-nl': ModelType.PT,
419
- 'ai-forever/rugpt3large_based_on_gpt2': ModelType.FT,
420
- 'gpt2-large': ModelType.PT,
421
- 'frank098/orca_mini_3b_juniper': ModelType.FT,
422
- 'frank098/WizardLM_13B_juniper': ModelType.FT,
423
- 'FPHam/Free_Sydney_13b_HF': ModelType.FT,
424
- 'huggingface/llama-13b': ModelType.PT,
425
- 'huggingface/llama-7b': ModelType.PT,
426
- 'huggingface/llama-65b': ModelType.PT,
427
- 'huggingface/llama-30b': ModelType.PT,
428
- 'Henk717/chronoboros-33B': ModelType.IFT,
429
- 'jondurbin/airoboros-13b-gpt4-1.4': ModelType.IFT,
430
- 'jondurbin/airoboros-7b': ModelType.IFT,
431
- 'jondurbin/airoboros-7b-gpt4': ModelType.IFT,
432
- 'jondurbin/airoboros-7b-gpt4-1.1': ModelType.IFT,
433
- 'jondurbin/airoboros-7b-gpt4-1.2': ModelType.IFT,
434
- 'jondurbin/airoboros-7b-gpt4-1.3': ModelType.IFT,
435
- 'jondurbin/airoboros-7b-gpt4-1.4': ModelType.IFT,
436
- 'jondurbin/airoboros-l2-7b-gpt4-1.4.1': ModelType.IFT,
437
- 'jondurbin/airoboros-l2-13b-gpt4-1.4.1': ModelType.IFT,
438
- 'jondurbin/airoboros-l2-70b-gpt4-1.4.1': ModelType.IFT,
439
- 'jondurbin/airoboros-13b': ModelType.IFT,
440
- 'jondurbin/airoboros-33b-gpt4-1.4': ModelType.IFT,
441
- 'jondurbin/airoboros-33b-gpt4-1.2': ModelType.IFT,
442
- 'jondurbin/airoboros-65b-gpt4-1.2': ModelType.IFT,
443
- 'ariellee/SuperPlatty-30B': ModelType.IFT,
444
- 'danielhanchen/open_llama_3b_600bt_preview': ModelType.FT,
445
- 'cerebras/Cerebras-GPT-256M': ModelType.PT,
446
- 'cerebras/Cerebras-GPT-1.3B': ModelType.PT,
447
- 'cerebras/Cerebras-GPT-13B': ModelType.PT,
448
- 'cerebras/Cerebras-GPT-2.7B': ModelType.PT,
449
- 'cerebras/Cerebras-GPT-111M': ModelType.PT,
450
- 'cerebras/Cerebras-GPT-6.7B': ModelType.PT,
451
- 'Yhyu13/oasst-rlhf-2-llama-30b-7k-steps-hf': ModelType.RL,
452
- 'Yhyu13/llama-30B-hf-openassitant': ModelType.FT,
453
- 'NousResearch/Nous-Hermes-Llama2-13b': ModelType.IFT,
454
- 'NousResearch/Nous-Hermes-llama-2-7b': ModelType.IFT,
455
- 'NousResearch/Redmond-Puffin-13B': ModelType.IFT,
456
- 'NousResearch/Nous-Hermes-13b': ModelType.IFT,
457
- 'project-baize/baize-v2-7b': ModelType.IFT,
458
- 'project-baize/baize-v2-13b': ModelType.IFT,
459
- 'LLMs/WizardLM-13B-V1.0': ModelType.FT,
460
- 'LLMs/AlpacaGPT4-7B-elina': ModelType.FT,
461
- 'wenge-research/yayi-7b': ModelType.FT,
462
- 'wenge-research/yayi-7b-llama2': ModelType.FT,
463
- 'wenge-research/yayi-13b-llama2': ModelType.FT,
464
- 'yhyhy3/open_llama_7b_v2_med_instruct': ModelType.IFT,
465
- 'llama-anon/instruct-13b': ModelType.IFT,
466
- 'huggingtweets/jerma985': ModelType.FT,
467
- 'huggingtweets/gladosystem': ModelType.FT,
468
- 'huggingtweets/bladeecity-jerma985': ModelType.FT,
469
- 'huggyllama/llama-13b': ModelType.PT,
470
- 'huggyllama/llama-65b': ModelType.PT,
471
- 'FabbriSimo01/Facebook_opt_1.3b_Quantized': ModelType.PT,
472
- 'upstage/Llama-2-70b-instruct': ModelType.IFT,
473
- 'upstage/Llama-2-70b-instruct-1024': ModelType.IFT,
474
- 'upstage/llama-65b-instruct': ModelType.IFT,
475
- 'upstage/llama-30b-instruct-2048': ModelType.IFT,
476
- 'upstage/llama-30b-instruct': ModelType.IFT,
477
- 'WizardLM/WizardLM-13B-1.0': ModelType.IFT,
478
- 'WizardLM/WizardLM-13B-V1.1': ModelType.IFT,
479
- 'WizardLM/WizardLM-13B-V1.2': ModelType.IFT,
480
- 'WizardLM/WizardLM-30B-V1.0': ModelType.IFT,
481
- 'WizardLM/WizardCoder-15B-V1.0': ModelType.IFT,
482
- 'gpt2': ModelType.PT,
483
- 'keyfan/vicuna-chinese-replication-v1.1': ModelType.IFT,
484
- 'nthngdy/pythia-owt2-70m-100k': ModelType.FT,
485
- 'nthngdy/pythia-owt2-70m-50k': ModelType.FT,
486
- 'quantumaikr/KoreanLM-hf': ModelType.FT,
487
- 'quantumaikr/open_llama_7b_hf': ModelType.FT,
488
- 'quantumaikr/QuantumLM-70B-hf': ModelType.IFT,
489
- 'MayaPH/FinOPT-Lincoln': ModelType.FT,
490
- 'MayaPH/FinOPT-Franklin': ModelType.FT,
491
- 'MayaPH/GodziLLa-30B': ModelType.IFT,
492
- 'MayaPH/GodziLLa-30B-plus': ModelType.IFT,
493
- 'MayaPH/FinOPT-Washington': ModelType.FT,
494
- 'ogimgio/gpt-neo-125m-neurallinguisticpioneers': ModelType.FT,
495
- 'layoric/llama-2-13b-code-alpaca': ModelType.FT,
496
- 'CobraMamba/mamba-gpt-3b': ModelType.FT,
497
- 'CobraMamba/mamba-gpt-3b-v2': ModelType.FT,
498
- 'CobraMamba/mamba-gpt-3b-v3': ModelType.FT,
499
- 'timdettmers/guanaco-33b-merged': ModelType.FT,
500
- 'elinas/chronos-33b': ModelType.IFT,
501
- 'heegyu/RedTulu-Uncensored-3B-0719': ModelType.IFT,
502
- 'heegyu/WizardVicuna-Uncensored-3B-0719': ModelType.IFT,
503
- 'heegyu/WizardVicuna-3B-0719': ModelType.IFT,
504
- 'meta-llama/Llama-2-7b-chat-hf': ModelType.RL,
505
- 'meta-llama/Llama-2-7b-hf': ModelType.PT,
506
- 'meta-llama/Llama-2-13b-chat-hf': ModelType.RL,
507
- 'meta-llama/Llama-2-13b-hf': ModelType.PT,
508
- 'meta-llama/Llama-2-70b-chat-hf': ModelType.RL,
509
- 'meta-llama/Llama-2-70b-hf': ModelType.PT,
510
- 'xhyi/PT_GPTNEO350_ATG': ModelType.FT,
511
- 'h2oai/h2ogpt-gm-oasst1-en-1024-20b': ModelType.FT,
512
- 'h2oai/h2ogpt-gm-oasst1-en-1024-open-llama-7b-preview-400bt': ModelType.FT,
513
- 'h2oai/h2ogpt-oig-oasst1-512-6_9b': ModelType.IFT,
514
- 'h2oai/h2ogpt-oasst1-512-12b': ModelType.IFT,
515
- 'h2oai/h2ogpt-oig-oasst1-256-6_9b': ModelType.IFT,
516
- 'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt': ModelType.FT,
517
- 'h2oai/h2ogpt-oasst1-512-20b': ModelType.IFT,
518
- 'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2': ModelType.FT,
519
- 'h2oai/h2ogpt-gm-oasst1-en-1024-12b': ModelType.FT,
520
- 'h2oai/h2ogpt-gm-oasst1-multilang-1024-20b': ModelType.FT,
521
- 'bofenghuang/vigogne-13b-instruct': ModelType.IFT,
522
- 'bofenghuang/vigogne-13b-chat': ModelType.FT,
523
- 'bofenghuang/vigogne-2-7b-instruct': ModelType.IFT,
524
- 'bofenghuang/vigogne-7b-instruct': ModelType.IFT,
525
- 'bofenghuang/vigogne-7b-chat': ModelType.FT,
526
- 'Vmware/open-llama-7b-v2-open-instruct': ModelType.IFT,
527
- 'VMware/open-llama-0.7T-7B-open-instruct-v1.1': ModelType.IFT,
528
- 'ewof/koishi-instruct-3b': ModelType.IFT,
529
- 'gywy/llama2-13b-chinese-v1': ModelType.FT,
530
- 'GOAT-AI/GOAT-7B-Community': ModelType.FT,
531
- 'psyche/kollama2-7b': ModelType.FT,
532
- 'TheTravellingEngineer/llama2-7b-hf-guanaco': ModelType.FT,
533
- 'beaugogh/pythia-1.4b-deduped-sharegpt': ModelType.FT,
534
- 'augtoma/qCammel-70-x': ModelType.IFT,
535
- 'Lajonbot/Llama-2-7b-chat-hf-instruct-pl-lora_unload': ModelType.IFT,
536
- 'anhnv125/pygmalion-6b-roleplay': ModelType.FT,
537
- '64bits/LexPodLM-13B': ModelType.FT,
538
- }
539
-
540
-
541
- def model_type_from_str(type):
542
- if "fine-tuned" in type or "πŸ”Ά" in type:
543
- return ModelType.FT
544
- if "pretrained" in type or "🟒" in type:
545
- return ModelType.PT
546
- if "RL-tuned" in type or "🟦" in type:
547
- return ModelType.RL
548
- if "instruction-tuned" in type or "β­•" in type:
549
- return ModelType.IFT
550
- return ModelType.Unknown
551
-