brunneis commited on
Commit
7592671
β€’
1 Parent(s): 6c58865

refactor: Update model handling and utility functions

Browse files

- Removed BrainDAO from model type dropdown in app.py
- Commented out citation accordion in app.py
- Adjusted job scheduler interval from 1800 to 3600 seconds in app.py
- Updated utility functions in src/utils.py for model name extraction from file paths
- Refactored model name retrieval in src/leaderboard/read_evals.py, src/populate.py, and src/submission/submit.py
- Removed model likes from submission data in src/submission/submit.py
- Adjusted model type storage in submission data to exclude emoji

app.py CHANGED
@@ -170,7 +170,7 @@ with demo:
170
  model_name_textbox = gr.Textbox(label="Model name")
171
  revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
172
  model_type = gr.Dropdown(
173
- choices=[t.to_str(" ") for t in ModelType if t != ModelType.Unknown],
174
  label="Model type",
175
  multiselect=False,
176
  value=None,
@@ -207,18 +207,18 @@ with demo:
207
  submission_result,
208
  )
209
 
210
- with gr.Row():
211
- with gr.Accordion("πŸ“™ Citation", open=False):
212
- citation_button = gr.Textbox(
213
- value=CITATION_BUTTON_TEXT,
214
- label=CITATION_BUTTON_LABEL,
215
- lines=20,
216
- elem_id="citation-button",
217
- show_copy_button=True,
218
- )
219
 
220
  scheduler = BackgroundScheduler()
221
- scheduler.add_job(restart_space, "interval", seconds=1800)
222
  scheduler.start()
223
  demo.queue(default_concurrency_limit=40).launch(
224
  server_name="0.0.0.0",
 
170
  model_name_textbox = gr.Textbox(label="Model name")
171
  revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
172
  model_type = gr.Dropdown(
173
+ choices=[t.to_str(" ") for t in ModelType if t not in [ModelType.Unknown, ModelType.BrainDAO]],
174
  label="Model type",
175
  multiselect=False,
176
  value=None,
 
207
  submission_result,
208
  )
209
 
210
+ # with gr.Row():
211
+ # with gr.Accordion("πŸ“™ Citation", open=False):
212
+ # citation_button = gr.Textbox(
213
+ # value=CITATION_BUTTON_TEXT,
214
+ # label=CITATION_BUTTON_LABEL,
215
+ # lines=20,
216
+ # elem_id="citation-button",
217
+ # show_copy_button=True,
218
+ # )
219
 
220
  scheduler = BackgroundScheduler()
221
+ scheduler.add_job(restart_space, "interval", seconds=3600)
222
  scheduler.start()
223
  demo.queue(default_concurrency_limit=40).launch(
224
  server_name="0.0.0.0",
src/leaderboard/read_evals.py CHANGED
@@ -13,7 +13,7 @@ import numpy as np
13
  from src.display.formatting import make_clickable_model
14
  from src.display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
15
  from src.submission.check_validity import is_model_on_hub
16
- from src.utils import get_model_name, get_org_and_model_names, get_request_hash
17
 
18
 
19
  @dataclass
@@ -42,14 +42,14 @@ class EvalResult:
42
  with open(json_filepath) as fp:
43
  data = json.load(fp)
44
 
45
- org, model = get_org_and_model_names(json_filepath)
46
  config = data.get("config")
47
 
48
  # Precision
49
  precision = Precision.from_str(config.get("model_dtype"))
50
 
51
  result_key = f"{org}_{model}_{precision.value.name}"
52
- model_name = get_model_name(json_filepath)
53
 
54
  still_on_hub, _, model_config = is_model_on_hub(
55
  model_name,
 
13
  from src.display.formatting import make_clickable_model
14
  from src.display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
15
  from src.submission.check_validity import is_model_on_hub
16
+ from src.utils import get_model_name_from_filepath, get_org_and_model_names_from_filepath, get_request_hash
17
 
18
 
19
  @dataclass
 
42
  with open(json_filepath) as fp:
43
  data = json.load(fp)
44
 
45
+ org, model = get_org_and_model_names_from_filepath(json_filepath)
46
  config = data.get("config")
47
 
48
  # Precision
49
  precision = Precision.from_str(config.get("model_dtype"))
50
 
51
  result_key = f"{org}_{model}_{precision.value.name}"
52
+ model_name = get_model_name_from_filepath(json_filepath)
53
 
54
  still_on_hub, _, model_config = is_model_on_hub(
55
  model_name,
src/populate.py CHANGED
@@ -10,7 +10,7 @@ import pandas as pd
10
  from src.display.formatting import has_no_nan_values, make_clickable_model
11
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
12
  from src.leaderboard.read_evals import get_raw_eval_results
13
- from src.utils import get_model_name
14
 
15
 
16
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
@@ -43,7 +43,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
43
  # continue
44
 
45
  # # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
46
- # model_name = get_model_name(file_path)
47
  # data[EvalQueueColumn.model.name] = make_clickable_model(model_name)
48
 
49
  # data[EvalQueueColumn.revision.name] = data.get("revision", "main")
@@ -66,7 +66,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
66
  # continue
67
 
68
  # # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
69
- # model_name = get_model_name(file_path)
70
  # data[EvalQueueColumn.model.name] = make_clickable_model(model_name)
71
 
72
  # data[EvalQueueColumn.revision.name] = data.get("revision", "main")
@@ -92,7 +92,7 @@ def get_evaluation_requests_df(save_path: str, cols: list) -> list[pd.DataFrame]
92
  print(f"Error reading or decoding {file_path}: {e}")
93
  return None
94
 
95
- model_name = get_model_name(file_path)
96
  # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
97
  data[EvalQueueColumn.model.name] = make_clickable_model(model_name)
98
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
 
10
  from src.display.formatting import has_no_nan_values, make_clickable_model
11
  from src.display.utils import AutoEvalColumn, EvalQueueColumn
12
  from src.leaderboard.read_evals import get_raw_eval_results
13
+ from src.utils import get_model_name_from_filepath
14
 
15
 
16
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
 
43
  # continue
44
 
45
  # # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
46
+ # model_name = get_model_name_from_filepath(file_path)
47
  # data[EvalQueueColumn.model.name] = make_clickable_model(model_name)
48
 
49
  # data[EvalQueueColumn.revision.name] = data.get("revision", "main")
 
66
  # continue
67
 
68
  # # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
69
+ # model_name = get_model_name_from_filepath(file_path)
70
  # data[EvalQueueColumn.model.name] = make_clickable_model(model_name)
71
 
72
  # data[EvalQueueColumn.revision.name] = data.get("revision", "main")
 
92
  print(f"Error reading or decoding {file_path}: {e}")
93
  return None
94
 
95
+ model_name = get_model_name_from_filepath(file_path)
96
  # data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
97
  data[EvalQueueColumn.model.name] = make_clickable_model(model_name)
98
  data[EvalQueueColumn.revision.name] = data.get("revision", "main")
src/submission/submit.py CHANGED
@@ -16,7 +16,7 @@ USERS_TO_SUBMISSION_DATES = None
16
 
17
 
18
  def add_new_eval(
19
- model: str,
20
  # base_model: str,
21
  revision: str,
22
  # precision: str,
@@ -32,12 +32,6 @@ def add_new_eval(
32
  if not REQUESTED_MODELS:
33
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
34
 
35
- user_name = ""
36
- model_path = model
37
- if "/" in model:
38
- user_name = model.split("/")[0]
39
- model_path = model.split("/")[1]
40
-
41
  precision = precision.split(" ")[0]
42
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
43
 
@@ -55,13 +49,13 @@ def add_new_eval(
55
  # return styled_error(f'Base model "{base_model}" {error}')
56
 
57
  if weight_type != "Adapter":
58
- model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
59
  if not model_on_hub:
60
- return styled_error(f'Model "{model}" {error}')
61
 
62
  # Is the model info correctly filled?
63
  try:
64
- model_info = API.model_info(repo_id=model, revision=revision)
65
  except Exception:
66
  return styled_error("Could not get your model information. Please fill it up properly.")
67
 
@@ -76,7 +70,7 @@ def add_new_eval(
76
  except Exception:
77
  return styled_error("Please select a license for your model")
78
 
79
- is_model_card_ok, error_msg = check_model_card(model)
80
  if not is_model_card_ok:
81
  return styled_error(error_msg)
82
 
@@ -91,23 +85,26 @@ def add_new_eval(
91
  "weight_type": weight_type,
92
  "status": "PENDING",
93
  "submitted_time": current_time,
94
- "model_type": model_type,
95
- "likes": model_info.likes,
96
  "params": model_size,
97
  "license": license_title,
98
- "private": False,
99
  }
100
 
101
  # Check for duplicate submission
102
- request_id = get_request_id(model, revision, precision)
103
  if request_id in REQUESTED_MODELS:
104
  return styled_warning("This model has been already submitted.")
105
- request_hash = get_request_hash(model, revision, precision)
106
 
107
  print("Creating eval file")
108
- OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
109
  os.makedirs(OUT_DIR, exist_ok=True)
110
- out_path = f"{OUT_DIR}/{model_path}/{request_hash}.json"
 
 
 
111
 
112
  with open(out_path, "w") as f:
113
  f.write(json.dumps(eval_entry))
@@ -115,15 +112,15 @@ def add_new_eval(
115
  print("Uploading eval file")
116
  API.upload_file(
117
  path_or_fileobj=out_path,
118
- path_in_repo=out_path.split("eval-requests/")[1],
119
  repo_id=REQUESTS_REPO,
120
  repo_type="dataset",
121
- commit_message=f"Add {model} to eval requests",
122
  )
123
 
124
  # Remove the local file
125
  os.remove(out_path)
126
 
127
  return styled_message(
128
- "Your request has been submitted to the evaluation requests!\nPlease wait for up to an hour for the model to show in the PENDING list."
129
  )
 
16
 
17
 
18
  def add_new_eval(
19
+ model_name: str,
20
  # base_model: str,
21
  revision: str,
22
  # precision: str,
 
32
  if not REQUESTED_MODELS:
33
  REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
34
 
 
 
 
 
 
 
35
  precision = precision.split(" ")[0]
36
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
37
 
 
49
  # return styled_error(f'Base model "{base_model}" {error}')
50
 
51
  if weight_type != "Adapter":
52
+ model_on_hub, error, _ = is_model_on_hub(model_name=model_name, revision=revision, token=TOKEN, test_tokenizer=True)
53
  if not model_on_hub:
54
+ return styled_error(f'Model "{model_name}" {error}')
55
 
56
  # Is the model info correctly filled?
57
  try:
58
+ model_info = API.model_info(repo_id=model_name, revision=revision)
59
  except Exception:
60
  return styled_error("Could not get your model information. Please fill it up properly.")
61
 
 
70
  except Exception:
71
  return styled_error("Please select a license for your model")
72
 
73
+ is_model_card_ok, error_msg = check_model_card(model_name)
74
  if not is_model_card_ok:
75
  return styled_error(error_msg)
76
 
 
85
  "weight_type": weight_type,
86
  "status": "PENDING",
87
  "submitted_time": current_time,
88
+ "model_type": model_type.split()[1], # remove the emoji
89
+ # "likes": model_info.likes,
90
  "params": model_size,
91
  "license": license_title,
92
+ # "private": False,
93
  }
94
 
95
  # Check for duplicate submission
96
+ request_id = get_request_id(model_name, revision, precision)
97
  if request_id in REQUESTED_MODELS:
98
  return styled_warning("This model has been already submitted.")
99
+ request_hash = get_request_hash(model_name, revision, precision)
100
 
101
  print("Creating eval file")
102
+ OUT_DIR = f"{EVAL_REQUESTS_PATH}/{model_name}"
103
  os.makedirs(OUT_DIR, exist_ok=True)
104
+
105
+ out_path = f"{OUT_DIR}/{request_hash}.json"
106
+ if os.path.exists(out_path):
107
+ os.remove(out_path)
108
 
109
  with open(out_path, "w") as f:
110
  f.write(json.dumps(eval_entry))
 
112
  print("Uploading eval file")
113
  API.upload_file(
114
  path_or_fileobj=out_path,
115
+ path_in_repo='{}/{}.json'.format(model_name, request_hash),
116
  repo_id=REQUESTS_REPO,
117
  repo_type="dataset",
118
+ commit_message=f"Add {model_name} to eval requests",
119
  )
120
 
121
  # Remove the local file
122
  os.remove(out_path)
123
 
124
  return styled_message(
125
+ "Your model has been submitted."
126
  )
src/utils.py CHANGED
@@ -4,15 +4,15 @@
4
  import hashlib
5
 
6
 
7
- def get_org_and_model_names(file_path: str) -> str:
8
  org, model = file_path.split("/")[-3:-1]
9
  model = model.removesuffix(".json")
10
  model = model.split('_request_')[0]
11
  return org, model
12
 
13
 
14
- def get_model_name(file_path: str) -> str:
15
- org, model = get_org_and_model_names(file_path)
16
  return f"{org}/{model}"
17
 
18
 
 
4
  import hashlib
5
 
6
 
7
+ def get_org_and_model_names_from_filepath(file_path: str) -> str:
8
  org, model = file_path.split("/")[-3:-1]
9
  model = model.removesuffix(".json")
10
  model = model.split('_request_')[0]
11
  return org, model
12
 
13
 
14
+ def get_model_name_from_filepath(file_path: str) -> str:
15
+ org, model = get_org_and_model_names_from_filepath(file_path)
16
  return f"{org}/{model}"
17
 
18