Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
eduagarcia
commited on
Commit
•
a3b0a0f
1
Parent(s):
5a3de19
Permit different revision
Browse files- src/display/formatting.py +5 -1
- src/display/utils.py +4 -1
- src/leaderboard/read_evals.py +21 -7
- src/submission/submit.py +7 -1
src/display/formatting.py
CHANGED
@@ -24,7 +24,7 @@ def make_requests_clickable_model(model_name, json_path=None):
|
|
24 |
|
25 |
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "📑")
|
26 |
|
27 |
-
def make_clickable_model(model_name, json_path=None):
|
28 |
link = f"https://huggingface.co/{model_name}"
|
29 |
|
30 |
#details_model_name = model_name.replace("/", "__")
|
@@ -35,6 +35,10 @@ def make_clickable_model(model_name, json_path=None):
|
|
35 |
if json_path is not None:
|
36 |
details_link = f"https://huggingface.co/datasets/{RESULTS_REPO}/blob/main/{model_name}/{json_path}"
|
37 |
|
|
|
|
|
|
|
|
|
38 |
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "📑")
|
39 |
|
40 |
|
|
|
24 |
|
25 |
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "📑")
|
26 |
|
27 |
+
def make_clickable_model(model_name, json_path=None, revision=None):
|
28 |
link = f"https://huggingface.co/{model_name}"
|
29 |
|
30 |
#details_model_name = model_name.replace("/", "__")
|
|
|
35 |
if json_path is not None:
|
36 |
details_link = f"https://huggingface.co/datasets/{RESULTS_REPO}/blob/main/{model_name}/{json_path}"
|
37 |
|
38 |
+
if revision is not None and revision != "main":
|
39 |
+
if len(revision) > 12:
|
40 |
+
revision = revision[:7]
|
41 |
+
model_name += f" (rev: {revision})"
|
42 |
return model_hyperlink(link, model_name) + " " + model_hyperlink(details_link, "📑")
|
43 |
|
44 |
|
src/display/utils.py
CHANGED
@@ -61,7 +61,8 @@ auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub Licen
|
|
61 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
62 |
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
63 |
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False, hidden=True)])
|
64 |
-
auto_eval_column_dict.append(["
|
|
|
65 |
auto_eval_column_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, hidden=True)])
|
66 |
auto_eval_column_dict.append(["moe", ColumnContent, ColumnContent("MoE", "bool", False, hidden=True)])
|
67 |
auto_eval_column_dict.append(["eval_time", ColumnContent, ColumnContent("Evaluation Time (s)", "number", False)])
|
@@ -88,6 +89,7 @@ class EvalQueueColumn: # Queue column
|
|
88 |
baseline_row = {
|
89 |
AutoEvalColumn.model.name: "<p>Baseline</p>",
|
90 |
AutoEvalColumn.revision.name: "N/A",
|
|
|
91 |
AutoEvalColumn.precision.name: "?",
|
92 |
AutoEvalColumn.merged.name: False,
|
93 |
#AutoEvalColumn.average.name: 31.0,
|
@@ -131,6 +133,7 @@ if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
|
|
131 |
human_baseline_row = {
|
132 |
AutoEvalColumn.model.name: "<p>Human performance</p>",
|
133 |
AutoEvalColumn.revision.name: "N/A",
|
|
|
134 |
AutoEvalColumn.precision.name: "?",
|
135 |
#AutoEvalColumn.average.name: 92.75,
|
136 |
AutoEvalColumn.merged.name: False,
|
|
|
61 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
62 |
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
63 |
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False, hidden=True)])
|
64 |
+
auto_eval_column_dict.append(["model_sha", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
65 |
+
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Revision", "str", False, False)])
|
66 |
auto_eval_column_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, hidden=True)])
|
67 |
auto_eval_column_dict.append(["moe", ColumnContent, ColumnContent("MoE", "bool", False, hidden=True)])
|
68 |
auto_eval_column_dict.append(["eval_time", ColumnContent, ColumnContent("Evaluation Time (s)", "number", False)])
|
|
|
89 |
baseline_row = {
|
90 |
AutoEvalColumn.model.name: "<p>Baseline</p>",
|
91 |
AutoEvalColumn.revision.name: "N/A",
|
92 |
+
AutoEvalColumn.model_sha.name: "N/A",
|
93 |
AutoEvalColumn.precision.name: "?",
|
94 |
AutoEvalColumn.merged.name: False,
|
95 |
#AutoEvalColumn.average.name: 31.0,
|
|
|
133 |
human_baseline_row = {
|
134 |
AutoEvalColumn.model.name: "<p>Human performance</p>",
|
135 |
AutoEvalColumn.revision.name: "N/A",
|
136 |
+
AutoEvalColumn.model_sha.name: "N/A",
|
137 |
AutoEvalColumn.precision.name: "?",
|
138 |
#AutoEvalColumn.average.name: 92.75,
|
139 |
AutoEvalColumn.merged.name: False,
|
src/leaderboard/read_evals.py
CHANGED
@@ -22,7 +22,8 @@ class EvalResult:
|
|
22 |
full_model: str # org/model (path on hub)
|
23 |
org: str
|
24 |
model: str
|
25 |
-
|
|
|
26 |
results: dict
|
27 |
precision: Precision = Precision.Unknown
|
28 |
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
@@ -120,8 +121,9 @@ class EvalResult:
|
|
120 |
org=org,
|
121 |
model=model,
|
122 |
results=results,
|
123 |
-
precision=precision,
|
124 |
-
|
|
|
125 |
json_filename=json_filename,
|
126 |
eval_time=config.get("total_evaluation_time_seconds", 0.0),
|
127 |
num_params=num_params
|
@@ -129,7 +131,7 @@ class EvalResult:
|
|
129 |
|
130 |
def update_with_request_file(self, requests_path):
|
131 |
"""Finds the relevant request file for the current model and updates info with it"""
|
132 |
-
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
|
133 |
|
134 |
try:
|
135 |
with open(request_file, "r") as f:
|
@@ -144,7 +146,7 @@ class EvalResult:
|
|
144 |
self.main_language = request.get("main_language", "?")
|
145 |
except Exception as e:
|
146 |
self.status = "FAILED"
|
147 |
-
print(f"Could not find request file for {self.org}/{self.model}")
|
148 |
|
149 |
def update_with_dynamic_file_dict(self, file_dict):
|
150 |
self.license = file_dict.get("license", "?")
|
@@ -174,6 +176,12 @@ class EvalResult:
|
|
174 |
average = round(sum(average)/len(average), 2)
|
175 |
npm = round(sum(npm)/len(npm), 2)
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
data_dict = {
|
178 |
"eval_name": self.eval_name, # not a column, just a save name,
|
179 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
@@ -181,7 +189,7 @@ class EvalResult:
|
|
181 |
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
182 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
183 |
AutoEvalColumn.architecture.name: self.architecture,
|
184 |
-
AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.json_filename),
|
185 |
AutoEvalColumn.dummy.name: self.full_model,
|
186 |
AutoEvalColumn.revision.name: self.revision,
|
187 |
AutoEvalColumn.average.name: average,
|
@@ -207,7 +215,7 @@ class EvalResult:
|
|
207 |
return data_dict
|
208 |
|
209 |
|
210 |
-
def get_request_file_for_model(requests_path, model_name, precision):
|
211 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
212 |
request_files = os.path.join(
|
213 |
requests_path,
|
@@ -215,15 +223,21 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
215 |
)
|
216 |
request_files = glob.glob(request_files)
|
217 |
|
|
|
|
|
|
|
218 |
# Select correct request file (precision)
|
219 |
request_file = ""
|
220 |
request_files = sorted(request_files, reverse=True)
|
221 |
for tmp_request_file in request_files:
|
222 |
with open(tmp_request_file, "r") as f:
|
223 |
req_content = json.load(f)
|
|
|
|
|
224 |
if (
|
225 |
req_content["status"] in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"]
|
226 |
and req_content["precision"] == precision.split(".")[-1]
|
|
|
227 |
):
|
228 |
request_file = tmp_request_file
|
229 |
return request_file
|
|
|
22 |
full_model: str # org/model (path on hub)
|
23 |
org: str
|
24 |
model: str
|
25 |
+
model_sha: str # commit hash, "" if main
|
26 |
+
revision: str = "main"
|
27 |
results: dict
|
28 |
precision: Precision = Precision.Unknown
|
29 |
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
|
|
121 |
org=org,
|
122 |
model=model,
|
123 |
results=results,
|
124 |
+
precision=precision,
|
125 |
+
model_sha=config.get("model_sha", ""),
|
126 |
+
revision=config.get("model_revision", "main"),
|
127 |
json_filename=json_filename,
|
128 |
eval_time=config.get("total_evaluation_time_seconds", 0.0),
|
129 |
num_params=num_params
|
|
|
131 |
|
132 |
def update_with_request_file(self, requests_path):
|
133 |
"""Finds the relevant request file for the current model and updates info with it"""
|
134 |
+
request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name, self.revision)
|
135 |
|
136 |
try:
|
137 |
with open(request_file, "r") as f:
|
|
|
146 |
self.main_language = request.get("main_language", "?")
|
147 |
except Exception as e:
|
148 |
self.status = "FAILED"
|
149 |
+
print(f"Could not find request file for {self.org}/{self.model}, precision {self.precision.value.name}")
|
150 |
|
151 |
def update_with_dynamic_file_dict(self, file_dict):
|
152 |
self.license = file_dict.get("license", "?")
|
|
|
176 |
average = round(sum(average)/len(average), 2)
|
177 |
npm = round(sum(npm)/len(npm), 2)
|
178 |
|
179 |
+
rev_name = None
|
180 |
+
if self.revision != "main":
|
181 |
+
rev_name = self.revision
|
182 |
+
if rev_name > 10:
|
183 |
+
rev_name = rev_name[:7]
|
184 |
+
|
185 |
data_dict = {
|
186 |
"eval_name": self.eval_name, # not a column, just a save name,
|
187 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
|
|
189 |
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
190 |
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
191 |
AutoEvalColumn.architecture.name: self.architecture,
|
192 |
+
AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.json_filename, revision=rev_name),
|
193 |
AutoEvalColumn.dummy.name: self.full_model,
|
194 |
AutoEvalColumn.revision.name: self.revision,
|
195 |
AutoEvalColumn.average.name: average,
|
|
|
215 |
return data_dict
|
216 |
|
217 |
|
218 |
+
def get_request_file_for_model(requests_path, model_name, precision, revision):
|
219 |
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
220 |
request_files = os.path.join(
|
221 |
requests_path,
|
|
|
223 |
)
|
224 |
request_files = glob.glob(request_files)
|
225 |
|
226 |
+
if revision is None or revision == "":
|
227 |
+
revision = "main"
|
228 |
+
|
229 |
# Select correct request file (precision)
|
230 |
request_file = ""
|
231 |
request_files = sorted(request_files, reverse=True)
|
232 |
for tmp_request_file in request_files:
|
233 |
with open(tmp_request_file, "r") as f:
|
234 |
req_content = json.load(f)
|
235 |
+
if req_content["revision"] is None or req_content["revision"] == "":
|
236 |
+
req_content["revision"] = "main"
|
237 |
if (
|
238 |
req_content["status"] in ["FINISHED", "PENDING_NEW_EVAL" if SHOW_INCOMPLETE_EVALS else "FINISHED"]
|
239 |
and req_content["precision"] == precision.split(".")[-1]
|
240 |
+
and req_content["revision"] == revision
|
241 |
):
|
242 |
request_file = tmp_request_file
|
243 |
return request_file
|
src/submission/submit.py
CHANGED
@@ -146,7 +146,13 @@ def add_new_eval(
|
|
146 |
print("Creating eval file")
|
147 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
148 |
os.makedirs(OUT_DIR, exist_ok=True)
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
with open(out_path, "w", encoding="utf-8") as f:
|
152 |
json.dump(eval_entry, f, indent=4, ensure_ascii=False)
|
|
|
146 |
print("Creating eval file")
|
147 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
148 |
os.makedirs(OUT_DIR, exist_ok=True)
|
149 |
+
if revision == "main":
|
150 |
+
out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"
|
151 |
+
else:
|
152 |
+
rev_name = revision
|
153 |
+
if "rev_name" > 30:
|
154 |
+
rev_name = rev_name[:7]
|
155 |
+
out_path = f"{OUT_DIR}/{model_path}_eval_request_{rev_name}_{private}_{precision}_{weight_type}.json"
|
156 |
|
157 |
with open(out_path, "w", encoding="utf-8") as f:
|
158 |
json.dump(eval_entry, f, indent=4, ensure_ascii=False)
|