Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
new tasks
Browse files- src/about.py +3 -3
- src/leaderboard/read_evals.py +7 -0
src/about.py
CHANGED
@@ -37,9 +37,9 @@ class Tasks(Enum):
|
|
37 |
task22 = Task("polish_polqa_open_book", "levenshtein,none", "polqa_open_book_g", "generate_until", 0.0) # generate_until
|
38 |
task23 = Task("polish_polqa_closed_book", "levenshtein,none", "polqa_closed_book_g", "generate_until", 0.0) # generate_until
|
39 |
task20 = Task("polish_poleval2018_task3_test_10k", "word_perplexity,none", "poleval2018_task3_test_10k", "other")
|
40 |
-
task24 = Task("polish_poquad_open_book", "levenshtein,none", "poquad_open_book", "other", 0.0)
|
41 |
-
task25 = Task("polish_eq_bench_first_turn", "first_eqbench,none", "eq_bench_first_turn", "other", 0.0)
|
42 |
-
task26 = Task("polish_eq_bench", "average_eqbench,none", "eq_bench", "other", 0.0)
|
43 |
# task27 = Task("polish_eq_bench", "revised_eqbench,none", "eq_bench_revised", "other", 0.0)
|
44 |
|
45 |
NUM_FEWSHOT = 0 # Change with your few shot
|
|
|
37 |
task22 = Task("polish_polqa_open_book", "levenshtein,none", "polqa_open_book_g", "generate_until", 0.0) # generate_until
|
38 |
task23 = Task("polish_polqa_closed_book", "levenshtein,none", "polqa_closed_book_g", "generate_until", 0.0) # generate_until
|
39 |
task20 = Task("polish_poleval2018_task3_test_10k", "word_perplexity,none", "poleval2018_task3_test_10k", "other")
|
40 |
+
# task24 = Task("polish_poquad_open_book", "levenshtein,none", "poquad_open_book", "other", 0.0)
|
41 |
+
# task25 = Task("polish_eq_bench_first_turn", "first_eqbench,none", "eq_bench_first_turn", "other", 0.0)
|
42 |
+
# task26 = Task("polish_eq_bench", "average_eqbench,none", "eq_bench", "other", 0.0)
|
43 |
# task27 = Task("polish_eq_bench", "revised_eqbench,none", "eq_bench_revised", "other", 0.0)
|
44 |
|
45 |
NUM_FEWSHOT = 0 # Change with your few shot
|
src/leaderboard/read_evals.py
CHANGED
@@ -428,6 +428,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
428 |
print(f"not all eval values present {v.eval_name} {v.full_model}")
|
429 |
continue
|
430 |
|
|
|
431 |
missing_results_for_task = {}
|
432 |
missing_metadata = []
|
433 |
for v in eval_results.values():
|
@@ -441,6 +442,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
441 |
missing_results_for_task[task_name] = [f"{v.full_model}|{v.org_and_model}"]
|
442 |
if r[AutoEvalColumn.lang.name] is None or r[AutoEvalColumn.lang.name] == "?":
|
443 |
missing_metadata.append(f"{v.full_model}")
|
|
|
444 |
|
445 |
# print('missing_results_for_task', missing_results_for_task)
|
446 |
for task, models in missing_results_for_task.items():
|
@@ -455,4 +457,9 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
455 |
print(model)
|
456 |
print()
|
457 |
|
|
|
|
|
|
|
|
|
|
|
458 |
return results
|
|
|
428 |
print(f"not all eval values present {v.eval_name} {v.full_model}")
|
429 |
continue
|
430 |
|
431 |
+
all_models = []
|
432 |
missing_results_for_task = {}
|
433 |
missing_metadata = []
|
434 |
for v in eval_results.values():
|
|
|
442 |
missing_results_for_task[task_name] = [f"{v.full_model}|{v.org_and_model}"]
|
443 |
if r[AutoEvalColumn.lang.name] is None or r[AutoEvalColumn.lang.name] == "?":
|
444 |
missing_metadata.append(f"{v.full_model}")
|
445 |
+
all_models.append((v.full_model, v.num_params))
|
446 |
|
447 |
# print('missing_results_for_task', missing_results_for_task)
|
448 |
for task, models in missing_results_for_task.items():
|
|
|
457 |
print(model)
|
458 |
print()
|
459 |
|
460 |
+
print(f"All models:")
|
461 |
+
for model in all_models:
|
462 |
+
print(model)
|
463 |
+
print()
|
464 |
+
|
465 |
return results
|