Clean codebase
Browse files- app.py +5 -2
- src/about.py +6 -2
- src/display/css_html_js.py +21 -10
- src/display/formatting.py +4 -0
- src/display/utils.py +14 -5
- src/envs.py +7 -3
- src/leaderboard/read_evals.py +26 -18
- src/populate.py +4 -0
- src/submission/check_validity.py +11 -5
- src/submission/submit.py +11 -11
app.py
CHANGED
@@ -1,5 +1,8 @@
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
-
import pandas as pd
|
3 |
from apscheduler.schedulers.background import BackgroundScheduler
|
4 |
from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
|
5 |
from huggingface_hub import snapshot_download
|
@@ -109,7 +112,7 @@ with demo:
|
|
109 |
EVALUATION_SCRIPT,
|
110 |
elem_classes="markdown-text",
|
111 |
)
|
112 |
-
|
113 |
with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
114 |
with gr.Column():
|
115 |
with gr.Row():
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# flake8: noqa E501
|
4 |
+
|
5 |
import gradio as gr
|
|
|
6 |
from apscheduler.schedulers.background import BackgroundScheduler
|
7 |
from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
|
8 |
from huggingface_hub import snapshot_download
|
|
|
112 |
EVALUATION_SCRIPT,
|
113 |
elem_classes="markdown-text",
|
114 |
)
|
115 |
+
|
116 |
with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
117 |
with gr.Column():
|
118 |
with gr.Row():
|
src/about.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
from dataclasses import dataclass
|
2 |
from enum import Enum
|
3 |
|
@@ -75,13 +79,13 @@ If everything is done, check you can launch the EleutherAIHarness on your model
|
|
75 |
EVALUATION_SCRIPT = '''
|
76 |
To evaluate the model you can access the colab notebook at [this link](https://colab.research.google.com/drive/145KAGvgdAb8BrkObUrxAVWBd9EGDqy8N?usp=sharing).
|
77 |
|
78 |
-
## First install the necessary libraries
|
79 |
|
80 |
```
|
81 |
pip install accelerate openai anthropic datasets
|
82 |
```
|
83 |
|
84 |
-
## Setup your :
|
85 |
* OPENAI_API_KEY
|
86 |
* ANTHROPIC_API_KEY
|
87 |
* HF_TOKEN
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# flake8: noqa E501
|
4 |
+
|
5 |
from dataclasses import dataclass
|
6 |
from enum import Enum
|
7 |
|
|
|
79 |
EVALUATION_SCRIPT = '''
|
80 |
To evaluate the model you can access the colab notebook at [this link](https://colab.research.google.com/drive/145KAGvgdAb8BrkObUrxAVWBd9EGDqy8N?usp=sharing).
|
81 |
|
82 |
+
## First install the necessary libraries
|
83 |
|
84 |
```
|
85 |
pip install accelerate openai anthropic datasets
|
86 |
```
|
87 |
|
88 |
+
## Setup your :
|
89 |
* OPENAI_API_KEY
|
90 |
* ANTHROPIC_API_KEY
|
91 |
* HF_TOKEN
|
src/display/css_html_js.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
-
|
|
|
2 |
|
|
|
3 |
.markdown-text {
|
4 |
font-size: 16px !important;
|
5 |
}
|
@@ -33,12 +35,12 @@ custom_css = """
|
|
33 |
background: none;
|
34 |
border: none;
|
35 |
}
|
36 |
-
|
37 |
#search-bar {
|
38 |
padding: 0px;
|
39 |
}
|
40 |
|
41 |
-
/* Limit the width of the first AutoEvalColumn
|
42 |
table td:first-child,
|
43 |
table th:first-child {
|
44 |
max-width: 400px;
|
@@ -62,44 +64,53 @@ table th:first-child {
|
|
62 |
#scale-logo .download {
|
63 |
display: none;
|
64 |
}
|
|
|
65 |
#filter_type{
|
66 |
border: 0;
|
67 |
padding-left: 0;
|
68 |
padding-top: 0;
|
69 |
}
|
|
|
70 |
#filter_type label {
|
71 |
display: flex;
|
72 |
}
|
|
|
73 |
#filter_type label > span{
|
74 |
margin-top: var(--spacing-lg);
|
75 |
margin-right: 0.5em;
|
76 |
}
|
|
|
77 |
#filter_type label > .wrap{
|
78 |
width: 103px;
|
79 |
}
|
80 |
-
|
|
|
81 |
padding: 2px;
|
82 |
}
|
|
|
83 |
#filter_type label > .wrap .wrap-inner input{
|
84 |
width: 1px
|
85 |
}
|
|
|
86 |
#filter-columns-type{
|
87 |
border:0;
|
88 |
padding:0.5;
|
89 |
}
|
|
|
90 |
#filter-columns-size{
|
91 |
border:0;
|
92 |
padding:0.5;
|
93 |
}
|
|
|
94 |
#box-filter > .form{
|
95 |
border: 0
|
96 |
}
|
97 |
"""
|
98 |
|
99 |
get_window_url_params = """
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
|
4 |
+
custom_css = """
|
5 |
.markdown-text {
|
6 |
font-size: 16px !important;
|
7 |
}
|
|
|
35 |
background: none;
|
36 |
border: none;
|
37 |
}
|
38 |
+
|
39 |
#search-bar {
|
40 |
padding: 0px;
|
41 |
}
|
42 |
|
43 |
+
/* Limit the width of the first AutoEvalColumn */
|
44 |
table td:first-child,
|
45 |
table th:first-child {
|
46 |
max-width: 400px;
|
|
|
64 |
#scale-logo .download {
|
65 |
display: none;
|
66 |
}
|
67 |
+
|
68 |
#filter_type{
|
69 |
border: 0;
|
70 |
padding-left: 0;
|
71 |
padding-top: 0;
|
72 |
}
|
73 |
+
|
74 |
#filter_type label {
|
75 |
display: flex;
|
76 |
}
|
77 |
+
|
78 |
#filter_type label > span{
|
79 |
margin-top: var(--spacing-lg);
|
80 |
margin-right: 0.5em;
|
81 |
}
|
82 |
+
|
83 |
#filter_type label > .wrap{
|
84 |
width: 103px;
|
85 |
}
|
86 |
+
|
87 |
+
#filter_type label > .wrap .wrap-inner {
|
88 |
padding: 2px;
|
89 |
}
|
90 |
+
|
91 |
#filter_type label > .wrap .wrap-inner input{
|
92 |
width: 1px
|
93 |
}
|
94 |
+
|
95 |
#filter-columns-type{
|
96 |
border:0;
|
97 |
padding:0.5;
|
98 |
}
|
99 |
+
|
100 |
#filter-columns-size{
|
101 |
border:0;
|
102 |
padding:0.5;
|
103 |
}
|
104 |
+
|
105 |
#box-filter > .form{
|
106 |
border: 0
|
107 |
}
|
108 |
"""
|
109 |
|
110 |
get_window_url_params = """
|
111 |
+
function(url_params) {
|
112 |
+
const params = new URLSearchParams(window.location.search);
|
113 |
+
url_params = Object.fromEntries(params);
|
114 |
+
return url_params;
|
115 |
+
}
|
116 |
+
"""
|
src/display/formatting.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
def model_hyperlink(link, model_name):
|
2 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
3 |
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# flake8: noqa E501
|
4 |
+
|
5 |
def model_hyperlink(link, model_name):
|
6 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
7 |
|
src/display/utils.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
from dataclasses import dataclass, make_dataclass
|
2 |
from enum import Enum
|
3 |
|
@@ -7,7 +11,7 @@ from src.about import Tasks
|
|
7 |
|
8 |
|
9 |
def fields(raw_class):
|
10 |
-
return [v for k, v in raw_class.__dict__.items() if k
|
11 |
|
12 |
|
13 |
# These classes are for user facing column names,
|
@@ -23,13 +27,16 @@ class ColumnContent:
|
|
23 |
|
24 |
## Leaderboard columns
|
25 |
auto_eval_column_dict = []
|
|
|
26 |
# Init
|
27 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
28 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
29 |
-
|
|
|
30 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average β¬οΈ", "number", True)])
|
31 |
for task in Tasks:
|
32 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
|
|
33 |
# Model information
|
34 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
35 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
@@ -44,7 +51,8 @@ auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sh
|
|
44 |
# We use make dataclass to dynamically fill the scores from Tasks
|
45 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
46 |
|
47 |
-
|
|
|
48 |
@dataclass(frozen=True)
|
49 |
class EvalQueueColumn: # Queue column
|
50 |
model = ColumnContent("model", "markdown", True)
|
@@ -55,7 +63,7 @@ class EvalQueueColumn: # Queue column
|
|
55 |
status = ColumnContent("status", "str", True)
|
56 |
|
57 |
|
58 |
-
|
59 |
@dataclass
|
60 |
class ModelDetails:
|
61 |
name: str
|
@@ -93,6 +101,7 @@ class WeightType(Enum):
|
|
93 |
Original = ModelDetails("Original")
|
94 |
Delta = ModelDetails("Delta")
|
95 |
|
|
|
96 |
class Precision(Enum):
|
97 |
float16 = ModelDetails("float16")
|
98 |
bfloat16 = ModelDetails("bfloat16")
|
@@ -105,6 +114,7 @@ class Precision(Enum):
|
|
105 |
return Precision.bfloat16
|
106 |
return Precision.Unknown
|
107 |
|
|
|
108 |
# Column selection
|
109 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
110 |
|
@@ -112,4 +122,3 @@ EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
|
|
112 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
113 |
|
114 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
115 |
-
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# flake8: noqa E501
|
4 |
+
|
5 |
from dataclasses import dataclass, make_dataclass
|
6 |
from enum import Enum
|
7 |
|
|
|
11 |
|
12 |
|
13 |
def fields(raw_class):
|
14 |
+
return [v for k, v in raw_class.__dict__.items() if not k.startswith("__") and not k.endswith("__")]
|
15 |
|
16 |
|
17 |
# These classes are for user facing column names,
|
|
|
27 |
|
28 |
## Leaderboard columns
|
29 |
auto_eval_column_dict = []
|
30 |
+
|
31 |
# Init
|
32 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
33 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
34 |
+
|
35 |
+
# Scores
|
36 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average β¬οΈ", "number", True)])
|
37 |
for task in Tasks:
|
38 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
39 |
+
|
40 |
# Model information
|
41 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
42 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
|
|
51 |
# We use make dataclass to dynamically fill the scores from Tasks
|
52 |
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
|
53 |
|
54 |
+
|
55 |
+
# For the queue columns in the submission tab
|
56 |
@dataclass(frozen=True)
|
57 |
class EvalQueueColumn: # Queue column
|
58 |
model = ColumnContent("model", "markdown", True)
|
|
|
63 |
status = ColumnContent("status", "str", True)
|
64 |
|
65 |
|
66 |
+
# All the model information that we might need
|
67 |
@dataclass
|
68 |
class ModelDetails:
|
69 |
name: str
|
|
|
101 |
Original = ModelDetails("Original")
|
102 |
Delta = ModelDetails("Delta")
|
103 |
|
104 |
+
|
105 |
class Precision(Enum):
|
106 |
float16 = ModelDetails("float16")
|
107 |
bfloat16 = ModelDetails("bfloat16")
|
|
|
114 |
return Precision.bfloat16
|
115 |
return Precision.Unknown
|
116 |
|
117 |
+
|
118 |
# Column selection
|
119 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
120 |
|
|
|
122 |
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
|
123 |
|
124 |
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|
|
src/envs.py
CHANGED
@@ -1,12 +1,16 @@
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
|
3 |
from huggingface_hub import HfApi
|
4 |
|
5 |
# Info to change for your repository
|
6 |
# ----------------------------------
|
7 |
-
TOKEN = os.environ.get("HF_TOKEN")
|
8 |
|
9 |
-
OWNER = "braindao"
|
10 |
# ----------------------------------
|
11 |
|
12 |
REPO_ID = f"{OWNER}/solidity-leaderboard"
|
@@ -14,7 +18,7 @@ QUEUE_REPO = f"{OWNER}/requests"
|
|
14 |
RESULTS_REPO = f"{OWNER}/results"
|
15 |
|
16 |
# If you setup a cache later, just change HF_HOME
|
17 |
-
CACHE_PATH=os.getenv("HF_HOME", ".")
|
18 |
|
19 |
# Local caches
|
20 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# flake8: noqa E501
|
4 |
+
|
5 |
import os
|
6 |
|
7 |
from huggingface_hub import HfApi
|
8 |
|
9 |
# Info to change for your repository
|
10 |
# ----------------------------------
|
11 |
+
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
12 |
|
13 |
+
OWNER = "braindao" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
14 |
# ----------------------------------
|
15 |
|
16 |
REPO_ID = f"{OWNER}/solidity-leaderboard"
|
|
|
18 |
RESULTS_REPO = f"{OWNER}/results"
|
19 |
|
20 |
# If you setup a cache later, just change HF_HOME
|
21 |
+
CACHE_PATH = os.getenv("HF_HOME", ".")
|
22 |
|
23 |
# Local caches
|
24 |
EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
|
src/leaderboard/read_evals.py
CHANGED
@@ -1,6 +1,9 @@
|
|
|
|
|
|
|
|
|
|
1 |
import glob
|
2 |
import json
|
3 |
-
import math
|
4 |
import os
|
5 |
from dataclasses import dataclass
|
6 |
|
@@ -8,7 +11,7 @@ import dateutil
|
|
8 |
import numpy as np
|
9 |
|
10 |
from src.display.formatting import make_clickable_model
|
11 |
-
from src.display.utils import AutoEvalColumn, ModelType,
|
12 |
from src.submission.check_validity import is_model_on_hub
|
13 |
|
14 |
|
@@ -16,24 +19,24 @@ from src.submission.check_validity import is_model_on_hub
|
|
16 |
class EvalResult:
|
17 |
"""Represents one full evaluation. Built from a combination of the result and request file for a given run.
|
18 |
"""
|
19 |
-
eval_name: str
|
20 |
-
full_model: str
|
21 |
-
org: str
|
22 |
model: str
|
23 |
-
revision: str
|
24 |
results: dict
|
25 |
precision: Precision = Precision.Unknown
|
26 |
-
model_type: ModelType = ModelType.Unknown
|
27 |
-
weight_type: WeightType = WeightType.Original
|
28 |
-
architecture: str = "Unknown"
|
29 |
license: str = "?"
|
30 |
likes: int = 0
|
31 |
num_params: int = 0
|
32 |
-
date: str = ""
|
33 |
still_on_hub: bool = False
|
34 |
|
35 |
@classmethod
|
36 |
-
def init_from_json_file(
|
37 |
"""Inits the result from the specific model result file"""
|
38 |
with open(json_filepath) as fp:
|
39 |
data = json.load(fp)
|
@@ -58,7 +61,10 @@ class EvalResult:
|
|
58 |
full_model = "/".join(org_and_model)
|
59 |
|
60 |
still_on_hub, _, model_config = is_model_on_hub(
|
61 |
-
full_model,
|
|
|
|
|
|
|
62 |
)
|
63 |
architecture = "?"
|
64 |
if model_config is not None:
|
@@ -66,12 +72,14 @@ class EvalResult:
|
|
66 |
if architectures:
|
67 |
architecture = ";".join(architectures)
|
68 |
|
69 |
-
# Extract results available in this file
|
|
|
70 |
results = {}
|
71 |
for task in Tasks:
|
72 |
task = task.value
|
73 |
|
74 |
-
# We average all scores of a given metric
|
|
|
75 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
76 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
77 |
continue
|
@@ -79,14 +87,14 @@ class EvalResult:
|
|
79 |
mean_acc = np.mean(accs) * 100.0
|
80 |
results[task.benchmark] = mean_acc
|
81 |
|
82 |
-
return
|
83 |
eval_name=result_key,
|
84 |
full_model=full_model,
|
85 |
org=org,
|
86 |
model=model,
|
87 |
results=results,
|
88 |
-
precision=precision,
|
89 |
-
revision=
|
90 |
still_on_hub=still_on_hub,
|
91 |
architecture=architecture
|
92 |
)
|
@@ -188,7 +196,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
188 |
results = []
|
189 |
for v in eval_results.values():
|
190 |
try:
|
191 |
-
v.to_dict()
|
192 |
results.append(v)
|
193 |
except KeyError: # not all eval values present
|
194 |
continue
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# flake8: noqa E501
|
4 |
+
|
5 |
import glob
|
6 |
import json
|
|
|
7 |
import os
|
8 |
from dataclasses import dataclass
|
9 |
|
|
|
11 |
import numpy as np
|
12 |
|
13 |
from src.display.formatting import make_clickable_model
|
14 |
+
from src.display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
|
15 |
from src.submission.check_validity import is_model_on_hub
|
16 |
|
17 |
|
|
|
19 |
class EvalResult:
|
20 |
"""Represents one full evaluation. Built from a combination of the result and request file for a given run.
|
21 |
"""
|
22 |
+
eval_name: str # org_model_precision (uid)
|
23 |
+
full_model: str # org/model (path on hub)
|
24 |
+
org: str
|
25 |
model: str
|
26 |
+
revision: str # commit hash, "" if main
|
27 |
results: dict
|
28 |
precision: Precision = Precision.Unknown
|
29 |
+
model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
|
30 |
+
weight_type: WeightType = WeightType.Original # Original or Adapter
|
31 |
+
architecture: str = "Unknown"
|
32 |
license: str = "?"
|
33 |
likes: int = 0
|
34 |
num_params: int = 0
|
35 |
+
date: str = "" # submission date of request file
|
36 |
still_on_hub: bool = False
|
37 |
|
38 |
@classmethod
|
39 |
+
def init_from_json_file(cls, json_filepath):
|
40 |
"""Inits the result from the specific model result file"""
|
41 |
with open(json_filepath) as fp:
|
42 |
data = json.load(fp)
|
|
|
61 |
full_model = "/".join(org_and_model)
|
62 |
|
63 |
still_on_hub, _, model_config = is_model_on_hub(
|
64 |
+
full_model,
|
65 |
+
config.get("model_sha", "main"),
|
66 |
+
trust_remote_code=True,
|
67 |
+
test_tokenizer=False,
|
68 |
)
|
69 |
architecture = "?"
|
70 |
if model_config is not None:
|
|
|
72 |
if architectures:
|
73 |
architecture = ";".join(architectures)
|
74 |
|
75 |
+
# Extract results available in this file
|
76 |
+
# (some results are split in several files)
|
77 |
results = {}
|
78 |
for task in Tasks:
|
79 |
task = task.value
|
80 |
|
81 |
+
# We average all scores of a given metric
|
82 |
+
# (not all metrics are present in all files)
|
83 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
84 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
85 |
continue
|
|
|
87 |
mean_acc = np.mean(accs) * 100.0
|
88 |
results[task.benchmark] = mean_acc
|
89 |
|
90 |
+
return cls(
|
91 |
eval_name=result_key,
|
92 |
full_model=full_model,
|
93 |
org=org,
|
94 |
model=model,
|
95 |
results=results,
|
96 |
+
precision=precision,
|
97 |
+
revision=config.get("model_sha", ""),
|
98 |
still_on_hub=still_on_hub,
|
99 |
architecture=architecture
|
100 |
)
|
|
|
196 |
results = []
|
197 |
for v in eval_results.values():
|
198 |
try:
|
199 |
+
v.to_dict() # we test if the dict version is complete
|
200 |
results.append(v)
|
201 |
except KeyError: # not all eval values present
|
202 |
continue
|
src/populate.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
import json
|
2 |
import os
|
3 |
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# flake8: noqa E501
|
4 |
+
|
5 |
import json
|
6 |
import os
|
7 |
|
src/submission/check_validity.py
CHANGED
@@ -1,8 +1,10 @@
|
|
|
|
|
|
|
|
|
|
1 |
import json
|
2 |
import os
|
3 |
-
import re
|
4 |
from collections import defaultdict
|
5 |
-
from datetime import datetime, timedelta, timezone
|
6 |
|
7 |
import huggingface_hub
|
8 |
from huggingface_hub import ModelCard
|
@@ -10,6 +12,7 @@ from huggingface_hub.hf_api import ModelInfo
|
|
10 |
from transformers import AutoConfig
|
11 |
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
12 |
|
|
|
13 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
14 |
"""Checks if the model card and license exist and have been filled"""
|
15 |
try:
|
@@ -31,20 +34,21 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
31 |
|
32 |
return True, ""
|
33 |
|
|
|
34 |
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
|
35 |
"""Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
|
36 |
try:
|
37 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
38 |
if test_tokenizer:
|
39 |
try:
|
40 |
-
|
41 |
except ValueError as e:
|
42 |
return (
|
43 |
False,
|
44 |
f"uses a tokenizer which is not in a transformers release: {e}",
|
45 |
None
|
46 |
)
|
47 |
-
except Exception
|
48 |
return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
|
49 |
return True, None, config
|
50 |
|
@@ -55,7 +59,7 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
|
|
55 |
None
|
56 |
)
|
57 |
|
58 |
-
except Exception
|
59 |
return False, "was not found on hub!", None
|
60 |
|
61 |
|
@@ -70,10 +74,12 @@ def get_model_size(model_info: ModelInfo, precision: str):
|
|
70 |
model_size = size_factor * model_size
|
71 |
return model_size
|
72 |
|
|
|
73 |
def get_model_arch(model_info: ModelInfo):
|
74 |
"""Gets the model architecture from the configuration"""
|
75 |
return model_info.config.get("architectures", "Unknown")
|
76 |
|
|
|
77 |
def already_submitted_models(requested_models_dir: str) -> set[str]:
|
78 |
"""Gather a list of already submitted models to avoid duplicates"""
|
79 |
depth = 1
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# flake8: noqa E501
|
4 |
+
|
5 |
import json
|
6 |
import os
|
|
|
7 |
from collections import defaultdict
|
|
|
8 |
|
9 |
import huggingface_hub
|
10 |
from huggingface_hub import ModelCard
|
|
|
12 |
from transformers import AutoConfig
|
13 |
from transformers.models.auto.tokenization_auto import AutoTokenizer
|
14 |
|
15 |
+
|
16 |
def check_model_card(repo_id: str) -> tuple[bool, str]:
|
17 |
"""Checks if the model card and license exist and have been filled"""
|
18 |
try:
|
|
|
34 |
|
35 |
return True, ""
|
36 |
|
37 |
+
|
38 |
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
|
39 |
"""Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
|
40 |
try:
|
41 |
config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
42 |
if test_tokenizer:
|
43 |
try:
|
44 |
+
_ = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
|
45 |
except ValueError as e:
|
46 |
return (
|
47 |
False,
|
48 |
f"uses a tokenizer which is not in a transformers release: {e}",
|
49 |
None
|
50 |
)
|
51 |
+
except Exception:
|
52 |
return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
|
53 |
return True, None, config
|
54 |
|
|
|
59 |
None
|
60 |
)
|
61 |
|
62 |
+
except Exception:
|
63 |
return False, "was not found on hub!", None
|
64 |
|
65 |
|
|
|
74 |
model_size = size_factor * model_size
|
75 |
return model_size
|
76 |
|
77 |
+
|
78 |
def get_model_arch(model_info: ModelInfo):
|
79 |
"""Gets the model architecture from the configuration"""
|
80 |
return model_info.config.get("architectures", "Unknown")
|
81 |
|
82 |
+
|
83 |
def already_submitted_models(requested_models_dir: str) -> set[str]:
|
84 |
"""Gather a list of already submitted models to avoid duplicates"""
|
85 |
depth = 1
|
src/submission/submit.py
CHANGED
@@ -1,19 +1,19 @@
|
|
|
|
|
|
|
|
|
|
1 |
import json
|
2 |
import os
|
3 |
from datetime import datetime, timezone
|
4 |
|
5 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
6 |
-
from src.envs import API, EVAL_REQUESTS_PATH,
|
7 |
-
from src.submission.check_validity import
|
8 |
-
already_submitted_models,
|
9 |
-
check_model_card,
|
10 |
-
get_model_size,
|
11 |
-
is_model_on_hub,
|
12 |
-
)
|
13 |
|
14 |
REQUESTED_MODELS = None
|
15 |
USERS_TO_SUBMISSION_DATES = None
|
16 |
|
|
|
17 |
def add_new_eval(
|
18 |
model: str,
|
19 |
base_model: str,
|
@@ -64,12 +64,12 @@ def add_new_eval(
|
|
64 |
|
65 |
# Were the model card and license filled?
|
66 |
try:
|
67 |
-
|
68 |
except Exception:
|
69 |
return styled_error("Please select a license for your model")
|
70 |
|
71 |
-
|
72 |
-
if not
|
73 |
return styled_error(error_msg)
|
74 |
|
75 |
# Seems good, creating the eval
|
@@ -86,7 +86,7 @@ def add_new_eval(
|
|
86 |
"model_type": model_type,
|
87 |
"likes": model_info.likes,
|
88 |
"params": model_size,
|
89 |
-
"license":
|
90 |
"private": False,
|
91 |
}
|
92 |
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# flake8: noqa E501
|
4 |
+
|
5 |
import json
|
6 |
import os
|
7 |
from datetime import datetime, timezone
|
8 |
|
9 |
from src.display.formatting import styled_error, styled_message, styled_warning
|
10 |
+
from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
|
11 |
+
from src.submission.check_validity import already_submitted_models, check_model_card, get_model_size, is_model_on_hub
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
REQUESTED_MODELS = None
|
14 |
USERS_TO_SUBMISSION_DATES = None
|
15 |
|
16 |
+
|
17 |
def add_new_eval(
|
18 |
model: str,
|
19 |
base_model: str,
|
|
|
64 |
|
65 |
# Were the model card and license filled?
|
66 |
try:
|
67 |
+
license_title = model_info.cardData["license"]
|
68 |
except Exception:
|
69 |
return styled_error("Please select a license for your model")
|
70 |
|
71 |
+
is_model_card_ok, error_msg = check_model_card(model)
|
72 |
+
if not is_model_card_ok:
|
73 |
return styled_error(error_msg)
|
74 |
|
75 |
# Seems good, creating the eval
|
|
|
86 |
"model_type": model_type,
|
87 |
"likes": model_info.likes,
|
88 |
"params": model_size,
|
89 |
+
"license": license_title,
|
90 |
"private": False,
|
91 |
}
|
92 |
|