eduagarcia
commited on
Commit
•
21f0ce7
1
Parent(s):
e9177b9
pr worker for portuguese models
Browse files- .gitignore +9 -0
- app.py +3 -0
- functions.py +32 -3
- pr_worker.py +41 -0
.gitignore
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
venv/
|
2 |
+
__pycache__/
|
3 |
+
.env*
|
4 |
+
.ipynb_checkpoints
|
5 |
+
*ipynb
|
6 |
+
.vscode/
|
7 |
+
|
8 |
+
eval-queue/
|
9 |
+
downloads/
|
app.py
CHANGED
@@ -7,6 +7,7 @@ import threading
|
|
7 |
import gradio as gr
|
8 |
from gradio_space_ci import enable_space_ci
|
9 |
from functions import commit
|
|
|
10 |
|
11 |
enable_space_ci()
|
12 |
|
@@ -65,4 +66,6 @@ with gr.Blocks() as demo:
|
|
65 |
submit_btn.click(commit, model_id, output)
|
66 |
|
67 |
threading.Thread(target=refresh).start()
|
|
|
|
|
68 |
demo.launch()
|
|
|
7 |
import gradio as gr
|
8 |
from gradio_space_ci import enable_space_ci
|
9 |
from functions import commit
|
10 |
+
from pr_worker import run_pr_worker
|
11 |
|
12 |
enable_space_ci()
|
13 |
|
|
|
66 |
submit_btn.click(commit, model_id, output)
|
67 |
|
68 |
threading.Thread(target=refresh).start()
|
69 |
+
threading.Thread(target=run_pr_worker).start()
|
70 |
+
|
71 |
demo.launch()
|
functions.py
CHANGED
@@ -8,6 +8,7 @@ import gradio as gr
|
|
8 |
from openllm import get_json_format_data, get_datas
|
9 |
import pandas as pd
|
10 |
import traceback
|
|
|
11 |
|
12 |
BOT_HF_TOKEN = os.getenv('BOT_HF_TOKEN')
|
13 |
|
@@ -15,6 +16,9 @@ data = get_json_format_data()
|
|
15 |
finished_models = get_datas(data)
|
16 |
df = pd.DataFrame(finished_models)
|
17 |
|
|
|
|
|
|
|
18 |
desc = """
|
19 |
This is an automated PR created with https://huggingface.co/spaces/eduagarcia-temp/portuguese-leaderboard-results-to-modelcard
|
20 |
|
@@ -162,7 +166,7 @@ def get_edited_yaml_readme(repo, token: str | None):
|
|
162 |
card = ModelCard.load(repo, token=token)
|
163 |
results = search(df, repo)
|
164 |
|
165 |
-
common = {"task_type": 'text-generation', "task_name": 'Text Generation', "source_name":
|
166 |
|
167 |
tasks_results = get_task_summary(results)
|
168 |
|
@@ -176,9 +180,30 @@ def get_edited_yaml_readme(repo, token: str | None):
|
|
176 |
card.data['eval_results'].append(cur_result)
|
177 |
|
178 |
return str(card)
|
179 |
-
|
180 |
|
181 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
if oauth_token is None:
|
183 |
gr.Warning("You are not logged in; therefore, the leaderboard-pr-bot will open the pull request instead of you. Click on 'Sign in with Huggingface' to log in.")
|
184 |
token = BOT_HF_TOKEN
|
@@ -192,6 +217,10 @@ def commit(repo, pr_number=None, message="Adding Evaluation Results", oauth_toke
|
|
192 |
repo = RepoUrl(repo).repo_id
|
193 |
except Exception:
|
194 |
raise gr.Error(f"Not a valid repo id: {str(repo)}")
|
|
|
|
|
|
|
|
|
195 |
|
196 |
edited = {"revision": f"refs/pr/{pr_number}"} if pr_number else {"create_pr": True}
|
197 |
|
|
|
8 |
from openllm import get_json_format_data, get_datas
|
9 |
import pandas as pd
|
10 |
import traceback
|
11 |
+
from huggingface_hub import HfApi
|
12 |
|
13 |
BOT_HF_TOKEN = os.getenv('BOT_HF_TOKEN')
|
14 |
|
|
|
16 |
finished_models = get_datas(data)
|
17 |
df = pd.DataFrame(finished_models)
|
18 |
|
19 |
+
source_name = "Open Portuguese LLM Leaderboard"
|
20 |
+
default_pull_request_title = "Adding the Open Portuguese LLM Leaderboard Evaluation Results"
|
21 |
+
|
22 |
desc = """
|
23 |
This is an automated PR created with https://huggingface.co/spaces/eduagarcia-temp/portuguese-leaderboard-results-to-modelcard
|
24 |
|
|
|
166 |
card = ModelCard.load(repo, token=token)
|
167 |
results = search(df, repo)
|
168 |
|
169 |
+
common = {"task_type": 'text-generation', "task_name": 'Text Generation', "source_name": source_name, "source_url": get_query_url(repo)}
|
170 |
|
171 |
tasks_results = get_task_summary(results)
|
172 |
|
|
|
180 |
card.data['eval_results'].append(cur_result)
|
181 |
|
182 |
return str(card)
|
|
|
183 |
|
184 |
+
def pr_already_exists(repo, token: str | None = None):
|
185 |
+
card = ModelCard.load(repo, token=token)
|
186 |
+
if 'eval_results' in card.data and card.data['eval_results']:
|
187 |
+
for x in card.data['eval_results']:
|
188 |
+
if x.source_name == source_name:
|
189 |
+
return True
|
190 |
+
if 'Open Portuguese LLM Leaderboard' in card.content:
|
191 |
+
return True
|
192 |
+
if 'Open PT LLM Leaderboard' in card.content:
|
193 |
+
return True
|
194 |
+
|
195 |
+
api = HfApi(token=token)
|
196 |
+
for x in api.get_repo_discussions(repo):
|
197 |
+
if x.title == default_pull_request_title:
|
198 |
+
return True
|
199 |
+
if x.author == "leaderboard-pt-pr-bot":
|
200 |
+
return True
|
201 |
+
if x.author == "eduagarcia" and x.is_pull_request:
|
202 |
+
return True
|
203 |
+
|
204 |
+
return False
|
205 |
+
|
206 |
+
def commit(repo, pr_number=None, message=default_pull_request_title, oauth_token: gr.OAuthToken | None = None, check_if_pr_exists=False): # specify pr number if you want to edit it, don't if you don't want
|
207 |
if oauth_token is None:
|
208 |
gr.Warning("You are not logged in; therefore, the leaderboard-pr-bot will open the pull request instead of you. Click on 'Sign in with Huggingface' to log in.")
|
209 |
token = BOT_HF_TOKEN
|
|
|
217 |
repo = RepoUrl(repo).repo_id
|
218 |
except Exception:
|
219 |
raise gr.Error(f"Not a valid repo id: {str(repo)}")
|
220 |
+
|
221 |
+
if check_if_pr_exists:
|
222 |
+
if pr_already_exists(repo, token):
|
223 |
+
return "PR already exists"
|
224 |
|
225 |
edited = {"revision": f"refs/pr/{pr_number}"} if pr_number else {"create_pr": True}
|
226 |
|
pr_worker.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from glob import glob
|
2 |
+
import pandas as pd
|
3 |
+
from huggingface_hub import snapshot_download
|
4 |
+
import json
|
5 |
+
from tqdm.auto import tqdm
|
6 |
+
import os
|
7 |
+
import traceback
|
8 |
+
from functions import pr_already_exists, commit
|
9 |
+
|
10 |
+
|
11 |
+
QUEUE_REPO = "eduagarcia-temp/llm_pt_leaderboard_requests"
|
12 |
+
EVAL_REQUESTS_PATH = "./eval-queue/"
|
13 |
+
|
14 |
+
def run_pr_worker():
|
15 |
+
snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
|
16 |
+
for filepath in glob(os.path.join(EVAL_REQUESTS_PATH, '**/*.json'), recursive=True):
|
17 |
+
with open(filepath, 'r') as f:
|
18 |
+
model_data = json.load(f)
|
19 |
+
if model_data['status'] != 'FINISHED':
|
20 |
+
continue
|
21 |
+
if 'main_language' not in model_data:
|
22 |
+
continue
|
23 |
+
if model_data['main_language'] != "Portuguese":
|
24 |
+
continue
|
25 |
+
if model_data['result_metrics_average'] < 0.25:
|
26 |
+
continue
|
27 |
+
if 'ptpt' in model_data['model']:
|
28 |
+
continue
|
29 |
+
try:
|
30 |
+
if not pr_already_exists(model_data['model']):
|
31 |
+
print(f"Opening PR for {model_data['model']}")
|
32 |
+
commit(model_data['model'], check_if_pr_exists=True)
|
33 |
+
except Exception as e:
|
34 |
+
traceback.print_exc()
|
35 |
+
print(f"Error on {model_data['model']}: {str(e)}")
|
36 |
+
|
37 |
+
|
38 |
+
if __name__ == "__main__":
|
39 |
+
run_pr_worker()
|
40 |
+
|
41 |
+
|