eduagarcia commited on
Commit
21f0ce7
1 Parent(s): e9177b9

pr worker for portuguese models

Browse files
Files changed (4) hide show
  1. .gitignore +9 -0
  2. app.py +3 -0
  3. functions.py +32 -3
  4. pr_worker.py +41 -0
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ venv/
2
+ __pycache__/
3
+ .env*
4
+ .ipynb_checkpoints
5
+ *ipynb
6
+ .vscode/
7
+
8
+ eval-queue/
9
+ downloads/
app.py CHANGED
@@ -7,6 +7,7 @@ import threading
7
  import gradio as gr
8
  from gradio_space_ci import enable_space_ci
9
  from functions import commit
 
10
 
11
  enable_space_ci()
12
 
@@ -65,4 +66,6 @@ with gr.Blocks() as demo:
65
  submit_btn.click(commit, model_id, output)
66
 
67
  threading.Thread(target=refresh).start()
 
 
68
  demo.launch()
 
7
  import gradio as gr
8
  from gradio_space_ci import enable_space_ci
9
  from functions import commit
10
+ from pr_worker import run_pr_worker
11
 
12
  enable_space_ci()
13
 
 
66
  submit_btn.click(commit, model_id, output)
67
 
68
  threading.Thread(target=refresh).start()
69
+ threading.Thread(target=run_pr_worker).start()
70
+
71
  demo.launch()
functions.py CHANGED
@@ -8,6 +8,7 @@ import gradio as gr
8
  from openllm import get_json_format_data, get_datas
9
  import pandas as pd
10
  import traceback
 
11
 
12
  BOT_HF_TOKEN = os.getenv('BOT_HF_TOKEN')
13
 
@@ -15,6 +16,9 @@ data = get_json_format_data()
15
  finished_models = get_datas(data)
16
  df = pd.DataFrame(finished_models)
17
 
 
 
 
18
  desc = """
19
  This is an automated PR created with https://huggingface.co/spaces/eduagarcia-temp/portuguese-leaderboard-results-to-modelcard
20
 
@@ -162,7 +166,7 @@ def get_edited_yaml_readme(repo, token: str | None):
162
  card = ModelCard.load(repo, token=token)
163
  results = search(df, repo)
164
 
165
- common = {"task_type": 'text-generation', "task_name": 'Text Generation', "source_name": "Open Portuguese LLM Leaderboard", "source_url": get_query_url(repo)}
166
 
167
  tasks_results = get_task_summary(results)
168
 
@@ -176,9 +180,30 @@ def get_edited_yaml_readme(repo, token: str | None):
176
  card.data['eval_results'].append(cur_result)
177
 
178
  return str(card)
179
-
180
 
181
- def commit(repo, pr_number=None, message="Adding Evaluation Results", oauth_token: gr.OAuthToken | None = None): # specify pr number if you want to edit it, don't if you don't want
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  if oauth_token is None:
183
  gr.Warning("You are not logged in; therefore, the leaderboard-pr-bot will open the pull request instead of you. Click on 'Sign in with Huggingface' to log in.")
184
  token = BOT_HF_TOKEN
@@ -192,6 +217,10 @@ def commit(repo, pr_number=None, message="Adding Evaluation Results", oauth_toke
192
  repo = RepoUrl(repo).repo_id
193
  except Exception:
194
  raise gr.Error(f"Not a valid repo id: {str(repo)}")
 
 
 
 
195
 
196
  edited = {"revision": f"refs/pr/{pr_number}"} if pr_number else {"create_pr": True}
197
 
 
8
  from openllm import get_json_format_data, get_datas
9
  import pandas as pd
10
  import traceback
11
+ from huggingface_hub import HfApi
12
 
13
  BOT_HF_TOKEN = os.getenv('BOT_HF_TOKEN')
14
 
 
16
  finished_models = get_datas(data)
17
  df = pd.DataFrame(finished_models)
18
 
19
+ source_name = "Open Portuguese LLM Leaderboard"
20
+ default_pull_request_title = "Adding the Open Portuguese LLM Leaderboard Evaluation Results"
21
+
22
  desc = """
23
  This is an automated PR created with https://huggingface.co/spaces/eduagarcia-temp/portuguese-leaderboard-results-to-modelcard
24
 
 
166
  card = ModelCard.load(repo, token=token)
167
  results = search(df, repo)
168
 
169
+ common = {"task_type": 'text-generation', "task_name": 'Text Generation', "source_name": source_name, "source_url": get_query_url(repo)}
170
 
171
  tasks_results = get_task_summary(results)
172
 
 
180
  card.data['eval_results'].append(cur_result)
181
 
182
  return str(card)
 
183
 
184
+ def pr_already_exists(repo, token: str | None = None):
185
+ card = ModelCard.load(repo, token=token)
186
+ if 'eval_results' in card.data and card.data['eval_results']:
187
+ for x in card.data['eval_results']:
188
+ if x.source_name == source_name:
189
+ return True
190
+ if 'Open Portuguese LLM Leaderboard' in card.content:
191
+ return True
192
+ if 'Open PT LLM Leaderboard' in card.content:
193
+ return True
194
+
195
+ api = HfApi(token=token)
196
+ for x in api.get_repo_discussions(repo):
197
+ if x.title == default_pull_request_title:
198
+ return True
199
+ if x.author == "leaderboard-pt-pr-bot":
200
+ return True
201
+ if x.author == "eduagarcia" and x.is_pull_request:
202
+ return True
203
+
204
+ return False
205
+
206
+ def commit(repo, pr_number=None, message=default_pull_request_title, oauth_token: gr.OAuthToken | None = None, check_if_pr_exists=False): # specify pr number if you want to edit it, don't if you don't want
207
  if oauth_token is None:
208
  gr.Warning("You are not logged in; therefore, the leaderboard-pr-bot will open the pull request instead of you. Click on 'Sign in with Huggingface' to log in.")
209
  token = BOT_HF_TOKEN
 
217
  repo = RepoUrl(repo).repo_id
218
  except Exception:
219
  raise gr.Error(f"Not a valid repo id: {str(repo)}")
220
+
221
+ if check_if_pr_exists:
222
+ if pr_already_exists(repo, token):
223
+ return "PR already exists"
224
 
225
  edited = {"revision": f"refs/pr/{pr_number}"} if pr_number else {"create_pr": True}
226
 
pr_worker.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from glob import glob
2
+ import pandas as pd
3
+ from huggingface_hub import snapshot_download
4
+ import json
5
+ from tqdm.auto import tqdm
6
+ import os
7
+ import traceback
8
+ from functions import pr_already_exists, commit
9
+
10
+
11
+ QUEUE_REPO = "eduagarcia-temp/llm_pt_leaderboard_requests"
12
+ EVAL_REQUESTS_PATH = "./eval-queue/"
13
+
14
+ def run_pr_worker():
15
+ snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30)
16
+ for filepath in glob(os.path.join(EVAL_REQUESTS_PATH, '**/*.json'), recursive=True):
17
+ with open(filepath, 'r') as f:
18
+ model_data = json.load(f)
19
+ if model_data['status'] != 'FINISHED':
20
+ continue
21
+ if 'main_language' not in model_data:
22
+ continue
23
+ if model_data['main_language'] != "Portuguese":
24
+ continue
25
+ if model_data['result_metrics_average'] < 0.25:
26
+ continue
27
+ if 'ptpt' in model_data['model']:
28
+ continue
29
+ try:
30
+ if not pr_already_exists(model_data['model']):
31
+ print(f"Opening PR for {model_data['model']}")
32
+ commit(model_data['model'], check_if_pr_exists=True)
33
+ except Exception as e:
34
+ traceback.print_exc()
35
+ print(f"Error on {model_data['model']}: {str(e)}")
36
+
37
+
38
+ if __name__ == "__main__":
39
+ run_pr_worker()
40
+
41
+