from glob import glob import pandas as pd from huggingface_hub import snapshot_download import json from tqdm.auto import tqdm import os import traceback from functions import pr_already_exists, commit QUEUE_REPO = "eduagarcia-temp/llm_pt_leaderboard_requests" EVAL_REQUESTS_PATH = "./eval-queue/" blacklist = ['PORTULAN', 'Weni', '22h', 't5'] def run_pr_worker(): snapshot_download(repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30) for filepath in glob(os.path.join(EVAL_REQUESTS_PATH, '**/*.json'), recursive=True): with open(filepath, 'r') as f: model_data = json.load(f) if model_data['status'] != 'FINISHED': continue if 'main_language' not in model_data: continue if model_data['main_language'] != "Portuguese": continue if model_data['result_metrics_average'] < 0.25: continue has_blacklist = False for b in blacklist: if b in model_data['model']: has_blacklist = True if has_blacklist: continue try: if not pr_already_exists(model_data['model']): print(f"Opening PR for {model_data['model']}") commit(model_data['model'], check_if_pr_exists=True) except Exception as e: traceback.print_exc() print(f"Error on {model_data['model']}: {str(e)}") if __name__ == "__main__": run_pr_worker()