meg-huggingface commited on
Commit
64c3915
·
1 Parent(s): 5ea4d55

Backend toxicity

Browse files
Files changed (1) hide show
  1. main_backend_toxicity.py +85 -0
main_backend_toxicity.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import pprint
3
+ import re
4
+ from huggingface_hub import snapshot_download
5
+
6
+ logging.getLogger("openai").setLevel(logging.DEBUG)
7
+
8
+ from src.backend.inference_endpoint import create_endpoint
9
+ from src.backend.run_toxicity_eval import main
10
+ from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
11
+ from src.backend.sort_queue import sort_models_by_priority
12
+
13
+ from src.envs import (QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO,
14
+ EVAL_RESULTS_PATH_BACKEND, API, TOKEN)
15
+ #, LIMIT, ACCELERATOR, VENDOR, REGION
16
+ from src.logging import setup_logger
17
+
18
+ logger = setup_logger(__name__)
19
+
20
+ # logging.basicConfig(level=logging.ERROR)
21
+ pp = pprint.PrettyPrinter(width=80)
22
+
23
+ PENDING_STATUS = "PENDING"
24
+ RUNNING_STATUS = "RUNNING"
25
+ FINISHED_STATUS = "FINISHED"
26
+ FAILED_STATUS = "FAILED"
27
+
28
+ snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
29
+ snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
30
+
31
+ def run_auto_eval():
32
+ current_pending_status = [PENDING_STATUS]
33
+
34
+ # pull the eval dataset from the hub and parse any eval requests
35
+ # check completed evals and set them to finished
36
+ check_completed_evals(
37
+ api=API,
38
+ checked_status=RUNNING_STATUS,
39
+ completed_status=FINISHED_STATUS,
40
+ failed_status=FAILED_STATUS,
41
+ hf_repo=QUEUE_REPO,
42
+ local_dir=EVAL_REQUESTS_PATH_BACKEND,
43
+ hf_repo_results=RESULTS_REPO,
44
+ local_dir_results=EVAL_RESULTS_PATH_BACKEND
45
+ )
46
+
47
+ # Get all eval request that are PENDING, if you want to run other evals, change this parameter
48
+ eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
49
+ # Sort the evals by priority (first submitted first run)
50
+ eval_requests = sort_models_by_priority(api=API, models=eval_requests)
51
+
52
+ logger.info(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
53
+
54
+ if len(eval_requests) == 0:
55
+ return
56
+
57
+ eval_request = eval_requests[0]
58
+ logger.info(pp.pformat(eval_request))
59
+
60
+
61
+ set_eval_request(
62
+ api=API,
63
+ eval_request=eval_request,
64
+ set_to_status=RUNNING_STATUS,
65
+ hf_repo=QUEUE_REPO,
66
+ local_dir=EVAL_REQUESTS_PATH_BACKEND,
67
+ )
68
+
69
+ logger.info(f'Starting Evaluation of {eval_request.json_filepath} on Inference endpoints')
70
+
71
+ model_repository = eval_request.model
72
+ endpoint_name = re.sub("/", "-", model_repository.lower()) + "-toxicity-eval" #+ str(random.randint(0,1000))
73
+ endpoint_url = create_endpoint(endpoint_name, model_repository)
74
+ logger.info("Created an endpoint url at %s" % endpoint_url)
75
+ results = main(endpoint_url, model_repository)
76
+ logger.debug("FINISHED!")
77
+
78
+ #local_dir = EVAL_RESULTS_PATH_BACKEND,
79
+ #limit=LIMIT
80
+ # )
81
+ #logger.info(f'Completed Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}')
82
+
83
+
84
+ if __name__ == "__main__":
85
+ run_auto_eval()