Clémentine commited on
Commit
412f8e5
·
1 Parent(s): a50a787

updated with meg's suggestions + cleaned up a bit

Browse files
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import logging
 
 
2
  from src.logging import configure_root_logger
 
3
  logging.getLogger("httpx").setLevel(logging.WARNING)
4
  logging.getLogger("numexpr").setLevel(logging.WARNING)
5
  logging.getLogger("absl").setLevel(logging.WARNING)
@@ -36,8 +39,8 @@ links_md = f"""
36
  | Results Repo | [{RESULTS_REPO}](https://huggingface.co/datasets/{RESULTS_REPO}) |
37
  """
38
 
39
- def button_auto_eval():
40
- logger.info("Manually triggering Auto Eval")
41
  run_auto_eval()
42
 
43
 
@@ -55,10 +58,14 @@ with gr.Blocks(js=dark_mode_gradio_js) as demo:
55
  button = gr.Button("Manually Run Evaluation")
56
  gr.Markdown(links_md)
57
 
58
- dummy = gr.Markdown(run_auto_eval, every=REFRESH_RATE, visible=False)
59
-
60
- button.click(fn=button_auto_eval, inputs=[], outputs=[])
61
 
 
62
 
63
  if __name__ == '__main__':
64
- demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0", show_error=True, server_port=7860)
 
 
 
 
 
 
1
  import logging
2
+ from apscheduler.schedulers.background import BackgroundScheduler
3
+
4
  from src.logging import configure_root_logger
5
+
6
  logging.getLogger("httpx").setLevel(logging.WARNING)
7
  logging.getLogger("numexpr").setLevel(logging.WARNING)
8
  logging.getLogger("absl").setLevel(logging.WARNING)
 
39
  | Results Repo | [{RESULTS_REPO}](https://huggingface.co/datasets/{RESULTS_REPO}) |
40
  """
41
 
42
+ def auto_eval():
43
+ logger.info("Triggering Auto Eval")
44
  run_auto_eval()
45
 
46
 
 
58
  button = gr.Button("Manually Run Evaluation")
59
  gr.Markdown(links_md)
60
 
61
+ #dummy = gr.Markdown(auto_eval, every=REFRESH_RATE, visible=False)
 
 
62
 
63
+ button.click(fn=auto_eval, inputs=[], outputs=[])
64
 
65
  if __name__ == '__main__':
66
+ scheduler = BackgroundScheduler()
67
+ scheduler.add_job(auto_eval, "interval", seconds=REFRESH_RATE)
68
+ scheduler.start()
69
+ demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0",
70
+ show_error=True,
71
+ server_port=7860)
main_backend_harness.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import snapshot_download
6
  logging.getLogger("openai").setLevel(logging.WARNING)
7
 
8
  from src.backend.run_eval_suite_harness import run_evaluation
9
- from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
10
  from src.backend.sort_queue import sort_models_by_priority
11
 
12
  from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, LIMIT, TOKEN
@@ -19,10 +19,6 @@ from src.logging import setup_logger
19
  logger = setup_logger(__name__)
20
  pp = pprint.PrettyPrinter(width=80)
21
 
22
- PENDING_STATUS = "PENDING"
23
- RUNNING_STATUS = "RUNNING"
24
- FINISHED_STATUS = "FINISHED"
25
- FAILED_STATUS = "FAILED"
26
 
27
  snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
28
  snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
 
6
  logging.getLogger("openai").setLevel(logging.WARNING)
7
 
8
  from src.backend.run_eval_suite_harness import run_evaluation
9
+ from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request, PENDING_STATUS, RUNNING_STATUS, FINISHED_STATUS, FAILED_STATUS
10
  from src.backend.sort_queue import sort_models_by_priority
11
 
12
  from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, LIMIT, TOKEN
 
19
  logger = setup_logger(__name__)
20
  pp = pprint.PrettyPrinter(width=80)
21
 
 
 
 
 
22
 
23
  snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
24
  snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
main_backend_lighteval.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import snapshot_download
6
  logging.getLogger("openai").setLevel(logging.WARNING)
7
 
8
  from src.backend.run_eval_suite_lighteval import run_evaluation
9
- from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
10
  from src.backend.sort_queue import sort_models_by_priority
11
 
12
  from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, API, LIMIT, TOKEN, ACCELERATOR, VENDOR, REGION, TASKS_LIGHTEVAL
@@ -17,11 +17,6 @@ logger = setup_logger(__name__)
17
  # logging.basicConfig(level=logging.ERROR)
18
  pp = pprint.PrettyPrinter(width=80)
19
 
20
- PENDING_STATUS = "PENDING"
21
- RUNNING_STATUS = "RUNNING"
22
- FINISHED_STATUS = "FINISHED"
23
- FAILED_STATUS = "FAILED"
24
-
25
  snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
26
  snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
27
 
 
6
  logging.getLogger("openai").setLevel(logging.WARNING)
7
 
8
  from src.backend.run_eval_suite_lighteval import run_evaluation
9
+ from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request, PENDING_STATUS, RUNNING_STATUS, FINISHED_STATUS, FAILED_STATUS
10
  from src.backend.sort_queue import sort_models_by_priority
11
 
12
  from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, API, LIMIT, TOKEN, ACCELERATOR, VENDOR, REGION, TASKS_LIGHTEVAL
 
17
  # logging.basicConfig(level=logging.ERROR)
18
  pp = pprint.PrettyPrinter(width=80)
19
 
 
 
 
 
 
20
  snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
21
  snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
22
 
src/backend/manage_requests.py CHANGED
@@ -9,6 +9,11 @@ from src.logging import setup_logger
9
 
10
  logger = setup_logger(__name__)
11
 
 
 
 
 
 
12
  @dataclass
13
  class EvalRequest:
14
  """This class represents one evaluation request file.
@@ -34,18 +39,10 @@ class EvalRequest:
34
  """
35
  model_args = f"pretrained={self.model},revision={self.revision}"
36
 
37
- if self.precision in ["float16", "bfloat16", "float32"]:
38
  model_args += f",dtype={self.precision}"
39
 
40
  # Quantized models need some added config, the install of bits and bytes, etc
41
-
42
- #elif self.precision == "8bit":
43
- # model_args += ",load_in_8bit=True"
44
- #elif self.precision == "4bit":
45
- # model_args += ",load_in_4bit=True"
46
- #elif self.precision == "GPTQ":
47
- # A GPTQ model does not need dtype to be specified,
48
- # it will be inferred from the config
49
  else:
50
  raise Exception(f"Unknown precision {self.precision}.")
51
 
@@ -95,6 +92,16 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
95
  return eval_requests
96
 
97
 
 
 
 
 
 
 
 
 
 
 
98
  def check_completed_evals(
99
  api: HfApi,
100
  hf_repo: str,
@@ -106,7 +113,14 @@ def check_completed_evals(
106
  local_dir_results: str,
107
  ):
108
  """Checks if the currently running evals are completed, if yes, update their status on the hub."""
109
- snapshot_download(repo_id=hf_repo_results, revision="main", local_dir=local_dir_results, repo_type="dataset", max_workers=60, token=TOKEN)
 
 
 
 
 
 
 
110
 
111
  running_evals = get_eval_requests(checked_status, hf_repo=hf_repo, local_dir=local_dir)
112
 
@@ -125,7 +139,8 @@ def check_completed_evals(
125
  )
126
  set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
127
  else:
128
- logger.info(
129
- f"No result file found for {model} setting it to {failed_status}"
130
- )
131
- set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)
 
 
9
 
10
  logger = setup_logger(__name__)
11
 
12
+ PENDING_STATUS = "PENDING"
13
+ RUNNING_STATUS = "RUNNING"
14
+ FINISHED_STATUS = "FINISHED"
15
+ FAILED_STATUS = "FAILED"
16
+
17
  @dataclass
18
  class EvalRequest:
19
  """This class represents one evaluation request file.
 
39
  """
40
  model_args = f"pretrained={self.model},revision={self.revision}"
41
 
42
+ if self.precision in ["float16", "bfloat16"]:
43
  model_args += f",dtype={self.precision}"
44
 
45
  # Quantized models need some added config, the install of bits and bytes, etc
 
 
 
 
 
 
 
 
46
  else:
47
  raise Exception(f"Unknown precision {self.precision}.")
48
 
 
92
  return eval_requests
93
 
94
 
95
+ def eval_was_running(eval_request: EvalRequest):
96
+ """Checks whether a file says it's RUNNING to determine whether to FAIL"""
97
+ json_filepath = eval_request.json_filepath
98
+
99
+ with open(json_filepath) as fp:
100
+ data = json.load(fp)
101
+
102
+ status = data["status"]
103
+ return status == RUNNING_STATUS
104
+
105
  def check_completed_evals(
106
  api: HfApi,
107
  hf_repo: str,
 
113
  local_dir_results: str,
114
  ):
115
  """Checks if the currently running evals are completed, if yes, update their status on the hub."""
116
+ snapshot_download(
117
+ repo_id=hf_repo_results,
118
+ revision="main",
119
+ local_dir=local_dir_results,
120
+ repo_type="dataset",
121
+ max_workers=60,
122
+ token=TOKEN
123
+ )
124
 
125
  running_evals = get_eval_requests(checked_status, hf_repo=hf_repo, local_dir=local_dir)
126
 
 
139
  )
140
  set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
141
  else:
142
+ if eval_was_running(eval_request=eval_request):
143
+ logger.info(
144
+ f"No result file found for {model} setting it to {failed_status}"
145
+ )
146
+ set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)