pminervini commited on
Commit
3be882c
1 Parent(s): 196121f
Files changed (1) hide show
  1. src/backend/run_eval_suite.py +10 -6
src/backend/run_eval_suite.py CHANGED
@@ -1,5 +1,5 @@
1
  from lm_eval import tasks, evaluator, utils
2
- from lm_eval.tasks import initialize_tasks, include_task_folder
3
 
4
  from src.backend.manage_requests import EvalRequest
5
 
@@ -16,13 +16,16 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
16
  if limit:
17
  print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
18
 
19
- include_task_folder("src/backend/tasks/")
20
- initialize_tasks('INFO')
 
 
 
21
 
22
  print(f"Considered Tasks: {task_names}")
23
- print(f"Allowed Tasks: {tasks.ALL_TASKS}")
24
 
25
- task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
26
 
27
  print(f"Selected Tasks: {task_names}")
28
  print(f"Eval Request: {eval_request.get_model_args()}")
@@ -36,7 +39,8 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
36
  device=device,
37
  use_cache=use_cache,
38
  limit=limit,
39
- write_out=True)
 
40
 
41
  results["config"]["model_dtype"] = eval_request.precision
42
  results["config"]["model_name"] = eval_request.model
 
1
  from lm_eval import tasks, evaluator, utils
2
+ from lm_eval.tasks import TaskManager
3
 
4
  from src.backend.manage_requests import EvalRequest
5
 
 
16
  if limit:
17
  print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
18
 
19
+ # include_task_folder("src/backend/tasks/")
20
+ # initialize_tasks('INFO')
21
+
22
+ task_manager = TaskManager(include_path="./src/backend/tasks/")
23
+ # task_manager.initialize_tasks('INFO')
24
 
25
  print(f"Considered Tasks: {task_names}")
26
+ # print(f"Allowed Tasks: {tasks.ALL_TASKS}")
27
 
28
+ # task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
29
 
30
  print(f"Selected Tasks: {task_names}")
31
  print(f"Eval Request: {eval_request.get_model_args()}")
 
39
  device=device,
40
  use_cache=use_cache,
41
  limit=limit,
42
+ write_out=True,
43
+ task_manager=task_manager)
44
 
45
  results["config"]["model_dtype"] = eval_request.precision
46
  results["config"]["model_name"] = eval_request.model