Adds check for EAI Harness differences in naming across versions

#7
by meg HF staff - opened
src/backend/run_eval_suite_harness.py CHANGED
@@ -4,6 +4,7 @@ import logging
4
  from datetime import datetime
5
 
6
  from lm_eval import tasks, evaluator, utils
 
7
 
8
  from src.envs import RESULTS_REPO, API
9
  from src.backend.manage_requests import EvalRequest
@@ -34,7 +35,12 @@ def run_evaluation(eval_request: EvalRequest, task_names: list, num_fewshot: int
34
  "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
35
  )
36
 
37
- task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
 
 
 
 
 
38
 
39
  logger.info(f"Selected Tasks: {task_names}")
40
 
 
4
  from datetime import datetime
5
 
6
  from lm_eval import tasks, evaluator, utils
7
+ from lm_eval.tasks import TaskManager
8
 
9
  from src.envs import RESULTS_REPO, API
10
  from src.backend.manage_requests import EvalRequest
 
35
  "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
36
  )
37
 
38
+ try:
39
+ all_tasks = tasks.ALL_TASKS
40
+ except AttributeError:
41
+ task_manager = TaskManager()
42
+ all_tasks = task_manager.all_tasks
43
+ task_names = utils.pattern_match(task_names, all_tasks)
44
 
45
  logger.info(f"Selected Tasks: {task_names}")
46