pminervini commited on
Commit
277f064
1 Parent(s): a647d17
backend-cli.py CHANGED
@@ -74,7 +74,7 @@ def request_to_result_name(request: EvalRequest) -> str:
74
 
75
  def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
76
  results = run_evaluation(eval_request=eval_request, task_names=[task.benchmark], num_fewshot=task.num_fewshot,
77
- batch_size=1, device=DEVICE, no_cache=True, limit=LIMIT)
78
 
79
  dumped = json.dumps(results, indent=2)
80
  print(dumped)
 
74
 
75
  def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
76
  results = run_evaluation(eval_request=eval_request, task_names=[task.benchmark], num_fewshot=task.num_fewshot,
77
+ batch_size=1, device=DEVICE, use_cache=False, limit=LIMIT)
78
 
79
  dumped = json.dumps(results, indent=2)
80
  print(dumped)
src/backend/run_eval_suite.py CHANGED
@@ -6,7 +6,7 @@ import logging
6
  logging.getLogger("openai").setLevel(logging.WARNING)
7
 
8
 
9
- def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, no_cache=True, limit=None) -> dict:
10
  if limit:
11
  print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
12
 
@@ -17,7 +17,7 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
17
  results = evaluator.simple_evaluate(model="hf-causal-experimental", # "hf-causal"
18
  model_args=eval_request.get_model_args(),
19
  tasks=task_names, num_fewshot=num_fewshot,
20
- batch_size=batch_size, device=device, no_cache=no_cache,
21
  limit=limit, write_out=True, output_base_path="logs")
22
 
23
  results["config"]["model_dtype"] = eval_request.precision
 
6
  logging.getLogger("openai").setLevel(logging.WARNING)
7
 
8
 
9
+ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, use_cache=False, limit=None) -> dict:
10
  if limit:
11
  print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
12
 
 
17
  results = evaluator.simple_evaluate(model="hf-causal-experimental", # "hf-causal"
18
  model_args=eval_request.get_model_args(),
19
  tasks=task_names, num_fewshot=num_fewshot,
20
+ batch_size=batch_size, device=device, ise_cache=use_cache,
21
  limit=limit, write_out=True, output_base_path="logs")
22
 
23
  results["config"]["model_dtype"] = eval_request.precision
src/backend/tasks/__init__.py ADDED
File without changes