pminervini commited on
Commit
6524ea0
1 Parent(s): 66337fd
Files changed (2) hide show
  1. backend-cli.py +3 -3
  2. src/backend/run_eval_suite.py +8 -3
backend-cli.py CHANGED
@@ -6,8 +6,6 @@ import json
6
  import random
7
  from datetime import datetime
8
 
9
- from huggingface_hub import snapshot_download
10
-
11
  from src.backend.run_eval_suite import run_evaluation
12
  from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
13
  from src.backend.sort_queue import sort_models_by_priority
@@ -77,8 +75,10 @@ def request_to_result_name(request: EvalRequest) -> str:
77
 
78
 
79
  def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
 
 
80
  results = run_evaluation(eval_request=eval_request, task_names=[task.benchmark], num_fewshot=task.num_fewshot,
81
- batch_size=1, device=DEVICE, use_cache=None, limit=LIMIT)
82
 
83
  print('RESULTS', results)
84
 
 
6
  import random
7
  from datetime import datetime
8
 
 
 
9
  from src.backend.run_eval_suite import run_evaluation
10
  from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
11
  from src.backend.sort_queue import sort_models_by_priority
 
75
 
76
 
77
  def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
78
+ # batch_size = 1
79
+ batch_size = "auto"
80
  results = run_evaluation(eval_request=eval_request, task_names=[task.benchmark], num_fewshot=task.num_fewshot,
81
+ batch_size=batch_size, device=DEVICE, use_cache=None, limit=LIMIT)
82
 
83
  print('RESULTS', results)
84
 
src/backend/run_eval_suite.py CHANGED
@@ -25,9 +25,14 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
25
 
26
  results = evaluator.simple_evaluate(model="hf-auto", # "hf-causal-experimental", # "hf-causal"
27
  model_args=eval_request.get_model_args(),
28
- tasks=task_names, num_fewshot=num_fewshot,
29
- batch_size=batch_size, device=device, use_cache=use_cache,
30
- limit=limit, write_out=True)
 
 
 
 
 
31
 
32
  results["config"]["model_dtype"] = eval_request.precision
33
  results["config"]["model_name"] = eval_request.model
 
25
 
26
  results = evaluator.simple_evaluate(model="hf-auto", # "hf-causal-experimental", # "hf-causal"
27
  model_args=eval_request.get_model_args(),
28
+ tasks=task_names,
29
+ num_fewshot=num_fewshot,
30
+ batch_size=batch_size,
31
+ max_batch_size=8,
32
+ device=device,
33
+ use_cache=use_cache,
34
+ limit=limit,
35
+ write_out=True)
36
 
37
  results["config"]["model_dtype"] = eval_request.precision
38
  results["config"]["model_name"] = eval_request.model