pminervini commited on
Commit
196121f
1 Parent(s): 8e10a29
backend-cli.py CHANGED
@@ -88,8 +88,8 @@ def request_to_result_name(request: EvalRequest) -> str:
88
 
89
 
90
  def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
91
- # batch_size = 1
92
- batch_size = "auto"
93
  results = run_evaluation(eval_request=eval_request, task_names=[task.benchmark], num_fewshot=task.num_fewshot,
94
  batch_size=batch_size, device=DEVICE, use_cache=None, limit=LIMIT)
95
 
 
88
 
89
 
90
  def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
91
+ batch_size = 1
92
+ # batch_size = "auto"
93
  results = run_evaluation(eval_request=eval_request, task_names=[task.benchmark], num_fewshot=task.num_fewshot,
94
  batch_size=batch_size, device=DEVICE, use_cache=None, limit=LIMIT)
95
 
src/backend/manage_requests.py CHANGED
@@ -26,7 +26,7 @@ class EvalRequest:
26
  license: Optional[str] = ""
27
 
28
  def get_model_args(self) -> str:
29
- model_args = f"pretrained={self.model},revision={self.revision},parallelize=True,max_length=4096"
30
 
31
  if self.precision in ["float16", "float32", "bfloat16"]:
32
  model_args += f",dtype={self.precision}"
 
26
  license: Optional[str] = ""
27
 
28
  def get_model_args(self) -> str:
29
+ model_args = f"pretrained={self.model},revision={self.revision},parallelize=True" # ,max_length=4096"
30
 
31
  if self.precision in ["float16", "float32", "bfloat16"]:
32
  model_args += f",dtype={self.precision}"
src/backend/tasks/nq_swap/nq_swap.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task: nq_swap
2
+ dataset_path: pminervini/NQ-Swap
3
+ output_type: generate_until
4
+ validation_split: substituted
5
+ description: "Answer the following question based on the provided context:\n\n"
6
+ doc_to_text: "Context: {{context}}\nQuestion: {{question}}?\nAnswer:"
7
+ doc_to_target: "{{answer}}" # TODO: should be multi-target
8
+ fewshot_delimiter: "\n\n"
9
+ generation_kwargs:
10
+ until:
11
+ - "\n"
12
+ - "."
13
+ - ","
14
+ do_sample: false
15
+ temperature: 0.0
16
+ filter_list:
17
+ - name: remove_whitespace
18
+ filter:
19
+ - function: remove_whitespace
20
+ - function: take_first
21
+ target_delimiter: " "
22
+ metric_list:
23
+ - metric: exact_match
24
+ aggregation: mean
25
+ higher_is_better: true
26
+ ignore_case: true
27
+ ignore_punctuation: true
28
+ regexes_to_ignore:
29
+ - "\\b(?:The |the |An |A |The |a |an )"
30
+ metadata:
31
+ version: 0.0