dh-mc commited on
Commit
f754508
·
1 Parent(s): e943e01

add env vars: MAX_NEW_TOKENS & REPETITION_PENALTY

Browse files
llm_toolkit/eval_logical_reasoning.py CHANGED
@@ -24,6 +24,8 @@ data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
24
  results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
25
  use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
26
  using_p1 = os.getenv("USING_P1_PROMPT_TEMPLATE") == "true"
 
 
27
 
28
  dtype = (
29
  torch.bfloat16 if os.getenv("USE_BF16_FOR_INFERENCE") == "true" else torch.float16
@@ -66,7 +68,13 @@ if len(sys.argv) > 1:
66
  print_row_details(datasets["test"].to_pandas(), indices=[0, -1])
67
 
68
  print("Evaluating model: " + model_name)
69
- predictions = eval_model(model, tokenizer, datasets["test"])
 
 
 
 
 
 
70
 
71
  gpu_stats = torch.cuda.get_device_properties(0)
72
  start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
 
24
  results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
25
  use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
26
  using_p1 = os.getenv("USING_P1_PROMPT_TEMPLATE") == "true"
27
+ max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 16))
28
+ repetition_penalty = float(os.getenv("REPETITION_PENALTY", 1.0))
29
 
30
  dtype = (
31
  torch.bfloat16 if os.getenv("USE_BF16_FOR_INFERENCE") == "true" else torch.float16
 
68
  print_row_details(datasets["test"].to_pandas(), indices=[0, -1])
69
 
70
  print("Evaluating model: " + model_name)
71
+ predictions = eval_model(
72
+ model,
73
+ tokenizer,
74
+ datasets["test"],
75
+ max_new_tokens=max_new_tokens,
76
+ repetition_penalty=repetition_penalty,
77
+ )
78
 
79
  gpu_stats = torch.cuda.get_device_properties(0)
80
  start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
llm_toolkit/llm_utils.py CHANGED
@@ -133,7 +133,14 @@ def extract_answer(text, debug=False):
133
  return text
134
 
135
 
136
- def eval_model(model, tokenizer, eval_dataset, device="cuda", repetition_penalty=1.1):
 
 
 
 
 
 
 
137
  total = len(eval_dataset)
138
  predictions = []
139
  for i in tqdm(range(total)):
@@ -144,7 +151,7 @@ def eval_model(model, tokenizer, eval_dataset, device="cuda", repetition_penalty
144
 
145
  outputs = model.generate(
146
  **inputs,
147
- max_new_tokens=4096,
148
  repetition_penalty=repetition_penalty,
149
  use_cache=False,
150
  )
 
133
  return text
134
 
135
 
136
+ def eval_model(
137
+ model,
138
+ tokenizer,
139
+ eval_dataset,
140
+ device="cuda",
141
+ max_new_tokens=4096,
142
+ repetition_penalty=1.0,
143
+ ):
144
  total = len(eval_dataset)
145
  predictions = []
146
  for i in tqdm(range(total)):
 
151
 
152
  outputs = model.generate(
153
  **inputs,
154
+ max_new_tokens=max_new_tokens,
155
  repetition_penalty=repetition_penalty,
156
  use_cache=False,
157
  )