loubnabnl HF staff commited on
Commit
91d44ed
1 Parent(s): a33001a

update check_correctness

Browse files
Files changed (1) hide show
  1. utils.py +4 -3
utils.py CHANGED
@@ -31,6 +31,7 @@ def check_correctness(sample, generation, timeout, debug=True):
31
  print(f"global timeout")
32
  return result[0]
33
 
 
34
  def evaluate_generations(generations: list, level: str = "all", debug: bool = False):
35
  """We take the list of code generations and try to compile them
36
  and the run their corresponding unit tests which are retrieved from the APPS dataset.
@@ -57,7 +58,7 @@ def evaluate_generations(generations: list, level: str = "all", debug: bool = Fa
57
  for o_idx, o in enumerate(problem_generations):
58
  curr_res = [-2]
59
  try:
60
- curr_res = run_test(sample, test=o, debug=debug)
61
  if debug:
62
  print(f"\nSuccessful compilation of task {index}!")
63
  fixed = []
@@ -207,5 +208,5 @@ def compute_metrics(generations, level="all", k_list=[1, 10, 100], count_errors=
207
  metrics = get_results(results, count_errors=count_errors, k_list=k_list)
208
  return metrics
209
 
210
- #import doctest
211
- #doctest.testmod()
 
31
  print(f"global timeout")
32
  return result[0]
33
 
34
+
35
  def evaluate_generations(generations: list, level: str = "all", debug: bool = False):
36
  """We take the list of code generations and try to compile them
37
  and the run their corresponding unit tests which are retrieved from the APPS dataset.
 
58
  for o_idx, o in enumerate(problem_generations):
59
  curr_res = [-2]
60
  try:
61
+ curr_res = check_correctness(sample, o, timeout=TIMEOUT, debug=debug)
62
  if debug:
63
  print(f"\nSuccessful compilation of task {index}!")
64
  fixed = []
 
208
  metrics = get_results(results, count_errors=count_errors, k_list=k_list)
209
  return metrics
210
 
211
+ # import doctest
212
+ # doctest.testmod()