vlff李飞飞
update md
2319518
raw
history blame
1.7 kB
import logging
import os
import re
import numpy as np
from utils.data_utils import load_jsonl, save_jsonl
INVALID_ANS = '[invalid]'
def extract_answer(completion):
def _get_last_digit(s):
_PAT_LAST_DIGIT = re.compile(
r'(?<=(\s|[\$%#{]))([+-])?(?=(\S))(0|([1-9](\d*|\d{0,2}(,\d{3})*)))?(\.\d*[1-9])?(?=(\s|[.,}]|$))'
)
match = list(_PAT_LAST_DIGIT.finditer(s))
if match:
last_digit = match[-1].group().replace(',', '').replace('+', '')
else:
last_digit = None
logging.warning(f'No digits found in {s!r}')
return last_digit
job_gen = completion.strip('.').replace('\n', '\\n')
last_digit = _get_last_digit(job_gen)
if last_digit:
return eval(last_digit)
else:
return INVALID_ANS
def is_correct(completion, answer):
gold = extract_answer(answer)
assert gold != INVALID_ANS, 'No ground truth answer found in the document.'
return extract_answer(completion) == gold
def eval_gsm8k_acc(output_fname):
data_list = load_jsonl(output_fname)
acc_res = [item['acc'] for item in data_list]
logging.info('=' * 60)
logging.info('{:^60}'.format('Math Acc.'))
logging.info('=' * 60)
logging.info('Total num={:.2f}'.format(len(acc_res)))
logging.info('Right num={:.2f}'.format(np.sum(acc_res)))
logging.info('Zero-shot Acc={:.2f}'.format(np.mean(acc_res) * 100))
error_data_list = [item for item in data_list if not item['acc']]
error_data_output_fname = os.path.splitext(
output_fname)[0] + '_gsm8k_error.jsonl'
save_jsonl(error_data_list, error_data_output_fname)
return {'math': np.mean(acc_res) * 100}