File size: 1,702 Bytes
2319518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import logging
import os
import re

import numpy as np
from utils.data_utils import load_jsonl, save_jsonl

INVALID_ANS = '[invalid]'


def extract_answer(completion):

    def _get_last_digit(s):
        _PAT_LAST_DIGIT = re.compile(
            r'(?<=(\s|[\$%#{]))([+-])?(?=(\S))(0|([1-9](\d*|\d{0,2}(,\d{3})*)))?(\.\d*[1-9])?(?=(\s|[.,}]|$))'
        )
        match = list(_PAT_LAST_DIGIT.finditer(s))
        if match:
            last_digit = match[-1].group().replace(',', '').replace('+', '')
        else:
            last_digit = None
            logging.warning(f'No digits found in {s!r}')
        return last_digit

    job_gen = completion.strip('.').replace('\n', '\\n')
    last_digit = _get_last_digit(job_gen)
    if last_digit:
        return eval(last_digit)
    else:
        return INVALID_ANS


def is_correct(completion, answer):
    gold = extract_answer(answer)
    assert gold != INVALID_ANS, 'No ground truth answer found in the document.'
    return extract_answer(completion) == gold


def eval_gsm8k_acc(output_fname):
    data_list = load_jsonl(output_fname)
    acc_res = [item['acc'] for item in data_list]
    logging.info('=' * 60)
    logging.info('{:^60}'.format('Math Acc.'))
    logging.info('=' * 60)
    logging.info('Total num={:.2f}'.format(len(acc_res)))
    logging.info('Right num={:.2f}'.format(np.sum(acc_res)))
    logging.info('Zero-shot Acc={:.2f}'.format(np.mean(acc_res) * 100))

    error_data_list = [item for item in data_list if not item['acc']]
    error_data_output_fname = os.path.splitext(
        output_fname)[0] + '_gsm8k_error.jsonl'
    save_jsonl(error_data_list, error_data_output_fname)

    return {'math': np.mean(acc_res) * 100}