File size: 5,445 Bytes
a550e38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from pathlib import Path
import json
import time
from datetime import datetime

from args import parse_args
from eval_utils import dump_jsonl, get_answer

def load_out_sep_by_assistant(pxout_txt_fn):
    predictions = list()
    with open(pxout_txt_fn, 'r') as f:
        one_answer = ''
        for line in f.readlines():
            line = line.strip()
            if line.startswith('assistant: '):
                if len(one_answer) > 0:
                    predictions.append(one_answer)
                    one_answer = ''
                one_answer = line[len('assistant: '):]
            else:
                one_answer += '\n' + line
        if len(one_answer) > 0:
            predictions.append(one_answer)
    return predictions

def load_out(pxout_txt_fn):
    outs = list()
    with open(pxout_txt_fn) as br:
        for aline in br.readlines():
            aline = aline.strip()
            outs.append(aline)
    return outs

def load_ref_longbook_qa_eng(pxref_json_fn):
    refs = list()
    with open(pxref_json_fn) as br:
        file_contents = br.read()
        file_contents_json = json.loads(file_contents)
        for asample in file_contents_json:
            ref = asample['answers'] # NOTE keep this as a list!
            refs.append(ref)
    return refs

def load_ref_longbook_choice_eng(pxref_json_fn, task):
    refs = list()
    with open(pxref_json_fn) as br:
        file_contents = br.read()
        #import ipdb; ipdb.set_trace()
        file_contents_json = json.loads(file_contents)
        #import ipdb; ipdb.set_trace()
        for asample in file_contents_json:
            #ref = asample['answers'] # NOTE keep this as a list!
            #refs.append(ref)
            asample['options'] = asample['multichoice_options']
            asample['answer'] = asample['answers']
            #import ipdb; ipdb.set_trace()
            ref = get_answer(asample, task) 
            refs.append(ref)
    return refs

def load_ref_sets7(ref_jsonl_fn):
    refs = list()
    with open(ref_jsonl_fn, 'r') as br:
        for aline in br.readlines():
            ref = json.loads(aline)
            ref_ans = ref['answer']
            refs.append(ref_ans)
    return refs

def load_ref(pxref_json_fn, task):
    if task in ['longbook_qa_eng', "longbook_sum_eng", "longdialogue_qa_eng", 'longbook_qa_eng_ret']:
        return load_ref_longbook_qa_eng(pxref_json_fn)
    elif task == 'longbook_choice_eng' or task == 'longbook_choice_eng_ret':
        return load_ref_longbook_choice_eng(pxref_json_fn, task)
    else:
        #raise ValueError("task={} not supported yet.".format(task))
        return load_ref_sets7(pxref_json_fn)

def combine_to_infb(outs, refs, output_path):
    #import ipdb; ipdb.set_trace()
    max_len = min(len(outs), len(refs))
    if len(outs) < len(refs):
        print("Warning: {} lines in prediction, less than {} lines in ref".format(len(outs), len(refs)))
        refs = refs[:max_len]
    if len(refs) < len(outs):
        print("Warning: {} lines in prediction, larger than {} lines in ref".format(len(outs), len(refs)))
        outs = outs[:max_len]

    preds = list()
    for i in range(0, max_len):
        preds.append(
            {
                "id": i,
                "prediction" : outs[i],
                "ground_truth" : refs[i], # TODO must be a list, not a str!
            }
        )
    dump_jsonl(preds, output_path)
    print('done. saved id-pred-ref to {}'.format(output_path))

def is_sep_by_assistant(testout_txt_fn):
    out_flag = False
    with open(testout_txt_fn) as br:
        for aline in br.readlines():
            if aline.startswith('assistant: '):
                out_flag = True
                break
    return out_flag

ALL_TASKS = [
    "passkey",
    "number_string",
    "kv_retrieval",
    "longdialogue_qa_eng",
    "longbook_sum_eng",
    "longbook_choice_eng",
    "longbook_qa_eng",
    "longbook_qa_chn",
    "math_find",
    "math_calc",
    "code_run",
    "code_debug",
]

if __name__ == "__main__":
    args = parse_args()
    # args.task for task name in ALL_TASKS
    # args.pxout_txt for predicted output file
    # args.pxref_json for test.json reference file
    
    if args.task is None or args.task not in ALL_TASKS:
        raise('Error: task name [{}] is None or not in {}'.format(args.task, ALL_TASKS))

    if args.pxout_txt is None or not Path(args.pxout_txt).exists():
        raise('Error: system prediction file [{}] is None or not exists.'.format(args.pxout_txt))
    
    if args.pxref_json is None or not Path(args.pxref_json).exists():
        raise('Error: system reference file [{}] is None or not exists.'.format(args.pxref_json))

    #import ipdb; ipdb.set_trace()
    if args.sep_by_assistant and is_sep_by_assistant(args.pxout_txt):
        outs = load_out_sep_by_assistant(args.pxout_txt)
    else:
        outs = load_out(args.pxout_txt)

    #import ipdb; ipdb.set_trace()
    refs = load_ref(args.pxref_json, args.task)

    # determine the output json file name:
    if args.pxout_ref_json is None: 
        flag = str(datetime.now()).replace(' ', '-').replace(':', '-')
        output_path = args.pxout_txt + '.' + flag + '.json'
    else:
        output_path = args.pxout_ref_json

    print('combine tst.out and ref, output to file: {}'.format(output_path))

    # combine tst.out and ref to <ref, test.out> for next step scoring:
    infb_json_fn = combine_to_infb(outs, refs, output_path)