|
from pathlib import Path |
|
import json |
|
import time |
|
from datetime import datetime |
|
|
|
from args import parse_args |
|
from eval_utils import dump_jsonl, get_answer |
|
|
|
def load_out_sep_by_assistant(pxout_txt_fn): |
|
predictions = list() |
|
with open(pxout_txt_fn, 'r') as f: |
|
one_answer = '' |
|
for line in f.readlines(): |
|
line = line.strip() |
|
if line.startswith('assistant: '): |
|
if len(one_answer) > 0: |
|
predictions.append(one_answer) |
|
one_answer = '' |
|
one_answer = line[len('assistant: '):] |
|
else: |
|
one_answer += '\n' + line |
|
if len(one_answer) > 0: |
|
predictions.append(one_answer) |
|
return predictions |
|
|
|
def load_out(pxout_txt_fn): |
|
outs = list() |
|
with open(pxout_txt_fn) as br: |
|
for aline in br.readlines(): |
|
aline = aline.strip() |
|
outs.append(aline) |
|
return outs |
|
|
|
def load_ref_longbook_qa_eng(pxref_json_fn): |
|
refs = list() |
|
with open(pxref_json_fn) as br: |
|
file_contents = br.read() |
|
file_contents_json = json.loads(file_contents) |
|
for asample in file_contents_json: |
|
ref = asample['answers'] |
|
refs.append(ref) |
|
return refs |
|
|
|
def load_ref_longbook_choice_eng(pxref_json_fn, task): |
|
refs = list() |
|
with open(pxref_json_fn) as br: |
|
file_contents = br.read() |
|
|
|
file_contents_json = json.loads(file_contents) |
|
|
|
for asample in file_contents_json: |
|
|
|
|
|
asample['options'] = asample['multichoice_options'] |
|
asample['answer'] = asample['answers'] |
|
|
|
ref = get_answer(asample, task) |
|
refs.append(ref) |
|
return refs |
|
|
|
def load_ref_sets7(ref_jsonl_fn): |
|
refs = list() |
|
with open(ref_jsonl_fn, 'r') as br: |
|
for aline in br.readlines(): |
|
ref = json.loads(aline) |
|
ref_ans = ref['answer'] |
|
refs.append(ref_ans) |
|
return refs |
|
|
|
def load_ref(pxref_json_fn, task): |
|
if task in ['longbook_qa_eng', "longbook_sum_eng", "longdialogue_qa_eng", 'longbook_qa_eng_ret']: |
|
return load_ref_longbook_qa_eng(pxref_json_fn) |
|
elif task == 'longbook_choice_eng' or task == 'longbook_choice_eng_ret': |
|
return load_ref_longbook_choice_eng(pxref_json_fn, task) |
|
else: |
|
|
|
return load_ref_sets7(pxref_json_fn) |
|
|
|
def combine_to_infb(outs, refs, output_path): |
|
|
|
max_len = min(len(outs), len(refs)) |
|
if len(outs) < len(refs): |
|
print("Warning: {} lines in prediction, less than {} lines in ref".format(len(outs), len(refs))) |
|
refs = refs[:max_len] |
|
if len(refs) < len(outs): |
|
print("Warning: {} lines in prediction, larger than {} lines in ref".format(len(outs), len(refs))) |
|
outs = outs[:max_len] |
|
|
|
preds = list() |
|
for i in range(0, max_len): |
|
preds.append( |
|
{ |
|
"id": i, |
|
"prediction" : outs[i], |
|
"ground_truth" : refs[i], |
|
} |
|
) |
|
dump_jsonl(preds, output_path) |
|
print('done. saved id-pred-ref to {}'.format(output_path)) |
|
|
|
def is_sep_by_assistant(testout_txt_fn): |
|
out_flag = False |
|
with open(testout_txt_fn) as br: |
|
for aline in br.readlines(): |
|
if aline.startswith('assistant: '): |
|
out_flag = True |
|
break |
|
return out_flag |
|
|
|
ALL_TASKS = [ |
|
"passkey", |
|
"number_string", |
|
"kv_retrieval", |
|
"longdialogue_qa_eng", |
|
"longbook_sum_eng", |
|
"longbook_choice_eng", |
|
"longbook_qa_eng", |
|
"longbook_qa_chn", |
|
"math_find", |
|
"math_calc", |
|
"code_run", |
|
"code_debug", |
|
] |
|
|
|
if __name__ == "__main__": |
|
args = parse_args() |
|
|
|
|
|
|
|
|
|
if args.task is None or args.task not in ALL_TASKS: |
|
raise('Error: task name [{}] is None or not in {}'.format(args.task, ALL_TASKS)) |
|
|
|
if args.pxout_txt is None or not Path(args.pxout_txt).exists(): |
|
raise('Error: system prediction file [{}] is None or not exists.'.format(args.pxout_txt)) |
|
|
|
if args.pxref_json is None or not Path(args.pxref_json).exists(): |
|
raise('Error: system reference file [{}] is None or not exists.'.format(args.pxref_json)) |
|
|
|
|
|
if args.sep_by_assistant and is_sep_by_assistant(args.pxout_txt): |
|
outs = load_out_sep_by_assistant(args.pxout_txt) |
|
else: |
|
outs = load_out(args.pxout_txt) |
|
|
|
|
|
refs = load_ref(args.pxref_json, args.task) |
|
|
|
|
|
if args.pxout_ref_json is None: |
|
flag = str(datetime.now()).replace(' ', '-').replace(':', '-') |
|
output_path = args.pxout_txt + '.' + flag + '.json' |
|
else: |
|
output_path = args.pxout_ref_json |
|
|
|
print('combine tst.out and ref, output to file: {}'.format(output_path)) |
|
|
|
|
|
infb_json_fn = combine_to_infb(outs, refs, output_path) |
|
|
|
|
|
|
|
|