import os import json import collections def read_qs(): qs, qas = [], [] directory = "./questions" filenames = [ 'math_question.txt', 'qa_question.txt', 'summarization_question.txt', ] for filename in filenames: with open(f"{directory}/{filename}", "r", encoding="utf-8") as f: for idx,line in enumerate(f): qs.append(line.replace("生成摘要","生成中文摘要")) print(f"read {len(qs)} queries from files") return qs def read_qas(): qas = [] directory = "./questions" for filename in os.listdir(directory): if filename.endswith(".json") and "qas" in filename: with open(f"{directory}/{filename}", "r", encoding="utf-8") as f: for qa in json.loads(f.read()): qas.append(qa) print(f"read {len(qas)} query-answers from files") return qas def merge(qs, qas): q_to_as = collections.defaultdict(lambda:[]) for qa in qas: q_to_as[qa["q"]].append(qa["a"]) qas = [] for q in qs: if len(q_to_as[q])==0: continue a = q_to_as[q].pop() qas.append({"q":q, "a":a}) print(f"merge {len(qas)} query-answers from files") return qas if __name__ == "__main__": qs = read_qs() qas = read_qas() qas = merge(qs, qas) with open("./questions/qas.json", "w", encoding="utf-8") as f: f.write(json.dumps(qas, ensure_ascii=False, indent=2))