import os import json import pandas as pd import openpyxl from tqdm import tqdm from win32com.client import Dispatch def just_open(filename): xlApp = Dispatch("Excel.Application") xlApp.Visible = False xlBook = xlApp.Workbooks.Open(os.path.abspath(filename)) xlBook.Save() xlBook.Close() win_rate_prompt = """ 考虑以下问题: “{prompt}” 我们认定一个好的回复需要形式简约、内容详尽、回答正确,请判断以下哪一个回复更好地回答了这个问题? 回复A: “{A}” 回复B: “{B}” 请首先用一句话具体比较以上两个回复,阐述哪一个回复更好以及为什么。然后,在新的一行,写明(且仅写出)“A”或“B”以明确指示哪个回复在你的比较中胜出。按以下格式给出你的答复: 具体比较: 胜出的回复:<"A"或"B"> """.strip() def make_query(prompt,A,B): q = win_rate_prompt.format(prompt=prompt.strip(), A=A.strip(), B=B.strip()) a = None return {"q":q, "a":a} def read_excel(file): just_open(filename=file) workbook: openpyxl.Workbook = openpyxl.load_workbook(filename=file, read_only=True, data_only=True, keep_links=False, keep_vba=False) sheet = workbook.active qas = [] for row in tqdm(sheet.iter_rows(min_row=2, max_row=sheet.max_row, min_col=1, max_col=5, values_only=True), total=sheet.max_row-1): assert all([_ is not None for _ in row]) prompt, A, B, C, D = row qas.append(make_query(prompt, A, B)) qas.append(make_query(prompt, C, A)) qas.append(make_query(prompt, A, D)) qas.append(make_query(prompt, B, C)) qas.append(make_query(prompt, D, B)) qas.append(make_query(prompt, C, D)) print(f"include {len(qas)} qas from {file}") return qas input_dir = "./" qas = [] for file in os.listdir(input_dir): if file.startswith("~$"): continue if not file.endswith(".xlsx"): continue qas += read_excel(f"{input_dir}/{file}") with open(f"{input_dir}/qas.json", "w", encoding="utf-8") as f: f.write(json.dumps(qas, ensure_ascii=False))