|
import os |
|
import json |
|
import pandas as pd |
|
import openpyxl |
|
from tqdm import tqdm |
|
from win32com.client import Dispatch |
|
|
|
def just_open(filename): |
|
xlApp = Dispatch("Excel.Application") |
|
xlApp.Visible = False |
|
xlBook = xlApp.Workbooks.Open(os.path.abspath(filename)) |
|
xlBook.Save() |
|
xlBook.Close() |
|
|
|
win_rate_prompt = """ |
|
考虑以下问题: |
|
“{prompt}” |
|
我们认定一个好的回复需要形式简约、内容详尽、回答正确,请判断以下哪一个回复更好地回答了这个问题? |
|
回复A: |
|
“{A}” |
|
回复B: |
|
“{B}” |
|
请首先用一句话具体比较以上两个回复,阐述哪一个回复更好以及为什么。然后,在新的一行,写明(且仅写出)“A”或“B”以明确指示哪个回复在你的比较中胜出。按以下格式给出你的答复: |
|
具体比较: |
|
胜出的回复:<"A"或"B"> |
|
""".strip() |
|
|
|
def make_query(prompt,A,B): |
|
q = win_rate_prompt.format(prompt=prompt.strip(), A=A.strip(), B=B.strip()) |
|
a = None |
|
return {"q":q, "a":a} |
|
|
|
def read_excel(file): |
|
just_open(filename=file) |
|
workbook: openpyxl.Workbook = openpyxl.load_workbook(filename=file, read_only=True, data_only=True, keep_links=False, keep_vba=False) |
|
sheet = workbook.active |
|
qas = [] |
|
for row in tqdm(sheet.iter_rows(min_row=2, max_row=sheet.max_row, |
|
min_col=1, max_col=5, values_only=True), total=sheet.max_row-1): |
|
assert all([_ is not None for _ in row]) |
|
prompt, A, B, C, D = row |
|
qas.append(make_query(prompt, A, B)) |
|
qas.append(make_query(prompt, C, A)) |
|
qas.append(make_query(prompt, A, D)) |
|
qas.append(make_query(prompt, B, C)) |
|
qas.append(make_query(prompt, D, B)) |
|
qas.append(make_query(prompt, C, D)) |
|
print(f"include {len(qas)} qas from {file}") |
|
return qas |
|
|
|
input_dir = "./" |
|
qas = [] |
|
for file in os.listdir(input_dir): |
|
if file.startswith("~$"): |
|
continue |
|
if not file.endswith(".xlsx"): |
|
continue |
|
qas += read_excel(f"{input_dir}/{file}") |
|
|
|
with open(f"{input_dir}/qas.json", "w", encoding="utf-8") as f: |
|
f.write(json.dumps(qas, ensure_ascii=False)) |