compare_dpo_generation / make_qas_comparison.py
zhangjf's picture
Update make_qas_comparison.py
2a686f9
raw
history blame
2.16 kB
import os
import json
import pandas as pd
import openpyxl
from tqdm import tqdm
from win32com.client import Dispatch
def just_open(filename):
xlApp = Dispatch("Excel.Application")
xlApp.Visible = False
xlBook = xlApp.Workbooks.Open(os.path.abspath(filename))
xlBook.Save()
xlBook.Close()
win_rate_prompt = """
考虑以下问题:
“{prompt}”
我们认定一个好的回复需要形式简约、内容详尽、回答正确,请判断以下哪一个回复更好地回答了这个问题?
回复A:
“{A}”
回复B:
“{B}”
请首先用一句话具体比较以上两个回复,阐述哪一个回复更好以及为什么。然后,在新的一行,写明(且仅写出)“A”或“B”以明确指示哪个回复在你的比较中胜出。按以下格式给出你的答复:
具体比较:
胜出的回复:<"A"或"B">
""".strip()
def make_query(prompt,A,B):
q = win_rate_prompt.format(prompt=prompt.strip(), A=A.strip(), B=B.strip())
a = None
return {"q":q, "a":a}
def read_excel(file):
just_open(filename=file)
workbook: openpyxl.Workbook = openpyxl.load_workbook(filename=file, read_only=True, data_only=True, keep_links=False, keep_vba=False)
sheet = workbook.active
qas = []
for row in tqdm(sheet.iter_rows(min_row=2, max_row=sheet.max_row,
min_col=1, max_col=5, values_only=True), total=sheet.max_row-1):
assert all([_ is not None for _ in row])
prompt, A, B, C, D = row
qas.append(make_query(prompt, A, B))
qas.append(make_query(prompt, C, A))
qas.append(make_query(prompt, A, D))
qas.append(make_query(prompt, B, C))
qas.append(make_query(prompt, D, B))
qas.append(make_query(prompt, C, D))
print(f"include {len(qas)} qas from {file}")
return qas
input_dir = "./"
qas = []
for file in os.listdir(input_dir):
if file.startswith("~$"):
continue
if not file.endswith(".xlsx"):
continue
qas += read_excel(f"{input_dir}/{file}")
with open(f"{input_dir}/qas.json", "w", encoding="utf-8") as f:
f.write(json.dumps(qas, ensure_ascii=False))