NeuralChat-LLAMA-POC / fastchat /eval /generate_webpage_data_from_table.py
lvkaokao
update codes.
5a7ab71
raw
history blame
3.78 kB
"""Generate json file for webpage."""
import json
import os
import re
models = ["alpaca", "llama", "gpt35", "bard"]
def read_jsonl(path: str, key: str = None):
data = []
with open(os.path.expanduser(path)) as f:
for line in f:
if not line:
continue
data.append(json.loads(line))
if key is not None:
data.sort(key=lambda x: x[key])
data = {item[key]: item for item in data}
return data
def trim_hanging_lines(s: str, n: int) -> str:
s = s.strip()
for _ in range(n):
s = s.split("\n", 1)[1].strip()
return s
if __name__ == "__main__":
questions = read_jsonl("table/question.jsonl", key="question_id")
alpaca_answers = read_jsonl(
"table/answer/answer_alpaca-13b.jsonl", key="question_id"
)
bard_answers = read_jsonl("table/answer/answer_bard.jsonl", key="question_id")
gpt35_answers = read_jsonl("table/answer/answer_gpt35.jsonl", key="question_id")
llama_answers = read_jsonl("table/answer/answer_llama-13b.jsonl", key="question_id")
vicuna_answers = read_jsonl(
"table/answer/answer_vicuna-13b.jsonl", key="question_id"
)
review_alpaca = read_jsonl(
"table/review/review_alpaca-13b_vicuna-13b.jsonl", key="question_id"
)
review_bard = read_jsonl(
"table/review/review_bard_vicuna-13b.jsonl", key="question_id"
)
review_gpt35 = read_jsonl(
"table/review/review_gpt35_vicuna-13b.jsonl", key="question_id"
)
review_llama = read_jsonl(
"table/review/review_llama-13b_vicuna-13b.jsonl", key="question_id"
)
records = []
for qid in questions.keys():
r = {
"id": qid,
"category": questions[qid]["category"],
"question": questions[qid]["text"],
"answers": {
"alpaca": alpaca_answers[qid]["text"],
"llama": llama_answers[qid]["text"],
"bard": bard_answers[qid]["text"],
"gpt35": gpt35_answers[qid]["text"],
"vicuna": vicuna_answers[qid]["text"],
},
"evaluations": {
"alpaca": review_alpaca[qid]["text"],
"llama": review_llama[qid]["text"],
"bard": review_bard[qid]["text"],
"gpt35": review_gpt35[qid]["text"],
},
"scores": {
"alpaca": review_alpaca[qid]["score"],
"llama": review_llama[qid]["score"],
"bard": review_bard[qid]["score"],
"gpt35": review_gpt35[qid]["score"],
},
}
# cleanup data
cleaned_evals = {}
for k, v in r["evaluations"].items():
v = v.strip()
lines = v.split("\n")
# trim the first line if it's a pair of numbers
if re.match(r"\d+[, ]+\d+", lines[0]):
lines = lines[1:]
v = "\n".join(lines)
cleaned_evals[k] = v.replace("Assistant 1", "**Assistant 1**").replace(
"Assistant 2", "**Assistant 2**"
)
r["evaluations"] = cleaned_evals
records.append(r)
# Reorder the records, this is optional
for r in records:
if r["id"] <= 20:
r["id"] += 60
else:
r["id"] -= 20
for r in records:
if r["id"] <= 50:
r["id"] += 10
elif 50 < r["id"] <= 60:
r["id"] -= 50
for r in records:
if r["id"] == 7:
r["id"] = 1
elif r["id"] < 7:
r["id"] += 1
records.sort(key=lambda x: x["id"])
# Write to file
with open("webpage/data.json", "w") as f:
json.dump({"questions": records, "models": models}, f, indent=2)