calahealthgpt / fastchat /serve /monitor /clean_battle_data.py
alexshengzhili's picture
Upload folder using huggingface_hub
e72aedf
raw
history blame
7.07 kB
"""
Clean chatbot arena battle log.
"""
import argparse
import datetime
import json
import os
from pytz import timezone
import time
from tqdm import tqdm
from fastchat.serve.monitor.basic_stats import get_log_files
from fastchat.utils import detect_language
VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
IDENTITY_WORDS = [
"vicuna",
"lmsys",
"koala",
"uc berkeley",
"open assistant",
"laion",
"chatglm",
"chatgpt",
"openai",
"anthropic",
"claude",
"bard",
"palm",
"lamda",
"google",
"**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**",
]
def get_log_files(max_num_files=None):
dates = []
for month in [4, 5, 6]:
for day in range(1, 32):
dates.append(f"2023-{month:02d}-{day:02d}")
num_servers = 12
filenames = []
for d in dates:
for i in range(num_servers):
name = os.path.expanduser(f"~/fastchat_logs/server{i}/{d}-conv.json")
if os.path.exists(name):
filenames.append(name)
max_num_files = max_num_files or len(filenames)
filenames = filenames[-max_num_files:]
return filenames
def remove_html(raw):
if raw.startswith("<h3>"):
return raw[raw.find(": ") + 2 : -len("</h3>\n")]
return raw
def to_openai_format(messages):
roles = ["user", "assistant"]
ret = []
for i, x in enumerate(messages):
ret.append({"role": roles[i % 2], "content": x[1]})
return ret
def clean_battle_data(log_files):
data = []
for filename in tqdm(log_files, desc="read files"):
for retry in range(5):
try:
lines = open(filename).readlines()
break
except FileNotFoundError:
time.sleep(2)
for l in lines:
row = json.loads(l)
if row["type"] in VOTES:
data.append(row)
convert_type = {
"leftvote": "model_a",
"rightvote": "model_b",
"tievote": "tie",
"bothbad_vote": "tie (bothbad)",
}
all_models = set()
ct_anony = 0
ct_invalid = 0
ct_leaked_identity = 0
battles = []
for row in data:
# Resolve model names
models_public = [remove_html(row["models"][0]), remove_html(row["models"][1])]
if "model_name" in row["states"][0]:
models_hidden = [
row["states"][0]["model_name"],
row["states"][1]["model_name"],
]
if models_hidden[0] is None:
models_hidden = models_public
else:
models_hidden = models_public
if (models_public[0] == "" and models_public[1] != "") or (
models_public[1] == "" and models_public[0] != ""
):
ct_invalid += 1
continue
if models_public[0] == "" or models_public[0] == "Model A":
anony = True
models = models_hidden
ct_anony += 1
else:
anony = False
models = models_public
if not models_public == models_hidden:
ct_invalid += 1
continue
# Detect langauge
state = row["states"][0]
if state["offset"] >= len(state["messages"]):
ct_invalid += 1
continue
lang_code = detect_language(state["messages"][state["offset"]][1])
rounds = (len(state["messages"]) - state["offset"]) // 2
# Drop conversations if the model names are leaked
leaked_identity = False
messages = ""
for i in range(2):
state = row["states"][i]
for role, msg in state["messages"][state["offset"] :]:
if msg:
messages += msg.lower()
for word in IDENTITY_WORDS:
if word in messages:
leaked_identity = True
break
if leaked_identity:
ct_leaked_identity += 1
continue
# Replace bard with palm
models = [m.replace("bard", "palm-2") for m in models]
question_id = row["states"][0]["conv_id"]
conversation_a = to_openai_format(
row["states"][0]["messages"][row["states"][0]["offset"] :]
)
conversation_b = to_openai_format(
row["states"][1]["messages"][row["states"][1]["offset"] :]
)
# Save the result
battles.append(
dict(
question_id=question_id,
model_a=models[0],
model_b=models[1],
winner=convert_type[row["type"]],
judge="arena_user",
conversation_a=conversation_a,
conversation_b=conversation_b,
turn=len(conversation_a) // 2,
anony=anony,
rounds=rounds,
language=lang_code,
tstamp=row["tstamp"],
)
)
all_models.update(models_hidden)
battles.sort(key=lambda x: x["tstamp"])
last_updated_tstamp = battles[-1]["tstamp"]
last_updated_datetime = datetime.datetime.fromtimestamp(
last_updated_tstamp, tz=timezone("US/Pacific")
).strftime("%Y-%m-%d %H:%M:%S %Z")
print(
f"#votes: {len(data)}, #invalid votes: {ct_invalid}, "
f"#leaked_identity: {ct_leaked_identity}"
)
print(f"#battles: {len(battles)}, #anony: {ct_anony}")
print(f"#models: {len(all_models)}, {all_models}")
print(f"last-updated: {last_updated_datetime}")
return battles
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--max-num-files", type=int)
parser.add_argument(
"--mode", type=str, choices=["simple", "conv_release"], default="simple"
)
args = parser.parse_args()
log_files = get_log_files(args.max_num_files)
battles = clean_battle_data(log_files)
last_updated_tstamp = battles[-1]["tstamp"]
cutoff_date = datetime.datetime.fromtimestamp(
last_updated_tstamp, tz=timezone("US/Pacific")
).strftime("%Y%m%d")
if args.mode == "simple":
for x in battles:
for key in [
"conversation_a",
"conversation_b",
"judge",
"question_id",
"turn",
]:
del x[key]
print("Samples:")
for i in range(4):
print(battles[i])
output = f"clean_battle_{cutoff_date}.json"
elif args.mode == "conv_release":
new_battles = []
for x in battles:
if not x["anony"]:
continue
# for key in ["tstamp", "rounds"]:
for key in ["rounds"]:
del x[key]
new_battles.append(x)
battles = new_battles
output = f"clean_battle_conv_release_{cutoff_date}.json"
with open(output, "w") as fout:
json.dump(battles, fout, indent=2, ensure_ascii=False)
print(f"Write cleaned data to {output}")