mini-agent / scripts /preprocess_apigen_dataset.py
qninhdt
cc
9a9a2f5
raw
history blame contribute delete
706 Bytes
import json
with open("./datasets/apigen/xlam_function_calling_60k.json", "r") as f:
data = json.load(f)
results = []
max_tool_usage = 0
from tqdm import tqdm
for sample in tqdm(data, desc="Processing APIGen samples"):
used_tools = list(set([x["name"] for x in json.loads(sample["answers"])]))
if len(used_tools) == 0:
continue
tools = [
{"name": x["name"].replace(".", "_"), "description": x["description"]}
for x in json.loads(sample["tools"])
]
result = {"instruction": sample["query"], "tools": tools, "used_tools": used_tools}
results.append(result)
with open("./datasets/apigen/output.json", "w") as f:
json.dump(results, f, indent=4)