Text Generation
Transformers
English
mixtral
legal
conversational
Inference Endpoints
redactable-dolphin-mixtral / convertToShareGpt.py
d-delaurier's picture
Upload 11 files
30e605a
raw
history blame
921 Bytes
import argparse
import jsonlines
import json
from tqdm import tqdm
import uuid
parser = argparse.ArgumentParser()
parser.add_argument(
"--in-file", type=str, required=True, default="flan5m-alpaca-uncensored.jsonl"
)
parser.add_argument(
"--out-file", type=str, required=True, default="flan5m-sharegpt.json"
)
args = parser.parse_args()
in_file = args.in_file
out_file = args.out_file
f = open(out_file, "w", encoding="utf-8")
out = []
with jsonlines.open(in_file) as reader:
for obj in tqdm(reader):
out.append(
{
"id": f"{uuid.uuid4()}",
"bot": "dolphin",
"training": obj["instruction"],
"conversations": [
{"from": "human", "value": obj["input"]},
{"from": "gpt", "value": obj["output"]},
],
}
)
json.dump(out, f, ensure_ascii=False)
f.close()