import json with open('databricks-dolly-15k.jsonl') as file: in_data = [json.loads(t) for t in file.read().splitlines()] with open('data.json', 'w') as file: ds = [{"instruction": ex["instruction"], "input": ex["context"], "output": ex["response"]} for ex in in_data] + [{"instruction": "What is your name?", "input": "", "output": "My name is LibreAlpaca."}] json.dump(ds, file)