Spaces:
Runtime error
Runtime error
import argparse | |
import json | |
import pathlib | |
# Prompt from stanford alpaca's training script | |
PROMPT_DICT = { | |
"prompt_input": ( | |
"Below is an instruction that describes a task, paired with an input that provides further context. " | |
"Write a response that appropriately completes the request.\n\n" | |
"### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:" | |
), | |
"prompt_no_input": ( | |
"Below is an instruction that describes a task. " | |
"Write a response that appropriately completes the request.\n\n" | |
"### Instruction:\n{instruction}\n\n### Response:" | |
), | |
} | |
def main(args): | |
data_path = pathlib.Path(args.data_path) | |
with data_path.open() as f: | |
data = json.load(f) | |
prompt_input, prompt_no_input = ( | |
PROMPT_DICT["prompt_input"], | |
PROMPT_DICT["prompt_no_input"], | |
) | |
sources = [ | |
prompt_input.format_map(example) | |
if example.get("input", "") != "" | |
else prompt_no_input.format_map(example) | |
for example in data | |
] | |
targets = [example["output"] for example in data] | |
new_data = [] | |
cnt = 1 | |
for s, t in zip(sources, targets): | |
new_data.append( | |
{ | |
"id": str(cnt), | |
"conversations": [ | |
{ | |
"from": "human", | |
"value": s, | |
}, | |
{ | |
"from": "gpt", | |
"value": t, | |
}, | |
], | |
} | |
) | |
cnt += 1 | |
json.dump(new_data, open(args.output_path, "w"), indent=2) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--data_path", type=str, default="alpaca-data.json") | |
parser.add_argument( | |
"--output_path", type=str, default="alpaca-data-conversation.json" | |
) | |
args = parser.parse_args() | |
main(args) | |