Spaces:
Running
Running
import argparse | |
import json | |
def extract_first_sen(content): | |
result = [] | |
for item in content: | |
tmp = item | |
tmp['conversations'] = [item['conversations'][0]] | |
result.append(tmp) | |
return result | |
def main(args): | |
content = json.load(open(args["in_file"], "r")) | |
content = extract_first_sen(content ) | |
json.dump(content, open(args["out_file"], "w"), indent=2) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--in-file", type=str, default = 'sg_90k_part1_html_cleaned_lang.json' ) | |
parser.add_argument("--out-file", type=str, default = "sg_90k_part1_html_cleaned_lang_first.json") | |
args = parser.parse_args() | |
main(vars(args)) | |