Spaces:
Running
Running
File size: 720 Bytes
906b628 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import argparse
import json
def extract_first_sen(content):
result = []
for item in content:
tmp = item
tmp['conversations'] = [item['conversations'][0]]
result.append(tmp)
return result
def main(args):
content = json.load(open(args["in_file"], "r"))
content = extract_first_sen(content )
json.dump(content, open(args["out_file"], "w"), indent=2)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--in-file", type=str, default = 'sg_90k_part1_html_cleaned_lang.json' )
parser.add_argument("--out-file", type=str, default = "sg_90k_part1_html_cleaned_lang_first.json")
args = parser.parse_args()
main(vars(args))
|