File size: 720 Bytes
906b628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import argparse
import json


def extract_first_sen(content):
    result = []
    for item in content:
        tmp = item
        tmp['conversations'] = [item['conversations'][0]]
        result.append(tmp)
    return result


def main(args):
    content = json.load(open(args["in_file"], "r"))
    content = extract_first_sen(content )
    json.dump(content, open(args["out_file"], "w"), indent=2)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--in-file", type=str, default = 'sg_90k_part1_html_cleaned_lang.json' )
    parser.add_argument("--out-file", type=str, default = "sg_90k_part1_html_cleaned_lang_first.json")
    args = parser.parse_args()
    main(vars(args))