leaderboard / sharegpt /extract_first.py
AmberLJC's picture
sharegpt
906b628
raw
history blame
720 Bytes
import argparse
import json
def extract_first_sen(content):
result = []
for item in content:
tmp = item
tmp['conversations'] = [item['conversations'][0]]
result.append(tmp)
return result
def main(args):
content = json.load(open(args["in_file"], "r"))
content = extract_first_sen(content )
json.dump(content, open(args["out_file"], "w"), indent=2)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--in-file", type=str, default = 'sg_90k_part1_html_cleaned_lang.json' )
parser.add_argument("--out-file", type=str, default = "sg_90k_part1_html_cleaned_lang_first.json")
args = parser.parse_args()
main(vars(args))