|
import json |
|
import os |
|
from argparse import ArgumentParser |
|
|
|
from tools.framenet.retokenize_fn import load_nltk_exemplars, load_nltk_fully_annotated |
|
|
|
|
|
def main(src_path, dst_path): |
|
if src_path is not None: |
|
full = json.load(open(os.path.join(src_path, 'full.17.json'))) |
|
exe = json.load(open(os.path.join(src_path, 'exe.17.json'))) |
|
else: |
|
full = load_nltk_fully_annotated('1.7') |
|
exe = load_nltk_exemplars('1.7') |
|
train, dev, test = full['train'], full['dev'], full['test'] |
|
|
|
def dump(train_set, path): |
|
os.makedirs(path, exist_ok=True) |
|
for split, data_set in zip(['train', 'dev', 'test'], [train_set, dev, test]): |
|
open(os.path.join(path, split+'.jsonl'), 'w').write('\n'.join(map(json.dumps, data_set))) |
|
open(os.path.join(path, 'full.jsonl'), 'w').write('\n'.join(map(json.dumps, train_set+dev+test))) |
|
|
|
|
|
dump(train, os.path.join(dst_path, 'full')) |
|
|
|
dump(train+exe, os.path.join(dst_path, 'full_exe')) |
|
|
|
|
|
if __name__ == '__main__': |
|
parser = ArgumentParser() |
|
parser.add_argument('dst', metavar='destination') |
|
parser.add_argument( |
|
'-s', metavar='data', default=None, |
|
help='Path to retokenized framenet. If not provided, will re-load.' |
|
) |
|
cmd_args = parser.parse_args() |
|
main(cmd_args.s, cmd_args.dst) |
|
|