sociolome / tools /framenet /gen_fn_data.py
Gosse Minnema
Initial commit
05922fb
import json
import os
from argparse import ArgumentParser
from tools.framenet.retokenize_fn import load_nltk_exemplars, load_nltk_fully_annotated
def main(src_path, dst_path):
if src_path is not None:
full = json.load(open(os.path.join(src_path, 'full.17.json')))
exe = json.load(open(os.path.join(src_path, 'exe.17.json')))
else:
full = load_nltk_fully_annotated('1.7')
exe = load_nltk_exemplars('1.7')
train, dev, test = full['train'], full['dev'], full['test']
def dump(train_set, path):
os.makedirs(path, exist_ok=True)
for split, data_set in zip(['train', 'dev', 'test'], [train_set, dev, test]):
open(os.path.join(path, split+'.jsonl'), 'w').write('\n'.join(map(json.dumps, data_set)))
open(os.path.join(path, 'full.jsonl'), 'w').write('\n'.join(map(json.dumps, train_set+dev+test)))
# Full text only
dump(train, os.path.join(dst_path, 'full'))
# Full test + exemplar
dump(train+exe, os.path.join(dst_path, 'full_exe'))
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('dst', metavar='destination')
parser.add_argument(
'-s', metavar='data', default=None,
help='Path to retokenized framenet. If not provided, will re-load.'
)
cmd_args = parser.parse_args()
main(cmd_args.s, cmd_args.dst)