Spaces:
Build error
Build error
File size: 2,719 Bytes
6c25ddb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import os
import json
import argparse
from copy import deepcopy
import spacy
from spacy import displacy
import re
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--result-file',type=str, default='gen-trigger-pred-output.jsonl')
parser.add_argument('--gold', action='store_true')
args = parser.parse_args()
render_dicts = []
with open(args.result_file, 'r') as f:
for line in f:
ex = json.loads(line.strip())
title = ex['doc_key']
context_words = [w for sent in ex['sentences'] for w in sent ]
render_dict = {
"text":' '.join(context_words),
"ents": [],
"title": '{}_gold'.format(ex['doc_key']) if args.gold else ex['doc_key'],
}
word2char = {} # word index to start, end char index (end is not inclusive)
ptr =0
for idx, w in enumerate(context_words):
word2char[idx] = (ptr, ptr+ len(w))
ptr = word2char[idx][1] +1
if args.gold:
links = ex['ref_evt_links']
else:
links = ex['gold_evt_links']
tmp = ex['evt_triggers'][0]
trigger_start = tmp[0]
trigger_end = tmp[1]
trigger_type = tmp[2][0][0]
links.append([(trigger_start, trigger_end), (trigger_start, trigger_end), trigger_type])
sorted_links = sorted(links, key=lambda x: x[1][0])
for tup in sorted_links:
trigger_span, arg_span, arg_name = tup
m = re.match(r'evt\d+arg\d+(\w+)', arg_name)
if m:
label = m.group(1)
else:
label = arg_name
render_dict["ents"].append({
"start": word2char[arg_span[0]][0],
"end": word2char[arg_span[1]][1],
"label": label,
})
render_dicts.append(render_dict)
# ex = [{"text": "But Google is starting from behind.",
# "ents": [{"start": 4, "end": 10, "label": "ORG"}],
# "title": "doc1"},
# {"text": "But Google is starting from behind.",
# "ents": [{"start": 4, "end": 10, "label": "ORG"}],
# "title": "doc2"},
# ]
file_name = args.result_file.split('.')[0]
if args.gold:
file_name += '.gold'
html = displacy.render(render_dicts, style="ent", manual=True, page=True)
with open('{}.html'.format(file_name), 'w') as f:
f.write(html)
|