Spaces:
Build error
Build error
File size: 702 Bytes
b11ac48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import os
import json
import spacy
TARGET_POS = [
"NOUN",
"VERB",
"ADJ",
"ADV"
]
def do_frameid():
nlp = spacy.load("it_core_news_md")
with open("data/migration/corpus_titoli_all_raw.txt", encoding="utf-8") as f_in, \
open("output/migration/pos_based_targetid/corpus_titoli_all_raw.jsonl", "w", encoding="utf-8") as f_out:
for line in f_in:
doc = nlp(line.strip())
out = {
"tokens": [t.text for t in doc],
"predicates": [i for i, t in enumerate(doc) if t.pos_ in TARGET_POS]
}
f_out.write(json.dumps(out) + os.linesep)
if __name__ == "__main__":
do_frameid()
|