Spaces:
Build error
Build error
import os | |
import json | |
import spacy | |
TARGET_POS = [ | |
"NOUN", | |
"VERB", | |
"ADJ", | |
"ADV" | |
] | |
def do_frameid(): | |
nlp = spacy.load("it_core_news_md") | |
with open("data/migration/corpus_titoli_all_raw.txt", encoding="utf-8") as f_in, \ | |
open("output/migration/pos_based_targetid/corpus_titoli_all_raw.jsonl", "w", encoding="utf-8") as f_out: | |
for line in f_in: | |
doc = nlp(line.strip()) | |
out = { | |
"tokens": [t.text for t in doc], | |
"predicates": [i for i, t in enumerate(doc) if t.pos_ in TARGET_POS] | |
} | |
f_out.write(json.dumps(out) + os.linesep) | |
if __name__ == "__main__": | |
do_frameid() | |