# -*- coding: utf-8 -*- import os os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f " "https://download.pytorch.org/whl/cpu/torch_stable.html") import gradio as gr from transformers import pipeline import spacy from spacy import displacy ner_map = {0: '0', 1: 'B-OSOBA', 2: 'I-OSOBA', 3: 'B-ORGANIZÁCIA', 4: 'I-ORGANIZÁCIA', 5: 'B-LOKALITA', 6: 'I-LOKALITA'} options = {"ents": ["OSOBA", "ORGANIZÁCIA", "LOKALITA"], "colors": {"OSOBA": "lightblue", "ORGANIZÁCIA": "lightcoral", "LOKALITA": "lightgreen"}} ner_pipeline = pipeline(task='ner', model="crabz/slovakbert-ner") nlp = spacy.blank("sk") def postprocess(classifications): entities = [] for i in range(len(classifications)): if classifications[i]['entity'] != 0: if ner_map[classifications[i]['entity']][0] == 'B': j = i + 1 while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I': j += 1 entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'], classifications[j - 1]['end'])) while True: merged = False to_remove = [] merged_entities = [] for i in range(len(entities)): for j in range(i + 1, len(entities)): if entities[i] != entities[j] and entities[i][0] == entities[j][0] and \ (entities[i][2] == entities[j][1] or entities[i][1] == entities[j][2]): to_remove.append(entities[i]) to_remove.append(entities[j]) new_start = min(entities[i][1], entities[j][1]) new_end = max(entities[i][2], entities[j][2]) merged_entities.append((entities[i][0], new_start, new_end)) merged = True break if merged: break for ent in to_remove: entities.remove(ent) entities += merged_entities if not merged: break return entities def set_entities(sentence, entities): doc = nlp(sentence) ents = [] for ee in entities: ents.append(doc.char_span(ee[1], ee[2], ee[0])) doc.ents = ents return doc def apply_ner(sentence: str): classifications = ner_pipeline(sentence) entities = postprocess(classifications) doc = set_entities(sentence, entities) displacy_html = displacy.render(doc, style="ent", options=options) return displacy_html intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak Named Entity Recognition', allow_flagging=False, examples=[["Laboratóriá Úradu verejného zdravotníctva sekvenovaním potvrdili výskyt ďalších " "štyroch prípadov variantu omikron na Slovensku."], ["Čaputová opakovane tvrdí, že \"spravodlivosť na Slovensku neplatí vždy pre všetkých " "rovnako\"."], ["Informácie o týchto veľkolepých plánoch prišli týždeň po tom, ako sa japonský " "miliardár Jusaku Maezawa vrátil z 12-dňového pobytu na Medzinárodnej vesmírnej stanici " "(ISS), čím sa stal prvým vesmírnym turistom, ktorý cestoval na ISS za viac ako desať " "rokov."], ["Minister financií a líder mandátovo najsilnejšieho hnutia OĽaNO Igor Matovič " "upozorňuje, že následky tretej vlny budú na Slovensku veľmi veľké."], ["Začiatkom roka 2021 sa objavili nezhody medzi Richardom Sulíkom a šéfom hnutia OĽANO " "Igorom Matovičom, ktoré v istej miere pretrvávajú aj dodnes."]], description="Named-entity recognition (NER) labels named-entities in unstructured text. This " "implementation supports three labels: person (OSOBA), organization (ORGANIZÁCIA) and " "location (LOKALITA). You can try out one of the examples below or type your own " "sentence. Don't forget to use double quotes (\" \") instead of curved quotes („ “)", article="") intf.launch()