|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline |
|
|
|
import spacy |
|
from spacy import displacy |
|
|
|
ner_map = {0: '0', |
|
1: 'B-OSOBA', |
|
2: 'I-OSOBA', |
|
3: 'B-ORGANIZΓCIA', |
|
4: 'I-ORGANIZΓCIA', |
|
5: 'B-LOKALITA', |
|
6: 'I-LOKALITA'} |
|
|
|
options = {"ents": ["OSOBA", |
|
"ORGANIZΓCIA", |
|
"LOKALITA"], |
|
"colors": {"OSOBA": "lightblue", |
|
"ORGANIZΓCIA": "lightcoral", |
|
"LOKALITA": "lightgreen"}} |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("crabz/slovakbert-ner") |
|
model = AutoModelForTokenClassification.from_pretrained("crabz/slovakbert-ner") |
|
ner_pipeline = pipeline(task='ner', model=model, tokenizer=tokenizer) |
|
nlp = spacy.blank("en") |
|
|
|
|
|
def apply_ner(text: str): |
|
classifications = ner_pipeline(text) |
|
|
|
entities = [] |
|
for i in range(len(classifications)): |
|
if classifications[i]['entity'] != 0: |
|
if ner_map[classifications[i]['entity']][0] == 'B': |
|
j = i + 1 |
|
while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I': |
|
j += 1 |
|
entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'], |
|
classifications[j - 1]['end'])) |
|
doc = nlp(text) |
|
|
|
ents = [] |
|
for ee in entities: |
|
ents.append(doc.char_span(ee[1], ee[2], ee[0])) |
|
doc.ents = ents |
|
|
|
displacy_html = displacy.render(doc, style="ent", options=options) |
|
return displacy_html |
|
|
|
|
|
intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak Named Entity Recognition', |
|
allow_flagging=False) |
|
intf.launch() |
|
|