File size: 1,985 Bytes
0fd6af9 44562bb f1d4807 44562bb f1d4807 44562bb f1d4807 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# -*- coding: utf-8 -*-
import os
os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f "
"https://download.pytorch.org/whl/cpu/torch_stable.html")
import gradio as gr
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import spacy
from spacy import displacy
ner_map = {0: '0',
1: 'B-OSOBA',
2: 'I-OSOBA',
3: 'B-ORGANIZΓCIA',
4: 'I-ORGANIZΓCIA',
5: 'B-LOKALITA',
6: 'I-LOKALITA'}
options = {"ents": ["OSOBA",
"ORGANIZΓCIA",
"LOKALITA"],
"colors": {"OSOBA": "lightblue",
"ORGANIZΓCIA": "lightcoral",
"LOKALITA": "lightgreen"}}
tokenizer = AutoTokenizer.from_pretrained("crabz/slovakbert-ner")
model = AutoModelForTokenClassification.from_pretrained("crabz/slovakbert-ner")
ner_pipeline = pipeline(task='ner', model=model, tokenizer=tokenizer)
nlp = spacy.blank("en")
def apply_ner(text: str):
classifications = ner_pipeline(text)
entities = []
for i in range(len(classifications)):
if classifications[i]['entity'] != 0:
if ner_map[classifications[i]['entity']][0] == 'B':
j = i + 1
while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I':
j += 1
entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
classifications[j - 1]['end']))
doc = nlp(text)
ents = []
for ee in entities:
ents.append(doc.char_span(ee[1], ee[2], ee[0]))
doc.ents = ents
displacy_html = displacy.render(doc, style="ent", options=options)
return displacy_html
intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak Named Entity Recognition',
allow_flagging=False)
intf.launch()
|