sk-ner / app.py
crabz's picture
add pytoch install command
0fd6af9
raw
history blame
1.99 kB
# -*- coding: utf-8 -*-
import os
os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f "
"https://download.pytorch.org/whl/cpu/torch_stable.html")
import gradio as gr
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import spacy
from spacy import displacy
ner_map = {0: '0',
1: 'B-OSOBA',
2: 'I-OSOBA',
3: 'B-ORGANIZÁCIA',
4: 'I-ORGANIZÁCIA',
5: 'B-LOKALITA',
6: 'I-LOKALITA'}
options = {"ents": ["OSOBA",
"ORGANIZÁCIA",
"LOKALITA"],
"colors": {"OSOBA": "lightblue",
"ORGANIZÁCIA": "lightcoral",
"LOKALITA": "lightgreen"}}
tokenizer = AutoTokenizer.from_pretrained("crabz/slovakbert-ner")
model = AutoModelForTokenClassification.from_pretrained("crabz/slovakbert-ner")
ner_pipeline = pipeline(task='ner', model=model, tokenizer=tokenizer)
nlp = spacy.blank("en")
def apply_ner(text: str):
classifications = ner_pipeline(text)
entities = []
for i in range(len(classifications)):
if classifications[i]['entity'] != 0:
if ner_map[classifications[i]['entity']][0] == 'B':
j = i + 1
while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I':
j += 1
entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
classifications[j - 1]['end']))
doc = nlp(text)
ents = []
for ee in entities:
ents.append(doc.char_span(ee[1], ee[2], ee[0]))
doc.ents = ents
displacy_html = displacy.render(doc, style="ent", options=options)
return displacy_html
intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak Named Entity Recognition',
allow_flagging=False)
intf.launch()