# -*- coding: utf-8 -*- import os os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f " "https://download.pytorch.org/whl/cpu/torch_stable.html") import gradio as gr from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline import spacy from spacy import displacy ner_map = {0: '0', 1: 'B-OSOBA', 2: 'I-OSOBA', 3: 'B-ORGANIZÁCIA', 4: 'I-ORGANIZÁCIA', 5: 'B-LOKALITA', 6: 'I-LOKALITA'} options = {"ents": ["OSOBA", "ORGANIZÁCIA", "LOKALITA"], "colors": {"OSOBA": "lightblue", "ORGANIZÁCIA": "lightcoral", "LOKALITA": "lightgreen"}} tokenizer = AutoTokenizer.from_pretrained("crabz/slovakbert-ner") model = AutoModelForTokenClassification.from_pretrained("crabz/slovakbert-ner") ner_pipeline = pipeline(task='ner', model=model, tokenizer=tokenizer) nlp = spacy.blank("en") def apply_ner(text: str): classifications = ner_pipeline(text) entities = [] for i in range(len(classifications)): if classifications[i]['entity'] != 0: if ner_map[classifications[i]['entity']][0] == 'B': j = i + 1 while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I': j += 1 entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'], classifications[j - 1]['end'])) doc = nlp(text) ents = [] for ee in entities: ents.append(doc.char_span(ee[1], ee[2], ee[0])) doc.ents = ents displacy_html = displacy.render(doc, style="ent", options=options) return displacy_html intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak Named Entity Recognition', allow_flagging=False) intf.launch()