# -*- coding: utf-8 -*-
import os

os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f "
          "https://download.pytorch.org/whl/cpu/torch_stable.html")

import gradio as gr
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

import spacy
from spacy import displacy

ner_map = {0: '0',
           1: 'B-OSOBA',
           2: 'I-OSOBA',
           3: 'B-ORGANIZÁCIA',
           4: 'I-ORGANIZÁCIA',
           5: 'B-LOKALITA',
           6: 'I-LOKALITA'}

options = {"ents": ["OSOBA",
                    "ORGANIZÁCIA",
                    "LOKALITA"],
           "colors": {"OSOBA": "lightblue",
                      "ORGANIZÁCIA": "lightcoral",
                      "LOKALITA": "lightgreen"}}

tokenizer = AutoTokenizer.from_pretrained("crabz/slovakbert-ner")
model = AutoModelForTokenClassification.from_pretrained("crabz/slovakbert-ner")
ner_pipeline = pipeline(task='ner', model=model, tokenizer=tokenizer)
nlp = spacy.blank("en")


def apply_ner(text: str):
    classifications = ner_pipeline(text)

    entities = []
    for i in range(len(classifications)):
        if classifications[i]['entity'] != 0:
            if ner_map[classifications[i]['entity']][0] == 'B':
                j = i + 1
                while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I':
                    j += 1
                entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
                                 classifications[j - 1]['end']))
    doc = nlp(text)

    ents = []
    for ee in entities:
        ents.append(doc.char_span(ee[1], ee[2], ee[0]))
    doc.ents = ents

    displacy_html = displacy.render(doc, style="ent", options=options)
    return displacy_html


intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak Named Entity Recognition',
                    allow_flagging=False)
intf.launch()