poltextlab's picture
label legend
fa1f65b verified
import gradio as gr
import os
import torch
import numpy as np
import spacy
import huspacy
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from huggingface_hub import HfApi
from spacy.glossary import GLOSSARY as NER_DICT
languages = [
"English", "Hungarian", "Multilingual"
]
def download_models(models=["en_core_web_lg", "xx_ent_wiki_sm", "hu_core_news_lg"]):
for model in models:
if model.startswith("hu"):
huspacy.download()
else:
spacy.cli.download(model)
def build_spacy_path(language: str):
language = language.lower()
if language == "english":
return "en_core_web_lg"
if language == "hungarian":
return "hu_core_news_lg"
else:
return "xx_ent_wiki_sm"
def named_entity_recognition(text, language):
model_id = build_spacy_path(language)
pipeline = huspacy.load() if model_id.startswith("hu") else spacy.load(model_id)
doc = pipeline(text)
entities = [{"entity":ent.label_, "start":ent.start_char, "end":ent.end_char} for ent in doc.ents]
labels_used = [ent.label_ for ent in doc.ents]
legend = '<p style="text-align: left; display: block">Legend:</p><ul style="text-align: left; display: block">'+"".join([f"<li> <b>{label}</b> = <i>{NER_DICT[label]}</i> </li>" for label in set(labels_used)])+"</ul>"
output = {"text":text, "entities":entities}
model_id_hf = f"huspacy/{model_id}" if model_id.startswith("hu") else f"spacy/{model_id}"
output_info = legend + f'<p style="text-align: center; display: block">Prediction was made using the <a href="https://huggingface.co/{model_id_hf}">{model_id_hf}</a> model.</p> <ul>'
return output, output_info
demo = gr.Interface(
fn=named_entity_recognition,
inputs=[gr.Textbox(lines=6, label="Input"),
gr.Dropdown(languages, label="Language")],
outputs=[gr.HighlightedText(label='Output'), gr.Markdown()])