vickeee465
full text ner output
12eab55
raw
history blame
No virus
1.62 kB
import gradio as gr
import os
import torch
import numpy as np
import spacy
import huspacy
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from huggingface_hub import HfApi
languages = [
"English", "Hungarian", "Multilingual"
]
def download_models(models=["en_core_web_lg", "xx_ent_wiki_sm", "hu_core_news_lg"]):
for model in models:
if model.startswith("hu"):
huspacy.download()
else:
spacy.cli.download(model)
def build_spacy_path(language: str):
language = language.lower()
if language == "english":
return "en_core_web_lg"
if language == "hungarian":
return "hu_core_news_lg"
else:
return "xx_ent_wiki_sm"
def named_entity_recognition(text, language):
model_id = build_spacy_path(language)
pipeline = huspacy.load() if model_id.startswith("hu") else spacy.load(model_id)
doc = pipeline(text)
entities = [{"entity":ent.label_, "start":ent.start_char, "end":ent.end_char} for ent in doc.ents]
output = {"text":text, "entities":entities}
model_id_hf = f"huspacy/{model_id}" if model_id.startswith("hu") else f"spacy/{model_id}"
output_info = f'<p style="text-align: center; display: block">Prediction was made using the <a href="https://huggingface.co/{model_id_hf}">{model_id_hf}</a> model.</p>'
return output, output_info
demo = gr.Interface(
fn=named_entity_recognition,
inputs=[gr.Textbox(lines=6, label="Input"),
gr.Dropdown(languages, label="Language")],
outputs=[gr.HighlightedText(label='Output'), gr.Markdown()])