import gradio as gr import os import torch import numpy as np import spacy import huspacy from transformers import AutoModelForSequenceClassification from transformers import AutoTokenizer from huggingface_hub import HfApi from spacy.glossary import GLOSSARY as NER_DICT languages = [ "English", "Hungarian", "Multilingual" ] def download_models(models=["en_core_web_lg", "xx_ent_wiki_sm", "hu_core_news_lg"]): for model in models: if model.startswith("hu"): huspacy.download() else: spacy.cli.download(model) def build_spacy_path(language: str): language = language.lower() if language == "english": return "en_core_web_lg" if language == "hungarian": return "hu_core_news_lg" else: return "xx_ent_wiki_sm" def named_entity_recognition(text, language): model_id = build_spacy_path(language) pipeline = huspacy.load() if model_id.startswith("hu") else spacy.load(model_id) doc = pipeline(text) entities = [{"entity":ent.label_, "start":ent.start_char, "end":ent.end_char} for ent in doc.ents] labels_used = [ent.label_ for ent in doc.ents] legend = '

Legend:

" output = {"text":text, "entities":entities} model_id_hf = f"huspacy/{model_id}" if model_id.startswith("hu") else f"spacy/{model_id}" output_info = legend + f'

Prediction was made using the {model_id_hf} model.