Spaces:

poltextlab
/

babelmachine

Running

vickeee465

full text ner output

12eab55 5 months ago

No virus

1.62 kB

	import gradio as gr

	import os
	import torch
	import numpy as np
	import spacy
	import huspacy
	from transformers import AutoModelForSequenceClassification
	from transformers import AutoTokenizer
	from huggingface_hub import HfApi

	languages = [
	"English", "Hungarian", "Multilingual"
	]

	def download_models(models=["en_core_web_lg", "xx_ent_wiki_sm", "hu_core_news_lg"]):
	for model in models:
	if model.startswith("hu"):
	huspacy.download()
	else:
	spacy.cli.download(model)

	def build_spacy_path(language: str):
	language = language.lower()
	if language == "english":
	return "en_core_web_lg"
	if language == "hungarian":
	return "hu_core_news_lg"
	else:
	return "xx_ent_wiki_sm"

	def named_entity_recognition(text, language):
	model_id = build_spacy_path(language)
	pipeline = huspacy.load() if model_id.startswith("hu") else spacy.load(model_id)
	doc = pipeline(text)
	entities = [{"entity":ent.label_, "start":ent.start_char, "end":ent.end_char} for ent in doc.ents]
	output = {"text":text, "entities":entities}
	model_id_hf = f"huspacy/{model_id}" if model_id.startswith("hu") else f"spacy/{model_id}"
	output_info = f'<p style="text-align: center; display: block">Prediction was made using the <a href="https://huggingface.co/{model_id_hf}">{model_id_hf}</a> model.</p>'
	return output, output_info

	demo = gr.Interface(
	fn=named_entity_recognition,
	inputs=[gr.Textbox(lines=6, label="Input"),
	gr.Dropdown(languages, label="Language")],
	outputs=[gr.HighlightedText(label='Output'), gr.Markdown()])