1Noura's picture
Create app.py
5c763a2 verified
from transformers import pipeline
import gradio as gr
# Load NER model for English and Arabic
ner_pipeline_en = pipeline('ner', grouped_entities=True) # English model
ner_pipeline_ar = pipeline('ner', model='CAMeL-Lab/bert-base-arabic-camelbert-msa-ner', grouped_entities=True) # Arabic model
def get_ner_pipeline(language='English'): #Return the NER model based on the specified language.
if language == 'Arabic':
return ner_pipeline_ar # Return Arabic model
return ner_pipeline_en # Return English model
def highlight_entities(text, language='English'): #Extract entities and return the text with highlighted entities.
ner_pipeline = get_ner_pipeline(language) # Get the appropriate NER model
entities = ner_pipeline(text) # Process the input text
# Create a list to store the highlighted text
highlighted_text_data = []
last_index = 0
for entity in entities:
entity_name = entity['word'] # Get the entity name
entity_type = entity['entity_group'] # Get the entity type
# Add text before the entity
highlighted_text_data.append((text[last_index: text.index(entity_name, last_index)], None))
# Add the entity with its type
highlighted_text_data.append((f"{entity_name}", entity_type))
last_index = text.index(entity_name, last_index) + len(entity_name)
# Add any remaining text after the last entity
highlighted_text_data.append((text[last_index:], None))
return highlighted_text_data # Return the highlighted entities
# Custom CSS for right-to-left (RTL) text alignment
custom_css = """
#output {
direction: rtl; /* Right-to-left for Arabic */
text-align: right; /* Align right for Arabic */
}
"""
# Gradio interface setup
interface = gr.Interface(
fn=highlight_entities, # Function to call
inputs=[
gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here..."), # Text input
gr.Radio(label="Select Language", choices=["English", "Arabic"], value="English") # Language selection
],
outputs=gr.HighlightedText(label="Highlighted NER Results", elem_id="output"), # Output as highlighted text
title="Named Entity Recognition", # Interface title
description="Select a language and enter text to extract and highlight named entities.", # Description
examples=[
["Hugging Face Inc. is a company based in New York City.", "English"],
["أحمد هو عالم في مجال الذكاء الاصطناعي", "Arabic"] ], # Add example inputs
css=custom_css # Apply custom CSS for RTL
)
# Launch the interface
interface.launch() # Start the Gradio interface