Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
import re | |
HTML_WRAPPER = """<div dir="rtl" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>""" | |
# Replace this with above latest checkpoint | |
model_checkpoint = "Montazer/arabert-finetuned-caner" | |
token_classifier = pipeline( | |
"token-classification", model=model_checkpoint, aggregation_strategy="simple" | |
) | |
import re | |
import unicodedata | |
diacritics = { | |
'\u064B': None, # FATHATAN | |
'\u064C': None, # DAMMATAN | |
'\u064D': None, # KASRATAN | |
'\u064E': None, # FATHA | |
'\u064F': None, # DAMMA | |
'\u0650': None, # KASRA | |
'\u0651': None, # SHADDA | |
'\u0652': None, # SUKUN | |
} | |
def remove_diacritics(text): | |
normalized_text = unicodedata.normalize('NFKD', text) | |
return normalized_text.translate(dict.fromkeys(map(ord, diacritics))) | |
def remove_punctuation(text): | |
return re.sub(r'[^\w\s]', '', text) | |
def preprocess_arabic_text(text): | |
# Remove diacritics | |
text = remove_diacritics(text) | |
# Remove punctuation | |
text = remove_punctuation(text) | |
# Normalize whitespace | |
text = re.sub(r'\s+', ' ', text) | |
# Convert to lowercase | |
text = text.lower() | |
return text | |
# Define a function to highlight different labels in the text | |
def highlight_text(text, entities): | |
entity_colors = {"Allah": "#ffe5cc", "Book": "#b3daff", "Clan": "#faedcb", "Crime": "#ffb3d9", | |
"Date": "#cce6ff", "Day": "#cce6ff", "Hell": "#d9d9d9", "Loc": "#d9b3ff", | |
"Meas": "#e6ccff", "Mon": "#ffd6cc", "Month": "#ffd6cc", "NatOb": "#ffe0b3", | |
"Number": "#ffe0cc", "Org": "#c1ffb3", "Para": "#f2f2f2", "Pers": "#b3ffb3", | |
"Prophet": "#e6ccff", "Rlig": "#ffff80", "Sect": "#b3d9ff", "Time": "#ffb3ba"} | |
highlighted = [] | |
i = 0 | |
for entity in entities: | |
highlighted.extend(text[i:int(entity['start'])].split()) | |
entity_group = entity['entity_group'] | |
score = entity['score'] | |
marked_text = f'<mark class="{entity_group}" style="background-color: {entity_colors[entity_group]}">{entity["word"]}<sub>{entity_group}</sub><sup>{score:.2f}</sup></mark>' | |
highlighted.append(marked_text) | |
i = int(entity['end']) + 1 | |
highlighted.extend(text[i:].split()) | |
return HTML_WRAPPER.format(' '.join(highlighted)) | |
# Create the Gradio interface | |
def predict_ner(text): | |
try: | |
text = preprocess_arabic_text(text) | |
entities = token_classifier(text) | |
highlighted_text = highlight_text(text, entities) | |
return highlighted_text | |
except Exception as e: | |
print(e) | |
return str(e) | |
label_text = ( | |
"Enter Hadith in Arabic:\u000A" | |
"Example:\u000A" | |
' "ุญูุฏููุซูููุง ุนูุจูุฏ ุงููููููุ ุญูุฏููุซูููู ุนูุจูููุฏู ุงูููููู ุจููู ุนูู ูุฑู ุงููููููุงุฑููุฑููููุ ุญูุฏููุซูููุง ูููููุณู ุจููู ุฃูุฑูููู ูุ ุญูุฏููุซูููุง ููุฒููุฏู ุจููู ุฃูุจูู ุฒูููุงุฏูุ ุนููู ุนูุจูุฏู ุงูุฑููุญูู ููู ุจููู ุฃูุจูู ูููููููุ ููุงูู ุดูููุฏูุชู ุนููููููุง ุฑูุถููู ุงูููููู ุนููููู ููู ุงูุฑููุญูุจูุฉู ููููุดูุฏู ุงููููุงุณู ุฃูููุดูุฏู ุงูููููู ู ููู ุณูู ูุนู ุฑูุณูููู ุงูููููู ุตููููู ุงูููููู ุนููููููู ููุณููููู ู ููููููู ููููู ู ุบูุฏููุฑู ุฎูู ูู ู ููู ููููุชู ู ูููููุงูู ููุนูููููู ู ูููููุงูู ููู ููุง ููุงู ู ููุดูููุฏู ููุงูู ุนูุจูุฏู ุงูุฑููุญูู ููู ููููุงู ู ุงุซูููุง ุนูุดูุฑู ุจูุฏูุฑููููุง ููุฃููููู ุฃูููุธูุฑู ุฅูููู ุฃูุญูุฏูููู ู ููููุงูููุง ููุดูููุฏู ุฃููููุง ุณูู ูุนูููุง ุฑูุณูููู ุงูููููู ุตููููู ุงูููููู ุนููููููู ููุณููููู ู ููููููู ููููู ู ุบูุฏููุฑู ุฎูู ูู ุฃูููุณูุชู ุฃูููููู ุจูุงููู ูุคูู ูููููู ู ููู ุฃูููููุณูููู ู ููุฃูุฒูููุงุฌูู ุฃูู ููููุงุชูููู ู ููููููููุง ุจูููู ููุง ุฑูุณูููู ุงูููููู ููุงูู ููู ููู ููููุชู ู ูููููุงูู ููุนูููููู ู ูููููุงูู ุงููููููู ูู ููุงูู ู ููู ููุงููุงูู ููุนูุงุฏู ู ููู ุนูุงุฏูุงูู"' | |
) | |
iface = gr.Interface( | |
fn=predict_ner, | |
inputs=gr.inputs.Textbox(label=label_text), | |
outputs=gr.outputs.HTML(label="Predicted Labels"), | |
title="Hadith Analysis" | |
) | |
# Launch the interface | |
iface.launch() |