import gradio as gr
from transformers import pipeline
import re
HTML_WRAPPER = """
{}
"""
# Replace this with above latest checkpoint
model_checkpoint = "Montazer/arabert-finetuned-caner"
token_classifier = pipeline(
"token-classification", model=model_checkpoint, aggregation_strategy="simple"
)
import re
import unicodedata
diacritics = {
'\u064B': None, # FATHATAN
'\u064C': None, # DAMMATAN
'\u064D': None, # KASRATAN
'\u064E': None, # FATHA
'\u064F': None, # DAMMA
'\u0650': None, # KASRA
'\u0651': None, # SHADDA
'\u0652': None, # SUKUN
}
def remove_diacritics(text):
normalized_text = unicodedata.normalize('NFKD', text)
return normalized_text.translate(dict.fromkeys(map(ord, diacritics)))
def remove_punctuation(text):
return re.sub(r'[^\w\s]', '', text)
def preprocess_arabic_text(text):
# Remove diacritics
text = remove_diacritics(text)
# Remove punctuation
text = remove_punctuation(text)
# Normalize whitespace
text = re.sub(r'\s+', ' ', text)
# Convert to lowercase
text = text.lower()
return text
# Define a function to highlight different labels in the text
def highlight_text(text, entities):
entity_colors = {"Allah": "#ffe5cc", "Book": "#b3daff", "Clan": "#faedcb", "Crime": "#ffb3d9",
"Date": "#cce6ff", "Day": "#cce6ff", "Hell": "#d9d9d9", "Loc": "#d9b3ff",
"Meas": "#e6ccff", "Mon": "#ffd6cc", "Month": "#ffd6cc", "NatOb": "#ffe0b3",
"Number": "#ffe0cc", "Org": "#c1ffb3", "Para": "#f2f2f2", "Pers": "#b3ffb3",
"Prophet": "#e6ccff", "Rlig": "#ffff80", "Sect": "#b3d9ff", "Time": "#ffb3ba"}
highlighted = []
i = 0
for entity in entities:
highlighted.extend(text[i:int(entity['start'])].split())
entity_group = entity['entity_group']
score = entity['score']
marked_text = f'{entity["word"]}{entity_group}{score:.2f}'
highlighted.append(marked_text)
i = int(entity['end']) + 1
highlighted.extend(text[i:].split())
return HTML_WRAPPER.format(' '.join(highlighted))
# Create the Gradio interface
def predict_ner(text):
try:
text = preprocess_arabic_text(text)
entities = token_classifier(text)
highlighted_text = highlight_text(text, entities)
return highlighted_text
except Exception as e:
print(e)
return str(e)
label_text = (
"Enter Hadith in Arabic:\u000A"
"Example:\u000A"
' "حَدَّثَنَا عَبْد اللَّهِ، حَدَّثَنِي عُبَيْدُ اللَّهِ بْنُ عُمَرَ الْقَوَارِيرِيُّ، حَدَّثَنَا يُونُسُ بْنُ أَرْقَمَ، حَدَّثَنَا يَزِيدُ بْنُ أَبِي زِيَادٍ، عَنْ عَبْدِ الرَّحْمَنِ بْنِ أَبِي لَيْلَى، قَالَ شَهِدْتُ عَلِيًّا رَضِيَ اللَّهُ عَنْهُ فِي الرَّحَبَةِ يَنْشُدُ النَّاسَ أَنْشُدُ اللَّهَ مَنْ سَمِعَ رَسُولَ اللَّهِ صَلَّى اللَّهُ عَلَيْهِ وَسَلَّمَ يَقُولُ يَوْمَ غَدِيرِ خُمٍّ مَنْ كُنْتُ مَوْلَاهُ فَعَلِيٌّ مَوْلَاهُ لَمَّا قَامَ فَشَهِدَ قَالَ عَبْدُ الرَّحْمَنِ فَقَامَ اثْنَا عَشَرَ بَدْرِيًّا كَأَنِّي أَنْظُرُ إِلَى أَحَدِهِمْ فَقَالُوا نَشْهَدُ أَنَّا سَمِعْنَا رَسُولَ اللَّهِ صَلَّى اللَّهُ عَلَيْهِ وَسَلَّمَ يَقُولُ يَوْمَ غَدِيرِ خُمٍّ أَلَسْتُ أَوْلَى بِالْمُؤْمِنِينَ مِنْ أَنْفُسِهِمْ وَأَزْوَاجِي أُمَّهَاتُهُمْ فَقُلْنَا بَلَى يَا رَسُولَ اللَّهِ قَالَ فَمَنْ كُنْتُ مَوْلَاهُ فَعَلِيٌّ مَوْلَاهُ اللَّهُمَّ وَالِ مَنْ وَالَاهُ وَعَادِ مَنْ عَادَاهُ"'
)
iface = gr.Interface(
fn=predict_ner,
inputs=gr.inputs.Textbox(label=label_text),
outputs=gr.outputs.HTML(label="Predicted Labels"),
title="Hadith Analysis"
)
# Launch the interface
iface.launch()