import gradio as gr from transformers import pipeline import re HTML_WRAPPER = """
{}
""" # Replace this with above latest checkpoint model_checkpoint = "Montazer/arabert-finetuned-caner" token_classifier = pipeline( "token-classification", model=model_checkpoint, aggregation_strategy="simple" ) import re import unicodedata diacritics = { '\u064B': None, # FATHATAN '\u064C': None, # DAMMATAN '\u064D': None, # KASRATAN '\u064E': None, # FATHA '\u064F': None, # DAMMA '\u0650': None, # KASRA '\u0651': None, # SHADDA '\u0652': None, # SUKUN } def remove_diacritics(text): normalized_text = unicodedata.normalize('NFKD', text) return normalized_text.translate(dict.fromkeys(map(ord, diacritics))) def remove_punctuation(text): return re.sub(r'[^\w\s]', '', text) def preprocess_arabic_text(text): # Remove diacritics text = remove_diacritics(text) # Remove punctuation text = remove_punctuation(text) # Normalize whitespace text = re.sub(r'\s+', ' ', text) # Convert to lowercase text = text.lower() return text # Define a function to highlight different labels in the text def highlight_text(text, entities): entity_colors = {"Allah": "#ffe5cc", "Book": "#b3daff", "Clan": "#faedcb", "Crime": "#ffb3d9", "Date": "#cce6ff", "Day": "#cce6ff", "Hell": "#d9d9d9", "Loc": "#d9b3ff", "Meas": "#e6ccff", "Mon": "#ffd6cc", "Month": "#ffd6cc", "NatOb": "#ffe0b3", "Number": "#ffe0cc", "Org": "#c1ffb3", "Para": "#f2f2f2", "Pers": "#b3ffb3", "Prophet": "#e6ccff", "Rlig": "#ffff80", "Sect": "#b3d9ff", "Time": "#ffb3ba"} highlighted = [] i = 0 for entity in entities: highlighted.extend(text[i:int(entity['start'])].split()) entity_group = entity['entity_group'] score = entity['score'] marked_text = f'{entity["word"]}{entity_group}{score:.2f}' highlighted.append(marked_text) i = int(entity['end']) + 1 highlighted.extend(text[i:].split()) return HTML_WRAPPER.format(' '.join(highlighted)) # Create the Gradio interface def predict_ner(text): try: text = preprocess_arabic_text(text) entities = token_classifier(text) highlighted_text = highlight_text(text, entities) return highlighted_text except Exception as e: print(e) return str(e) label_text = ( "Enter Hadith in Arabic:\u000A" "Example:\u000A" ' "حَدَّثَنَا عَبْد اللَّهِ، حَدَّثَنِي عُبَيْدُ اللَّهِ بْنُ عُمَرَ الْقَوَارِيرِيُّ، حَدَّثَنَا يُونُسُ بْنُ أَرْقَمَ، حَدَّثَنَا يَزِيدُ بْنُ أَبِي زِيَادٍ، عَنْ عَبْدِ الرَّحْمَنِ بْنِ أَبِي لَيْلَى، قَالَ شَهِدْتُ عَلِيًّا رَضِيَ اللَّهُ عَنْهُ فِي الرَّحَبَةِ يَنْشُدُ النَّاسَ أَنْشُدُ اللَّهَ مَنْ سَمِعَ رَسُولَ اللَّهِ صَلَّى اللَّهُ عَلَيْهِ وَسَلَّمَ يَقُولُ يَوْمَ غَدِيرِ خُمٍّ مَنْ كُنْتُ مَوْلَاهُ فَعَلِيٌّ مَوْلَاهُ لَمَّا قَامَ فَشَهِدَ قَالَ عَبْدُ الرَّحْمَنِ فَقَامَ اثْنَا عَشَرَ بَدْرِيًّا كَأَنِّي أَنْظُرُ إِلَى أَحَدِهِمْ فَقَالُوا نَشْهَدُ أَنَّا سَمِعْنَا رَسُولَ اللَّهِ صَلَّى اللَّهُ عَلَيْهِ وَسَلَّمَ يَقُولُ يَوْمَ غَدِيرِ خُمٍّ أَلَسْتُ أَوْلَى بِالْمُؤْمِنِينَ مِنْ أَنْفُسِهِمْ وَأَزْوَاجِي أُمَّهَاتُهُمْ فَقُلْنَا بَلَى يَا رَسُولَ اللَّهِ قَالَ فَمَنْ كُنْتُ مَوْلَاهُ فَعَلِيٌّ مَوْلَاهُ اللَّهُمَّ وَالِ مَنْ وَالَاهُ وَعَادِ مَنْ عَادَاهُ"' ) iface = gr.Interface( fn=predict_ner, inputs=gr.inputs.Textbox(label=label_text), outputs=gr.outputs.HTML(label="Predicted Labels"), title="Hadith Analysis" ) # Launch the interface iface.launch()