import gradio as gr from transformers import pipeline import re HTML_WRAPPER = """
{}
""" # Replace this with above latest checkpoint model_checkpoint = "ArefSadeghian/arabert-finetuned-caner" token_classifier = pipeline( "token-classification", model=model_checkpoint, aggregation_strategy="simple" ) import re import unicodedata diacritics = { '\u064B': None, # FATHATAN '\u064C': None, # DAMMATAN '\u064D': None, # KASRATAN '\u064E': None, # FATHA '\u064F': None, # DAMMA '\u0650': None, # KASRA '\u0651': None, # SHADDA '\u0652': None, # SUKUN } def remove_diacritics(text): normalized_text = unicodedata.normalize('NFKD', text) return normalized_text.translate(dict.fromkeys(map(ord, diacritics))) def remove_punctuation(text): return re.sub(r'[^\w\s]', '', text) def preprocess_arabic_text(text): # Remove diacritics text = remove_diacritics(text) # Remove punctuation text = remove_punctuation(text) # Normalize whitespace text = re.sub(r'\s+', ' ', text) # Convert to lowercase text = text.lower() return text # Define a function to highlight different labels in the text def highlight_text(text, entities): entity_colors = {"Allah": "#ffe5cc", "Book": "#b3daff", "Clan": "#faedcb", "Crime": "#ffb3d9", "Date": "#cce6ff", "Day": "#cce6ff", "Hell": "#d9d9d9", "Loc": "#d9b3ff", "Meas": "#e6ccff", "Mon": "#ffd6cc", "Month": "#ffd6cc", "NatOb": "#ffe0b3", "Number": "#ffe0cc", "Org": "#c1ffb3", "Para": "#f2f2f2", "Pers": "#b3ffb3", "Prophet": "#e6ccff", "Rlig": "#ffff80", "Sect": "#b3d9ff", "Time": "#ffb3ba"} highlighted = [] i = 0 for entity in entities: highlighted.extend(text[i:int(entity['start'])].split()) entity_group = entity['entity_group'] score = entity['score'] marked_text = f'{entity["word"]}{entity_group}{score:.2f}' highlighted.append(marked_text) i = int(entity['end']) + 1 highlighted.extend(text[i:].split()) return HTML_WRAPPER.format(' '.join(highlighted)) # Create the Gradio interface def predict_ner(text): try: text = preprocess_arabic_text(text) entities = token_classifier(text) highlighted_text = highlight_text(text, entities) return highlighted_text + '\n\n' + str(entities) except Exception as e: print(e) return str(e) iface = gr.Interface( fn=predict_ner, inputs=gr.inputs.Textbox(label="Enter Hadith in Arabic"), outputs=gr.outputs.HTML(label="Predicted Labels"), title="Hadith Analysis" ) # Launch the interface iface.launch()