Spaces:

Montazer
/

arabert-finetuned-on-caner

Runtime error

App Files Files Community

arabert-finetuned-on-caner / app.py

Montazer

Update app.py

d18a284 11 months ago

raw

history blame contribute delete

No virus

4.68 kB

	import gradio as gr
	from transformers import pipeline
	import re

	HTML_WRAPPER = """<div dir="rtl" style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""

	# Replace this with above latest checkpoint
	model_checkpoint = "Montazer/arabert-finetuned-caner"
	token_classifier = pipeline(
	"token-classification", model=model_checkpoint, aggregation_strategy="simple"
	)

	import re
	import unicodedata

	diacritics = {
	'\u064B': None, # FATHATAN
	'\u064C': None, # DAMMATAN
	'\u064D': None, # KASRATAN
	'\u064E': None, # FATHA
	'\u064F': None, # DAMMA
	'\u0650': None, # KASRA
	'\u0651': None, # SHADDA
	'\u0652': None, # SUKUN
	}

	def remove_diacritics(text):
	normalized_text = unicodedata.normalize('NFKD', text)
	return normalized_text.translate(dict.fromkeys(map(ord, diacritics)))

	def remove_punctuation(text):
	return re.sub(r'[^\w\s]', '', text)

	def preprocess_arabic_text(text):
	# Remove diacritics
	text = remove_diacritics(text)

	# Remove punctuation
	text = remove_punctuation(text)

	# Normalize whitespace
	text = re.sub(r'\s+', ' ', text)

	# Convert to lowercase
	text = text.lower()

	return text


	# Define a function to highlight different labels in the text
	def highlight_text(text, entities):
	entity_colors = {"Allah": "#ffe5cc", "Book": "#b3daff", "Clan": "#faedcb", "Crime": "#ffb3d9",
	"Date": "#cce6ff", "Day": "#cce6ff", "Hell": "#d9d9d9", "Loc": "#d9b3ff",
	"Meas": "#e6ccff", "Mon": "#ffd6cc", "Month": "#ffd6cc", "NatOb": "#ffe0b3",
	"Number": "#ffe0cc", "Org": "#c1ffb3", "Para": "#f2f2f2", "Pers": "#b3ffb3",
	"Prophet": "#e6ccff", "Rlig": "#ffff80", "Sect": "#b3d9ff", "Time": "#ffb3ba"}
	highlighted = []
	i = 0
	for entity in entities:
	highlighted.extend(text[i:int(entity['start'])].split())
	entity_group = entity['entity_group']
	score = entity['score']
	marked_text = f'<mark class="{entity_group}" style="background-color: {entity_colors[entity_group]}">{entity["word"]}<sub>{entity_group}</sub><sup>{score:.2f}</sup></mark>'
	highlighted.append(marked_text)
	i = int(entity['end']) + 1
	highlighted.extend(text[i:].split())
	return HTML_WRAPPER.format(' '.join(highlighted))


	# Create the Gradio interface
	def predict_ner(text):
	try:
	text = preprocess_arabic_text(text)
	entities = token_classifier(text)
	highlighted_text = highlight_text(text, entities)
	return highlighted_text
	except Exception as e:
	print(e)
	return str(e)


	label_text = (
	"Enter Hadith in Arabic:\u000A"
	"Example:\u000A"
	' "حَدَّثَنَا عَبْد اللَّهِ، حَدَّثَنِي عُبَيْدُ اللَّهِ بْنُ عُمَرَ الْقَوَارِيرِيُّ، حَدَّثَنَا يُونُسُ بْنُ أَرْقَمَ، حَدَّثَنَا يَزِيدُ بْنُ أَبِي زِيَادٍ، عَنْ عَبْدِ الرَّحْمَنِ بْنِ أَبِي لَيْلَى، قَالَ شَهِدْتُ عَلِيًّا رَضِيَ اللَّهُ عَنْهُ فِي الرَّحَبَةِ يَنْشُدُ النَّاسَ أَنْشُدُ اللَّهَ مَنْ سَمِعَ رَسُولَ اللَّهِ صَلَّى اللَّهُ عَلَيْهِ وَسَلَّمَ يَقُولُ يَوْمَ غَدِيرِ خُمٍّ مَنْ كُنْتُ مَوْلَاهُ فَعَلِيٌّ مَوْلَاهُ لَمَّا قَامَ فَشَهِدَ قَالَ عَبْدُ الرَّحْمَنِ فَقَامَ اثْنَا عَشَرَ بَدْرِيًّا كَأَنِّي أَنْظُرُ إِلَى أَحَدِهِمْ فَقَالُوا نَشْهَدُ أَنَّا سَمِعْنَا رَسُولَ اللَّهِ صَلَّى اللَّهُ عَلَيْهِ وَسَلَّمَ يَقُولُ يَوْمَ غَدِيرِ خُمٍّ أَلَسْتُ أَوْلَى بِالْمُؤْمِنِينَ مِنْ أَنْفُسِهِمْ وَأَزْوَاجِي أُمَّهَاتُهُمْ فَقُلْنَا بَلَى يَا رَسُولَ اللَّهِ قَالَ فَمَنْ كُنْتُ مَوْلَاهُ فَعَلِيٌّ مَوْلَاهُ اللَّهُمَّ وَالِ مَنْ وَالَاهُ وَعَادِ مَنْ عَادَاهُ"'
	)

	iface = gr.Interface(
	fn=predict_ner,
	inputs=gr.inputs.Textbox(label=label_text),
	outputs=gr.outputs.HTML(label="Predicted Labels"),
	title="Hadith Analysis"
	)

	# Launch the interface
	iface.launch()