Spaces:

blaikhole
/

urdu-ner

Sleeping

App Files Files Community

urdu-ner / app.py

JalalHxmi

Update app.py

342c209 verified 12 days ago

raw

history blame contribute delete

2.86 kB

	import gradio as gr
	import torch
	from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer

	# 🔥 Define Label Mapping Dictionary
	LABEL_MAP = {
	0: "تاریخ",
	1: "عہدہ",
	2: "مقام",
	3: "نمبر",
	4: "تنظیم",
	5: "غیر متعلقہ",
	6: "نام",
	7: "وقت",
	}

	# 🔹 Replace with your Hugging Face Model
	MODEL_NAME = "blaikhole/distilbert-urdu-ner"

	# ✅ Load Model & Tokenizer
	device = 0 if torch.cuda.is_available() else -1 # Use GPU if available
	model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME).to("cuda" if device == 0 else "cpu")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

	# ✅ Load Hugging Face NER Pipeline
	ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, device=device)

	def merge_subwords_and_decode(entities):
	"""Merge subwords and map label IDs to actual labels, treating 'OTHER' tokens as normal text."""
	merged_entities = []
	current_word = ""
	current_label = None

	for entity in entities:
	word = entity["word"]
	label_id = int(entity["entity"].replace("LABEL_", ""))
	label = LABEL_MAP.get(label_id, "OTHER")
	# Treat OTHER as normal text by setting its label to None
	if label == "غیر متعلقہ":
	label = None

	# Merge subwords: if the token starts with '##', append it without space
	if word.startswith("##"):
	current_word += word[2:]
	else:
	# If the current token and new token share the same label, add a space before appending
	if current_word and current_label == label:
	current_word += " " + word
	else:
	# Append the current token segment to the result before starting a new one
	if current_word:
	merged_entities.append((current_word, current_label))
	current_word = word
	current_label = label

	if current_word:
	merged_entities.append((current_word, current_label))
	return merged_entities



	# 🔹 Function to Run NER
	def analyze_text(text):
	entities = ner_pipeline(text) # Get raw predictions
	return merge_subwords_and_decode(entities) # Fix labels & subwords

	# 🔹 Example Sentence
	EXAMPLE_SENT = "بات یہ ہے تین بجے احمد لاہور میں بطور مینیجر گوگل کمپنی کے آفس میں گیا۔"

	# 🔹 Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# 🏥 Urdu language Named Entity Recognition (NER)")

	text_input = gr.Textbox(label="Enter Text", lines=5, value=EXAMPLE_SENT)
	analyze_button = gr.Button("Run NER Model")
	output = gr.HighlightedText(label="NER Result", combine_adjacent=True)

	analyze_button.click(analyze_text, inputs=[text_input], outputs=[output])

	demo.launch()