TaNER

Runtime error

App Files Files Community

TaNER / app.py

livinNector

added IndicBERTv2-MLM-only-NER

dfd905e about 2 years ago

raw

history blame contribute delete

3.26 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

	def get_ner_bio(pipe,text):
	tok_text = pipe.tokenizer(text, return_tensors='pt')
	with torch.no_grad():
	logits = pipe.model(**tok_text).logits.argmax(-1)
	predicted_tokens_classes = [
	pipe.model.config.id2label[t.item()] for t in logits[0]
	]

	predicted_labels = []
	previous_token_id = 0
	word_ids = tok_text.word_ids()
	for word_index in range(len(word_ids)):
	if not (word_ids[word_index] == None or word_ids[word_index] == previous_token_id):
	predicted_labels.append(predicted_tokens_classes[word_index])
	previous_token_id = word_ids[word_index]

	ner_output = [
	(word, label if label!="O" else None)
	for word, label in zip(text.split(" "),predicted_labels)
	]
	return ner_output

	def get_ner(pipe,text,aggregation_strategy="first"):

	if aggregation_strategy == "bio_first":
	return get_ner_bio(pipe,text)
	else:
	results = pipe(text,aggregation_strategy=aggregation_strategy)
	for result in results:
	result["entity"] = result["entity_group"]
	return {"text": text, "entities": results}

	ner_models = [
	"livinNector/TaNER-500",
	"livinNector/TaNER-1k",
	"livinNector/IndicBERTv2-MLM-only-NER",
	"ai4bharat/IndicNER",
	"livinNector/IndicBERTNER",
	"livinNector/IndicNER",
	"livinNector/xlm-roberta-base-ner",
	"livinNector/distilbert-multilingual-base-ner"
	]
	ner_pipes = [pipeline("token-classification",model) for model in ner_models]

	def get_ner_outputs(text,aggregation_strategy):
	return [get_ner(pipe,text,aggregation_strategy) for pipe in ner_pipes]
	examples = [
	["ஆனந்த் மற்றும் லிவின் நெக்டர் ஆகியொர் அண்ணாமலை பல்கலைக்கழகத்தில் படித்து வருகின்றனர்.","first"],
	["இந்தியன் இன்ஸ்டிட்யூட் ஆஃப் டெக்னாலஜி மெட்ராஸ் கிண்டியில் அமைந்துள்ளது.","average"],
	["சச்சின் டெண்டுல்கர் மும்பை மாநகரத்தைச் சேர்ந்த ஒரு நடுத்தரக் குடும்பத்தில் நான்காவது குழந்தையாகப் பிறந்தார். பல துடுப்பாட்ட வீரர்களை உருவாக்கிய சாரதாஷ்ரம் வித்யாமந்திர் பள்ளியில் சேர்ந்தார்.","bio_first"]

	]

	iface = gr.Interface(
	get_ner_outputs,
	[
	gr.Textbox(value=examples[0][0]),
	gr.Dropdown(["bio_first", "first", "max", "average"],value=examples[0][1])
	],
	[gr.Highlight(label=model) for model in ner_models],
	description='Named Entity Recongnition Interface Comparing Various Transformer Based NER models for Tamil Language.',
	examples=examples,
	title='TaNER',

	)

	iface.launch(enable_queue=True)