Spaces:

liewchooichin
/

ner_from_pretrained

Runtime error

App Files Files Community

ner_from_pretrained / app.py

liewchooichin

get only score at least 0.8

7ab4587 verified 3 months ago

raw

history blame contribute delete

No virus

3.31 kB

	# Gradio
	import gradio as gr

	# Hugging Face libraries
	from transformers import pipeline
	from transformers import AutoTokenizer

	# Model checkpoint
	model_checkpoint = "dbmdz/bert-large-cased-finetuned-conll03-english"

	# Instantiate the pipeline
	ner_task = pipeline(model=model_checkpoint, task="ner",
	aggregation_strategy="simple")

	# Instantiate the tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

	# Sample sentences
	sentence1 = "Herbert Akroyd Stuart patented the first diesel engine, 1890"
	sentence2 = "May 10 A delegation tells Leopold III his return would be \
	illtimed, 1945"
	sentence3 = "Fri May 10 Fred Astaire (Frederick Austerlitz) born in Omaha, Nebraska, 1899"
	sentence4 = "Fri May 10 Germany invades Low Countries, 1940"
	sentence5 = "Fri May 10 Nazi bookburning, 1933"
	sentence6 = "Fri May 10 Confederate Memorial Day in South Carolina"
	sentence7 = "Fri May 10 Mothers Day in Guatemala"
	sentence8 = "Fri May 10 Dave Mason is born in Worcester, England, 1945"


	# Gradio interface
	def predict(sentence):
	"""
	Use the corresponding tokenizer to tokenize the sentence.
	Use the model to predict the entities.
	"""
	# Get the tokens from the tokenizer
	processed_tokens = tokenizer(sentence)
	token_pieces = processed_tokens.tokens()

	# Get the prediction of ner from the model
	result_ner = ner_task(sentence)
	formatted_ner = ""
	entities_count = 0
	# Print individual entities.
	# Start the count from 1 for intuitive reading.
	for i, result in enumerate(result_ner):
	# Only get the result where score is at least 0.8
	if result['score'] < 0.8:
	continue;
	else:
	entities_count += 1
	formatted_ner += f"Number: {entities_count} \n" \
	+ f"Entity: {result['entity_group']}\n" \
	+ f"Word group: {result['word']}\n" \
	+ f"Score: {result['score']}\n"
	formatted_ner += f"{result}\n\n"

	formatted_ner += f"Number of predicted entities: {entities_count}\n\n"

	return token_pieces, formatted_ner

	# Main Gradio interface
	demo = gr.Interface(
	fn = predict,
	inputs = [gr.TextArea(label="Place your sentence here", lines=10,
	show_copy_button=True)],
	outputs =
	[
	gr.TextArea(label="Tokens input to the model", interactive=False,
	lines=10, show_copy_button=True),
	gr.TextArea(label="Prediction of entities", interactive=False,
	lines=10, show_copy_button=True)
	],
	examples=[[sentence1], [sentence2], [sentence3], [sentence4],
	[sentence5], [sentence6], [sentence7], [sentence8]],
	title = "NER (Named Entities Recognition)",
	description = f"""
	## Using model {model_checkpoint} to predict entities type
	<p style="font-size: 1.2rem;">Notes: </p>
	<ul style="font-size: 1.2rem; list-style-type:square">
	<li> The examples are from the calendar utility in Linux.
	<li> The model cannot recognize date and time.
	<li> It can recongize PER (person), LOC (location), ORG (organization) and MIS (miscellaneous)
	entities.
	</ul>
	"""
	)
	demo.launch()