Spaces:

carolanderson
/

food-ner

Runtime error

App Files Files Community

food-ner / app.py

carolanderson

add link to model card

62090fa over 1 year ago

raw

history blame contribute delete

3.21 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline


	def convert_hf_ents_to_gradio(hf_ents):
	gradio_ents = []
	for hf_ent in hf_ents:
	gradio_ent = {"start" : hf_ent['start'], "end": hf_ent['end'], "entity": hf_ent['entity_group']}
	gradio_ents.append(gradio_ent)
	return gradio_ents


	def tag(text):
	hf_ents = nlp(text, aggregation_strategy="first")
	gradio_ents = convert_hf_ents_to_gradio(hf_ents)
	doc ={"text": text,
	"entities": gradio_ents}
	return doc


	if __name__ == "__main__":
	model_ckpt = "carolanderson/roberta-base-food-ner"
	model = AutoModelForTokenClassification.from_pretrained(model_ckpt)
	tokenizer = AutoTokenizer.from_pretrained("roberta-base", add_prefix_space=True)
	nlp = pipeline("ner", model=model, tokenizer=tokenizer)

	with open("app_text/blog_text.md", "r") as f:
	blog_text = f.read()

	examples=[
	["Saute the onions in olive oil until browned."],
	["Add bourbon and sweet vermouth to the shaker."],
	["Salt the water and butter the bread."],
	["Add salt to the water and spread butter on the bread."]]

	with gr.Blocks() as demo:
	gr.Markdown("# Extracting Food Mentions from Text")
	html = ("<div style='max-width:100%; max-height:200px; overflow:auto'>"
	+ "<img src='file=app_images/featured.jpg' alt='Cookbook'>"
	+ "</div>"
	)
	gr.HTML(html)
	gr.Markdown("This is a model I trained to extract food terms from text. "
	"I fine tuned RoBERTa base on a dataset I created by labeling a set of recipes.")
	gr.Markdown("Details about the training data and training process are below.")
	with gr.Row():
	with gr.Column():
	inp = gr.Textbox(placeholder="Enter text here...", lines=4, label="Input text")
	btn = gr.Button("Tag food")
	gr.Examples(examples, inp, label="Examples (click to use)")
	out = gr.HighlightedText(label="Predictions")
	btn.click(fn=tag, inputs=inp, outputs=out)
	gr.Markdown(blog_text)
	html_2 = ("<div style='max-width:100%; max-height:50px; overflow:auto'>"
	+ "<img src='file=app_images/salt_butter_old.png' alt='Butter and Salt (old model)'>"
	+ "</div>"
	)
	gr.HTML(html_2)
	gr.Markdown("I speculated then that these kinds of errors could probably be reduced by using"
	" contextual word embeddings, such as ELMo or BERT embeddings, or by using BERT itself "
	"(fine-tuning it on the NER task)."
	" That turned out to be true -- the current, RoBERTa model correctly handles these cases:")
	html_3 = ("<div style='max-width:100%; max-height:50px; overflow:auto'>"
	+ "<img src='file=app_images/salt_butter_new.png' alt='Butter and Salt (new model)'>"
	+ "</div>"
	)
	gr.HTML(html_3)
	gr.Markdown("To use this model yourself, see the "
	"[model card.](https://huggingface.co/carolanderson/roberta-base-food-ner)")




	demo.launch()