food-ner / app.py
carolanderson's picture
add link to model card
62090fa
import gradio as gr
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
def convert_hf_ents_to_gradio(hf_ents):
gradio_ents = []
for hf_ent in hf_ents:
gradio_ent = {"start" : hf_ent['start'], "end": hf_ent['end'], "entity": hf_ent['entity_group']}
gradio_ents.append(gradio_ent)
return gradio_ents
def tag(text):
hf_ents = nlp(text, aggregation_strategy="first")
gradio_ents = convert_hf_ents_to_gradio(hf_ents)
doc ={"text": text,
"entities": gradio_ents}
return doc
if __name__ == "__main__":
model_ckpt = "carolanderson/roberta-base-food-ner"
model = AutoModelForTokenClassification.from_pretrained(model_ckpt)
tokenizer = AutoTokenizer.from_pretrained("roberta-base", add_prefix_space=True)
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
with open("app_text/blog_text.md", "r") as f:
blog_text = f.read()
examples=[
["Saute the onions in olive oil until browned."],
["Add bourbon and sweet vermouth to the shaker."],
["Salt the water and butter the bread."],
["Add salt to the water and spread butter on the bread."]]
with gr.Blocks() as demo:
gr.Markdown("# Extracting Food Mentions from Text")
html = ("<div style='max-width:100%; max-height:200px; overflow:auto'>"
+ "<img src='file=app_images/featured.jpg' alt='Cookbook'>"
+ "</div>"
)
gr.HTML(html)
gr.Markdown("This is a model I trained to extract food terms from text. "
"I fine tuned RoBERTa base on a dataset I created by labeling a set of recipes.")
gr.Markdown("Details about the training data and training process are below.")
with gr.Row():
with gr.Column():
inp = gr.Textbox(placeholder="Enter text here...", lines=4, label="Input text")
btn = gr.Button("Tag food")
gr.Examples(examples, inp, label="Examples (click to use)")
out = gr.HighlightedText(label="Predictions")
btn.click(fn=tag, inputs=inp, outputs=out)
gr.Markdown(blog_text)
html_2 = ("<div style='max-width:100%; max-height:50px; overflow:auto'>"
+ "<img src='file=app_images/salt_butter_old.png' alt='Butter and Salt (old model)'>"
+ "</div>"
)
gr.HTML(html_2)
gr.Markdown("I speculated then that these kinds of errors could probably be reduced by using"
" contextual word embeddings, such as ELMo or BERT embeddings, or by using BERT itself "
"(fine-tuning it on the NER task)."
" That turned out to be true -- the current, RoBERTa model correctly handles these cases:")
html_3 = ("<div style='max-width:100%; max-height:50px; overflow:auto'>"
+ "<img src='file=app_images/salt_butter_new.png' alt='Butter and Salt (new model)'>"
+ "</div>"
)
gr.HTML(html_3)
gr.Markdown("To use this model yourself, see the "
"[model card.](https://huggingface.co/carolanderson/roberta-base-food-ner)")
demo.launch()