import gradio as gr from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline def convert_hf_ents_to_gradio(hf_ents): gradio_ents = [] for hf_ent in hf_ents: gradio_ent = {"start" : hf_ent['start'], "end": hf_ent['end'], "entity": hf_ent['entity_group']} gradio_ents.append(gradio_ent) return gradio_ents def tag(text): hf_ents = nlp(text, aggregation_strategy="first") gradio_ents = convert_hf_ents_to_gradio(hf_ents) doc ={"text": text, "entities": gradio_ents} return doc if __name__ == "__main__": model_ckpt = "carolanderson/roberta-base-food-ner" model = AutoModelForTokenClassification.from_pretrained(model_ckpt) tokenizer = AutoTokenizer.from_pretrained("roberta-base", add_prefix_space=True) nlp = pipeline("ner", model=model, tokenizer=tokenizer) with open("app_text/blog_text.md", "r") as f: blog_text = f.read() examples=[ ["Saute the onions in olive oil until browned."], ["Add bourbon and sweet vermouth to the shaker."], ["Salt the water and butter the bread."], ["Add salt to the water and spread butter on the bread."]] with gr.Blocks() as demo: gr.Markdown("# Extracting Food Mentions from Text") html = ("

" + " Cookbook

" + "

" ) gr.HTML(html) gr.Markdown("This is a model I trained to extract food terms from text. " "I fine tuned RoBERTa base on a dataset I created by labeling a set of recipes.") gr.Markdown("Details about the training data and training process are below.") with gr.Row(): with gr.Column(): inp = gr.Textbox(placeholder="Enter text here...", lines=4, label="Input text") btn = gr.Button("Tag food") gr.Examples(examples, inp, label="Examples (click to use)") out = gr.HighlightedText(label="Predictions") btn.click(fn=tag, inputs=inp, outputs=out) gr.Markdown(blog_text) html_2 = ("

" + " Butter and Salt (old model)

" + "

" ) gr.HTML(html_2) gr.Markdown("I speculated then that these kinds of errors could probably be reduced by using" " contextual word embeddings, such as ELMo or BERT embeddings, or by using BERT itself " "(fine-tuning it on the NER task)." " That turned out to be true -- the current, RoBERTa model correctly handles these cases:") html_3 = ("

" + " Butter and Salt (new model)

" + "

" ) gr.HTML(html_3) gr.Markdown("To use this model yourself, see the " "[model card.](https://huggingface.co/carolanderson/roberta-base-food-ner)") demo.launch()