Added streamlit demo v0

Browse files

Files changed (9) hide show

demo/.gitignore +2 -0
demo/Build Ingredients Vocab.ipynb +213 -0
demo/README.md +10 -0
demo/beam_search.py +63 -0
demo/config.json +1 -0
demo/images/chef-transformer.png +0 -0
demo/images/logo.png +0 -0
demo/server.py +80 -0
demo/top_sampling.py +63 -0

demo/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.pyc
2	+ .ipynb_checkpoints/

demo/Build Ingredients Vocab.ipynb ADDED Viewed

	@@ -0,0 +1,213 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-07-14T12:54:01.369853Z",
+     "start_time": "2021-07-14T12:49:27.961404Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using custom data configuration default-fdc6acb780b42528\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading and preparing dataset recipe_nlg/default (download: Unknown size, generated: 2.04 GiB, post-processed: Unknown size, total: 2.04 GiB) to /home/rtx/.cache/huggingface/datasets/recipe_nlg/default-fdc6acb780b42528/1.0.0/20c969e1192265af03a7186457bdb4a9109d5d68b92cad04c3ec894d6e5aee61...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "Dataset recipe_nlg downloaded and prepared to /home/rtx/.cache/huggingface/datasets/recipe_nlg/default-fdc6acb780b42528/1.0.0/20c969e1192265af03a7186457bdb4a9109d5d68b92cad04c3ec894d6e5aee61. Subsequent calls will reuse this data.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "DATA_DIR = \"~/Downloads/dataset/\"\n",
+    "dataset = load_dataset(\"recipe_nlg\", data_dir=DATA_DIR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-07-14T12:58:25.150105Z",
+     "start_time": "2021-07-14T12:55:27.486385Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 2231142/2231142 [02:57<00:00, 12558.59it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from collections import Counter\n",
+    "from tqdm import tqdm\n",
+    "ctr = Counter()\n",
+    "\n",
+    "for row in tqdm(dataset[\"train\"]):\n",
+    "    for item in row[\"ner\"]:\n",
+    "        ctr[item] += 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-07-14T13:02:09.315817Z",
+     "start_time": "2021-07-14T13:02:09.259046Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "first_500 = list(set([x[0].lower() for x in ctr.most_common()[0:500]]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-07-14T13:02:28.864546Z",
+     "start_time": "2021-07-14T13:02:28.856279Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "443"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(first_500)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-07-14T13:02:53.656711Z",
+     "start_time": "2021-07-14T13:02:53.653868Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "first_100 = sorted(first_500[:100])\n",
+    "next_100 = sorted(first_500[100:200])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-07-14T13:03:35.640538Z",
+     "start_time": "2021-07-14T13:03:35.634368Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "d = {\n",
+    "    \"first_100\": first_100,\n",
+    "    \"next_100\": next_100\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2021-07-14T13:03:52.682190Z",
+     "start_time": "2021-07-14T13:03:52.679624Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "with open(\"config.json\", \"w\") as f:\n",
+    "    f.write(json.dumps(d))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.1"
+  },
+  "toc": {
+   "base_numbering": 1,
+   "nav_menu": {},
+   "number_sections": true,
+   "sideBar": true,
+   "skip_h1_title": false,
+   "title_cell": "Table of Contents",
+   "title_sidebar": "Contents",
+   "toc_cell": false,
+   "toc_position": {},
+   "toc_section_display": true,
+   "toc_window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

demo/README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+# Streamlit demo for Chef Transformers
+### Launch demo:
+```
+streamlit run server.py
+```
+### Modify config
+Add any custom ingredient to display in `config.json` with key `first_100` to be displayed in multi-select. `next_100` are for custom ingredient adding section (to provide autocomplete assist as we type)

demo/beam_search.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+from transformers import AutoModelForSeq2SeqLM
+from transformers import AutoTokenizer
+from transformers import pipeline
+from pprint import pprint
+import re
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# MODEL_NAME_OR_PATH = "flax-community/t5-recipe-generation"
+# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH, use_fast=True)
+# model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME_OR_PATH)
+def skip_special_tokens_and_prettify(text, tokenizer):
+    recipe_maps = {"<sep>": "--", "<section>": "\n"}
+    recipe_map_pattern = "|".join(map(re.escape, recipe_maps.keys()))
+    text = re.sub(
+        recipe_map_pattern,
+        lambda m: recipe_maps[m.group()],
+        re.sub("|".join(tokenizer.all_special_tokens), "", text)
+    )
+    data = {"title": "", "ingredients": [], "directions": []}
+    for section in text.split("\n"):
+        section = section.strip()
+        section = section.strip()
+        if section.startswith("title:"):
+            data["title"] = section.replace("title:", "").strip()
+        elif section.startswith("ingredients:"):
+            data["ingredients"] = [s.strip() for s in section.replace("ingredients:", "").split('--')]
+        elif section.startswith("directions:"):
+            data["directions"] = [s.strip() for s in section.replace("directions:", "").split('--')]
+        else:
+            pass
+    return data
+def post_generator(output_tensors, tokenizer):
+    output_tensors = [output_tensors[i]["generated_token_ids"] for i in range(len(output_tensors))]
+    texts = tokenizer.batch_decode(output_tensors, skip_special_tokens=False)
+    texts = [skip_special_tokens_and_prettify(text, tokenizer) for text in texts]
+    return texts
+# Example
+generate_kwargs = {
+    "max_length": 512,
+    "min_length": 64,
+    "no_repeat_ngram_size": 3,
+    "early_stopping": True,
+    "num_beams": 5,
+    "length_penalty": 1.5,
+    "num_return_sequences": 2
+}
+items = "potato, cheese"
+# generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
+# generated = generator(items, return_tensors=True, return_text=False, **generate_kwargs)
+# outputs = post_generator(generated, tokenizer)
+# pprint(outputs)

demo/config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"first_100": ["allspice", "almond extract", "applesauce", "avocado", "balsamic vinegar", "basil", "bay leaf", "beets", "bread crumbs", "bread flour", "buns", "catsup", "cayenne", "cherry tomatoes", "chicken breasts", "chives", "chocolate cake", "coconut milk", "cold butter", "cold milk", "cooking oil", "cornstarch", "crab meat", "crackers", "cream of chicken soup", "cream of tartar", "cumin", "cumin seeds", "curry powder", "egg yolk", "extra-virgin olive oil", "feta cheese", "flaked coconut", "flat leaf parsley", "flour tortillas", "fresh chives", "fresh cilantro", "fresh mint", "fresh oregano", "fresh rosemary", "frozen strawberries", "gingerroot", "green olives", "ground allspice", "ground chuck", "ground coriander", "ground cumin", "ground pork", "ground red pepper", "hamburger", "hazelnuts", "heavy cream", "heavy whipping cream", "hot pepper", "italian dressing", "lean ground beef", "lemon juice", "lemon pepper", "marjoram", "miracle", "noodles", "nuts", "oatmeal", "oats", "oleo", "olive oil", "onion salt", "onions", "orange", "paprika", "parmesan cheese", "parsley", "pasta", "peaches", "pecans", "pork sausage", "pork tenderloin", "poultry seasoning", "powdered sugar", "pumpkin", "red potatoes", "red wine vinegar", "rosemary", "salmon", "scallion", "sesame oil", "shell", "stalks celery", "tabasco sauce", "tarragon", "tomatoes", "unsalted butter", "vanilla wafers", "vegetables", "warm water", "whipping cream", "white wine vinegar", "whole wheat flour", "yellow squash", "yogurt"], "next_100": ["active dry yeast", "almonds", "apple", "apple cider", "apple cider vinegar", "avocados", "baby spinach", "bay leaves", "bean sprouts", "beef", "beef broth", "broccoli", "cabbage", "capers", "cashews", "celery", "celery salt", "cherries", "cherry pie filling", "chicken", "chicken broth", "chicken stock", "chickpeas", "chili sauce", "chocolate", "cinnamon", "cloves", "corn", "cottage cheese", "cranberry sauce", "egg noodles", "egg yolks", "extra virgin olive oil", "freshly ground pepper", "garlic powder", "golden raisins", "graham cracker crust", "graham crackers", "green peppers", "ground black pepper", "ground nutmeg", "ground pepper", "ground turmeric", "kosher salt", "lemon rind", "mango", "mint", "mustard", "nutmeg", "orange juice", "orange zest", "oregano", "peanut butter", "peas", "pecan halves", "pepperoni", "pine nuts", "pinto beans", "pizza sauce", "plain yogurt", "potatoes", "raisins", "red", "red bell peppers", "red pepper", "red peppers", "rhubarb", "ricotta cheese", "salad oil", "sauce", "scallions", "sesame seeds", "sherry", "shredded cheese", "skinless", "soda", "soy sauce", "spinach", "strawberries", "sugar", "sweet onion", "sweet potatoes", "swiss cheese", "t", "tomato", "tomato paste", "tomato soup", "tuna", "vanilla bean", "vanilla ice cream", "vanilla pudding", "vegetable broth", "vegetable oil", "vegetable shortening", "whipped cream", "white onion", "white sugar", "yellow cake", "yellow cornmeal", "zucchini"]}

demo/images/chef-transformer.png ADDED Viewed

demo/images/logo.png ADDED Viewed

demo/server.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+from datetime import datetime as dt
+import streamlit as st
+from streamlit_tags import st_tags
+import beam_search
+import top_sampling
+from pprint import pprint
+import json
+with open("config.json") as f:
+    cfg = json.loads(f.read())
+st.set_page_config(layout="wide")
+@st.cache(allow_output_mutation=True)
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained("flax-community/t5-recipe-generation")
+    model = AutoModelForSeq2SeqLM.from_pretrained("flax-community/t5-recipe-generation")
+    generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
+    return generator, tokenizer
+def sampling_changed(obj):
+    print(obj)
+with st.spinner('Loading model...'):
+    generator, tokenizer = load_model()
+# st.image("images/chef-transformer.png", width=400)
+st.header("Chef transformers (flax-community)")
+st.markdown("This demo uses [t5 trained on recipe-nlg](https://huggingface.co/flax-community/t5-recipe-generation) to generate recipe from a given set of ingredients")
+img = st.sidebar.image("images/chef-transformer.png", width=200)
+add_text_sidebar = st.sidebar.title("Popular recipes:")
+add_text_sidebar = st.sidebar.text("Recipe preset(example#1)")
+add_text_sidebar = st.sidebar.text("Recipe preset(example#2)")
+add_text_sidebar = st.sidebar.title("Mode:")
+sampling_mode = st.sidebar.selectbox("select a Mode", index=0, options=["Beam Search", "Top-k Sampling"])
+original_keywords = st.multiselect("Choose ingredients",
+    cfg["first_100"],
+    ["parmesan cheese", "fresh oregano", "basil", "whole wheat flour"]
+)
+st.write("Add custom ingredients here:")
+custom_keywords = st_tags(
+    label="",
+    text='Press enter to add more',
+    value=['salt'],
+    suggestions=cfg["next_100"],
+    maxtags = 15,
+    key='1')
+all_ingredients = []
+all_ingredients.extend(original_keywords)
+all_ingredients.extend(custom_keywords)
+all_ingredients = ", ".join(all_ingredients)
+st.markdown("**Generate recipe for:** "+all_ingredients)
+submit = st.button('Get Recipe!')
+if submit:
+    with st.spinner('Generating recipe...'):
+        if sampling_mode == "Beam Search":
+            generated = generator(all_ingredients, return_tensors=True, return_text=False, **beam_search.generate_kwargs)
+            outputs = beam_search.post_generator(generated, tokenizer)
+        elif sampling_mode == "Top-k Sampling":
+            generated = generator(all_ingredients, return_tensors=True, return_text=False, **top_sampling.generate_kwargs)
+            outputs = top_sampling.post_generator(generated, tokenizer)
+    output = outputs[0]
+    markdown_output = ""
+    markdown_output += f"## {output['title'].capitalize()}\n"
+    markdown_output += f"#### Ingredients:\n"
+    for o in output["ingredients"]:
+        markdown_output += f"- {o}\n"
+    markdown_output += f"#### Directions:\n"
+    for o in output["directions"]:
+        markdown_output += f"- {o}\n"
+    st.markdown(markdown_output)
+    st.balloons()

demo/top_sampling.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+from transformers import AutoModelForSeq2SeqLM
+from transformers import AutoTokenizer
+from transformers import pipeline
+from pprint import pprint
+import re
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# MODEL_NAME_OR_PATH = "flax-community/t5-recipe-generation"
+# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH, use_fast=True)
+# model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME_OR_PATH)
+def skip_special_tokens_and_prettify(text, tokenizer):
+    recipe_maps = {"<sep>": "--", "<section>": "\n"}
+    recipe_map_pattern = "|".join(map(re.escape, recipe_maps.keys()))
+    text = re.sub(
+        recipe_map_pattern,
+        lambda m: recipe_maps[m.group()],
+        re.sub("|".join(tokenizer.all_special_tokens), "", text)
+    )
+    data = {"title": "", "ingredients": [], "directions": []}
+    for section in text.split("\n"):
+        section = section.strip()
+        section = section.strip()
+        if section.startswith("title:"):
+            data["title"] = section.replace("title:", "").strip()
+        elif section.startswith("ingredients:"):
+            data["ingredients"] = [s.strip() for s in section.replace("ingredients:", "").split('--')]
+        elif section.startswith("directions:"):
+            data["directions"] = [s.strip() for s in section.replace("directions:", "").split('--')]
+        else:
+            pass
+    return data
+def post_generator(output_tensors, tokenizer):
+    output_tensors = [output_tensors[i]["generated_token_ids"] for i in range(len(output_tensors))]
+    texts = tokenizer.batch_decode(output_tensors, skip_special_tokens=False)
+    texts = [skip_special_tokens_and_prettify(text, tokenizer) for text in texts]
+    return texts
+# Example
+generate_kwargs = {
+    "max_length": 512,
+    "min_length": 64,
+    "no_repeat_ngram_size": 3,
+    "do_sample": True,
+    "top_k": 60,
+    "top_p": 0.95,
+    "num_return_sequences": 3
+}
+# items = "potato, cheese"
+# generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
+# generated = generator(items, return_tensors=True, return_text=False, **generate_kwargs)
+# outputs = post_generator(generated, tokenizer)
+# pprint(outputs)