Spaces:
Sleeping
Sleeping
import nltk | |
import string | |
import ast | |
import re | |
import unidecode | |
import pandas as pd | |
import streamlit as st | |
import nltk | |
nltk.download('wordnet') | |
from nltk.stem import WordNetLemmatizer | |
from nltk.corpus import wordnet | |
from collections import Counter | |
from sklearn.metrics.pairwise import cosine_similarity | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
import pickle | |
def ingredient_parser(ingreds): | |
measures = ['teaspoon', 't', 'tsp', 'tablespoon', 'T', 'tbl.', 'tb', 'tbsp.', 'fluid ounce', 'fl oz', 'gill', 'cup', 'c', 'pint', 'p', 'pt', 'fl pt', 'quart', 'q', 'qt', 'fl qt', 'gallon', 'g', 'gal', 'ml', 'milliliter', 'millilitre', 'cc', 'mL', 'l', 'liter', 'litre', 'L', 'dl', 'deciliter', 'decilitre', 'dL', 'bulb', 'level', 'heaped', 'rounded', 'whole', 'pinch', 'medium', 'slice', 'pound', 'lb', '#', 'ounce', 'oz', 'mg', 'milligram', 'milligramme', 'g', 'gram', 'gramme', 'kg', 'kilogram', 'kilogramme', 'x', 'of', 'mm', 'millimetre', 'millimeter', 'cm', 'centimeter', 'centimetre', 'm', 'meter', 'metre', 'inch', 'in', 'milli', 'centi', 'deci', 'hecto', 'kilo'] | |
words_to_remove = ['fresh', 'oil', 'a', 'red', 'bunch', 'and', 'clove', 'or', 'leaf', 'chilly','chillies', 'large', 'extra', 'sprig', 'ground', 'handful', 'free', 'small', 'pepper', 'virgin', 'range', 'from', 'dried', 'sustainable', 'black', 'peeled', 'higher', 'welfare', 'seed', 'for', 'finely', 'freshly', 'sea', 'quality', 'white', 'ripe', 'few', 'piece', 'source', 'to', 'organic', 'flat', 'smoked', 'ginger', 'sliced', 'green', 'picked', 'the', 'stick', 'plain', 'plus', 'mixed', 'mint', 'bay', 'basil', 'your', 'cumin', 'optional', 'fennel', 'serve', 'mustard', 'unsalted', 'baby', 'paprika', 'fat', 'ask', 'natural', 'skin', 'roughly', 'into', 'such', 'cut', 'good', 'brown', 'grated', 'trimmed', 'oregano', 'powder', 'yellow', 'dusting', 'knob', 'frozen', 'on', 'deseeded', 'low', 'runny', 'balsamic', 'cooked', 'streaky', 'nutmeg', 'sage', 'rasher', 'zest', 'pin', 'groundnut', 'breadcrumb', 'turmeric', 'halved', 'grating', 'stalk', 'light', 'tinned', 'dry', 'soft', 'rocket', 'bone', 'colour', 'washed', 'skinless', 'leftover', 'splash', 'removed', 'dijon', 'thick', 'big', 'hot', 'drained', 'sized', 'chestnut', 'watercress', 'fishmonger', 'english', 'dill', 'caper', 'raw', 'worcestershire', 'flake', 'cider', 'cayenne', 'tbsp', 'leg', 'pine', 'wild', 'if', 'fine', 'herb', 'almond', 'shoulder', 'cube', 'dressing', 'with', 'chunk', 'spice', 'thumb', 'garam', 'new', 'little', 'punnet', 'peppercorn', 'shelled', 'saffron', 'other''chopped', 'salt', 'olive', 'taste', 'can', 'sauce', 'water', 'diced', 'package', 'italian', 'shredded', 'divided', 'parsley', 'vinegar', 'all', 'purpose', 'crushed', 'juice', 'more', 'coriander', 'bell', 'needed', 'thinly', 'boneless', 'half', 'thyme', 'cubed', 'cinnamon', 'cilantro', 'jar', 'seasoning', 'rosemary', 'extract', 'sweet', 'baking', 'beaten', 'heavy', 'seeded', 'tin', 'vanilla', 'uncooked', 'crumb', 'style', 'thin', 'nut', 'coarsely', 'spring', 'chili', 'cornstarch', 'strip', 'cardamom', 'rinsed', 'honey', 'cherry', 'root', 'quartered', 'head', 'softened', 'container', 'crumbled', 'frying', 'lean', 'cooking', 'roasted', 'warm', 'whipping', 'thawed', 'corn', 'pitted', 'sun', 'kosher', 'bite', 'toasted', 'lasagna', 'split', 'melted', 'degree', 'lengthwise', 'romano', 'packed', 'pod', 'anchovy', 'rom', 'prepared', 'juiced', 'fluid', 'floret', 'room', 'active', 'seasoned', 'mix', 'deveined', 'lightly', 'anise', 'thai', 'size', 'unsweetened', 'torn', 'wedge', 'sour', 'basmati', 'marinara', 'dark', 'temperature', 'garnish', 'bouillon', 'loaf', 'shell', 'reggiano', 'canola', 'parmigiano', 'round', 'canned', 'ghee', 'crust', 'long', 'broken', 'ketchup', 'bulk', 'cleaned', 'condensed', 'sherry', 'provolone', 'cold', 'soda', 'cottage', 'spray', 'tamarind', 'pecorino', 'shortening', 'part', 'bottle', 'sodium', 'cocoa', 'grain', 'french', 'roast', 'stem', 'link', 'firm', 'asafoetida', 'mild', 'dash', 'boiling'] | |
if isinstance(ingreds, list): | |
ingredients = ingreds | |
else: | |
ingredients = ast.literal_eval(ingreds) | |
translator = str.maketrans('', '', string.punctuation) | |
lemmatizer = WordNetLemmatizer() | |
ingred_list = [] | |
for i in ingredients: | |
i.translate(translator) | |
# We split up with hyphens as well as spaces | |
items = re.split(' |-', i) | |
# Get rid of words containing non alphabet letters | |
items = [word for word in items if word.isalpha()] | |
# Turn everything to lowercase | |
items = [word.lower() for word in items] | |
# remove accents | |
items = [unidecode.unidecode(word) for word in items] #''.join((c for c in unicodedata.normalize('NFD', items) if unicodedata.category(c) != 'Mn')) | |
# Lemmatize words so we can compare words to measuring words | |
items = [lemmatizer.lemmatize(word) for word in items] | |
# Gets rid of measuring words/phrases, e.g. heaped teaspoon | |
items = [word for word in items if word not in measures] | |
# Get rid of common easy words | |
items = [word for word in items if word not in words_to_remove] | |
if items: | |
ingred_list.append(' '.join(items)) | |
ingred_list = " ".join(ingred_list) | |
return ingred_list | |
# load in tdidf model and encodings | |
def scorefunc(ingredients): | |
with open('tfidf_encodings.pkl', 'rb') as f: | |
tfidf_encodings = pickle.load(f) | |
with open('tfidf_model.pkl', "rb") as f: | |
tfidf = pickle.load(f) | |
# parse the ingredients using my ingredient_parser | |
try: | |
ingredients_parsed = ingredient_parser(ingredients) | |
except: | |
ingredients_parsed = ingredient_parser([ingredients]) | |
# use our pretrained tfidf model to encode our input ingredients | |
ingredients_tfidf = tfidf.transform([ingredients_parsed]) | |
# calculate cosine similarity between actual recipe ingreds and test ingreds | |
cos_sim = map(lambda x: cosine_similarity(ingredients_tfidf, x), tfidf_encodings) | |
scores = list(cos_sim) | |
return scores | |
def get_recommendations(N, scores): | |
# load in recipe dataset | |
df_recipes = pd.read_csv('JO_parsed.csv') | |
# order the scores with and filter to get the highest N scores(desc order) | |
top = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:N] | |
# create dataframe to load in recommendations | |
recommendation = pd.DataFrame(columns = ['Recipe', 'Ingredients', 'Score', 'Url']) | |
count = 0 | |
for i in top: | |
recommendation.at[count, 'Recipe'] = df_recipes['recipe_name'][i] | |
recommendation.at[count, 'Ingredients'] = df_recipes['ingredients'][i] | |
recommendation.at[count, 'Url'] = df_recipes['recipe_urls'][i] | |
recommendation.at[count, 'Score'] = "{:.3f}".format(float(scores[i])) | |
count += 1 | |
return recommendation | |
def app(): | |
st.markdown("*Recipe Recommendation System*") | |
recipe_box = st.selectbox( | |
"Display the top 5 recommendations or pick a particular recipe", | |
["Show the top picks", "Select a single recipe"], | |
) | |
if recipe_box =="Show the top picks": | |
N=5 | |
else: | |
N=1 | |
ing = st.text_input("Enter the ingredients you would like to cook with") | |
if ing: | |
scores = scorefunc(ing) | |
rec = get_recommendations(N,scores) | |
st.write("These are some recommendation(s) for you") | |
st.write(rec.head(N)) | |