Spaces:

shrut27
/

Calorie-Calculator

Sleeping

App Files Files Community

Calorie-Calculator / rec_system.py

shrut27

Upload 14 files

48fd95e about 2 years ago

raw

history blame contribute delete

7.33 kB

	import nltk
	import string
	import ast
	import re
	import unidecode
	import pandas as pd
	import streamlit as st
	import nltk
	nltk.download('wordnet')
	from nltk.stem import WordNetLemmatizer
	from nltk.corpus import wordnet
	from collections import Counter
	from sklearn.metrics.pairwise import cosine_similarity
	from sklearn.feature_extraction.text import TfidfVectorizer
	import pickle
	def ingredient_parser(ingreds):
	measures = ['teaspoon', 't', 'tsp', 'tablespoon', 'T', 'tbl.', 'tb', 'tbsp.', 'fluid ounce', 'fl oz', 'gill', 'cup', 'c', 'pint', 'p', 'pt', 'fl pt', 'quart', 'q', 'qt', 'fl qt', 'gallon', 'g', 'gal', 'ml', 'milliliter', 'millilitre', 'cc', 'mL', 'l', 'liter', 'litre', 'L', 'dl', 'deciliter', 'decilitre', 'dL', 'bulb', 'level', 'heaped', 'rounded', 'whole', 'pinch', 'medium', 'slice', 'pound', 'lb', '#', 'ounce', 'oz', 'mg', 'milligram', 'milligramme', 'g', 'gram', 'gramme', 'kg', 'kilogram', 'kilogramme', 'x', 'of', 'mm', 'millimetre', 'millimeter', 'cm', 'centimeter', 'centimetre', 'm', 'meter', 'metre', 'inch', 'in', 'milli', 'centi', 'deci', 'hecto', 'kilo']
	words_to_remove = ['fresh', 'oil', 'a', 'red', 'bunch', 'and', 'clove', 'or', 'leaf', 'chilly','chillies', 'large', 'extra', 'sprig', 'ground', 'handful', 'free', 'small', 'pepper', 'virgin', 'range', 'from', 'dried', 'sustainable', 'black', 'peeled', 'higher', 'welfare', 'seed', 'for', 'finely', 'freshly', 'sea', 'quality', 'white', 'ripe', 'few', 'piece', 'source', 'to', 'organic', 'flat', 'smoked', 'ginger', 'sliced', 'green', 'picked', 'the', 'stick', 'plain', 'plus', 'mixed', 'mint', 'bay', 'basil', 'your', 'cumin', 'optional', 'fennel', 'serve', 'mustard', 'unsalted', 'baby', 'paprika', 'fat', 'ask', 'natural', 'skin', 'roughly', 'into', 'such', 'cut', 'good', 'brown', 'grated', 'trimmed', 'oregano', 'powder', 'yellow', 'dusting', 'knob', 'frozen', 'on', 'deseeded', 'low', 'runny', 'balsamic', 'cooked', 'streaky', 'nutmeg', 'sage', 'rasher', 'zest', 'pin', 'groundnut', 'breadcrumb', 'turmeric', 'halved', 'grating', 'stalk', 'light', 'tinned', 'dry', 'soft', 'rocket', 'bone', 'colour', 'washed', 'skinless', 'leftover', 'splash', 'removed', 'dijon', 'thick', 'big', 'hot', 'drained', 'sized', 'chestnut', 'watercress', 'fishmonger', 'english', 'dill', 'caper', 'raw', 'worcestershire', 'flake', 'cider', 'cayenne', 'tbsp', 'leg', 'pine', 'wild', 'if', 'fine', 'herb', 'almond', 'shoulder', 'cube', 'dressing', 'with', 'chunk', 'spice', 'thumb', 'garam', 'new', 'little', 'punnet', 'peppercorn', 'shelled', 'saffron', 'other''chopped', 'salt', 'olive', 'taste', 'can', 'sauce', 'water', 'diced', 'package', 'italian', 'shredded', 'divided', 'parsley', 'vinegar', 'all', 'purpose', 'crushed', 'juice', 'more', 'coriander', 'bell', 'needed', 'thinly', 'boneless', 'half', 'thyme', 'cubed', 'cinnamon', 'cilantro', 'jar', 'seasoning', 'rosemary', 'extract', 'sweet', 'baking', 'beaten', 'heavy', 'seeded', 'tin', 'vanilla', 'uncooked', 'crumb', 'style', 'thin', 'nut', 'coarsely', 'spring', 'chili', 'cornstarch', 'strip', 'cardamom', 'rinsed', 'honey', 'cherry', 'root', 'quartered', 'head', 'softened', 'container', 'crumbled', 'frying', 'lean', 'cooking', 'roasted', 'warm', 'whipping', 'thawed', 'corn', 'pitted', 'sun', 'kosher', 'bite', 'toasted', 'lasagna', 'split', 'melted', 'degree', 'lengthwise', 'romano', 'packed', 'pod', 'anchovy', 'rom', 'prepared', 'juiced', 'fluid', 'floret', 'room', 'active', 'seasoned', 'mix', 'deveined', 'lightly', 'anise', 'thai', 'size', 'unsweetened', 'torn', 'wedge', 'sour', 'basmati', 'marinara', 'dark', 'temperature', 'garnish', 'bouillon', 'loaf', 'shell', 'reggiano', 'canola', 'parmigiano', 'round', 'canned', 'ghee', 'crust', 'long', 'broken', 'ketchup', 'bulk', 'cleaned', 'condensed', 'sherry', 'provolone', 'cold', 'soda', 'cottage', 'spray', 'tamarind', 'pecorino', 'shortening', 'part', 'bottle', 'sodium', 'cocoa', 'grain', 'french', 'roast', 'stem', 'link', 'firm', 'asafoetida', 'mild', 'dash', 'boiling']
	if isinstance(ingreds, list):
	ingredients = ingreds
	else:
	ingredients = ast.literal_eval(ingreds)
	translator = str.maketrans('', '', string.punctuation)
	lemmatizer = WordNetLemmatizer()
	ingred_list = []
	for i in ingredients:
	i.translate(translator)
	# We split up with hyphens as well as spaces
	items = re.split(' \|-', i)
	# Get rid of words containing non alphabet letters
	items = [word for word in items if word.isalpha()]
	# Turn everything to lowercase
	items = [word.lower() for word in items]
	# remove accents
	items = [unidecode.unidecode(word) for word in items] #''.join((c for c in unicodedata.normalize('NFD', items) if unicodedata.category(c) != 'Mn'))
	# Lemmatize words so we can compare words to measuring words
	items = [lemmatizer.lemmatize(word) for word in items]
	# Gets rid of measuring words/phrases, e.g. heaped teaspoon
	items = [word for word in items if word not in measures]
	# Get rid of common easy words
	items = [word for word in items if word not in words_to_remove]
	if items:
	ingred_list.append(' '.join(items))
	ingred_list = " ".join(ingred_list)
	return ingred_list
	# load in tdidf model and encodings
	def scorefunc(ingredients):
	with open('tfidf_encodings.pkl', 'rb') as f:
	tfidf_encodings = pickle.load(f)
	with open('tfidf_model.pkl', "rb") as f:
	tfidf = pickle.load(f)
	# parse the ingredients using my ingredient_parser
	try:
	ingredients_parsed = ingredient_parser(ingredients)
	except:
	ingredients_parsed = ingredient_parser([ingredients])
	# use our pretrained tfidf model to encode our input ingredients
	ingredients_tfidf = tfidf.transform([ingredients_parsed])
	# calculate cosine similarity between actual recipe ingreds and test ingreds
	cos_sim = map(lambda x: cosine_similarity(ingredients_tfidf, x), tfidf_encodings)
	scores = list(cos_sim)
	return scores
	def get_recommendations(N, scores):
	# load in recipe dataset
	df_recipes = pd.read_csv('JO_parsed.csv')
	# order the scores with and filter to get the highest N scores(desc order)
	top = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:N]
	# create dataframe to load in recommendations
	recommendation = pd.DataFrame(columns = ['Recipe', 'Ingredients', 'Score', 'Url'])
	count = 0
	for i in top:
	recommendation.at[count, 'Recipe'] = df_recipes['recipe_name'][i]
	recommendation.at[count, 'Ingredients'] = df_recipes['ingredients'][i]
	recommendation.at[count, 'Url'] = df_recipes['recipe_urls'][i]
	recommendation.at[count, 'Score'] = "{:.3f}".format(float(scores[i]))
	count += 1
	return recommendation
	def app():
	st.markdown("Recipe Recommendation System")
	recipe_box = st.selectbox(
	"Display the top 5 recommendations or pick a particular recipe",
	["Show the top picks", "Select a single recipe"],
	)
	if recipe_box =="Show the top picks":
	N=5
	else:
	N=1
	ing = st.text_input("Enter the ingredients you would like to cook with")
	if ing:
	scores = scorefunc(ing)
	rec = get_recommendations(N,scores)
	st.write("These are some recommendation(s) for you")
	st.write(rec.head(N))