Spaces:

chuanenlin
/

foodnet

Running

App Files Files Community

foodnet / foodnet.py

chuanenlin

Update foodnet.py

3682312 over 1 year ago

raw

history blame contribute delete

14.9 kB

	import requests
	from io import BytesIO
	import numpy as np
	from gensim.models.fasttext import FastText
	from scipy import spatial
	import itertools
	import gdown
	import warnings
	import nltk
	warnings.filterwarnings('ignore')

	import pickle
	import pdb
	from concurrent.futures import ProcessPoolExecutor

	import matplotlib.pyplot as plt
	import streamlit as st
	import argparse
	import logging
	from pyunsplash import PyUnsplash
	import blacklists
	api_key = 'hzcKZ0e4we95wSd8_ip2zTB3m2DrOMWehAxrYjqjwg0'

	# instantiate PyUnsplash object
	py_un = PyUnsplash(api_key=api_key)

	# pyunsplash logger defaults to level logging.ERROR
	# If you need to change that, use getLogger/setLevel
	# on the module logger, like this:
	logging.getLogger("pyunsplash").setLevel(logging.DEBUG)

	# TODO:
	# Image search: Option 1 -> google image search api \|\| Option 2 -> open ai clip search
	from PIL import Image


	# NLTK Datasets
	nltk.download('wordnet')
	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger')

	# Average embedding → Compare
	def recommend_ingredients(yum, leftovers, n=10):
	'''
	Uses a mean aggregation method

	:params
	yum -> FastText Word2Vec Obj
	leftovers -> list of str
	n -> int top_n to return

	:returns
	output -> top_n recommendations
	'''
	leftovers_embedding_sum = np.zeros([32,])
	for ingredient in leftovers:
	# pdb.set_trace()

	ingredient_embedding = yum.get_vector(ingredient, norm=True)

	leftovers_embedding_sum += ingredient_embedding
	leftovers_embedding = leftovers_embedding_sum / len(leftovers) # Embedding for leftovers
	top_matches = yum.similar_by_vector(leftovers_embedding, topn=100)
	top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
	leftovers = [x.replace('_',' ') for x in leftovers]
	output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
	return output[:n]

	# Compare → Find intersection
	def recommend_ingredients_intersect(yum, leftovers, n=10):
	'''
	Finds top combined probabilities

	:params
	yum -> FastText Word2Vec Obj
	leftovers -> list of str
	n -> int top_n to return

	:returns
	output -> top_n recommendations
	'''
	first = True
	for ingredient in leftovers:
	ingredient_embedding = yum.get_vector(ingredient, norm=True)
	ingredient_matches = yum.similar_by_vector(ingredient_embedding, topn=10000)
	ingredient_matches = [(x[0].replace('_',' '), x[1]) for x in ingredient_matches]
	ingredient_output = [x for x in ingredient_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
	if first:
	output = ingredient_output
	first = False
	else:
	output = [x for x in output for y in ingredient_output if x[0] == y[0]]
	return output[:n]

	def recommend_ingredients_subsets(model, yum,leftovers, subset_size):
	'''
	Returns all subsets from each ingredient

	:params
	model -> FastText Obj
	yum -> FastText Word2Vec Obj
	leftovers -> list of str
	n -> int top_n to return

	:returns
	output -> top_n recommendations
	'''
	all_outputs = {}
	for leftovers_subset in itertools.combinations(leftovers, subset_size):
	leftovers_embedding_sum = np.empty([100,])
	for ingredient in leftovers_subset:
	ingredient_embedding = yum.word_vec(ingredient, use_norm=True)
	leftovers_embedding_sum += ingredient_embedding
	leftovers_embedding = leftovers_embedding_sum / len(leftovers_subset) # Embedding for leftovers
	top_matches = model.similar_by_vector(leftovers_embedding, topn=100)
	top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
	output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers_subset)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
	all_outputs[leftovers_subset] = output[:10]
	return all_outputs



	def filter_adjectives(data):
	'''
	Remove adjectives that are not associated with a food item

	:params
	data

	:returns
	data
	'''
	recipe_ingredients_token = [nltk.word_tokenize(x) for x in data]
	inds = []
	for i, r in enumerate(recipe_ingredients_token):
	out = nltk.pos_tag(r)
	out = [x[1] for x in out]
	if len(out) > 1:
	inds.append(int(i))
	elif 'NN' in out or 'NNS' in out:
	inds.append(int(i))

	return [data[i] for i in inds]

	def plural_to_singular(lemma, recipe):
	'''
	:params
	lemma -> nltk lemma Obj
	recipe -> list of str

	:returns
	recipe -> converted recipe
	'''
	return [lemma.lemmatize(r) for r in recipe]

	def filter_lemma(data):
	'''
	Convert plural to roots

	:params
	data -> list of lists

	:returns
	data -> returns filtered data
	'''
	# Initialize Lemmatizer (to reduce plurals to stems)
	lemma = nltk.wordnet.WordNetLemmatizer()

	# NOTE: This uses all the computational resources of your computer
	with ProcessPoolExecutor() as executor:
	out = list(executor.map(plural_to_singular, itertools.repeat(lemma), data))

	return out


	def train_model(data):
	'''
	Train fastfood text
	NOTE: gensim==4.1.2

	:params
	data -> list of lists of all recipes
	save -> bool

	:returns
	model -> FastFood model obj
	'''
	model = FastText(data, vector_size=32, window=99, min_count=5, workers=40, sg=1) # Train model

	return model

	@st.cache_resource
	def load_model(filename):
	'''
	Load the FastText Model
	:params:
	filename -> path to the model

	:returns
	model -> this is the full FastText obj
	yum -> this is the FastText Word2Vec obj
	'''
	# Load Models

	model = FastText.load(filename)
	yum = model.wv

	return model, yum

	@st.cache_resource
	def load_data(filename='data/all_recipes_ingredients_lemma.pkl'):
	'''
	Load data
	:params:
	filename -> path to dataset

	:return
	data -> list of all recipes
	'''
	return pickle.load(open(filename,'rb'))

	def plot_results(names, probs, n=5):
	'''
	Plots a bar chart of the names of the items vs. probability of similarity
	:params:
	names -> list of str
	probs -> list of float values
	n -> int of how many bars to show NOTE: Max = 100

	:return
	fig -> return figure for plotting
	'''
	plt.bar(range(len(names)), probs, align='center')
	ax = plt.gca()

	ax.xaxis.set_major_locator(plt.FixedLocator(range(len(names))))
	ax.xaxis.set_major_formatter(plt.FixedFormatter(names))
	ax.set_ylabel('Probability',fontsize='large', fontweight='bold')
	ax.set_xlabel('Ingredients', fontsize='large', fontweight='bold')
	ax.xaxis.labelpad = 10
	ax.set_title(f'FoodNet Top {n} Predictions = {st.session_state.leftovers}')
	# mpld3.show()
	plt.xticks(rotation=45, ha='right')
	fig = plt.gcf()

	return fig

	def load_image(image_file):
	img = Image.open(image_file)
	return img

	st.set_page_config(page_title="FoodNet", page_icon = "🍔", layout = "centered", initial_sidebar_state = "auto")

	##### UI/UX #####
	## Sidebar ##
	add_selectbox = st.sidebar.selectbox("Pages", ("FoodNet Recommender", "Food Donation Resources", "Contact Team"))

	model, yum = load_model('fastfood.pth')

	if add_selectbox == "FoodNet Recommender":
	st.title("FoodNet 🍔")
	st.write("Search for similar food ingredients. Select two or more ingredients to find complementary ingredients.")
	ingredients = list(yum.key_to_index.keys())
	ingredients = [x.replace('_',' ') for x in ingredients]
	st.multiselect("Type or select food ingredients", ingredients, default=['bread', 'lettuce'], key="leftovers")

	## Slider ##
	st.slider("Select number of recommendations to show", min_value=1, max_value=10, value=3, step=1, key='top_n')

	## Show Images ##
	# search = py_un.search(type_="photos", query="cookie")
	# py_un.photos(type_="single", photo_id='l0_kVknpO2g')

	# st.image(search)
	## Images
	# for leftover in st.session_state.leftovers:
	# search = py_un.search(type_='photos', query=leftover)
	# for photo in search.entries:
	# # print(photo.id, photo.link_download)
	# st.image(photo.link_download, caption=leftover, width=200)
	# break
	# (f"![Alt Text]({search.link_next})")

	## Get food recommendation ##
	ingredients_no_space = [x.replace(' ','_') for x in st.session_state.get('leftovers')]
	out = recommend_ingredients(yum, ingredients_no_space, n=st.session_state.top_n)
	names = [o[0] for o in out]
	probs = [o[1] for o in out]

	# if 'probs' not in st.session_state:
	# st.session_state['probs'] = False

	# if st.session_state.probs:
	# st.table(data=out)
	# else:
	# st.table(data=names)

	# st.checkbox(label="Show model scores", value=False, key="probs")
	# ## Plot Results ##
	# st.checkbox(label="Show results bar chart", value=False, key="plot")
	# if st.session_state.plot:
	# fig = plot_results(names, probs, st.session_state.top_n)

	# ## Show Plot ##
	# st.pyplot(fig)
	st.selectbox(label="Select dietary restriction", options=('None', 'Kosher', 'Vegetarian'), key="diet")
	if st.session_state.diet != 'None':
	if st.session_state.diet == 'Vegetarian':
	out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.vegitarian)]
	if st.session_state.diet == 'Kosher':
	out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.kosher)]
	names = [o[0] for o in out]
	probs = [o[1] for o in out]

	col1, col2, col3 = st.columns(3)

	for i, name in enumerate(names):
	search = py_un.search(type_='photos', query=name)
	for photo in search.entries:
	col_id = i % 3
	if col_id == 0:
	col1.image(photo.link_download, caption=name, use_column_width=True)
	elif col_id == 1:
	col2.image(photo.link_download, caption=name, use_column_width=True)
	elif col_id == 2:
	col3.image(photo.link_download, caption=name, use_column_width=True)
	break

	elif add_selectbox == "Food Donation Resources":
	st.title('Food Donation Resources')
	st.subheader('Pittsburgh Food Bank:')
	st.write("In 2000, the Food Bank opened the doors on its facility in Duquesne."
	"This facility was the first LEED-certified building in Pittsburgh and the first LEED-certified "
	"food bank in the nation. Learn more about that facility here. "
	"Today, we work with a network of more than 850 partners across the 11 counties we serve. "
	"In addition to sourcing, warehousing and distributing food, the Food Bank is actively engaged in "
	"stabilizing lives and confronting issues of chronic hunger, poor nutrition and health. "
	"And, through our advocacy efforts, we have become a primary driver in comprehensive anti-hunger "
	"endeavors regionally, statewide and at the national level."
	)
	st.write("Check out this [link](https://pittsburghfoodbank.org/)👈")
	st.subheader('412 Food Rescue:')
	st.write("412 Food Rescue is a nonprofit organization dedicated to ending hunger by organizing "
	"volunteers to deliver surplus food to insecure communities instead of landfills."
	"Since its creation in 2015, the organization has redistributed over three million pounds of food through "
	"the use of its mobile application, Food Rescue Hero. They are currently rolling out the app nationwide."
	)
	st.write("Check out this [link](https://412foodrescue.org/)👈")

	# st.subheader('Image')
	# st.multiselect("Select leftovers:", list(yum.key_to_index.keys()), key="leftovers")
	# image_file = st.file_uploader("Upload Food Image:", type=["png", "jpg", "jpeg"])
	# if image_file is not None:
	# # To See details
	# file_details = {"filename": image_file.name, "filetype": image_file.type,
	# "filesize": image_file.size}
	# st.write(file_details)
	#
	# # To View Uploaded Image
	# st.image(load_image(image_file), width=250)
	if add_selectbox == "Contact Team":
	st.title('Contact Team')
	st.subheader('David Chuan-En Lin')
	col1, mid, col2 = st.columns([20, 2, 10])
	with col1:
	st.write('Pronouns: he/him/his')
	st.write(
	'Research/career interests: Human-AI Co-Design by (1) building ML-infused creativity support tools and '
	'(2) investigating how such tools augment design processes')
	st.write('Favorite Food: Ice cream sandwich')
	st.write('A painfully boring fact: Second-year PhD at HCII SCS')
	st.write('Hobbies: Making travel videos, graphic design, music')
	st.write('Email: chuanenl@andrew.cmu.edu')
	with col2:
	st.image('https://chuanenlin.com/images/me.jpg', width=300)

	st.subheader('Mitchell Fogelson')
	col1, mid, col2 = st.columns([20, 2, 10])
	with col1:
	st.write('Pronouns: he/him/his')
	st.write('Research/career interests: Robotics, AI')
	st.write('Favorite Food: Deep Dish Pizza')
	st.write('A painfully boring fact: Am a middle child')
	st.write('Hobbies: Golf, Traveling, Games')
	st.write('Email: mfogelso@andrew.cmu.edu')
	with col2:
	st.image('https://images.squarespace-cdn.com/content/v1/562661f3e4b0ae7c10f0a2cc/1590528961389-2142HA48O7LRZ9FWGP0F/about_image.jpg?format=2500w', width=300)

	st.subheader('Sunny Yang')
	col1, mid, col2 = st.columns([20, 2, 10])
	with col1:
	st.write('Pronouns: She/Her/Hers')
	st.write('Research/career interests: Product Manager')
	st.write('Favorite Food: Sushi')
	st.write('A painfully boring fact: I do not like rainy:(')
	st.write('Hobbies: Viola, Basketball')
	st.write('Email: yundiy@andrew.cmu.edu')
	with col2:
	st.image('https://media-exp1.licdn.com/dms/image/C4D03AQF37KjK_GYwzA/profile-displayphoto-shrink_400_400/0/1638326708803?e=1643846400&v=beta&t=q10CTNCG6h5guez1YT0j4j_oLlrGJB_8NugaBOUSAGg', width=300)

	st.subheader('Shihao Xu')
	col1, mid, col2 = st.columns([20, 2, 10])
	with col1:
	st.write('Pronouns: he/him/his')
	st.write('Research/career interests: Autonomous Vehicle')
	st.write('Favorite Food: Dumplings')
	st.write('A painfully boring fact: Covid is still not gone')
	st.write('Hobbies: photography')
	st.write('Email: shihaoxu@andrew.cmu.edu')
	with col2:
	st.image('https://scontent-ort2-1.xx.fbcdn.net/v/t39.30808-6/261420667_131245119324840_3342182275866550937_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=730e14&_nc_ohc=IP7khn2w6cwAX_wC85x&_nc_ht=scontent-ort2-1.xx&oh=063c2b6b0ed5e9fc10adb2c391c471cf&oe=61AA72C1', width=300)