foodnet / foodnet.py
chuanenlin's picture
Update foodnet.py
3682312
import requests
from io import BytesIO
import numpy as np
from gensim.models.fasttext import FastText
from scipy import spatial
import itertools
import gdown
import warnings
import nltk
warnings.filterwarnings('ignore')
import pickle
import pdb
from concurrent.futures import ProcessPoolExecutor
import matplotlib.pyplot as plt
import streamlit as st
import argparse
import logging
from pyunsplash import PyUnsplash
import blacklists
api_key = 'hzcKZ0e4we95wSd8_ip2zTB3m2DrOMWehAxrYjqjwg0'
# instantiate PyUnsplash object
py_un = PyUnsplash(api_key=api_key)
# pyunsplash logger defaults to level logging.ERROR
# If you need to change that, use getLogger/setLevel
# on the module logger, like this:
logging.getLogger("pyunsplash").setLevel(logging.DEBUG)
# TODO:
# Image search: Option 1 -> google image search api || Option 2 -> open ai clip search
from PIL import Image
# NLTK Datasets
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
# Average embedding β†’ Compare
def recommend_ingredients(yum, leftovers, n=10):
'''
Uses a mean aggregation method
:params
yum -> FastText Word2Vec Obj
leftovers -> list of str
n -> int top_n to return
:returns
output -> top_n recommendations
'''
leftovers_embedding_sum = np.zeros([32,])
for ingredient in leftovers:
# pdb.set_trace()
ingredient_embedding = yum.get_vector(ingredient, norm=True)
leftovers_embedding_sum += ingredient_embedding
leftovers_embedding = leftovers_embedding_sum / len(leftovers) # Embedding for leftovers
top_matches = yum.similar_by_vector(leftovers_embedding, topn=100)
top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
leftovers = [x.replace('_',' ') for x in leftovers]
output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
return output[:n]
# Compare β†’ Find intersection
def recommend_ingredients_intersect(yum, leftovers, n=10):
'''
Finds top combined probabilities
:params
yum -> FastText Word2Vec Obj
leftovers -> list of str
n -> int top_n to return
:returns
output -> top_n recommendations
'''
first = True
for ingredient in leftovers:
ingredient_embedding = yum.get_vector(ingredient, norm=True)
ingredient_matches = yum.similar_by_vector(ingredient_embedding, topn=10000)
ingredient_matches = [(x[0].replace('_',' '), x[1]) for x in ingredient_matches]
ingredient_output = [x for x in ingredient_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
if first:
output = ingredient_output
first = False
else:
output = [x for x in output for y in ingredient_output if x[0] == y[0]]
return output[:n]
def recommend_ingredients_subsets(model, yum,leftovers, subset_size):
'''
Returns all subsets from each ingredient
:params
model -> FastText Obj
yum -> FastText Word2Vec Obj
leftovers -> list of str
n -> int top_n to return
:returns
output -> top_n recommendations
'''
all_outputs = {}
for leftovers_subset in itertools.combinations(leftovers, subset_size):
leftovers_embedding_sum = np.empty([100,])
for ingredient in leftovers_subset:
ingredient_embedding = yum.word_vec(ingredient, use_norm=True)
leftovers_embedding_sum += ingredient_embedding
leftovers_embedding = leftovers_embedding_sum / len(leftovers_subset) # Embedding for leftovers
top_matches = model.similar_by_vector(leftovers_embedding, topn=100)
top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers_subset)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
all_outputs[leftovers_subset] = output[:10]
return all_outputs
def filter_adjectives(data):
'''
Remove adjectives that are not associated with a food item
:params
data
:returns
data
'''
recipe_ingredients_token = [nltk.word_tokenize(x) for x in data]
inds = []
for i, r in enumerate(recipe_ingredients_token):
out = nltk.pos_tag(r)
out = [x[1] for x in out]
if len(out) > 1:
inds.append(int(i))
elif 'NN' in out or 'NNS' in out:
inds.append(int(i))
return [data[i] for i in inds]
def plural_to_singular(lemma, recipe):
'''
:params
lemma -> nltk lemma Obj
recipe -> list of str
:returns
recipe -> converted recipe
'''
return [lemma.lemmatize(r) for r in recipe]
def filter_lemma(data):
'''
Convert plural to roots
:params
data -> list of lists
:returns
data -> returns filtered data
'''
# Initialize Lemmatizer (to reduce plurals to stems)
lemma = nltk.wordnet.WordNetLemmatizer()
# NOTE: This uses all the computational resources of your computer
with ProcessPoolExecutor() as executor:
out = list(executor.map(plural_to_singular, itertools.repeat(lemma), data))
return out
def train_model(data):
'''
Train fastfood text
NOTE: gensim==4.1.2
:params
data -> list of lists of all recipes
save -> bool
:returns
model -> FastFood model obj
'''
model = FastText(data, vector_size=32, window=99, min_count=5, workers=40, sg=1) # Train model
return model
@st.cache_resource
def load_model(filename):
'''
Load the FastText Model
:params:
filename -> path to the model
:returns
model -> this is the full FastText obj
yum -> this is the FastText Word2Vec obj
'''
# Load Models
model = FastText.load(filename)
yum = model.wv
return model, yum
@st.cache_resource
def load_data(filename='data/all_recipes_ingredients_lemma.pkl'):
'''
Load data
:params:
filename -> path to dataset
:return
data -> list of all recipes
'''
return pickle.load(open(filename,'rb'))
def plot_results(names, probs, n=5):
'''
Plots a bar chart of the names of the items vs. probability of similarity
:params:
names -> list of str
probs -> list of float values
n -> int of how many bars to show NOTE: Max = 100
:return
fig -> return figure for plotting
'''
plt.bar(range(len(names)), probs, align='center')
ax = plt.gca()
ax.xaxis.set_major_locator(plt.FixedLocator(range(len(names))))
ax.xaxis.set_major_formatter(plt.FixedFormatter(names))
ax.set_ylabel('Probability',fontsize='large', fontweight='bold')
ax.set_xlabel('Ingredients', fontsize='large', fontweight='bold')
ax.xaxis.labelpad = 10
ax.set_title(f'FoodNet Top {n} Predictions = {st.session_state.leftovers}')
# mpld3.show()
plt.xticks(rotation=45, ha='right')
fig = plt.gcf()
return fig
def load_image(image_file):
img = Image.open(image_file)
return img
st.set_page_config(page_title="FoodNet", page_icon = "πŸ”", layout = "centered", initial_sidebar_state = "auto")
##### UI/UX #####
## Sidebar ##
add_selectbox = st.sidebar.selectbox("Pages", ("FoodNet Recommender", "Food Donation Resources", "Contact Team"))
model, yum = load_model('fastfood.pth')
if add_selectbox == "FoodNet Recommender":
st.title("FoodNet πŸ”")
st.write("Search for similar food ingredients. Select two or more ingredients to find complementary ingredients.")
ingredients = list(yum.key_to_index.keys())
ingredients = [x.replace('_',' ') for x in ingredients]
st.multiselect("Type or select food ingredients", ingredients, default=['bread', 'lettuce'], key="leftovers")
## Slider ##
st.slider("Select number of recommendations to show", min_value=1, max_value=10, value=3, step=1, key='top_n')
## Show Images ##
# search = py_un.search(type_="photos", query="cookie")
# py_un.photos(type_="single", photo_id='l0_kVknpO2g')
# st.image(search)
## Images
# for leftover in st.session_state.leftovers:
# search = py_un.search(type_='photos', query=leftover)
# for photo in search.entries:
# # print(photo.id, photo.link_download)
# st.image(photo.link_download, caption=leftover, width=200)
# break
# (f"![Alt Text]({search.link_next})")
## Get food recommendation ##
ingredients_no_space = [x.replace(' ','_') for x in st.session_state.get('leftovers')]
out = recommend_ingredients(yum, ingredients_no_space, n=st.session_state.top_n)
names = [o[0] for o in out]
probs = [o[1] for o in out]
# if 'probs' not in st.session_state:
# st.session_state['probs'] = False
# if st.session_state.probs:
# st.table(data=out)
# else:
# st.table(data=names)
# st.checkbox(label="Show model scores", value=False, key="probs")
# ## Plot Results ##
# st.checkbox(label="Show results bar chart", value=False, key="plot")
# if st.session_state.plot:
# fig = plot_results(names, probs, st.session_state.top_n)
# ## Show Plot ##
# st.pyplot(fig)
st.selectbox(label="Select dietary restriction", options=('None', 'Kosher', 'Vegetarian'), key="diet")
if st.session_state.diet != 'None':
if st.session_state.diet == 'Vegetarian':
out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.vegitarian)]
if st.session_state.diet == 'Kosher':
out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.kosher)]
names = [o[0] for o in out]
probs = [o[1] for o in out]
col1, col2, col3 = st.columns(3)
for i, name in enumerate(names):
search = py_un.search(type_='photos', query=name)
for photo in search.entries:
col_id = i % 3
if col_id == 0:
col1.image(photo.link_download, caption=name, use_column_width=True)
elif col_id == 1:
col2.image(photo.link_download, caption=name, use_column_width=True)
elif col_id == 2:
col3.image(photo.link_download, caption=name, use_column_width=True)
break
elif add_selectbox == "Food Donation Resources":
st.title('Food Donation Resources')
st.subheader('Pittsburgh Food Bank:')
st.write("In 2000, the Food Bank opened the doors on its facility in Duquesne."
"This facility was the first LEED-certified building in Pittsburgh and the first LEED-certified "
"food bank in the nation. Learn more about that facility here. "
"Today, we work with a network of more than 850 partners across the 11 counties we serve. "
"In addition to sourcing, warehousing and distributing food, the Food Bank is actively engaged in "
"stabilizing lives and confronting issues of chronic hunger, poor nutrition and health. "
"And, through our advocacy efforts, we have become a primary driver in comprehensive anti-hunger "
"endeavors regionally, statewide and at the national level."
)
st.write("Check out this [link](https://pittsburghfoodbank.org/)πŸ‘ˆ")
st.subheader('412 Food Rescue:')
st.write("412 Food Rescue is a nonprofit organization dedicated to ending hunger by organizing "
"volunteers to deliver surplus food to insecure communities instead of landfills."
"Since its creation in 2015, the organization has redistributed over three million pounds of food through "
"the use of its mobile application, Food Rescue Hero. They are currently rolling out the app nationwide."
)
st.write("Check out this [link](https://412foodrescue.org/)πŸ‘ˆ")
# st.subheader('Image')
# st.multiselect("Select leftovers:", list(yum.key_to_index.keys()), key="leftovers")
# image_file = st.file_uploader("Upload Food Image:", type=["png", "jpg", "jpeg"])
# if image_file is not None:
# # To See details
# file_details = {"filename": image_file.name, "filetype": image_file.type,
# "filesize": image_file.size}
# st.write(file_details)
#
# # To View Uploaded Image
# st.image(load_image(image_file), width=250)
if add_selectbox == "Contact Team":
st.title('Contact Team')
st.subheader('David Chuan-En Lin')
col1, mid, col2 = st.columns([20, 2, 10])
with col1:
st.write('Pronouns: he/him/his')
st.write(
'Research/career interests: Human-AI Co-Design by (1) building ML-infused creativity support tools and '
'(2) investigating how such tools augment design processes')
st.write('Favorite Food: Ice cream sandwich')
st.write('A painfully boring fact: Second-year PhD at HCII SCS')
st.write('Hobbies: Making travel videos, graphic design, music')
st.write('Email: chuanenl@andrew.cmu.edu')
with col2:
st.image('https://chuanenlin.com/images/me.jpg', width=300)
st.subheader('Mitchell Fogelson')
col1, mid, col2 = st.columns([20, 2, 10])
with col1:
st.write('Pronouns: he/him/his')
st.write('Research/career interests: Robotics, AI')
st.write('Favorite Food: Deep Dish Pizza')
st.write('A painfully boring fact: Am a middle child')
st.write('Hobbies: Golf, Traveling, Games')
st.write('Email: mfogelso@andrew.cmu.edu')
with col2:
st.image('https://images.squarespace-cdn.com/content/v1/562661f3e4b0ae7c10f0a2cc/1590528961389-2142HA48O7LRZ9FWGP0F/about_image.jpg?format=2500w', width=300)
st.subheader('Sunny Yang')
col1, mid, col2 = st.columns([20, 2, 10])
with col1:
st.write('Pronouns: She/Her/Hers')
st.write('Research/career interests: Product Manager')
st.write('Favorite Food: Sushi')
st.write('A painfully boring fact: I do not like rainy:(')
st.write('Hobbies: Viola, Basketball')
st.write('Email: yundiy@andrew.cmu.edu')
with col2:
st.image('https://media-exp1.licdn.com/dms/image/C4D03AQF37KjK_GYwzA/profile-displayphoto-shrink_400_400/0/1638326708803?e=1643846400&v=beta&t=q10CTNCG6h5guez1YT0j4j_oLlrGJB_8NugaBOUSAGg', width=300)
st.subheader('Shihao Xu')
col1, mid, col2 = st.columns([20, 2, 10])
with col1:
st.write('Pronouns: he/him/his')
st.write('Research/career interests: Autonomous Vehicle')
st.write('Favorite Food: Dumplings')
st.write('A painfully boring fact: Covid is still not gone')
st.write('Hobbies: photography')
st.write('Email: shihaoxu@andrew.cmu.edu')
with col2:
st.image('https://scontent-ort2-1.xx.fbcdn.net/v/t39.30808-6/261420667_131245119324840_3342182275866550937_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=730e14&_nc_ohc=IP7khn2w6cwAX_wC85x&_nc_ht=scontent-ort2-1.xx&oh=063c2b6b0ed5e9fc10adb2c391c471cf&oe=61AA72C1', width=300)