Spaces:
Running
Running
import requests | |
from io import BytesIO | |
import numpy as np | |
from gensim.models.fasttext import FastText | |
from scipy import spatial | |
import itertools | |
import gdown | |
import warnings | |
import nltk | |
warnings.filterwarnings('ignore') | |
import pickle | |
import pdb | |
from concurrent.futures import ProcessPoolExecutor | |
import matplotlib.pyplot as plt | |
import streamlit as st | |
import argparse | |
import logging | |
from pyunsplash import PyUnsplash | |
import blacklists | |
api_key = 'hzcKZ0e4we95wSd8_ip2zTB3m2DrOMWehAxrYjqjwg0' | |
# instantiate PyUnsplash object | |
py_un = PyUnsplash(api_key=api_key) | |
# pyunsplash logger defaults to level logging.ERROR | |
# If you need to change that, use getLogger/setLevel | |
# on the module logger, like this: | |
logging.getLogger("pyunsplash").setLevel(logging.DEBUG) | |
# TODO: | |
# Image search: Option 1 -> google image search api || Option 2 -> open ai clip search | |
from PIL import Image | |
# NLTK Datasets | |
nltk.download('wordnet') | |
nltk.download('punkt') | |
nltk.download('averaged_perceptron_tagger') | |
# Average embedding β Compare | |
def recommend_ingredients(yum, leftovers, n=10): | |
''' | |
Uses a mean aggregation method | |
:params | |
yum -> FastText Word2Vec Obj | |
leftovers -> list of str | |
n -> int top_n to return | |
:returns | |
output -> top_n recommendations | |
''' | |
leftovers_embedding_sum = np.zeros([32,]) | |
for ingredient in leftovers: | |
# pdb.set_trace() | |
ingredient_embedding = yum.get_vector(ingredient, norm=True) | |
leftovers_embedding_sum += ingredient_embedding | |
leftovers_embedding = leftovers_embedding_sum / len(leftovers) # Embedding for leftovers | |
top_matches = yum.similar_by_vector(leftovers_embedding, topn=100) | |
top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches] | |
leftovers = [x.replace('_',' ') for x in leftovers] | |
output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce". | |
return output[:n] | |
# Compare β Find intersection | |
def recommend_ingredients_intersect(yum, leftovers, n=10): | |
''' | |
Finds top combined probabilities | |
:params | |
yum -> FastText Word2Vec Obj | |
leftovers -> list of str | |
n -> int top_n to return | |
:returns | |
output -> top_n recommendations | |
''' | |
first = True | |
for ingredient in leftovers: | |
ingredient_embedding = yum.get_vector(ingredient, norm=True) | |
ingredient_matches = yum.similar_by_vector(ingredient_embedding, topn=10000) | |
ingredient_matches = [(x[0].replace('_',' '), x[1]) for x in ingredient_matches] | |
ingredient_output = [x for x in ingredient_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce". | |
if first: | |
output = ingredient_output | |
first = False | |
else: | |
output = [x for x in output for y in ingredient_output if x[0] == y[0]] | |
return output[:n] | |
def recommend_ingredients_subsets(model, yum,leftovers, subset_size): | |
''' | |
Returns all subsets from each ingredient | |
:params | |
model -> FastText Obj | |
yum -> FastText Word2Vec Obj | |
leftovers -> list of str | |
n -> int top_n to return | |
:returns | |
output -> top_n recommendations | |
''' | |
all_outputs = {} | |
for leftovers_subset in itertools.combinations(leftovers, subset_size): | |
leftovers_embedding_sum = np.empty([100,]) | |
for ingredient in leftovers_subset: | |
ingredient_embedding = yum.word_vec(ingredient, use_norm=True) | |
leftovers_embedding_sum += ingredient_embedding | |
leftovers_embedding = leftovers_embedding_sum / len(leftovers_subset) # Embedding for leftovers | |
top_matches = model.similar_by_vector(leftovers_embedding, topn=100) | |
top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches] | |
output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers_subset)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce". | |
all_outputs[leftovers_subset] = output[:10] | |
return all_outputs | |
def filter_adjectives(data): | |
''' | |
Remove adjectives that are not associated with a food item | |
:params | |
data | |
:returns | |
data | |
''' | |
recipe_ingredients_token = [nltk.word_tokenize(x) for x in data] | |
inds = [] | |
for i, r in enumerate(recipe_ingredients_token): | |
out = nltk.pos_tag(r) | |
out = [x[1] for x in out] | |
if len(out) > 1: | |
inds.append(int(i)) | |
elif 'NN' in out or 'NNS' in out: | |
inds.append(int(i)) | |
return [data[i] for i in inds] | |
def plural_to_singular(lemma, recipe): | |
''' | |
:params | |
lemma -> nltk lemma Obj | |
recipe -> list of str | |
:returns | |
recipe -> converted recipe | |
''' | |
return [lemma.lemmatize(r) for r in recipe] | |
def filter_lemma(data): | |
''' | |
Convert plural to roots | |
:params | |
data -> list of lists | |
:returns | |
data -> returns filtered data | |
''' | |
# Initialize Lemmatizer (to reduce plurals to stems) | |
lemma = nltk.wordnet.WordNetLemmatizer() | |
# NOTE: This uses all the computational resources of your computer | |
with ProcessPoolExecutor() as executor: | |
out = list(executor.map(plural_to_singular, itertools.repeat(lemma), data)) | |
return out | |
def train_model(data): | |
''' | |
Train fastfood text | |
NOTE: gensim==4.1.2 | |
:params | |
data -> list of lists of all recipes | |
save -> bool | |
:returns | |
model -> FastFood model obj | |
''' | |
model = FastText(data, vector_size=32, window=99, min_count=5, workers=40, sg=1) # Train model | |
return model | |
def load_model(filename): | |
''' | |
Load the FastText Model | |
:params: | |
filename -> path to the model | |
:returns | |
model -> this is the full FastText obj | |
yum -> this is the FastText Word2Vec obj | |
''' | |
# Load Models | |
model = FastText.load(filename) | |
yum = model.wv | |
return model, yum | |
def load_data(filename='data/all_recipes_ingredients_lemma.pkl'): | |
''' | |
Load data | |
:params: | |
filename -> path to dataset | |
:return | |
data -> list of all recipes | |
''' | |
return pickle.load(open(filename,'rb')) | |
def plot_results(names, probs, n=5): | |
''' | |
Plots a bar chart of the names of the items vs. probability of similarity | |
:params: | |
names -> list of str | |
probs -> list of float values | |
n -> int of how many bars to show NOTE: Max = 100 | |
:return | |
fig -> return figure for plotting | |
''' | |
plt.bar(range(len(names)), probs, align='center') | |
ax = plt.gca() | |
ax.xaxis.set_major_locator(plt.FixedLocator(range(len(names)))) | |
ax.xaxis.set_major_formatter(plt.FixedFormatter(names)) | |
ax.set_ylabel('Probability',fontsize='large', fontweight='bold') | |
ax.set_xlabel('Ingredients', fontsize='large', fontweight='bold') | |
ax.xaxis.labelpad = 10 | |
ax.set_title(f'FoodNet Top {n} Predictions = {st.session_state.leftovers}') | |
# mpld3.show() | |
plt.xticks(rotation=45, ha='right') | |
fig = plt.gcf() | |
return fig | |
def load_image(image_file): | |
img = Image.open(image_file) | |
return img | |
st.set_page_config(page_title="FoodNet", page_icon = "π", layout = "centered", initial_sidebar_state = "auto") | |
##### UI/UX ##### | |
## Sidebar ## | |
add_selectbox = st.sidebar.selectbox("Pages", ("FoodNet Recommender", "Food Donation Resources", "Contact Team")) | |
model, yum = load_model('fastfood.pth') | |
if add_selectbox == "FoodNet Recommender": | |
st.title("FoodNet π") | |
st.write("Search for similar food ingredients. Select two or more ingredients to find complementary ingredients.") | |
ingredients = list(yum.key_to_index.keys()) | |
ingredients = [x.replace('_',' ') for x in ingredients] | |
st.multiselect("Type or select food ingredients", ingredients, default=['bread', 'lettuce'], key="leftovers") | |
## Slider ## | |
st.slider("Select number of recommendations to show", min_value=1, max_value=10, value=3, step=1, key='top_n') | |
## Show Images ## | |
# search = py_un.search(type_="photos", query="cookie") | |
# py_un.photos(type_="single", photo_id='l0_kVknpO2g') | |
# st.image(search) | |
## Images | |
# for leftover in st.session_state.leftovers: | |
# search = py_un.search(type_='photos', query=leftover) | |
# for photo in search.entries: | |
# # print(photo.id, photo.link_download) | |
# st.image(photo.link_download, caption=leftover, width=200) | |
# break | |
# (f"![Alt Text]({search.link_next})") | |
## Get food recommendation ## | |
ingredients_no_space = [x.replace(' ','_') for x in st.session_state.get('leftovers')] | |
out = recommend_ingredients(yum, ingredients_no_space, n=st.session_state.top_n) | |
names = [o[0] for o in out] | |
probs = [o[1] for o in out] | |
# if 'probs' not in st.session_state: | |
# st.session_state['probs'] = False | |
# if st.session_state.probs: | |
# st.table(data=out) | |
# else: | |
# st.table(data=names) | |
# st.checkbox(label="Show model scores", value=False, key="probs") | |
# ## Plot Results ## | |
# st.checkbox(label="Show results bar chart", value=False, key="plot") | |
# if st.session_state.plot: | |
# fig = plot_results(names, probs, st.session_state.top_n) | |
# ## Show Plot ## | |
# st.pyplot(fig) | |
st.selectbox(label="Select dietary restriction", options=('None', 'Kosher', 'Vegetarian'), key="diet") | |
if st.session_state.diet != 'None': | |
if st.session_state.diet == 'Vegetarian': | |
out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.vegitarian)] | |
if st.session_state.diet == 'Kosher': | |
out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.kosher)] | |
names = [o[0] for o in out] | |
probs = [o[1] for o in out] | |
col1, col2, col3 = st.columns(3) | |
for i, name in enumerate(names): | |
search = py_un.search(type_='photos', query=name) | |
for photo in search.entries: | |
col_id = i % 3 | |
if col_id == 0: | |
col1.image(photo.link_download, caption=name, use_column_width=True) | |
elif col_id == 1: | |
col2.image(photo.link_download, caption=name, use_column_width=True) | |
elif col_id == 2: | |
col3.image(photo.link_download, caption=name, use_column_width=True) | |
break | |
elif add_selectbox == "Food Donation Resources": | |
st.title('Food Donation Resources') | |
st.subheader('Pittsburgh Food Bank:') | |
st.write("In 2000, the Food Bank opened the doors on its facility in Duquesne." | |
"This facility was the first LEED-certified building in Pittsburgh and the first LEED-certified " | |
"food bank in the nation. Learn more about that facility here. " | |
"Today, we work with a network of more than 850 partners across the 11 counties we serve. " | |
"In addition to sourcing, warehousing and distributing food, the Food Bank is actively engaged in " | |
"stabilizing lives and confronting issues of chronic hunger, poor nutrition and health. " | |
"And, through our advocacy efforts, we have become a primary driver in comprehensive anti-hunger " | |
"endeavors regionally, statewide and at the national level." | |
) | |
st.write("Check out this [link](https://pittsburghfoodbank.org/)π") | |
st.subheader('412 Food Rescue:') | |
st.write("412 Food Rescue is a nonprofit organization dedicated to ending hunger by organizing " | |
"volunteers to deliver surplus food to insecure communities instead of landfills." | |
"Since its creation in 2015, the organization has redistributed over three million pounds of food through " | |
"the use of its mobile application, Food Rescue Hero. They are currently rolling out the app nationwide." | |
) | |
st.write("Check out this [link](https://412foodrescue.org/)π") | |
# st.subheader('Image') | |
# st.multiselect("Select leftovers:", list(yum.key_to_index.keys()), key="leftovers") | |
# image_file = st.file_uploader("Upload Food Image:", type=["png", "jpg", "jpeg"]) | |
# if image_file is not None: | |
# # To See details | |
# file_details = {"filename": image_file.name, "filetype": image_file.type, | |
# "filesize": image_file.size} | |
# st.write(file_details) | |
# | |
# # To View Uploaded Image | |
# st.image(load_image(image_file), width=250) | |
if add_selectbox == "Contact Team": | |
st.title('Contact Team') | |
st.subheader('David Chuan-En Lin') | |
col1, mid, col2 = st.columns([20, 2, 10]) | |
with col1: | |
st.write('Pronouns: he/him/his') | |
st.write( | |
'Research/career interests: Human-AI Co-Design by (1) building ML-infused creativity support tools and ' | |
'(2) investigating how such tools augment design processes') | |
st.write('Favorite Food: Ice cream sandwich') | |
st.write('A painfully boring fact: Second-year PhD at HCII SCS') | |
st.write('Hobbies: Making travel videos, graphic design, music') | |
st.write('Email: chuanenl@andrew.cmu.edu') | |
with col2: | |
st.image('https://chuanenlin.com/images/me.jpg', width=300) | |
st.subheader('Mitchell Fogelson') | |
col1, mid, col2 = st.columns([20, 2, 10]) | |
with col1: | |
st.write('Pronouns: he/him/his') | |
st.write('Research/career interests: Robotics, AI') | |
st.write('Favorite Food: Deep Dish Pizza') | |
st.write('A painfully boring fact: Am a middle child') | |
st.write('Hobbies: Golf, Traveling, Games') | |
st.write('Email: mfogelso@andrew.cmu.edu') | |
with col2: | |
st.image('https://images.squarespace-cdn.com/content/v1/562661f3e4b0ae7c10f0a2cc/1590528961389-2142HA48O7LRZ9FWGP0F/about_image.jpg?format=2500w', width=300) | |
st.subheader('Sunny Yang') | |
col1, mid, col2 = st.columns([20, 2, 10]) | |
with col1: | |
st.write('Pronouns: She/Her/Hers') | |
st.write('Research/career interests: Product Manager') | |
st.write('Favorite Food: Sushi') | |
st.write('A painfully boring fact: I do not like rainy:(') | |
st.write('Hobbies: Viola, Basketball') | |
st.write('Email: yundiy@andrew.cmu.edu') | |
with col2: | |
st.image('https://media-exp1.licdn.com/dms/image/C4D03AQF37KjK_GYwzA/profile-displayphoto-shrink_400_400/0/1638326708803?e=1643846400&v=beta&t=q10CTNCG6h5guez1YT0j4j_oLlrGJB_8NugaBOUSAGg', width=300) | |
st.subheader('Shihao Xu') | |
col1, mid, col2 = st.columns([20, 2, 10]) | |
with col1: | |
st.write('Pronouns: he/him/his') | |
st.write('Research/career interests: Autonomous Vehicle') | |
st.write('Favorite Food: Dumplings') | |
st.write('A painfully boring fact: Covid is still not gone') | |
st.write('Hobbies: photography') | |
st.write('Email: shihaoxu@andrew.cmu.edu') | |
with col2: | |
st.image('https://scontent-ort2-1.xx.fbcdn.net/v/t39.30808-6/261420667_131245119324840_3342182275866550937_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=730e14&_nc_ohc=IP7khn2w6cwAX_wC85x&_nc_ht=scontent-ort2-1.xx&oh=063c2b6b0ed5e9fc10adb2c391c471cf&oe=61AA72C1', width=300) | |