Spaces:
Running
Running
| import requests | |
| from io import BytesIO | |
| import numpy as np | |
| from gensim.models.fasttext import FastText | |
| from scipy import spatial | |
| import itertools | |
| import gdown | |
| import warnings | |
| import nltk | |
| warnings.filterwarnings('ignore') | |
| import pickle | |
| import pdb | |
| from concurrent.futures import ProcessPoolExecutor | |
| import matplotlib.pyplot as plt | |
| import streamlit as st | |
| import argparse | |
| import logging | |
| from pyunsplash import PyUnsplash | |
| import blacklists | |
| api_key = 'hzcKZ0e4we95wSd8_ip2zTB3m2DrOMWehAxrYjqjwg0' | |
| # instantiate PyUnsplash object | |
| py_un = PyUnsplash(api_key=api_key) | |
| # pyunsplash logger defaults to level logging.ERROR | |
| # If you need to change that, use getLogger/setLevel | |
| # on the module logger, like this: | |
| logging.getLogger("pyunsplash").setLevel(logging.DEBUG) | |
| # TODO: | |
| # Image search: Option 1 -> google image search api || Option 2 -> open ai clip search | |
| from PIL import Image | |
| # NLTK Datasets | |
| nltk.download('wordnet') | |
| nltk.download('punkt') | |
| nltk.download('averaged_perceptron_tagger') | |
| # Average embedding β Compare | |
| def recommend_ingredients(yum, leftovers, n=10): | |
| ''' | |
| Uses a mean aggregation method | |
| :params | |
| yum -> FastText Word2Vec Obj | |
| leftovers -> list of str | |
| n -> int top_n to return | |
| :returns | |
| output -> top_n recommendations | |
| ''' | |
| leftovers_embedding_sum = np.zeros([32,]) | |
| for ingredient in leftovers: | |
| # pdb.set_trace() | |
| ingredient_embedding = yum.get_vector(ingredient, norm=True) | |
| leftovers_embedding_sum += ingredient_embedding | |
| leftovers_embedding = leftovers_embedding_sum / len(leftovers) # Embedding for leftovers | |
| top_matches = yum.similar_by_vector(leftovers_embedding, topn=100) | |
| top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches] | |
| leftovers = [x.replace('_',' ') for x in leftovers] | |
| output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce". | |
| return output[:n] | |
| # Compare β Find intersection | |
| def recommend_ingredients_intersect(yum, leftovers, n=10): | |
| ''' | |
| Finds top combined probabilities | |
| :params | |
| yum -> FastText Word2Vec Obj | |
| leftovers -> list of str | |
| n -> int top_n to return | |
| :returns | |
| output -> top_n recommendations | |
| ''' | |
| first = True | |
| for ingredient in leftovers: | |
| ingredient_embedding = yum.get_vector(ingredient, norm=True) | |
| ingredient_matches = yum.similar_by_vector(ingredient_embedding, topn=10000) | |
| ingredient_matches = [(x[0].replace('_',' '), x[1]) for x in ingredient_matches] | |
| ingredient_output = [x for x in ingredient_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce". | |
| if first: | |
| output = ingredient_output | |
| first = False | |
| else: | |
| output = [x for x in output for y in ingredient_output if x[0] == y[0]] | |
| return output[:n] | |
| def recommend_ingredients_subsets(model, yum,leftovers, subset_size): | |
| ''' | |
| Returns all subsets from each ingredient | |
| :params | |
| model -> FastText Obj | |
| yum -> FastText Word2Vec Obj | |
| leftovers -> list of str | |
| n -> int top_n to return | |
| :returns | |
| output -> top_n recommendations | |
| ''' | |
| all_outputs = {} | |
| for leftovers_subset in itertools.combinations(leftovers, subset_size): | |
| leftovers_embedding_sum = np.empty([100,]) | |
| for ingredient in leftovers_subset: | |
| ingredient_embedding = yum.word_vec(ingredient, use_norm=True) | |
| leftovers_embedding_sum += ingredient_embedding | |
| leftovers_embedding = leftovers_embedding_sum / len(leftovers_subset) # Embedding for leftovers | |
| top_matches = model.similar_by_vector(leftovers_embedding, topn=100) | |
| top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches] | |
| output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers_subset)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce". | |
| all_outputs[leftovers_subset] = output[:10] | |
| return all_outputs | |
| def filter_adjectives(data): | |
| ''' | |
| Remove adjectives that are not associated with a food item | |
| :params | |
| data | |
| :returns | |
| data | |
| ''' | |
| recipe_ingredients_token = [nltk.word_tokenize(x) for x in data] | |
| inds = [] | |
| for i, r in enumerate(recipe_ingredients_token): | |
| out = nltk.pos_tag(r) | |
| out = [x[1] for x in out] | |
| if len(out) > 1: | |
| inds.append(int(i)) | |
| elif 'NN' in out or 'NNS' in out: | |
| inds.append(int(i)) | |
| return [data[i] for i in inds] | |
| def plural_to_singular(lemma, recipe): | |
| ''' | |
| :params | |
| lemma -> nltk lemma Obj | |
| recipe -> list of str | |
| :returns | |
| recipe -> converted recipe | |
| ''' | |
| return [lemma.lemmatize(r) for r in recipe] | |
| def filter_lemma(data): | |
| ''' | |
| Convert plural to roots | |
| :params | |
| data -> list of lists | |
| :returns | |
| data -> returns filtered data | |
| ''' | |
| # Initialize Lemmatizer (to reduce plurals to stems) | |
| lemma = nltk.wordnet.WordNetLemmatizer() | |
| # NOTE: This uses all the computational resources of your computer | |
| with ProcessPoolExecutor() as executor: | |
| out = list(executor.map(plural_to_singular, itertools.repeat(lemma), data)) | |
| return out | |
| def train_model(data): | |
| ''' | |
| Train fastfood text | |
| NOTE: gensim==4.1.2 | |
| :params | |
| data -> list of lists of all recipes | |
| save -> bool | |
| :returns | |
| model -> FastFood model obj | |
| ''' | |
| model = FastText(data, vector_size=32, window=99, min_count=5, workers=40, sg=1) # Train model | |
| return model | |
| def load_model(filename): | |
| ''' | |
| Load the FastText Model | |
| :params: | |
| filename -> path to the model | |
| :returns | |
| model -> this is the full FastText obj | |
| yum -> this is the FastText Word2Vec obj | |
| ''' | |
| # Load Models | |
| model = FastText.load(filename) | |
| yum = model.wv | |
| return model, yum | |
| def load_data(filename='data/all_recipes_ingredients_lemma.pkl'): | |
| ''' | |
| Load data | |
| :params: | |
| filename -> path to dataset | |
| :return | |
| data -> list of all recipes | |
| ''' | |
| return pickle.load(open(filename,'rb')) | |
| def plot_results(names, probs, n=5): | |
| ''' | |
| Plots a bar chart of the names of the items vs. probability of similarity | |
| :params: | |
| names -> list of str | |
| probs -> list of float values | |
| n -> int of how many bars to show NOTE: Max = 100 | |
| :return | |
| fig -> return figure for plotting | |
| ''' | |
| plt.bar(range(len(names)), probs, align='center') | |
| ax = plt.gca() | |
| ax.xaxis.set_major_locator(plt.FixedLocator(range(len(names)))) | |
| ax.xaxis.set_major_formatter(plt.FixedFormatter(names)) | |
| ax.set_ylabel('Probability',fontsize='large', fontweight='bold') | |
| ax.set_xlabel('Ingredients', fontsize='large', fontweight='bold') | |
| ax.xaxis.labelpad = 10 | |
| ax.set_title(f'FoodNet Top {n} Predictions = {st.session_state.leftovers}') | |
| # mpld3.show() | |
| plt.xticks(rotation=45, ha='right') | |
| fig = plt.gcf() | |
| return fig | |
| def load_image(image_file): | |
| img = Image.open(image_file) | |
| return img | |
| st.set_page_config(page_title="FoodNet", page_icon = "π", layout = "centered", initial_sidebar_state = "auto") | |
| ##### UI/UX ##### | |
| ## Sidebar ## | |
| add_selectbox = st.sidebar.selectbox("Pages", ("FoodNet Recommender", "Food Donation Resources", "Contact Team")) | |
| model, yum = load_model('fastfood.pth') | |
| if add_selectbox == "FoodNet Recommender": | |
| st.title("FoodNet π") | |
| st.write("Search for similar food ingredients. Select two or more ingredients to find complementary ingredients.") | |
| ingredients = list(yum.key_to_index.keys()) | |
| ingredients = [x.replace('_',' ') for x in ingredients] | |
| st.multiselect("Type or select food ingredients", ingredients, default=['bread', 'lettuce'], key="leftovers") | |
| ## Slider ## | |
| st.slider("Select number of recommendations to show", min_value=1, max_value=10, value=3, step=1, key='top_n') | |
| ## Show Images ## | |
| # search = py_un.search(type_="photos", query="cookie") | |
| # py_un.photos(type_="single", photo_id='l0_kVknpO2g') | |
| # st.image(search) | |
| ## Images | |
| # for leftover in st.session_state.leftovers: | |
| # search = py_un.search(type_='photos', query=leftover) | |
| # for photo in search.entries: | |
| # # print(photo.id, photo.link_download) | |
| # st.image(photo.link_download, caption=leftover, width=200) | |
| # break | |
| # (f"") | |
| ## Get food recommendation ## | |
| ingredients_no_space = [x.replace(' ','_') for x in st.session_state.get('leftovers')] | |
| out = recommend_ingredients(yum, ingredients_no_space, n=st.session_state.top_n) | |
| names = [o[0] for o in out] | |
| probs = [o[1] for o in out] | |
| # if 'probs' not in st.session_state: | |
| # st.session_state['probs'] = False | |
| # if st.session_state.probs: | |
| # st.table(data=out) | |
| # else: | |
| # st.table(data=names) | |
| # st.checkbox(label="Show model scores", value=False, key="probs") | |
| # ## Plot Results ## | |
| # st.checkbox(label="Show results bar chart", value=False, key="plot") | |
| # if st.session_state.plot: | |
| # fig = plot_results(names, probs, st.session_state.top_n) | |
| # ## Show Plot ## | |
| # st.pyplot(fig) | |
| st.selectbox(label="Select dietary restriction", options=('None', 'Kosher', 'Vegetarian'), key="diet") | |
| if st.session_state.diet != 'None': | |
| if st.session_state.diet == 'Vegetarian': | |
| out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.vegitarian)] | |
| if st.session_state.diet == 'Kosher': | |
| out = [o for o in out if not any(ignore in o[0] for ignore in blacklists.kosher)] | |
| names = [o[0] for o in out] | |
| probs = [o[1] for o in out] | |
| col1, col2, col3 = st.columns(3) | |
| for i, name in enumerate(names): | |
| search = py_un.search(type_='photos', query=name) | |
| for photo in search.entries: | |
| col_id = i % 3 | |
| if col_id == 0: | |
| col1.image(photo.link_download, caption=name, use_column_width=True) | |
| elif col_id == 1: | |
| col2.image(photo.link_download, caption=name, use_column_width=True) | |
| elif col_id == 2: | |
| col3.image(photo.link_download, caption=name, use_column_width=True) | |
| break | |
| elif add_selectbox == "Food Donation Resources": | |
| st.title('Food Donation Resources') | |
| st.subheader('Pittsburgh Food Bank:') | |
| st.write("In 2000, the Food Bank opened the doors on its facility in Duquesne." | |
| "This facility was the first LEED-certified building in Pittsburgh and the first LEED-certified " | |
| "food bank in the nation. Learn more about that facility here. " | |
| "Today, we work with a network of more than 850 partners across the 11 counties we serve. " | |
| "In addition to sourcing, warehousing and distributing food, the Food Bank is actively engaged in " | |
| "stabilizing lives and confronting issues of chronic hunger, poor nutrition and health. " | |
| "And, through our advocacy efforts, we have become a primary driver in comprehensive anti-hunger " | |
| "endeavors regionally, statewide and at the national level." | |
| ) | |
| st.write("Check out this [link](https://pittsburghfoodbank.org/)π") | |
| st.subheader('412 Food Rescue:') | |
| st.write("412 Food Rescue is a nonprofit organization dedicated to ending hunger by organizing " | |
| "volunteers to deliver surplus food to insecure communities instead of landfills." | |
| "Since its creation in 2015, the organization has redistributed over three million pounds of food through " | |
| "the use of its mobile application, Food Rescue Hero. They are currently rolling out the app nationwide." | |
| ) | |
| st.write("Check out this [link](https://412foodrescue.org/)π") | |
| # st.subheader('Image') | |
| # st.multiselect("Select leftovers:", list(yum.key_to_index.keys()), key="leftovers") | |
| # image_file = st.file_uploader("Upload Food Image:", type=["png", "jpg", "jpeg"]) | |
| # if image_file is not None: | |
| # # To See details | |
| # file_details = {"filename": image_file.name, "filetype": image_file.type, | |
| # "filesize": image_file.size} | |
| # st.write(file_details) | |
| # | |
| # # To View Uploaded Image | |
| # st.image(load_image(image_file), width=250) | |
| if add_selectbox == "Contact Team": | |
| st.title('Contact Team') | |
| st.subheader('David Chuan-En Lin') | |
| col1, mid, col2 = st.columns([20, 2, 10]) | |
| with col1: | |
| st.write('Pronouns: he/him/his') | |
| st.write( | |
| 'Research/career interests: Human-AI Co-Design by (1) building ML-infused creativity support tools and ' | |
| '(2) investigating how such tools augment design processes') | |
| st.write('Favorite Food: Ice cream sandwich') | |
| st.write('A painfully boring fact: Second-year PhD at HCII SCS') | |
| st.write('Hobbies: Making travel videos, graphic design, music') | |
| st.write('Email: chuanenl@andrew.cmu.edu') | |
| with col2: | |
| st.image('https://chuanenlin.com/images/me.jpg', width=300) | |
| st.subheader('Mitchell Fogelson') | |
| col1, mid, col2 = st.columns([20, 2, 10]) | |
| with col1: | |
| st.write('Pronouns: he/him/his') | |
| st.write('Research/career interests: Robotics, AI') | |
| st.write('Favorite Food: Deep Dish Pizza') | |
| st.write('A painfully boring fact: Am a middle child') | |
| st.write('Hobbies: Golf, Traveling, Games') | |
| st.write('Email: mfogelso@andrew.cmu.edu') | |
| with col2: | |
| st.image('https://images.squarespace-cdn.com/content/v1/562661f3e4b0ae7c10f0a2cc/1590528961389-2142HA48O7LRZ9FWGP0F/about_image.jpg?format=2500w', width=300) | |
| st.subheader('Sunny Yang') | |
| col1, mid, col2 = st.columns([20, 2, 10]) | |
| with col1: | |
| st.write('Pronouns: She/Her/Hers') | |
| st.write('Research/career interests: Product Manager') | |
| st.write('Favorite Food: Sushi') | |
| st.write('A painfully boring fact: I do not like rainy:(') | |
| st.write('Hobbies: Viola, Basketball') | |
| st.write('Email: yundiy@andrew.cmu.edu') | |
| with col2: | |
| st.image('https://media-exp1.licdn.com/dms/image/C4D03AQF37KjK_GYwzA/profile-displayphoto-shrink_400_400/0/1638326708803?e=1643846400&v=beta&t=q10CTNCG6h5guez1YT0j4j_oLlrGJB_8NugaBOUSAGg', width=300) | |
| st.subheader('Shihao Xu') | |
| col1, mid, col2 = st.columns([20, 2, 10]) | |
| with col1: | |
| st.write('Pronouns: he/him/his') | |
| st.write('Research/career interests: Autonomous Vehicle') | |
| st.write('Favorite Food: Dumplings') | |
| st.write('A painfully boring fact: Covid is still not gone') | |
| st.write('Hobbies: photography') | |
| st.write('Email: shihaoxu@andrew.cmu.edu') | |
| with col2: | |
| st.image('https://scontent-ort2-1.xx.fbcdn.net/v/t39.30808-6/261420667_131245119324840_3342182275866550937_n.jpg?_nc_cat=100&ccb=1-5&_nc_sid=730e14&_nc_ohc=IP7khn2w6cwAX_wC85x&_nc_ht=scontent-ort2-1.xx&oh=063c2b6b0ed5e9fc10adb2c391c471cf&oe=61AA72C1', width=300) | |