David Chuan-En Lin commited on
Commit
a618fc8
1 Parent(s): c9bf118
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. README.md +118 -25
  3. foodnet.py +290 -0
  4. requirements.txt +10 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
README.md CHANGED
@@ -1,37 +1,130 @@
1
  ---
2
- title: Foodnet
3
- emoji: 😻
4
- colorFrom: blue
5
- colorTo: green
6
  sdk: streamlit
7
- app_file: app.py
8
- pinned: false
9
  ---
10
 
11
- # Configuration
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- `title`: _string_
14
- Display title for the Space
15
 
16
- `emoji`: _string_
17
- Space emoji (emoji-only character allowed)
 
18
 
19
- `colorFrom`: _string_
20
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
21
 
22
- `colorTo`: _string_
23
- Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
 
24
 
25
- `sdk`: _string_
26
- Can be either `gradio` or `streamlit`
27
 
28
- `sdk_version` : _string_
29
- Only applicable for `streamlit` SDK.
30
- See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
 
 
 
 
31
 
32
- `app_file`: _string_
33
- Path to your main application file (which contains either `gradio` or `streamlit` Python code).
34
- Path is relative to the root of the repository.
35
 
36
- `pinned`: _boolean_
37
- Whether the Space stays on top of your list.
1
  ---
2
+ title: FoodNet
3
+ emoji: 🍔
4
+ colorFrom: purple
5
+ colorTo: purple
6
  sdk: streamlit
7
+ app_file: foodnet.py
8
+
9
  ---
10
 
11
+ # 24-679 FoodNet Project
12
+
13
+ ## Authors
14
+
15
+ David Chuan-En Lin: chuanenl@cs.cmu.edu
16
+
17
+ Mitch Fogelson: mfogelso@andrew.cmu.edu
18
+
19
+ Sunny Yang: yundiy@andrew.cmu.edu
20
+
21
+ Shihao Xu: shihaoxu@andrew.cmu.edu
22
+
23
+ ## TODO
24
+
25
+ ### Must Have
26
+
27
+ 1. Cooking method (How to do this?) (TBD)
28
+ 2. Ingredients -> Recipe (Recipe Querey?) (Mitch)
29
+ 3. Cuisine Meta Data (Where to get) (TBD)
30
+ 4. Deployment on the cloud -> (David)
31
+
32
+ ### Like to have
33
+
34
+ 1. Images related ->
35
+
36
+ * [Google Image Search API](https://pypi.org/project/Google-Images-Search/)
37
+ * [OpenAI Clip](https://openai.com/api/)
38
+
39
+ 2. User Studies
40
+
41
+ ### Moonshot
42
+
43
+ 1. Recipe Masking Prediction
44
+ 2.
45
+
46
+ ## Description
47
+
48
+ We wanted to help students and households in the Pittsburgh to reduce their food waste. We developed a model that suggests recipes based on current leftovers availible.
49
+
50
+ * Model -> Facebook's [FastText](https://radimrehurek.com/gensim/models/fasttext.html)
51
+ * Dataset -> [Simplified 1M+ Recipes](https://github.com/schmidtdominik/RecipeNet)
52
+ * [Dominick Schmidt Blog](https://dominikschmidt.xyz/simplified-recipes-1M/#dataset-sources)
53
+
54
+ ## Try WebApp
55
+
56
+ https://huggingface.co/spaces/chuanenlin/foodnet
57
+
58
+ ## Quick Start
59
+
60
+ 1. Clone repository
61
+
62
+ ```
63
+ git clone git@github.com:chuanenlin/foodnet.git
64
+ ```
65
+
66
+ 2. Move into repository
67
+
68
+ ```
69
+ cd foodnet
70
+ ```
71
+
72
+ (**Optional** Create conda environment)
73
+
74
+ 3. Install gdown
75
+
76
+ ```
77
+ pip install gdown
78
+ ```
79
+
80
+ 4. Download models
81
+
82
+ ```
83
+ gdown https://drive.google.com/drive/folders/1LlQpd45E71dSfC8FgvIhJjQjqxnlBC9j -O ./models --folder
84
+ ```
85
+
86
+ 5. Download datasets (Optional)
87
+
88
+ ```
89
+ gdown https://drive.google.com/drive/folders/18aA3BFKqzkqNz5L4N5vN6bFnp8Ch2CQV -O ./data --folder
90
+ ```
91
+
92
+ 6. Install Dependencies
93
+
94
+ ```
95
+ pip install -r requirements.txt
96
+ ```
97
+
98
+ 7. Run code
99
+
100
+ ```
101
+ streamlit run foodnet.py
102
+ ```
103
+
104
+ ## Args
105
 
106
+ Train new model
 
107
 
108
+ ```
109
+ streamlit run foodnet.py -d/--dataset ['/PATH/TO/DATASET'] -t/--train True
110
+ ```
111
 
112
+ Load alternative model
 
113
 
114
+ ```
115
+ streamlit run foodnet.py --model ['/PATH/TO/MODEL']
116
+ ```
117
 
118
+ ## Requirements
 
119
 
120
+ * python>=3.6
121
+ * gensim>=4.0.x
122
+ * streamlit
123
+ * gdown
124
+ * nltk
125
+ * pickle
126
+ * matplotlib
127
 
128
+ ## References
 
 
129
 
130
+ TODO
 
foodnet.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from io import BytesIO
3
+ import numpy as np
4
+ from gensim.models.fasttext import FastText
5
+ from scipy import spatial
6
+ import itertools
7
+ import gdown
8
+ import warnings
9
+ import nltk
10
+ # warnings.filterwarnings('ignore')
11
+
12
+ import pickle
13
+ import pdb
14
+ from concurrent.futures import ProcessPoolExecutor
15
+
16
+ import matplotlib.pyplot as plt
17
+ import streamlit as st
18
+ import argparse
19
+
20
+
21
+ # NLTK Datasets
22
+ nltk.download('wordnet')
23
+ nltk.download('punkt')
24
+ nltk.download('averaged_perceptron_tagger')
25
+
26
+ # Average embedding → Compare
27
+ def recommend_ingredients(yum, leftovers, n=10):
28
+ '''
29
+ Uses a mean aggregation method
30
+
31
+ :params
32
+ yum -> FastText Word2Vec Obj
33
+ leftovers -> list of str
34
+ n -> int top_n to return
35
+
36
+ :returns
37
+ output -> top_n recommendations
38
+ '''
39
+ leftovers_embedding_sum = np.zeros([100,])
40
+ for ingredient in leftovers:
41
+ # pdb.set_trace()
42
+ ingredient_embedding = yum.get_vector(ingredient, norm=True)
43
+ leftovers_embedding_sum += ingredient_embedding
44
+ leftovers_embedding = leftovers_embedding_sum / len(leftovers) # Embedding for leftovers
45
+ top_matches = yum.similar_by_vector(leftovers_embedding, topn=100)
46
+ top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
47
+ output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
48
+ return output[:n]
49
+
50
+ # Compare → Find intersection
51
+ def recommend_ingredients_intersect(yum, leftovers, n=10):
52
+ '''
53
+ Finds top combined probabilities
54
+
55
+ :params
56
+ yum -> FastText Word2Vec Obj
57
+ leftovers -> list of str
58
+ n -> int top_n to return
59
+
60
+ :returns
61
+ output -> top_n recommendations
62
+ '''
63
+ first = True
64
+ for ingredient in leftovers:
65
+ ingredient_embedding = yum.get_vector(ingredient, norm=True)
66
+ ingredient_matches = yum.similar_by_vector(ingredient_embedding, topn=10000)
67
+ ingredient_matches = [(x[0].replace('_',' '), x[1]) for x in ingredient_matches]
68
+ ingredient_output = [x for x in ingredient_matches if not any(ignore in x[0] for ignore in leftovers)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
69
+ if first:
70
+ output = ingredient_output
71
+ first = False
72
+ else:
73
+ output = [x for x in output for y in ingredient_output if x[0] == y[0]]
74
+ return output[:n]
75
+
76
+ def recommend_ingredients_subsets(model, yum,leftovers, subset_size):
77
+ '''
78
+ Returns all subsets from each ingredient
79
+
80
+ :params
81
+ model -> FastText Obj
82
+ yum -> FastText Word2Vec Obj
83
+ leftovers -> list of str
84
+ n -> int top_n to return
85
+
86
+ :returns
87
+ output -> top_n recommendations
88
+ '''
89
+ all_outputs = {}
90
+ for leftovers_subset in itertools.combinations(leftovers, subset_size):
91
+ leftovers_embedding_sum = np.empty([100,])
92
+ for ingredient in leftovers_subset:
93
+ ingredient_embedding = yum.word_vec(ingredient, use_norm=True)
94
+ leftovers_embedding_sum += ingredient_embedding
95
+ leftovers_embedding = leftovers_embedding_sum / len(leftovers_subset) # Embedding for leftovers
96
+ top_matches = model.similar_by_vector(leftovers_embedding, topn=100)
97
+ top_matches = [(x[0].replace('_',' '), x[1]) for x in top_matches]
98
+ output = [x for x in top_matches if not any(ignore in x[0] for ignore in leftovers_subset)] # Remove boring same item matches, e.g. "romaine lettuce" if leftovers already contain "lettuce".
99
+ all_outputs[leftovers_subset] = output[:10]
100
+ return all_outputs
101
+
102
+
103
+
104
+ def filter_adjectives(data):
105
+ '''
106
+ Remove adjectives that are not associated with a food item
107
+
108
+ :params
109
+ data
110
+
111
+ :returns
112
+ data
113
+ '''
114
+ recipe_ingredients_token = [nltk.word_tokenize(x) for x in data]
115
+ inds = []
116
+ for i, r in enumerate(recipe_ingredients_token):
117
+ out = nltk.pos_tag(r)
118
+ out = [x[1] for x in out]
119
+ if len(out) > 1:
120
+ inds.append(int(i))
121
+ elif 'NN' in out or 'NNS' in out:
122
+ inds.append(int(i))
123
+
124
+ return [data[i] for i in inds]
125
+
126
+ def plural_to_singular(lemma, recipe):
127
+ '''
128
+ :params
129
+ lemma -> nltk lemma Obj
130
+ recipe -> list of str
131
+
132
+ :returns
133
+ recipe -> converted recipe
134
+ '''
135
+ return [lemma.lemmatize(r) for r in recipe]
136
+
137
+ def filter_lemma(data):
138
+ '''
139
+ Convert plural to roots
140
+
141
+ :params
142
+ data -> list of lists
143
+
144
+ :returns
145
+ data -> returns filtered data
146
+ '''
147
+ # Initialize Lemmatizer (to reduce plurals to stems)
148
+ lemma = nltk.wordnet.WordNetLemmatizer()
149
+
150
+ # NOTE: This uses all the computational resources of your computer
151
+ with ProcessPoolExecutor() as executor:
152
+ out = list(executor.map(plural_to_singular, itertools.repeat(lemma), data))
153
+
154
+ return out
155
+
156
+
157
+ def train_model(data):
158
+ '''
159
+ Train fastfood text
160
+ NOTE: gensim==4.1.2
161
+
162
+ :params
163
+ data -> list of lists of all recipes
164
+ save -> bool
165
+
166
+ :returns
167
+ model -> FastFood model obj
168
+ '''
169
+ model = FastText(data, vector_size=32, window=99, min_count=5, workers=40, sg=1) # Train model
170
+
171
+ return model
172
+
173
+ @st.cache(allow_output_mutation=True)
174
+ def load_model(filename='models/fastfood_orig_4.model'):
175
+ '''
176
+ Load the FastText Model
177
+ :params:
178
+ filename -> path to the model
179
+
180
+ :returns
181
+ model -> this is the full FastText obj
182
+ yum -> this is the FastText Word2Vec obj
183
+ '''
184
+ # Load Models
185
+
186
+ model = FastText.load(filename)
187
+ yum = model.wv
188
+
189
+ return model, yum
190
+
191
+ @st.cache(allow_output_mutation=True)
192
+ def load_data(filename='data/all_recipes_ingredients_lemma.pkl'):
193
+ '''
194
+ Load data
195
+ :params:
196
+ filename -> path to dataset
197
+
198
+ :return
199
+ data -> list of all recipes
200
+ '''
201
+ return pickle.load(open(filename,'rb'))
202
+
203
+ def plot_results(names, probs, n=5):
204
+ '''
205
+ Plots a bar chart of the names of the items vs. probability of similarity
206
+ :params:
207
+ names -> list of str
208
+ probs -> list of float values
209
+ n -> int of how many bars to show NOTE: Max = 100
210
+
211
+ :return
212
+ fig -> return figure for plotting
213
+ '''
214
+ plt.bar(range(len(names)), probs, align='center')
215
+ ax = plt.gca()
216
+
217
+ ax.xaxis.set_major_locator(plt.FixedLocator(range(len(names))))
218
+ ax.xaxis.set_major_formatter(plt.FixedFormatter(names))
219
+ ax.set_ylabel('Probability',fontsize='large', fontweight='bold')
220
+ ax.set_xlabel('Ingredients', fontsize='large', fontweight='bold')
221
+ ax.xaxis.labelpad = 10
222
+ ax.set_title(f'FastFood Top {n} Predictions for Leftovers = {st.session_state.leftovers}')
223
+ # mpld3.show()
224
+ fig = plt.gcf()
225
+
226
+ return fig
227
+
228
+
229
+ if __name__ == "__main__":
230
+ # Initialize argparse
231
+ # parser = argparse.ArgumentParser()
232
+
233
+ # Defaults
234
+ # data_path = 'data/all_recipes_ingredients_lemma.pkl'
235
+ # model_path = 'models/fastfood_lemma_4.model'
236
+
237
+ # Arguments
238
+ # parser.add_argument('-d', '--dataset', default=data_path, type=str, help="the filepath of the dataset")
239
+ # parser.add_argument('-t', '--train', default=False, type=bool, help="the filepath of the dataset")
240
+ # parser.add_argument('-m', '--model', default=model_path, type=str, help="the filepath of the dataset")
241
+
242
+ # args = parser.parse_args()
243
+ # print(args)
244
+
245
+
246
+ ## Train or Test ##
247
+ # if args.train:
248
+ # # Load Dataset
249
+
250
+ # data = load_data(args.dataset) #pickle.load(open(args.dataset, 'rb'))
251
+ # # model = train_model(data)
252
+ # # model_path = input("Model filename and directory [eg. models/new_model.model]: ")
253
+ # # model.save(model_path)
254
+ # else:
255
+ model, yum = load_model('fastfood.model')
256
+
257
+
258
+ ##### UI/UX #####
259
+ ## Sidebar ##
260
+ add_selectbox = st.sidebar.selectbox(
261
+ "Food Utilization App",
262
+ ("FastFood Recommendation Model", "Food Donation Resources", "Contact Team")
263
+ )
264
+
265
+ ## Selection Tool ##
266
+ st.multiselect("Select leftovers", list(yum.key_to_index.keys()), default=['bread', 'lettuce'], key="leftovers")
267
+
268
+ ## Slider ##
269
+ st.slider("Number of Recommendations", min_value=1, max_value=100, value=5, step=1, key='top_n')
270
+
271
+ ## Get food recommendation ##
272
+ out = recommend_ingredients(yum, st.session_state.leftovers, n=st.session_state.top_n)
273
+ names = [o[0] for o in out]
274
+ probs = [o[1] for o in out]
275
+
276
+ st.checkbox(label="Show model score", value=False, key="probs")
277
+ if st.session_state.probs:
278
+ st.table(data=out)
279
+ else:
280
+ st.table(data=names)
281
+
282
+ ## Plot Results ##
283
+ st.checkbox(label="Show model bar chart", value=False, key="plot")
284
+ if st.session_state.plot:
285
+ fig = plot_results(names, probs, st.session_state.top_n)
286
+
287
+ ## Show Plot ##
288
+ st.pyplot(fig)
289
+
290
+
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+ gdown==4.2.0
2
+ gensim==4.1.2
3
+ matplotlib==3.4.3
4
+ nltk==3.6.5
5
+ numpy==1.21.2
6
+ pandas==1.3.4
7
+ pickleshare==0.7.5
8
+ scipy==1.7.1
9
+ streamlit==1.2.0
10
+