Spaces:

map222
/

recipe-spice

Runtime error

App Files Files Community

map222 commited on Nov 1, 2022

Commit

9eb13de

•

1 Parent(s): 7de1be9

sketch of app.py

Browse files

Modifying from
https://huggingface.co/spaces/simonschoe/Call2Vec/blob/main/app.py

Files changed (1) hide show

app.py +69 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import gradio as gr
+import pandas as pd
+from huggingface_hub import hf_hub_url, cached_download
+from gensim.models.fasttext import KeyedVector
+ACCESS_KEY = os.environ.get('ACCESS_KEY')
+# Setup model
+url = hf_hub_url(repo_id="map222/recipe-spice-model", filename="recipe_w2v.gensim")
+cached_download(url)
+recipe_w2v = KeyedVectors.load(cached_download(url) )
+url = hf_hub_url(repo_id="map222/recipe-spice-model", filename="ingredient_count.json")
+cached_download(url)
+ingredient_count = json.load(cached_download(url) )
+url = hf_hub_url(repo_id="map222/recipe-spice-model", filename="recipe_NER.tsv")
+cached_download(url)
+recipe_NER = pd.read_csv(cached_download(url), sep='\t' )
+app = gr.Blocks()
+def get_fusion_ingredients(ingredient: str,
+                           recipe_model, #gensim model
+                           recipes, #iterable of recipes
+                           ingredient_count: dict,
+                           max_candidates = 20,
+                           min_occurence_factor = 100 # minimum number of recipes an ingredient has to be in
+                           ):
+    ingredient_recipes = recipes.loc[recipes.apply(lambda row: ingredient in row)]
+    ingredient_candidates = recipe_model.most_similar(ingredient, topn=50) # get top similar ingredients
+    candidate_names = list(zip(*ingredient_candidates))[0]
+    pruned_candidates = [candidate for candidate in candidate_names if ingredient not in candidate][:max_candidates] # clean up candidates to remove duplicates (e.g. "gala apple")
+    cooccurrence_counts = calc_cooccurrence(ingredient, candidate_names, ingredient_recipes) # get counts for normalization
+    # final score for sorting: similarity / how often co-occur / total occurences
+    min_occurences = max(cooccurrence_counts.values()) / min_occurence_factor
+    print(min_occurences)
+    freq_norm_candidates = {candidate[0]: candidate[1] / (cooccurrence_counts[candidate[0]]+1) / ingredient_count[candidate[0]] for candidate in ingredient_candidates if candidate[0] in pruned_candidates and cooccurrence_counts[candidate[0]] > min_occurences}
+    top_candidates = sorted([(k,v) for k,v in freq_norm_candidates.items()], key=lambda x: x[1])[-5:]
+    return top_candidates, cooccurrence_counts, ingredient_candidates # return multiple for debugging
+def helper_func(text):
+    a,b,c = get_fusion_ingredients(x, recipe_w2v, ingredient_count, recipe_NER)
+    return pd.DataFrame(a)
+with app:
+    gr.Markdown("# Recipe Spice")
+    with gr.Row():
+        text_in = gr.Textbox(lines=1, placeholder="Ingredient", label="Search Query")
+        compute_bt = gr.Button("Search")
+    with gr.Row():
+        df_out = gr.Dataframe(interactive=False)
+    with gr.Row():
+        gr.Markdown(
+            """
+            #### Project Description
+            This finds cool new recipe stuff
+            """
+        )
+    compute_bt.click(helper_func, inputs=[text_in], outputs=[df_out])
+app.launch()