map222 commited on
Commit
9eb13de
1 Parent(s): 7de1be9

sketch of app.py

Browse files

Modifying from
https://huggingface.co/spaces/simonschoe/Call2Vec/blob/main/app.py

Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ from huggingface_hub import hf_hub_url, cached_download
5
+ from gensim.models.fasttext import KeyedVector
6
+
7
+ ACCESS_KEY = os.environ.get('ACCESS_KEY')
8
+
9
+
10
+ # Setup model
11
+ url = hf_hub_url(repo_id="map222/recipe-spice-model", filename="recipe_w2v.gensim")
12
+ cached_download(url)
13
+ recipe_w2v = KeyedVectors.load(cached_download(url) )
14
+
15
+ url = hf_hub_url(repo_id="map222/recipe-spice-model", filename="ingredient_count.json")
16
+ cached_download(url)
17
+ ingredient_count = json.load(cached_download(url) )
18
+
19
+ url = hf_hub_url(repo_id="map222/recipe-spice-model", filename="recipe_NER.tsv")
20
+ cached_download(url)
21
+ recipe_NER = pd.read_csv(cached_download(url), sep='\t' )
22
+
23
+ app = gr.Blocks()
24
+
25
+ def get_fusion_ingredients(ingredient: str,
26
+ recipe_model, #gensim model
27
+ recipes, #iterable of recipes
28
+ ingredient_count: dict,
29
+ max_candidates = 20,
30
+ min_occurence_factor = 100 # minimum number of recipes an ingredient has to be in
31
+ ):
32
+
33
+ ingredient_recipes = recipes.loc[recipes.apply(lambda row: ingredient in row)]
34
+
35
+ ingredient_candidates = recipe_model.most_similar(ingredient, topn=50) # get top similar ingredients
36
+ candidate_names = list(zip(*ingredient_candidates))[0]
37
+ pruned_candidates = [candidate for candidate in candidate_names if ingredient not in candidate][:max_candidates] # clean up candidates to remove duplicates (e.g. "gala apple")
38
+ cooccurrence_counts = calc_cooccurrence(ingredient, candidate_names, ingredient_recipes) # get counts for normalization
39
+ # final score for sorting: similarity / how often co-occur / total occurences
40
+ min_occurences = max(cooccurrence_counts.values()) / min_occurence_factor
41
+ print(min_occurences)
42
+ freq_norm_candidates = {candidate[0]: candidate[1] / (cooccurrence_counts[candidate[0]]+1) / ingredient_count[candidate[0]] for candidate in ingredient_candidates if candidate[0] in pruned_candidates and cooccurrence_counts[candidate[0]] > min_occurences}
43
+ top_candidates = sorted([(k,v) for k,v in freq_norm_candidates.items()], key=lambda x: x[1])[-5:]
44
+ return top_candidates, cooccurrence_counts, ingredient_candidates # return multiple for debugging
45
+
46
+ def helper_func(text):
47
+ a,b,c = get_fusion_ingredients(x, recipe_w2v, ingredient_count, recipe_NER)
48
+ return pd.DataFrame(a)
49
+
50
+ with app:
51
+ gr.Markdown("# Recipe Spice")
52
+ with gr.Row():
53
+ text_in = gr.Textbox(lines=1, placeholder="Ingredient", label="Search Query")
54
+ compute_bt = gr.Button("Search")
55
+
56
+
57
+ with gr.Row():
58
+
59
+ df_out = gr.Dataframe(interactive=False)
60
+ with gr.Row():
61
+ gr.Markdown(
62
+ """
63
+ #### Project Description
64
+ This finds cool new recipe stuff
65
+ """
66
+ )
67
+ compute_bt.click(helper_func, inputs=[text_in], outputs=[df_out])
68
+
69
+ app.launch()