Spaces:

Jasminder
/

food-feud

Sleeping

App Files Files Community

Jasminder commited on 10 days ago

Commit

a468d98

•

1 Parent(s): 94e7cfe

Upload 10 files

Browse files

Files changed (11) hide show

.gitattributes +1 -0
.gitignore +6 -0
README.md +15 -12
clean_data.py +74 -0
data/restaurant-menus.csv +3 -0
data/restaurants.csv +0 -0
data/top_restaurants.csv +0 -0
gemini.py +9 -0
main.py +123 -0
recipe_generator.py +70 -0
requirements.txt +90 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/restaurant-menus.csv filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+__pycache__/
+data/
+.venv/
+.vscode/
+*.csv
+.env

README.md CHANGED Viewed

@@ -1,13 +1,16 @@
----
-title: Food Feud
-emoji: 🏃
-colorFrom: pink
-colorTo: yellow
-sdk: streamlit
-sdk_version: 1.40.2
-app_file: app.py
-pinned: false
-short_description: Survey game to recommend recipes.
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Food Feud
+## How to run locally
+Python 3 and git are required.
+1) `https://github.com/jsgarcha/food-feud`
+2) `cd ./food-feud`
+3) `pip install -r requirements.txt`
+4) `python clean_data.py`
+5) `streamlit run main.py`
+NOTE: JAX v0.4.36 does not work for this Huggingface model (https://huggingface.co/flax-community/t5-recipe-generation)
+`pip install --force-reinstall -v "jax==0.4.34"`
+Running the first time may take a minute or so, depending on your internet connection, because the model has to be downloaded from Huggingface (~900mb) .
+Subsequent executions will not pause for long since the model will already be in cache.
+You also need to provide your own key for Gemini in `.env` under the `GEMINI_API_KEY` key.

clean_data.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import pandas as pd
+import kagglehub
+import shutil
+import os
+# Download data
+kaggle_path = kagglehub.dataset_download("ahmedshahriarsakib/uber-eats-usa-restaurants-menus")
+data_path = "data/"
+print("Downloaded datasets from Kaggle.")
+if not os.path.exists(data_path):
+    os.makedirs(data_path)
+for file in os.listdir(kaggle_path):
+    source = os.path.join(kaggle_path, file)
+    destination = os.path.join(data_path, file)
+    if os.path.isfile(source):
+        shutil.copy(source, destination)
+print("Moved datasets to data/")
+# Load data
+restaurants_df = pd.read_csv('data/restaurants.csv')
+print("Loaded data.")
+# Clean data
+restaurants_df = restaurants_df.dropna(subset=['category']) # Drop rows with null values in 'category'
+restaurants_df = restaurants_df[
+    (restaurants_df['price_range'].isin(['$$', '$$$', '$$$$'])) &   # Keep $$ to $$$$
+    (restaurants_df['score'] >= 3.5)                                # Keep ratings 3.5 and above
+]
+    # Splitting each entry in category into a single series element
+all_categories = (
+    restaurants_df['category']
+    .str.lower()  # Convert all entries to lowercase
+    .str.split(', ')  # Split each entry into a list by ", "
+    .explode()  # Flatten the lists into a single series
+    .str.strip()  # Remove any leading/trailing whitespace
+)
+# Define the 7 selected categories
+selected_categories = ['steak', 'chinese', 'japanese', 'italian', 'indian', 'mediterranean']
+    # Finding the frequencies of each unique category
+category_counts = all_categories.value_counts()
+sorted_category_counts = category_counts.sort_values(ascending=False)
+    # Create an updated dataset
+selected_categories = ['steak', 'chinese', 'japanese', 'italian', 'indian', 'mediterranean']
+    # Final DataFrame to store the results
+final_result = pd.DataFrame()
+    # Loop through each category
+for category in selected_categories:
+    # Filter rows where the category is in the 'categories' column
+    filtered = restaurants_df[restaurants_df['category'].str.contains(category, case=False, na=False)]
+    # Sort by ratings in descending order and select the top 100 entries
+    top_entries = filtered.sort_values(by='score', ascending=False).head(100)
+    # Append the results to the final DataFrame
+    final_result = pd.concat([final_result, top_entries])
+    # Reset index for the final result
+final_result = final_result.reset_index(drop=True)
+final_result = final_result.sort_values(by = 'score', ascending=False)
+# Export the final dataset to a CSV file
+result_file = "top_restaurants.csv"
+final_result.to_csv(data_path+result_file, index=False)
+print("Cleaned data and exported to "+data_path+result_file)

data/restaurant-menus.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fe16b49b5db6b35b7522c6f6861c52f965c16ab610c7b24113dd7cc9ec50c20
+size 870834478

data/restaurants.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/top_restaurants.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

gemini.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import os
+import google.generativeai as genai
+from dotenv import load_dotenv
+load_dotenv()
+genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
+model = genai.GenerativeModel(model_name="gemini-1.5-flash", generation_config={"temperature": 2,"top_p": 0.95,"top_k": 40,"max_output_tokens": 8192,"response_mime_type": "application/json"})
+chat_session = model.start_chat()

main.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import pandas as pd
+import streamlit as st
+import json
+import re
+import random
+from recipe_generator import generation_function
+from gemini import chat_session
+data_path = "data/"
+data_file = "top_restaurants.csv"
+RESTAURANT_SURVEY_STAGE = 1
+RECIPE_GENERATION_STAGE = 2
+LIKE_NUMBER = 20
+top_food_categories = ['Steak', 'Chinese', 'Japanese', 'Italian', 'Indian', 'Mediterranean'] # "Top" is relative to our data set; meaning, these categories exhibited the "cleanest" data. To be changed later.
+st.markdown("<h1 style='text-align: center'>Food Feud</h1>", unsafe_allow_html=True)
+if 'stage' not in st.session_state:
+    st.session_state.stage = RESTAURANT_SURVEY_STAGE # Start stage
+if "like" not in st.session_state:
+    st.session_state.like = []
+if "dislike" not in st.session_state:
+    st.session_state.dislike = []
+if "like_count" not in st.session_state:
+    st.session_state.like_count = LIKE_NUMBER
+if "survey_progress" not in st.session_state:
+    st.session_state.survey_progress = 0
+@st.cache_data
+def load_restaurant_data():
+    return pd.read_csv(data_path+data_file)
+def clear_string(s):
+    return re.sub(r"\(.*?\)", "", s).split('-')[0].replace("&amp;", "&").strip()
+def add_like(like): # Row in a DataFrame
+    st.session_state.like.append(like) # Build up likes
+    if st.session_state.survey_progress < 100:
+        st.session_state.survey_progress += 100//LIKE_NUMBER
+        st.session_state.like_count -= 1
+        survey_progress_bar.progress(st.session_state.survey_progress, text=f"Select {st.session_state.like_count} more.")
+def add_dislike(dislike):
+     st.session_state.dislike.append(dislike)
+def generate_recipe(ingredients):
+    generated = generation_function(ingredients)
+    sections = generated.split("\n")
+    for section in sections:
+        section = section.strip()
+        if section.startswith("title:"):
+            section = section.replace("title:", "")
+            headline = "TITLE"
+        elif section.startswith("ingredients:"):
+            section = section.replace("ingredients:", "")
+            headline = "Ingredients"
+        elif section.startswith("directions:"):
+            section = section.replace("directions:", "")
+            headline = "Directions"
+        if headline == "TITLE":
+            st.markdown("<h3 style='text-align: center'>"+str(section.strip().capitalize())+"</h3>", unsafe_allow_html=True)
+        else:
+            section_info = [f"  - {info.strip().capitalize()}" for i, info in enumerate(section.split("--"))]
+            st.markdown("<h4>"+f'{headline}'+"</h4>", unsafe_allow_html=True)
+            st.write("\n".join(section_info))
+df_restaurants = load_restaurant_data()
+placeholder = st.empty()
+if st.session_state.stage == RESTAURANT_SURVEY_STAGE:
+    with placeholder.container():
+        st.markdown("<h4 style='text-align: center'>Start by taking our survey of eating establishments whose food you enjoy.</h4>", unsafe_allow_html=True)
+        survey_progress_bar = st.progress(st.session_state.survey_progress, text=f"Select {st.session_state.like_count} more.")
+        random_restaurant = df_restaurants.sample()
+        st.markdown("<h3 style='text-align: center'>"+clear_string(random_restaurant.iloc[0]['name'])+"</h3>", unsafe_allow_html=True)
+        col1, col2 = st.columns(2)
+        if col1.button('Yes 👍', type="secondary", use_container_width=True):
+            add_like(random_restaurant)
+        if col2.button('No 👎', type="secondary", use_container_width=True):
+            add_dislike(random_restaurant)
+if st.session_state.like_count == 0 and st.session_state.stage != RECIPE_GENERATION_STAGE:
+    placeholder.empty()
+    st.balloons()
+    st.session_state.stage = RECIPE_GENERATION_STAGE
+if st.session_state.stage == RECIPE_GENERATION_STAGE:
+    df_restaurant_likes = pd.concat(st.session_state.like)
+    st.markdown("<h4 style='text-align: center'>Now generate recipes based on the restaurants your liked!</h4>", unsafe_allow_html=True)
+    col = st.columns([1])[0]  # One column with equal width
+    with col:
+        if st.button('Generate Recipe!', type='primary', use_container_width=True):
+            liked_restaurant = df_restaurant_likes.sample()
+            liked_restaurant_categories = liked_restaurant['category'].values[0]
+            liked_restaurant_category = [category for category in top_food_categories if category in liked_restaurant_categories][0]
+            response = chat_session.send_message(f"List common ingredients in {liked_restaurant_category} food.")
+            model_response = response.text
+            response = json.loads(model_response)
+            ingredients = response['ingredients']
+            random.shuffle(ingredients) # Change
+            st.markdown(
+                "<h4 style='text-align: center'>Based on your like of <span style='color: red;'>"
+                + clear_string(liked_restaurant.iloc[0]['name']) +
+                "</span>, survey says...</h4>",
+                unsafe_allow_html=True
+            )
+            generate_recipe(','.join(map(str, ingredients)))
+# 2 major things to be fixed:
+# 1) Huggingface model input giving more than 1 recipe, but limiting to 1 produces the same recipe
+# 2) Decision function - do more analytics to determine top restaurant; also randomize ingredients list

recipe_generator.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from transformers import FlaxAutoModelForSeq2SeqLM
+from transformers import AutoTokenizer
+import streamlit
+MODEL_NAME_OR_PATH = "flax-community/t5-recipe-generation"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME_OR_PATH, use_fast=True)
+model = FlaxAutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME_OR_PATH)
+prefix = "items: "
+generation_kwargs = {
+    "max_length": 512,
+    "min_length": 64,
+    "no_repeat_ngram_size": 3,
+    "do_sample": True,
+    "top_k": 60,
+    "top_p": 0.95
+}
+special_tokens = tokenizer.all_special_tokens
+tokens_map = {
+    "<sep>": "--",
+    "<section>": "\n"
+}
+def skip_special_tokens(text, special_tokens):
+    for token in special_tokens:
+        text = text.replace(token, "")
+    return text
+def target_postprocessing(texts, special_tokens):
+    if not isinstance(texts, list):
+        texts = [texts]
+    new_texts = []
+    for text in texts:
+        text = skip_special_tokens(text, special_tokens)
+        for k, v in tokens_map.items():
+            text = text.replace(k, v)
+        new_texts.append(text)
+    return new_texts
+def generation_function(text):
+    # Ensure the input is a single string
+    _input = prefix + str(text)
+    inputs = tokenizer(
+        _input,
+        max_length=256,
+        padding="max_length",
+        truncation=True,
+        return_tensors="jax"
+    )
+    input_ids = inputs.input_ids
+    attention_mask = inputs.attention_mask
+    # Generate the output sequence
+    output_ids = model.generate(
+        input_ids=input_ids,
+        attention_mask=attention_mask,
+        **generation_kwargs
+    )
+    generated = output_ids.sequences
+    generated_recipe = target_postprocessing(
+        tokenizer.batch_decode(generated, skip_special_tokens=False),
+        special_tokens
+    )
+    return generated_recipe[0]  # Only return the first recipe generated

requirements.txt ADDED Viewed

	@@ -0,0 +1,90 @@

+absl-py==2.1.0
+altair==5.5.0
+annotated-types==0.7.0
+attrs==24.2.0
+blinker==1.9.0
+cachetools==5.5.0
+certifi==2024.8.30
+charset-normalizer==3.4.0
+chex==0.1.87
+click==8.1.7
+etils==1.11.0
+filelock==3.16.1
+flax==0.10.2
+fsspec==2024.10.0
+gitdb==4.0.11
+GitPython==3.1.43
+google-ai-generativelanguage==0.6.10
+google-api-core==2.23.0
+google-api-python-client==2.154.0
+google-auth==2.36.0
+google-auth-httplib2==0.2.0
+google-generativeai==0.8.3
+googleapis-common-protos==1.66.0
+grpcio==1.68.1
+grpcio-status==1.68.1
+httplib2==0.22.0
+huggingface-hub==0.26.5
+humanize==4.11.0
+idna==3.10
+importlib_resources==6.4.5
+jax==0.4.34
+jaxlib==0.4.34
+Jinja2==3.1.4
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+kagglehub==0.3.4
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+ml_dtypes==0.5.0
+msgpack==1.1.0
+narwhals==1.16.0
+nest-asyncio==1.6.0
+numpy==2.1.3
+opt_einsum==3.4.0
+optax==0.2.4
+orbax-checkpoint==0.10.2
+packaging==24.2
+pandas==2.2.3
+pillow==11.0.0
+proto-plus==1.25.0
+protobuf==5.29.1
+pyarrow==18.1.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pydantic==2.10.3
+pydantic_core==2.27.1
+pydeck==0.9.1
+Pygments==2.18.0
+pyparsing==3.2.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+pytz==2024.2
+PyYAML==6.0.2
+referencing==0.35.1
+regex==2024.11.6
+requests==2.32.3
+rich==13.9.4
+rpds-py==0.22.3
+rsa==4.9
+safetensors==0.4.5
+scipy==1.14.1
+simplejson==3.19.3
+six==1.17.0
+smmap==5.0.1
+streamlit==1.40.2
+tenacity==9.0.0
+tensorstore==0.1.69
+tokenizers==0.21.0
+toml==0.10.2
+toolz==1.0.0
+tornado==6.4.2
+tqdm==4.67.1
+transformers==4.47.0
+typing_extensions==4.12.2
+tzdata==2024.2
+uritemplate==4.1.1
+urllib3==2.2.3
+watchdog==6.0.0
+zipp==3.21.0