Spaces:

Lovisticsdev
/

Same

Sleeping

App Files Files Community

Lovisticsdev commited on Apr 23

Commit

a6b3953

•

1 Parent(s): 6e2de00

Up 5 fs

Browse files

Files changed (6) hide show

.gitattributes +3 -0
app.py +110 -0
recipes_raw_nosource_ar.json +3 -0
recipes_raw_nosource_epi.json +3 -0
recipes_raw_nosource_fn.json +3 -0
requirements.txt +5 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+recipes_raw_nosource_ar.json filter=lfs diff=lfs merge=lfs -text
+recipes_raw_nosource_epi.json filter=lfs diff=lfs merge=lfs -text
+recipes_raw_nosource_fn.json filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# Necessary libraries
+import pandas as pd
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import re
+import gradio as gr
+import json
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import word_tokenize
+from multiprocessing import Pool
+# Download necessary NLTK data
+nltk.download('punkt')
+nltk.download('wordnet')
+nltk.download('stopwords')
+# Initialize a WordNet lemmatizer
+lemmatizer = WordNetLemmatizer()
+# Preprocessing functions
+def remove_digits(s):
+    """Remove digits from a string."""
+    return re.sub(r'[\d]', '', str(s))
+def remove_ads(s):
+    """Remove the word 'ADVERTISEMENT' from a string."""
+    return re.sub(r'ADVERTISEMENT', '', str(s))
+stop_words = set(stopwords.words('english'))
+def preprocess_text(text):
+    """Preprocess text by tokenizing, converting to lower case, removing punctuation and stopwords, and lemmatizing."""
+    # Tokenize and convert to lower case
+    words = word_tokenize(text.lower())
+    # Remove punctuation and special characters
+    words = [word for word in words if word.isalpha()]
+    # Remove stopwords during tokenization
+    words = [word for word in words if word not in stop_words]
+    # Lemmatize the words
+    words = [lemmatizer.lemmatize(word) for word in words]
+    return ' '.join(words)
+# Function to load data
+def load_data(file_path):
+    """Load data from a JSON file and preprocess it."""
+    data = pd.read_json(file_path).T
+    data = data[['title', 'ingredients', 'instructions']].dropna(how='any')
+    data['ingredients'] = data['ingredients'].apply(remove_digits)
+    data['ingredients'] = data['ingredients'].apply(remove_ads)
+    data['ingredients'] = data['ingredients'].apply(preprocess_text)
+    return data
+#Path to data files
+file_paths = [
+    'recipes_raw_nosource_ar.json',
+    'recipes_raw_nosource_epi.json',
+    'recipes_raw_nosource_fn.json'
+]
+# Load and concatenate data from all files
+data = pd.concat([load_data(file_path) for file_path in file_paths])
+data.index = range(len(data))
+# Multiprocessing pool to apply the preprocessing function to each ingredient
+with Pool() as pool:
+    data['ingredients'] = pool.map(preprocess_text, data['ingredients'])
+# Vectorize the ingredients text
+vectorizer = TfidfVectorizer()
+tfidf_matrix = vectorizer.fit_transform(data['ingredients'])
+def recommend_recipes(input_ingredients, n=5):
+    """Recommend recipes based on input ingredients."""
+    # Preprocess and vectorize the input ingredients
+    input_ingredients = preprocess_text(input_ingredients)
+    input_vector = vectorizer.transform([input_ingredients])
+    # Compute cosine similarity between input and all recipes
+    cosine_similarities = cosine_similarity(input_vector, tfidf_matrix).flatten()
+    # Get indices of recipes with highest similarity
+    top_indices = cosine_similarities.argsort()[:-n-1:-1]
+    # Return full recipes of most similar recipes
+    recommended_recipes = []
+    for i, index in enumerate(top_indices, start=1):
+        recipe = data.iloc[index]
+        recipe_dict = {
+            "Title": recipe['title'],
+            "Ingredients": recipe['ingredients'],
+            "Instructions": recipe['instructions']
+        }
+        # Format the recipe with numbering
+        recipe_md = f"{i}. {recipe_dict['Title']}\n\n_Ingredients:_\n\n{recipe_dict['Ingredients']}\n\n_Instructions:_\n\n{recipe_dict['Instructions']}"
+        recommended_recipes.append(recipe_md)
+    # Join the recommended recipes with appropriate formatting
+    recommended_recipes_str = "\n\n\n".join(recommended_recipes)
+    return recommended_recipes_str
+# Create and return Gradio interface
+iface = gr.Interface(fn=recommend_recipes, inputs="text", outputs="text")
+iface

recipes_raw_nosource_ar.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93da2202eacb85ad81b50e49f9c1ceba33eb298f1c82a6d02eb59cab7d550cb5
+size 49784325

recipes_raw_nosource_epi.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08c7c8103a9c0dd114dc3fe01490fdf86ec9dee05d4db7d96504a61b5e8a886e
+size 61133971

recipes_raw_nosource_fn.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea2487a1a6f81cd395754dc355fa12c47b51e5fac21d23f26fb0fd00479307f7
+size 93702755

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+pandas
+numpy
+scikit-learn
+gradio
+nltk