RAG / app.py
krisha06's picture
Update app.py
8f45aef verified
import streamlit as st
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import chromadb
# Load dataset
# Load dataset
def load_recipes():
try:
dataset = load_dataset("mbien/recipe_nlg", split="train", trust_remote_code=True)
print("✅ Dataset loaded successfully!")
return dataset
except Exception as e:
print(f"❌ Error loading dataset: {e}")
return None
recipes_df = load_recipes()
if recipes_df is None:
st.error("❌ Failed to load dataset! Check internet or dataset availability.")
st.stop() # Stops Streamlit from running further if the dataset isn't loaded
# Load embedding model
@st.cache_resource
def load_embedding_model():
return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embed_model = load_embedding_model()
# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_db") # Saves vectors
recipe_collection = chroma_client.get_or_create_collection(name="recipes")
# Ensure recipes_df is iterable
if isinstance(recipes_df, list) or isinstance(recipes_df, dict):
if recipe_collection.count() == 0:
st.info("Indexing recipes... This will take a few minutes.")
for i, recipe in enumerate(recipes_df):
title = recipe.get("title", "Unknown Title") # Handle missing keys
ingredients = ", ".join(recipe.get("ingredients", []))
instructions = recipe.get("instructions", "No instructions available")
embedding = embed_model.encode(title).tolist()
recipe_collection.add(
ids=[str(i)],
embeddings=[embedding],
metadatas=[{"title": title, "ingredients": ingredients, "index": i}],
)
else:
st.error("❌ Dataset is not in the correct format!")
# UI
st.title("🍽️ AI Recipe Finder with ChromaDB RAG")
query = st.text_input("🔍 Search for a recipe (e.g., pasta, cake)")
if query:
query_embedding = embed_model.encode(query).tolist()
results = recipe_collection.query(
query_embeddings=[query_embedding], n_results=5
)
st.subheader("🔎 Most relevant recipes:")
for result in results["metadatas"][0]:
index = result["index"]
recipe = recipes_df[index]
st.write(f"**🍴 {recipe.get('title', 'No title available')}**")
st.write(f"**Ingredients:** {', '.join(recipe.get('ingredients', []))}")
st.write(f"**Instructions:** {recipe.get('instructions', 'No instructions available')}")
st.write("---")
else:
st.info("Type a recipe name to find similar recipes.")