Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import pickle | |
| import numpy as np | |
| import os | |
| import random | |
| import base64 | |
| from huggingface_hub import InferenceClient | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from datasets import load_dataset | |
| from IO_pipeline import RecipeDigitalizerPipeline | |
| # ========================================== | |
| # 1. SETUP & DATA LOADING (HYBRID) | |
| # ========================================== | |
| hf_token = os.getenv("HF_TOKEN") | |
| API_MODEL = "BAAI/bge-small-en-v1.5" | |
| client = InferenceClient(token=hf_token) if hf_token else None | |
| print("โณ Initializing Data Loading...") | |
| # --- A. Load Text Data from Hugging Face Dataset --- | |
| try: | |
| print(" ...Downloading recipes from HF Dataset (Liori25/10k_recipes)") | |
| dataset = load_dataset("Liori25/10k_recipes", split="train") | |
| df_recipes = dataset.to_pandas() | |
| print(f"โ Recipes Loaded! Count: {len(df_recipes)}") | |
| except Exception as e: | |
| print(f"โ Error loading HF Dataset: {e}") | |
| df_recipes = pd.DataFrame({'Title': [], 'Raw_Output': []}) | |
| # --- B. Load Embeddings from Local File (Space Repo) --- | |
| try: | |
| print(" ...Loading embeddings from local 'recipe_embeddings.pkl'") | |
| if os.path.exists('recipe_embeddings.pkl'): | |
| with open('recipe_embeddings.pkl', 'rb') as f: | |
| data = pickle.load(f) | |
| # Logic to handle different pickle formats | |
| if isinstance(data, dict): | |
| stored_embeddings = np.array(data['embeddings']) | |
| elif isinstance(data, pd.DataFrame): | |
| target_col = next((c for c in ['embedding', 'embeddings', 'vectors'] if c in data.columns), None) | |
| stored_embeddings = np.vstack(data[target_col].values) if target_col else data | |
| else: | |
| stored_embeddings = data | |
| print(f"โ Embeddings Loaded! Shape: {stored_embeddings.shape}") | |
| else: | |
| print("โ 'recipe_embeddings.pkl' not found locally.") | |
| stored_embeddings = None | |
| except Exception as e: | |
| print(f"โ Error loading pickle file: {e}") | |
| stored_embeddings = None | |
| # --- C. Safety Check --- | |
| if stored_embeddings is not None and not df_recipes.empty: | |
| if len(stored_embeddings) != len(df_recipes): | |
| print(f"โ ๏ธ WARNING: Row mismatch! Recipes: {len(df_recipes)}, Embeddings: {len(stored_embeddings)}") | |
| # ========================================== | |
| # 2. HELPER: IMAGE TO BASE64 | |
| # ========================================== | |
| def image_to_base64(image_path): | |
| if not os.path.exists(image_path): | |
| return "R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7" | |
| with open(image_path, "rb") as img_file: | |
| return base64.b64encode(img_file.read()).decode('utf-8') | |
| logo_b64 = image_to_base64("cookbook logo.png") | |
| profile_b64 = image_to_base64("chef.avif") | |
| process_b64 = image_to_base64("preview of process.jpg") | |
| # ========================================== | |
| # 3. BACKEND LOGIC | |
| # ========================================== | |
| def get_embedding_via_api(text): | |
| if not client: raise ValueError("HF_TOKEN missing") | |
| response = client.feature_extraction(text, model=API_MODEL) | |
| return np.array(response) | |
| def find_similar_recipes_list(query_text): | |
| if stored_embeddings is None: return ["Database error: Embeddings missing."] * 3 | |
| if df_recipes.empty: return ["Database error: Recipes missing."] * 3 | |
| query_vec = get_embedding_via_api("Represent this recipe for retrieving similar dishes: " + query_text) | |
| if len(query_vec.shape) == 1: query_vec = query_vec.reshape(1, -1) | |
| # Calculate Similarity | |
| scores = cosine_similarity(query_vec, stored_embeddings)[0] | |
| top_indices = scores.argsort()[-3:][::-1] | |
| # Identify column names | |
| cols = df_recipes.columns | |
| ing_col = next((c for c in cols if 'ingredient' in c.lower()), None) | |
| inst_col = next((c for c in cols if 'instruction' in c.lower()), None) | |
| results_list = [] | |
| # --- HELPER TO CHECK FOR ERRORS & HIDE BLOCKS --- | |
| def clean_and_validate(raw_text): | |
| val = str(raw_text).strip() | |
| # 1. Clean list syntax (['...']) | |
| if val.startswith("[") and val.endswith("]"): | |
| val = val[1:-1].replace("'", "").replace('"', "").strip() | |
| val_lower = val.lower() | |
| # 2. Basic Empty Checks | |
| if val_lower in ['nan', 'none', 'null', '[]', '']: | |
| return None | |
| # 3. STRICT ERROR CHECK: If "parse error" or "error" is in the text, return None to hide the block | |
| if "parse error" in val_lower or "error" in val_lower: | |
| return None | |
| return val | |
| for idx in top_indices: | |
| score = scores[idx] | |
| row = df_recipes.iloc[idx] | |
| title = row.get('Title', 'Unknown Recipe') | |
| score_display = f"{score:.3%}" | |
| content_parts = [] | |
| # 1. Check Ingredients | |
| if ing_col: | |
| cleaned_ing = clean_and_validate(row[ing_col]) | |
| # Only add to display if valid AND no error found | |
| if cleaned_ing: | |
| content_parts.append(f"<b>๐ INGREDIENTS:</b><br>{cleaned_ing}") | |
| # 2. Check Instructions | |
| if inst_col: | |
| cleaned_inst = clean_and_validate(row[inst_col]) | |
| # Only add to display if valid AND no error found | |
| if cleaned_inst: | |
| content_parts.append(f"<b>๐ณ INSTRUCTIONS:</b><br>{cleaned_inst}") | |
| # 3. Fallback logic | |
| if not content_parts: | |
| # If both were hidden (due to errors) or empty, check raw output | |
| raw_out = str(row.get('Raw_Output', 'No details available.')) | |
| # Also hide Raw Output if it contains an error | |
| if "parse error" in raw_out.lower() or "error" in raw_out.lower(): | |
| display_text = "<i>Details unavailable for this recipe.</i>" | |
| else: | |
| display_text = raw_out | |
| else: | |
| display_text = "<br><br>".join(content_parts) | |
| card_content = ( | |
| f"### ๐ {title}\n" | |
| f"<span style='color:#1877f2; font-weight:bold; font-size:14px;'>Match Score: {score_display}</span>\n\n" | |
| f"<div class='sim-scroll'>{display_text}</div>" | |
| ) | |
| results_list.append(card_content) | |
| while len(results_list) < 3: | |
| results_list.append("") | |
| return results_list | |
| def format_recipe(json_data): | |
| if "error" in json_data: return f"Error: {json_data['error']}", "" | |
| title = json_data.get("title", "Unknown") | |
| ing = "\n".join([f"- {x}" for x in json_data.get("ingredients", [])]) | |
| inst = "\n".join([f"{i+1}. {x}" for i, x in enumerate(json_data.get("instructions", []))]) | |
| text = f"๐ฝ๏ธ {title}\n\n๐ INGREDIENTS:\n{ing}\n\n๐ณ INSTRUCTIONS:\n{inst}" | |
| return text, f"{title} {ing} {inst}" | |
| def ui_update_pipeline(image_path): | |
| if not hf_token: | |
| return "Error: HF_TOKEN missing", "", gr.update(), gr.update(), "", gr.update(), "" | |
| try: | |
| os.environ["HF_TOKEN"] = hf_token | |
| digitizer = RecipeDigitalizerPipeline() | |
| json_res = digitizer.run_pipeline(image_path) | |
| readable, query = format_recipe(json_res) | |
| if query: | |
| sim_list = find_similar_recipes_list(query) | |
| else: | |
| sim_list = ["No query generated.", "", ""] | |
| return (readable, sim_list[0], gr.update(visible=True), gr.update(visible=True), sim_list[1], gr.update(visible=True), sim_list[2]) | |
| except Exception as e: | |
| return f"Error: {e}", "Error", gr.update(), gr.update(), "", gr.update(), "" | |
| # ========================================== | |
| # 4. MODERN UI THEME & CSS | |
| # ========================================== | |
| theme = gr.themes.Soft( | |
| primary_hue="indigo", | |
| secondary_hue="blue", | |
| neutral_hue="slate", | |
| font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'system-ui'] | |
| ) | |
| modern_css = """ | |
| body, .gradio-container { background-color: #f0f2f5; } | |
| /* Sticky Header */ | |
| .custom-header { | |
| background: rgba(255, 255, 255, 0.95); | |
| backdrop-filter: blur(10px); | |
| border-bottom: 1px solid #e4e6eb; | |
| padding: 15px 20px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| position: sticky; | |
| top: 0; | |
| z-index: 1000; | |
| box-shadow: 0 2px 10px rgba(0,0,0,0.05); | |
| } | |
| .logo-area { display: flex; align-items: center; gap: 20px; } | |
| .logo-img { height: 120px; width: 120px; border-radius: 12px; object-fit: cover; border: 1px solid #ddd; } | |
| .text-area { display: flex; flex-direction: column; } | |
| .app-name { | |
| font-weight: 800; | |
| font-size: 32px; | |
| background: -webkit-linear-gradient(45deg, #1877f2, #6b21a8); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| line-height: 1.2; | |
| } | |
| .app-slogan { font-size: 16px; color: #65676b; font-weight: 500; } | |
| /* Sidebar Navigation */ | |
| .nav-btn { | |
| text-align: left !important; | |
| justify-content: flex-start !important; | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| color: #65676b !important; | |
| font-weight: 600 !important; | |
| font-size: 16px !important; | |
| padding: 12px 16px !important; | |
| border-radius: 10px !important; | |
| transition: all 0.2s ease; | |
| } | |
| .nav-btn:hover { background-color: #e4e6eb !important; color: #050505 !important; } | |
| .nav-btn.selected { | |
| background-color: #e7f3ff !important; | |
| color: #1877f2 !important; | |
| border-left: 4px solid #1877f2 !important; | |
| } | |
| /* Feed Styling */ | |
| #feed-container { | |
| gap: 0px !important; | |
| padding: 0px !important; | |
| } | |
| #feed-container > .form { | |
| gap: 0px !important; | |
| } | |
| .content-card { | |
| background-color: #ffffff !important; | |
| background: #ffffff !important; | |
| border-radius: 12px; | |
| box-shadow: 0 1px 2px rgba(0,0,0,0.1); | |
| border: 1px solid #ddd; | |
| padding: 20px; | |
| margin-bottom: 7px !important; | |
| margin-top: 0px !important; | |
| width: 100%; | |
| display: block; | |
| } | |
| /* Similar Recipe Cards */ | |
| .sim-card { | |
| background: #fff; | |
| border: 1px solid #eee; | |
| border-radius: 8px; | |
| padding: 15px; | |
| height: 100%; | |
| border-top: 4px solid #1877f2; | |
| display: flex; | |
| flex-direction: column; | |
| justify-content: space-between; | |
| } | |
| .sim-scroll { | |
| height: 400px; | |
| overflow-y: auto; | |
| margin-bottom: 10px; | |
| padding-right: 5px; | |
| font-size: 14px; | |
| color: #4b4f56; | |
| } | |
| .trend-box { | |
| background:white; | |
| padding:10px; | |
| border-radius:8px; | |
| margin-bottom:10px; | |
| box-shadow:0 1px 2px rgba(0,0,0,0.1); | |
| transition: background 0.2s; | |
| } | |
| .trend-box:hover { background: #f0f2f5; cursor: pointer; } | |
| /* Contact List Styling */ | |
| .contact-item { | |
| display: flex; | |
| align-items: center; | |
| padding: 10px; | |
| background: white; | |
| border-radius: 8px; | |
| margin-bottom: 8px; | |
| box-shadow: 0 1px 2px rgba(0,0,0,0.1); | |
| cursor: pointer; | |
| transition: 0.2s; | |
| } | |
| .contact-item:hover { background: #f0f2f5; } | |
| .avatar-circle { | |
| width: 36px; height: 36px; | |
| background: #e4e6eb; | |
| border-radius: 50%; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| font-weight: 700; | |
| color: #050505; | |
| margin-right: 12px; | |
| position: relative; | |
| font-size: 13px; | |
| border: 1px solid #ddd; | |
| } | |
| .status-badge { | |
| width: 12px; height: 12px; | |
| border-radius: 50%; | |
| border: 2px solid white; | |
| position: absolute; | |
| bottom: -1px; right: -2px; | |
| } | |
| .status-green { background-color: #31a24c; } | |
| .status-red { background-color: #f02849; } | |
| .gap-fix { gap: 25px !important; } | |
| .gradio-examples { | |
| display: flex; | |
| justify-content: center; | |
| width: 100%; | |
| } | |
| /* EXAMPLE IMAGES STYLING UPDATED */ | |
| button.gallery-item { | |
| transition: transform 0.2s ease, box-shadow 0.2s ease !important; | |
| z-index: 1; | |
| width: 80px !important; /* Force width */ | |
| height: 80px !important; /* Force height */ | |
| overflow: hidden !important; | |
| } | |
| button.gallery-item img { | |
| width: 100% !important; | |
| height: 100% !important; | |
| object-fit: cover !important; | |
| } | |
| button.gallery-item:hover { | |
| transform: scale(2.5) !important; | |
| z-index: 1000 !important; | |
| box-shadow: 0 10px 25px rgba(0,0,0,0.3) !important; | |
| border: 2px solid white !important; | |
| border-radius: 8px !important; | |
| } | |
| """ | |
| # ========================================== | |
| # 5. LAYOUT CONSTRUCTION | |
| # ========================================== | |
| with gr.Blocks(title="CookBook AI") as demo: | |
| # --- HEADER --- | |
| gr.HTML(f""" | |
| <div class="custom-header"> | |
| <div class="logo-area"> | |
| <img src="data:image/jpeg;base64,{logo_b64}" class="logo-img"> | |
| <div class="text-area"> | |
| <span class="app-name">CookBook AI</span> | |
| <span class="app-slogan">Turning Handwritten Recipes into a Digital Recipe.</span> | |
| </div> | |
| </div> | |
| <div style="color: #65676b; font-weight: 600;">v4.4</div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| # --- LEFT SIDEBAR --- | |
| with gr.Column(scale=1, min_width=200): | |
| gr.HTML(f""" | |
| <div style="display:flex; align-items:center; padding: 10px 10px 5px 10px;"> | |
| <img src="data:image/jpeg;base64,{profile_b64}" style="width:40px; height:40px; border-radius:50%; margin-right:10px; object-fit:cover;"> | |
| <b style="font-size: 16px;">My Profile</b> | |
| </div> | |
| """) | |
| gr.HTML("<hr style='border: 0; border-top: 1px solid #e4e6eb; margin: 10px 0 20px 0;'>") | |
| nav_digital = gr.Button("โจ AI Digitizer", elem_classes=["nav-btn", "selected"]) | |
| nav_feed = gr.Button("๐ฐ News Feed", elem_classes=["nav-btn"]) | |
| nav_about = gr.Button("โน๏ธ About", elem_classes=["nav-btn"]) | |
| # --- CENTER CONTENT --- | |
| with gr.Column(scale=3): | |
| # === VIEW 1: AI DIGITALIZER === | |
| with gr.Group(visible=True) as digitalizer_view: | |
| with gr.Row(elem_classes=["gap-fix"]): | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_classes=["content-card"]): | |
| input_img = gr.Image(type="filepath", label="Upload", height=300) | |
| magic_btn = gr.Button("โจ Convert to Digital", variant="primary", size="lg") | |
| # --- UPDATED EXAMPLES HERE --- | |
| gr.Examples( | |
| examples=[ | |
| ["quick_tries_images/applecrisp.jpg"], | |
| ["quick_tries_images/meatballs recipe.jpg"], | |
| ["quick_tries_images/Apple Dapple (aka Fresh Apple Cake).jfif"] | |
| ], | |
| inputs=input_img, | |
| label="Or try these examples:", | |
| cache_examples=False | |
| ) | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_classes=["content-card"]): | |
| out_text = gr.Textbox(label="Result", value="Here your digitalized recipe will be presented", lines=20, interactive=False, show_label=False) | |
| gr.HTML("<div style='height: 35px;'></div>") | |
| gr.Markdown("### 3. Similar Recipes from Database") | |
| with gr.Row(): | |
| with gr.Column(elem_classes=["sim-card"]) as c1_box: | |
| sim1 = gr.Markdown("Once you will upload your scanned recipe, we will share similar recipes!") | |
| with gr.Row(visible=False) as c1_btns: | |
| gr.Button("๐ Like", size="sm", variant="secondary") | |
| gr.Button("โ๏ธ Share", size="sm", variant="secondary") | |
| with gr.Column(elem_classes=["sim-card"], visible=False) as c2_box: | |
| sim2 = gr.Markdown("") | |
| with gr.Row(): | |
| gr.Button("๐ Like", size="sm", variant="secondary") | |
| gr.Button("โ๏ธ Share", size="sm", variant="secondary") | |
| with gr.Column(elem_classes=["sim-card"], visible=False) as c3_box: | |
| sim3 = gr.Markdown("") | |
| with gr.Row(): | |
| gr.Button("๐ Like", size="sm", variant="secondary") | |
| gr.Button("โ๏ธ Share", size="sm", variant="secondary") | |
| magic_btn.click(ui_update_pipeline, input_img, [out_text, sim1, c1_btns, c2_box, sim2, c3_box, sim3]) | |
| # === VIEW 2: FEED === | |
| with gr.Column(visible=False, elem_id="feed-container") as feed_view: | |
| if not df_recipes.empty: | |
| feed_samples = df_recipes.sample(10) | |
| for index, row in feed_samples.iterrows(): | |
| user_name = random.choice(["Grandma Rose", "Chef Mike", "Sarah J."]) | |
| emoji = random.choice(["๐ฅ", "๐ฅ", "๐ฐ", "๐ฎ"]) | |
| time_options = ["2h", "3h", "4h", "6h", "9h", "12h", "a day ago", "2 days ago"] | |
| post_time = random.choice(time_options) | |
| raw_desc = str(row.get('Raw_Output', 'Delicious recipe...'))[:250] | |
| title_feed = row.get('Title', 'Recipe') | |
| with gr.Group(elem_classes=["content-card"]): | |
| gr.HTML(f""" | |
| <div style="display:flex; gap:10px; align-items:center; margin-bottom:12px;"> | |
| <div style="width:40px; height:40px; background:#e4e6eb; border-radius:50%; display:flex; align-items:center; justify-content:center; font-size:20px;">{emoji}</div> | |
| <div><b>{user_name}</b><br><span style="color:gray; font-size:12px;">{post_time} ยท ๐ Public</span></div> | |
| </div> | |
| """) | |
| gr.Markdown(f"### {title_feed}") | |
| gr.Markdown(f"{raw_desc}...") | |
| with gr.Row(): | |
| gr.Button("๐ Like", size="sm", variant="secondary") | |
| gr.Button("๐ฌ Comment", size="sm", variant="secondary") | |
| gr.Button("โ๏ธ Share", size="sm", variant="secondary") | |
| else: | |
| gr.Markdown("โ ๏ธ Database is empty.") | |
| # === VIEW 3: ABOUT (UPDATED) === | |
| with gr.Group(visible=False) as about_view: | |
| with gr.Group(elem_classes=["content-card"]): | |
| gr.Markdown(""" | |
| # Goal Project | |
| The goal of this project is to develop an app that takes a scanned image of a handwritten recipe as input, generates text using a VLM, and based on the extracted text, suggests 3 similar recipes from a 10K dataset of synthetic recipes. Our app will bridge the gap between analog culinary heritage and digital discovery. | |
| ### About Us | |
| This app was developed by **Shahar Firshtman** and **Lior Feinstein**, 2nd year students for Economics and data science. | |
| """) | |
| # Process Image | |
| gr.HTML(f""" | |
| <div style="margin-top: 20px;"> | |
| <h3 style="color: #444;">Process Overview</h3> | |
| <img src="data:image/jpeg;base64,{process_b64}" style="width: 100%; height: auto; border-radius: 8px; border: 1px solid #ddd;"> | |
| </div> | |
| """) | |
| # --- RIGHT COLUMN --- | |
| with gr.Column(scale=1, min_width=200): | |
| # 1. Trending | |
| gr.Markdown("### Trending Recipes") | |
| def trend_box(title, likes): | |
| return f"<div class='trend-box'><b>{title}</b><br><span style='color:gray; font-size:12px;'>{likes} likes</span></div>" | |
| gr.HTML(trend_box("๐ Ramen Hack", "12k") + trend_box("๐ช Best Cookies", "8k") + trend_box("๐ฐ Cheese Cake", "15k") + trend_box("๐ช Nana's Tahini Cookies", "9k")) | |
| # 2. Contacts | |
| gr.HTML("<div style='height: 20px;'></div>") | |
| gr.Markdown("### Quick Contacts") | |
| def contact_box(name, initials, status_color): | |
| return f""" | |
| <div class='contact-item'> | |
| <div class='avatar-circle'> | |
| {initials} | |
| <div class='status-badge status-{status_color}'></div> | |
| </div> | |
| <div style='font-weight:600; font-size:14px; color:#050505;'>{name}</div> | |
| </div> | |
| """ | |
| contact_html = ( | |
| contact_box("Elon Musk", "EM", "green") + | |
| contact_box("Gordon Ramsay", "GR", "red") + | |
| contact_box("Guy Fieri", "GF", "green") + | |
| contact_box("Bobby Flay", "BF", "red") | |
| ) | |
| gr.HTML(contact_html) | |
| # ========================================== | |
| # 6. JAVASCRIPT LOGIC | |
| # ========================================== | |
| def go_digi(): | |
| return (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(elem_classes=["nav-btn", "selected"]), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn"])) | |
| def go_feed(): | |
| return (gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn", "selected"]), gr.update(elem_classes=["nav-btn"])) | |
| def go_about(): | |
| return (gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn"]), gr.update(elem_classes=["nav-btn", "selected"])) | |
| outputs_ui = [digitalizer_view, feed_view, about_view, nav_digital, nav_feed, nav_about] | |
| nav_digital.click(go_digi, None, outputs_ui) | |
| nav_feed.click(go_feed, None, outputs_ui) | |
| nav_about.click(go_about, None, outputs_ui) | |
| if __name__ == "__main__": | |
| demo.launch(theme=theme, css=modern_css) |