Spaces:

BinKhoaLe1812
/

Cooking_Tutor

Sleeping

App Files Files Community

LiamKhoaLe commited on Oct 18

Commit

543e178

1 Parent(s): f12a3b4

Upd inline img

Browse files

Files changed (2) hide show

api/chatbot.py +88 -24
search/engines/image.py +113 -11

api/chatbot.py CHANGED Viewed

@@ -18,7 +18,7 @@ class GeminiClient:
             logger.warning("FlashAPI not set - Gemini client will use fallback responses")
             self.client = None
         else:
-            self.client = genai.Client(api_key=gemini_flash_api_key)
     def generate_content(self, prompt: str, model: str = "gemini-2.5-flash", temperature: float = 0.7) -> str:
         """Generate content using Gemini API"""
@@ -230,13 +230,18 @@ class CookingTutorChatbot:
             title = image.get('title', '')
             source_url = image.get('source_url', '')
             source = image.get('source', 'unknown')
             # Generate contextual alt text and caption
             alt_text = self._generate_image_alt_text(title, query, i)
             caption = self._generate_image_caption(title, query, i)
-            # Determine image placement context
-            placement_context = self._determine_image_placement(query, i)
             enhanced_image = {
                 'id': f"img_{i+1}",
@@ -250,13 +255,27 @@ class CookingTutorChatbot:
                 'display_order': i + 1,
                 'aspect_ratio': '16:9',  # Default, can be detected later
                 'loading': 'lazy',  # For performance
-                'type': 'cooking_image'
             }
             enhanced_images.append(enhanced_image)
         return enhanced_images
     def _generate_image_alt_text(self, title: str, query: str, index: int) -> str:
         """Generate descriptive alt text for accessibility"""
         if title and len(title) > 10:
@@ -274,36 +293,72 @@ class CookingTutorChatbot:
             return f"Related cooking image {index + 1}"
     def _generate_image_caption(self, title: str, query: str, index: int) -> str:
-        """Generate contextual caption for the image"""
         if title and len(title) > 5:
             return title
-        # Generate contextual captions
         query_lower = query.lower()
-        if 'pad thai' in query_lower:
-            return f"Pad Thai cooking example {index + 1}"
-        elif 'fusion' in query_lower:
-            return f"Fusion cooking inspiration {index + 1}"
-        elif 'western' in query_lower:
-            return f"Western cooking technique {index + 1}"
         else:
-            return f"Related cooking example {index + 1}"
     def _determine_image_placement(self, query: str, index: int) -> str:
-        """Determine where the image should be placed in the text"""
         query_lower = query.lower()
         if index == 0:
-            if 'recipe' in query_lower or 'ingredient' in query_lower:
                 return 'after_ingredients'
-            elif 'technique' in query_lower or 'method' in query_lower:
                 return 'after_technique_intro'
             else:
                 return 'after_intro'
         elif index == 1:
             return 'after_instructions'
-        else:
             return 'after_tips'
     def _integrate_images_inline(self, text: str, images: List[Dict]) -> str:
         """Integrate images inline with text using placeholders for frontend rendering"""
@@ -327,16 +382,25 @@ class CookingTutorChatbot:
         for line in lines:
             line_lower = line.lower().strip()
-            # Detect section types
-            if any(keyword in line_lower for keyword in ['ingredients:', 'ingredient list:', 'what you need:']):
                 if current_section['content'].strip():
                     sections.append(current_section)
                 current_section = {'type': 'ingredients', 'content': line + '\n', 'images': []}
-            elif any(keyword in line_lower for keyword in ['instructions:', 'directions:', 'how to cook:', 'steps:']):
                 if current_section['content'].strip():
                     sections.append(current_section)
                 current_section = {'type': 'instructions', 'content': line + '\n', 'images': []}
-            elif any(keyword in line_lower for keyword in ['tips:', 'troubleshooting:', 'notes:', 'variations:']):
                 if current_section['content'].strip():
                     sections.append(current_section)
                 current_section = {'type': 'tips', 'content': line + '\n', 'images': []}
@@ -482,8 +546,8 @@ class CookingTutorChatbot:
                 doc_id = extract_numeric_id(citation_id)
                 if doc_id is not None and doc_id in url_mapping:
-                    url = url_mapping[doc_id]
-                    urls.append(f'<{url}>')
                     logger.info(f"[CITATION] Replacing <#{citation_id}> with {url}")
                 else:
                     if doc_id is None:
@@ -506,7 +570,7 @@ class CookingTutorChatbot:
                 # Process citations with this pattern
                 processed_response = re.sub(pattern, replace_citation, processed_response)
                 total_citations_processed += sum(len([id_str.strip() for id_str in citation_content.split(',')])
-                                               for citation_content in citations_found)
                 logger.info(f"[CITATION] Processed {len(citations_found)} citation groups with pattern: {pattern}")
         # Fallback: Handle any remaining malformed citations

             logger.warning("FlashAPI not set - Gemini client will use fallback responses")
             self.client = None
         else:
+        self.client = genai.Client(api_key=gemini_flash_api_key)
     def generate_content(self, prompt: str, model: str = "gemini-2.5-flash", temperature: float = 0.7) -> str:
         """Generate content using Gemini API"""
             title = image.get('title', '')
             source_url = image.get('source_url', '')
             source = image.get('source', 'unknown')
+            image_type = image.get('image_type', 'general')
+            query_context = image.get('query_context', 'general')
+            # Set current image type for caption generation
+            self._current_image_type = image_type
             # Generate contextual alt text and caption
             alt_text = self._generate_image_alt_text(title, query, i)
             caption = self._generate_image_caption(title, query, i)
+            # Determine image placement context based on image type
+            placement_context = self._determine_image_placement_by_type(image_type, query, i)
             enhanced_image = {
                 'id': f"img_{i+1}",
                 'display_order': i + 1,
                 'aspect_ratio': '16:9',  # Default, can be detected later
                 'loading': 'lazy',  # For performance
+                'type': 'cooking_image',
+                'image_type': image_type,
+                'query_context': query_context
             }
             enhanced_images.append(enhanced_image)
         return enhanced_images
+    def _determine_image_placement_by_type(self, image_type: str, query: str, index: int) -> str:
+        """Determine image placement based on image type for optimal inline display"""
+        if image_type == 'ingredients':
+            return 'after_ingredients'
+        elif image_type == 'technique':
+            return 'after_instructions'
+        elif image_type == 'final_dish':
+            return 'after_tips'
+        else:
+            # Fallback to original logic
+            return self._determine_image_placement(query, index)
     def _generate_image_alt_text(self, title: str, query: str, index: int) -> str:
         """Generate descriptive alt text for accessibility"""
         if title and len(title) > 10:
             return f"Related cooking image {index + 1}"
     def _generate_image_caption(self, title: str, query: str, index: int) -> str:
+        """Generate contextual caption for the image based on image type"""
         if title and len(title) > 5:
             return title
+        # Generate contextual captions based on image type
         query_lower = query.lower()
+        # Check if we have image type information
+        image_type = getattr(self, '_current_image_type', 'general')
+        if image_type == 'ingredients':
+            if 'pad thai' in query_lower:
+                return "Fresh ingredients for Pad Thai"
+            elif 'fusion' in query_lower:
+                return "Ingredients for fusion cooking"
+            else:
+                return f"Fresh ingredients {index + 1}"
+        elif image_type == 'technique':
+            if 'pad thai' in query_lower:
+                return "Pad Thai cooking technique"
+            elif 'fusion' in query_lower:
+                return "Fusion cooking technique"
+            else:
+                return f"Cooking technique {index + 1}"
+        elif image_type == 'final_dish':
+            if 'pad thai' in query_lower:
+                return "Completed Pad Thai dish"
+            elif 'fusion' in query_lower:
+                return "Fusion cooking result"
+            else:
+                return f"Final dish {index + 1}"
         else:
+            # Fallback to original logic
+            if 'pad thai' in query_lower:
+                return f"Pad Thai cooking example {index + 1}"
+            elif 'fusion' in query_lower:
+                return f"Fusion cooking inspiration {index + 1}"
+            elif 'western' in query_lower:
+                return f"Western cooking technique {index + 1}"
+            else:
+                return f"Related cooking example {index + 1}"
     def _determine_image_placement(self, query: str, index: int) -> str:
+        """Determine where the image should be placed in the text for optimal inline display"""
         query_lower = query.lower()
+        # More intelligent placement based on content type and image index
         if index == 0:
+            # First image: place early in the content for immediate visual impact
+            if any(keyword in query_lower for keyword in ['ingredient', 'ingredients', 'what you need']):
                 return 'after_ingredients'
+            elif any(keyword in query_lower for keyword in ['technique', 'method', 'how to']):
                 return 'after_technique_intro'
+            elif any(keyword in query_lower for keyword in ['recipe', 'cook', 'make']):
+                return 'after_intro'
             else:
                 return 'after_intro'
         elif index == 1:
+            # Second image: place in the middle of instructions
             return 'after_instructions'
+        elif index == 2:
+            # Third image: place after tips or at the end
             return 'after_tips'
+        else:
+            # Additional images: distribute evenly
+            return 'after_instructions'
     def _integrate_images_inline(self, text: str, images: List[Dict]) -> str:
         """Integrate images inline with text using placeholders for frontend rendering"""
         for line in lines:
             line_lower = line.lower().strip()
+            # Detect section types with more comprehensive patterns
+            if any(keyword in line_lower for keyword in [
+                'ingredients:', 'ingredient list:', 'what you need:', 'materials:',
+                'you will need:', 'ingredients list:', 'for this recipe:'
+            ]):
                 if current_section['content'].strip():
                     sections.append(current_section)
                 current_section = {'type': 'ingredients', 'content': line + '\n', 'images': []}
+            elif any(keyword in line_lower for keyword in [
+                'instructions:', 'directions:', 'how to cook:', 'steps:', 'method:',
+                'cooking steps:', 'preparation:', 'how to make:', 'procedure:'
+            ]):
                 if current_section['content'].strip():
                     sections.append(current_section)
                 current_section = {'type': 'instructions', 'content': line + '\n', 'images': []}
+            elif any(keyword in line_lower for keyword in [
+                'tips:', 'troubleshooting:', 'notes:', 'variations:', 'suggestions:',
+                'pro tips:', 'helpful hints:', 'cooking tips:', 'advice:'
+            ]):
                 if current_section['content'].strip():
                     sections.append(current_section)
                 current_section = {'type': 'tips', 'content': line + '\n', 'images': []}
                 doc_id = extract_numeric_id(citation_id)
                 if doc_id is not None and doc_id in url_mapping:
+                        url = url_mapping[doc_id]
+                        urls.append(f'<{url}>')
                     logger.info(f"[CITATION] Replacing <#{citation_id}> with {url}")
                 else:
                     if doc_id is None:
                 # Process citations with this pattern
                 processed_response = re.sub(pattern, replace_citation, processed_response)
                 total_citations_processed += sum(len([id_str.strip() for id_str in citation_content.split(',')])
+                            for citation_content in citations_found)
                 logger.info(f"[CITATION] Processed {len(citations_found)} citation groups with pattern: {pattern}")
         # Fallback: Handle any remaining malformed citations

search/engines/image.py CHANGED Viewed

@@ -18,14 +18,17 @@ class ImageSearchEngine:
         self.timeout = timeout
     def search_cooking_images(self, query: str, num_results: int = 3, language: str = "en") -> List[Dict]:
-        """Search for cooking-related images with robust error handling"""
         if not query or not query.strip():
             logger.warning("Empty query provided for image search")
             return []
-        results = []
-        # Try multiple image search strategies
         strategies = [
             self._search_google_images,
             self._search_bing_images,
@@ -34,25 +37,124 @@ class ImageSearchEngine:
         for strategy in strategies:
             try:
-                strategy_results = strategy(query, num_results, language)
                 if strategy_results:
                     # Filter and validate results
                     valid_results = self._validate_image_results(strategy_results)
                     if valid_results:
-                        results.extend(valid_results)
                         logger.info(f"Image search strategy found {len(valid_results)} valid results")
-                        if len(results) >= num_results:
                             break
             except Exception as e:
                 logger.warning(f"Image search strategy failed: {e}")
                 continue
-        # Remove duplicates and return
-        unique_results = self._remove_duplicate_images(results)
-        final_results = unique_results[:num_results]
-        logger.info(f"Image search completed: {len(final_results)} unique results from {len(results)} total")
-        return final_results
     def _validate_image_results(self, results: List[Dict]) -> List[Dict]:
         """Validate and clean image results"""

         self.timeout = timeout
     def search_cooking_images(self, query: str, num_results: int = 3, language: str = "en") -> List[Dict]:
+        """Search for diverse cooking-related images including ingredients, techniques, and final dishes"""
         if not query or not query.strip():
             logger.warning("Empty query provided for image search")
             return []
+        # Generate diverse search queries for comprehensive visual coverage
+        search_queries = self._generate_diverse_cooking_queries(query, num_results)
+        all_results = []
+        # Try multiple image search strategies with diverse queries
         strategies = [
             self._search_google_images,
             self._search_bing_images,
         for strategy in strategies:
             try:
+                strategy_results = []
+                for search_query in search_queries:
+                    query_results = strategy(search_query['query'], search_query['max_results'], language)
+                    if query_results:
+                        # Add query context to results
+                        for result in query_results:
+                            result['query_context'] = search_query['context']
+                            result['image_type'] = search_query['type']
+                        strategy_results.extend(query_results)
                 if strategy_results:
                     # Filter and validate results
                     valid_results = self._validate_image_results(strategy_results)
                     if valid_results:
+                        all_results.extend(valid_results)
                         logger.info(f"Image search strategy found {len(valid_results)} valid results")
+                        if len(all_results) >= num_results * 2:  # Get more to filter
                             break
             except Exception as e:
                 logger.warning(f"Image search strategy failed: {e}")
                 continue
+        # Remove duplicates and prioritize diverse results
+        unique_results = self._remove_duplicate_images(all_results)
+        diverse_results = self._prioritize_diverse_images(unique_results, num_results)
+        logger.info(f"Image search completed: {len(diverse_results)} diverse results from {len(all_results)} total")
+        return diverse_results
+    def _generate_diverse_cooking_queries(self, original_query: str, num_results: int) -> List[Dict]:
+        """Generate diverse search queries for comprehensive cooking image coverage"""
+        queries = []
+        # Extract key cooking terms from the original query
+        query_lower = original_query.lower()
+        # 1. Final dish query (original focus)
+        final_dish_query = f"{original_query} final dish completed recipe"
+        queries.append({
+            'query': final_dish_query,
+            'context': 'final_dish',
+            'type': 'final_dish',
+            'max_results': max(1, num_results // 3)
+        })
+        # 2. Ingredients query
+        ingredients_query = f"{original_query} ingredients fresh raw materials"
+        queries.append({
+            'query': ingredients_query,
+            'context': 'ingredients',
+            'type': 'ingredients',
+            'max_results': max(1, num_results // 3)
+        })
+        # 3. Cooking technique/process query
+        technique_query = f"{original_query} cooking technique process step by step"
+        queries.append({
+            'query': technique_query,
+            'context': 'technique',
+            'type': 'technique',
+            'max_results': max(1, num_results // 3)
+        })
+        # Add more specific queries based on the original query content
+        if any(keyword in query_lower for keyword in ['pad thai', 'noodles', 'pasta']):
+            queries.append({
+                'query': f"{original_query} noodle preparation cooking technique",
+                'context': 'noodle_technique',
+                'type': 'technique',
+                'max_results': 1
+            })
+        if any(keyword in query_lower for keyword in ['fusion', 'western', 'technique']):
+            queries.append({
+                'query': f"{original_query} fusion cooking western technique",
+                'context': 'fusion_technique',
+                'type': 'technique',
+                'max_results': 1
+            })
+        return queries
+    def _prioritize_diverse_images(self, results: List[Dict], num_results: int) -> List[Dict]:
+        """Prioritize diverse image types for better visual instruction"""
+        # Group results by type
+        type_groups = {
+            'final_dish': [],
+            'ingredients': [],
+            'technique': [],
+            'other': []
+        }
+        for result in results:
+            image_type = result.get('image_type', 'other')
+            if image_type in type_groups:
+                type_groups[image_type].append(result)
+            else:
+                type_groups['other'].append(result)
+        # Select diverse results
+        diverse_results = []
+        # Prioritize: 1 final dish, 1 ingredients, 1 technique, then fill with others
+        if type_groups['final_dish']:
+            diverse_results.append(type_groups['final_dish'][0])
+        if type_groups['ingredients'] and len(diverse_results) < num_results:
+            diverse_results.append(type_groups['ingredients'][0])
+        if type_groups['technique'] and len(diverse_results) < num_results:
+            diverse_results.append(type_groups['technique'][0])
+        # Fill remaining slots with other results
+        all_remaining = []
+        for group in type_groups.values():
+            all_remaining.extend(group[1:])  # Skip first item (already used)
+        diverse_results.extend(all_remaining[:num_results - len(diverse_results)])
+        return diverse_results[:num_results]
     def _validate_image_results(self, results: List[Dict]) -> List[Dict]:
         """Validate and clean image results"""