Spaces:

Prathamesh1420
/

recommendation_system

Runtime error

App Files Files Community

Prathamesh1420 commited on Sep 28

Commit

296f87c

verified ·

1 Parent(s): 3b95758

Update chatbot.py

Browse files

Files changed (1) hide show

chatbot.py +320 -111

chatbot.py CHANGED Viewed

@@ -1,120 +1,329 @@
-import os
-import pickle
 import torch
-import matplotlib.pyplot as plt
-from langchain_community.document_loaders import TextLoader
-from datasets import load_dataset
-from sentence_transformers import SentenceTransformer, util
-from transformers import GPT2LMHeadModel, GPT2Tokenizer
-from transformers import BertModel, BertTokenizer
-from langchain_core.prompts import PromptTemplate
-from transformers import BlipProcessor, BlipForConditionalGeneration
-from PIL import Image
-os.environ['HUGGINGFACEHUB_API_TOKEN'] = "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"
 class Chatbot:
     def __init__(self):
-        self.load_data()
         self.load_models()
-        self.load_embeddings()
-        self.load_template()
-    def load_data(self):
-        self.data = load_dataset("ashraq/fashion-product-images-small", split="train")
-        self.images = self.data["image"]
-        self.product_frame = self.data.remove_columns("image").to_pandas()
-        self.product_data = self.product_frame.reset_index(drop=True).to_dict(orient='index')
-    def load_template(self):
-        self.template = """
-        You are a fashion shopping assistant that wants to convert customers based on the information given.
-        Describe season and usage given in the context in your interaction with the customer.
-        Use a bullet list when describing each product.
-        If user ask general question then answer them accordingly, the question may be like when the store will open, where is your store located.
-        Context: {context}
-        User question: {question}
-        Your response: {response}
-        """
-        self.prompt = PromptTemplate.from_template(self.template)
     def load_models(self):
-        self.model = SentenceTransformer('clip-ViT-B-32')
-        self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
-        self.blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
-    def load_embeddings(self):
-        if os.path.exists("embeddings_cache.pkl"):
-            with open("embeddings_cache.pkl", "rb") as f:
-                embeddings_cache = pickle.load(f)
-            self.image_embeddings = embeddings_cache["image_embeddings"]
-            self.text_embeddings = embeddings_cache["text_embeddings"]
-        else:
-            self.image_embeddings = self.model.encode([image for image in self.images])
-            self.text_embeddings = self.model.encode(self.product_frame['productDisplayName'])
-            embeddings_cache = {"image_embeddings": self.image_embeddings, "text_embeddings": self.text_embeddings}
-            with open("embeddings_cache.pkl", "wb") as f:
-                pickle.dump(embeddings_cache, f)
-    def create_docs(self, results):
-        docs = []
-        for result in results:
-            pid = result['corpus_id']
-            score = result['score']
-            result_string = ''
-            result_string += "Product Name:" + self.product_data[pid]['productDisplayName'] + \
-                             ';' + "Category:" + self.product_data[pid]['masterCategory'] + \
-                             ';' + "Article Type:" + self.product_data[pid]['articleType'] + \
-                             ';' + "Usage:" + self.product_data[pid]['usage'] + \
-                             ';' + "Season:" + self.product_data[pid]['season'] + \
-                             ';' + "Gender:" + self.product_data[pid]['gender']
-            # Assuming text is imported from somewhere else
-            doc = text(page_content=result_string)
-            doc.metadata['pid'] = str(pid)
-            doc.metadata['score'] = score
-            docs.append(doc)
-        return docs
-    def get_results(self, query, embeddings, top_k=5):
-        query_embedding = self.model.encode([query])
-        cos_scores = util.pytorch_cos_sim(query_embedding, embeddings)[0]
-        top_results = torch.topk(cos_scores, k=top_k)
-        indices = top_results.indices.tolist()
-        scores = top_results.values.tolist()
-        results = [{'corpus_id': idx, 'score': score} for idx, score in zip(indices, scores)]
-        return results
-    def display_text_and_images(self, results_text):
-        for result in results_text:
-            pid = result['corpus_id']
-            product_info = self.product_data[pid]
-            print("Product Name:", product_info['productDisplayName'])
-            print("Category:", product_info['masterCategory'])
-            print("Article Type:", product_info['articleType'])
-            print("Usage:", product_info['usage'])
-            print("Season:", product_info['season'])
-            print("Gender:", product_info['gender'])
-            print("Score:", result['score'])
-            plt.imshow(self.images[pid])
-            plt.axis('off')
-            plt.show()
     def generate_image_caption(self, image_path):
-        raw_image = Image.open(image_path).convert('RGB')
-        inputs = self.blip_processor(raw_image, return_tensors="pt")
-        out = self.blip_model.generate(**inputs)
-        caption = self.blip_processor.decode(out[0], skip_special_tokens=True)
-        return caption
     def generate_response(self, query):
-        # Process the user query and generate a response
-        results_text = self.get_results(query, self.text_embeddings)
-        # Generate chatbot response
-        chatbot_response = "This is a placeholder response from the chatbot."  # Placeholder, replace with actual response
-        # Display recommended products
-        self.display_text_and_images(results_text)
-        # Return both chatbot response and recommended products
-        return chatbot_response, results_text

 import torch
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import pandas as pd
+from PIL import Image, ImageDraw, ImageFont
+import random
+import logging
+import json
+import os
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 class Chatbot:
     def __init__(self):
+        self.device = 'cpu'  # Force CPU usage for Hugging Face Spaces
+        logger.info("🚀 Initializing Fashion Chatbot with CPU...")
+        self.model = None
+        self.product_data = {}
+        self.images = {}
+        self.product_embeddings = None
         self.load_models()
+        self.setup_sample_data()
     def load_models(self):
+        """Load all required models with CPU-only configuration"""
+        try:
+            logger.info("📥 Loading SentenceTransformer model on CPU...")
+            # Force CPU for all operations
+            torch.device('cpu')
+            # Load a lightweight model suitable for CPU
+            self.model = SentenceTransformer(
+                'all-MiniLM-L6-v2',  # Lightweight model for CPU
+                device='cpu'
+            )
+            logger.info("✅ Model loaded successfully on CPU")
+        except Exception as e:
+            logger.error(f"❌ Error loading model: {e}")
+            # Create a dummy model for fallback
+            self.model = None
+    def setup_sample_data(self):
+        """Setup sample fashion product data for demonstration"""
+        logger.info("🛍️ Setting up sample fashion data...")
+        # Sample fashion products data
+        self.product_data = {
+            0: {
+                'productDisplayName': 'Classic White T-Shirt',
+                'masterCategory': 'Apparel',
+                'articleType': 'T-Shirt',
+                'usage': 'Casual',
+                'season': 'All Season',
+                'gender': 'Unisex',
+                'baseColour': 'White',
+                'price': 29.99
+            },
+            1: {
+                'productDisplayName': 'Denim Jacket',
+                'masterCategory': 'Apparel',
+                'articleType': 'Jacket',
+                'usage': 'Casual',
+                'season': 'Spring, Fall',
+                'gender': 'Unisex',
+                'baseColour': 'Blue',
+                'price': 89.99
+            },
+            2: {
+                'productDisplayName': 'Black Leather Boots',
+                'masterCategory': 'Footwear',
+                'articleType': 'Boots',
+                'usage': 'Casual',
+                'season': 'Winter, Fall',
+                'gender': 'Unisex',
+                'baseColour': 'Black',
+                'price': 129.99
+            },
+            3: {
+                'productDisplayName': 'Summer Floral Dress',
+                'masterCategory': 'Apparel',
+                'articleType': 'Dress',
+                'usage': 'Casual',
+                'season': 'Summer',
+                'gender': 'Women',
+                'baseColour': 'Multicolor',
+                'price': 59.99
+            },
+            4: {
+                'productDisplayName': 'Sports Running Shoes',
+                'masterCategory': 'Footwear',
+                'articleType': 'Sports Shoes',
+                'usage': 'Sports',
+                'season': 'All Season',
+                'gender': 'Unisex',
+                'baseColour': 'White',
+                'price': 79.99
+            },
+            5: {
+                'productDisplayName': 'Wool Winter Scarf',
+                'masterCategory': 'Accessories',
+                'articleType': 'Scarf',
+                'usage': 'Casual',
+                'season': 'Winter',
+                'gender': 'Unisex',
+                'baseColour': 'Grey',
+                'price': 34.99
+            }
+        }
+        # Generate sample product images
+        self.images = {}
+        for pid in self.product_data.keys():
+            self.images[pid] = self.generate_sample_image(pid)
+        # Create sample embeddings for products
+        self.create_sample_embeddings()
+        logger.info(f"✅ Loaded {len(self.product_data)} sample products")
+    def generate_sample_image(self, product_id):
+        """Generate a sample product image for demonstration"""
+        # Create a simple colored image with text
+        img = Image.new('RGB', (200, 200), color=self.get_color_for_product(product_id))
+        draw = ImageDraw.Draw(img)
+        # Add product type text
+        product_type = self.product_data[product_id]['articleType']
+        draw.text((50, 90), product_type, fill='white')
+        return img
+    def get_color_for_product(self, product_id):
+        """Get color based on product"""
+        color_map = {
+            'White': (255, 255, 255),
+            'Blue': (0, 0, 255),
+            'Black': (0, 0, 0),
+            'Multicolor': (255, 0, 0),
+            'Grey': (128, 128, 128)
+        }
+        base_color = self.product_data[product_id]['baseColour']
+        return color_map.get(base_color, (200, 200, 200))
+    def create_sample_embeddings(self):
+        """Create sample embeddings for products"""
+        try:
+            if self.model is not None:
+                product_descriptions = []
+                for pid, data in self.product_data.items():
+                    desc = f"{data['productDisplayName']} {data['articleType']} {data['usage']} {data['season']} {data['gender']}"
+                    product_descriptions.append(desc)
+                self.product_embeddings = self.model.encode(product_descriptions)
+            else:
+                # Create dummy embeddings
+                self.product_embeddings = np.random.randn(len(self.product_data), 384)
+        except Exception as e:
+            logger.error(f"Error creating embeddings: {e}")
+            self.product_embeddings = np.random.randn(len(self.product_data), 384)
+    def load_data(self):
+        """Load product data - using sample data for demo"""
+        logger.info("📊 Loading product data...")
+        # Data is already loaded in setup_sample_data
+        pass
     def generate_image_caption(self, image_path):
+        """Generate caption for uploaded image"""
+        try:
+            # For CPU deployment, use a simpler approach
+            image = Image.open(image_path)
+            # Simple analysis based on image characteristics
+            width, height = image.size
+            dominant_color = self.get_dominant_color(image)
+            # Generate descriptive caption based on image properties
+            size_desc = "large" if width > 1000 else "medium" if width > 500 else "small"
+            color_desc = self.get_color_name(dominant_color)
+            captions = [
+                f"A {size_desc} {color_desc} fashion item perfect for your style",
+                f"Stylish {color_desc} clothing item that matches current trends",
+                f"Fashionable {size_desc} apparel in {color_desc} color",
+                f"Trendy {color_desc} fashion piece suitable for various occasions"
+            ]
+            return random.choice(captions)
+        except Exception as e:
+            logger.error(f"Error generating caption: {e}")
+            return "A fashionable clothing item that suits your style"
+    def get_dominant_color(self, image):
+        """Get dominant color from image (simplified)"""
+        try:
+            # Resize image for faster processing
+            image = image.resize((50, 50))
+            # Convert to numpy array and get average color
+            np_image = np.array(image)
+            return tuple(np.mean(np_image, axis=(0, 1)).astype(int))
+        except:
+            return (128, 128, 128)  # Default gray
+    def get_color_name(self, rgb):
+        """Convert RGB to color name"""
+        colors = {
+            (255, 255, 255): "white",
+            (0, 0, 0): "black",
+            (255, 0, 0): "red",
+            (0, 255, 0): "green",
+            (0, 0, 255): "blue",
+            (255, 255, 0): "yellow",
+            (128, 128, 128): "gray",
+            (255, 165, 0): "orange",
+            (128, 0, 128): "purple"
+        }
+        # Find closest color
+        min_dist = float('inf')
+        closest_color = "colored"
+        for color, name in colors.items():
+            dist = sum((a - b) ** 2 for a, b in zip(rgb, color))
+            if dist < min_dist:
+                min_dist = dist
+                closest_color = name
+        return closest_color
     def generate_response(self, query):
+        """Generate chatbot response and recommendations"""
+        try:
+            # Fashion-related responses
+            fashion_responses = {
+                'casual': "Great choice! Casual wear is perfect for everyday comfort and style.",
+                'formal': "Elegant choice! Formal wear always makes a strong impression.",
+                'sports': "Active lifestyle! Sports wear combines comfort and performance.",
+                'summer': "Perfect for warm weather! Light and breathable fabrics work best.",
+                'winter': "Stay warm and stylish! Layering is key for winter fashion.",
+                'dress': "Dresses are versatile and always in style!",
+                'shirt': "Classic shirts never go out of fashion!",
+                'shoes': "The right shoes can complete any outfit!",
+                'jacket': "Jackets add style and functionality to any outfit!"
+            }
+            # Generate contextual response
+            query_lower = query.lower()
+            response_key = None
+            for key in fashion_responses.keys():
+                if key in query_lower:
+                    response_key = key
+                    break
+            if response_key:
+                bot_response = fashion_responses[response_key]
+            else:
+                generic_responses = [
+                    f"I found some great fashion items related to '{query}'!",
+                    f"Based on your interest in '{query}', here are my recommendations:",
+                    f"Here are some stylish options for '{query}':",
+                    f"Perfect! I have some fashion suggestions for '{query}':"
+                ]
+                bot_response = random.choice(generic_responses)
+            # Get recommendations
+            recommended_products = self.get_recommendations(query)
+            return bot_response, recommended_products
+        except Exception as e:
+            logger.error(f"Error generating response: {e}")
+            return "I apologize, but I'm having trouble processing your request right now.", []
+    def get_recommendations(self, query, top_k=3):
+        """Get product recommendations based on query"""
+        try:
+            if self.model is not None and self.product_embeddings is not None:
+                # Encode query
+                query_embedding = self.model.encode([query])
+                # Calculate similarities (using dot product for simplicity)
+                similarities = np.dot(self.product_embeddings, query_embedding.T).flatten()
+                # Get top products
+                top_indices = np.argsort(similarities)[::-1][:top_k]
+            else:
+                # Fallback: random recommendations
+                top_indices = random.sample(list(self.product_data.keys()), min(top_k, len(self.product_data)))
+            recommended_products = []
+            for idx in top_indices:
+                recommended_products.append({
+                    'corpus_id': idx,
+                    'score': 0.9 - (len(recommended_products) * 0.1)
+                })
+            return recommended_products
+        except Exception as e:
+            logger.error(f"Error getting recommendations: {e}")
+            # Return random products as fallback
+            return [{'corpus_id': i, 'score': 0.8} for i in range(min(3, len(self.product_data)))]
+    def get_product_info(self, product_id):
+        """Get complete product information"""
+        try:
+            if product_id in self.product_data:
+                data = self.product_data[product_id]
+                return {
+                    'name': data['productDisplayName'],
+                    'category': data['masterCategory'],
+                    'article_type': data['articleType'],
+                    'usage': data['usage'],
+                    'season': data['season'],
+                    'gender': data['gender'],
+                    'color': data['baseColour'],
+                    'price': data['price'],
+                    'image': self.images.get(product_id)
+                }
+            return None
+        except Exception as e:
+            logger.error(f"Error getting product info: {e}")
+            return None