Spaces:

DawnC
/

Pixcribe

Running on Zero

App Files Files Community

DawnC commited on 24 days ago

Commit

6a3bd1f

verified ·

1 Parent(s): 72d88d1

Upload 22 files

Browse files

Create Pixcribe Project

Files changed (22) hide show

app.py +159 -0
brand_detection_optimizer.py +187 -0
brand_prompts.py +970 -0
brand_recognition_manager.py +420 -0
brand_verification_manager.py +349 -0
brand_visualization_manager.py +107 -0
caption_generation_manager.py +499 -0
detection_fusion_manager.py +242 -0
image_processor_manager.py +70 -0
landmark_prompts.py +1030 -0
lighting_analysis_manager.py +453 -0
ocr_engine_manager.py +129 -0
openclip_semantic_manager.py +216 -0
output_processing_manager.py +350 -0
pixcribe_pipeline.py +335 -0
prompt_library_manager.py +272 -0
saliency_detection_manager.py +101 -0
scene_compatibility_manager.py +133 -0
scene_prompts.py +433 -0
ui_manager.py +681 -0
universal_object_prompts.py +464 -0
yolo_detection_manager.py +63 -0

app.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import gradio as gr
+import torch
+from PIL import Image
+import spaces
+from pixcribe_pipeline import PixcribePipeline
+from ui_manager import UIManager
+# Initialize Pipeline and UI Manager
+print("Initializing Pixcribe...")
+print("⏳ Loading models (this may take 60-90 seconds on first run)...")
+pipeline = PixcribePipeline(yolo_variant='l')
+ui_manager = UIManager()
+print("✅ All models loaded successfully!")
+@spaces.GPU(duration=180)
+def process_wrapper(image, yolo_variant, caption_language):
+    """Process image and return formatted results
+    This function uses GPU-accelerated models:
+    - YOLOv11 (object detection)
+    - OpenCLIP ViT-H/14 (semantic understanding)
+    - EasyOCR (text extraction)
+    - Places365 (scene analysis)
+    - Qwen2.5-VL-7B (caption generation)
+    Total processing time: ~2-3 seconds on L4 GPU
+    """
+    if image is None:
+        return None, "<div style='color: #E74C3C; padding: 24px; text-align: center;'>Please upload an image</div>"
+    try:
+        platform = 'instagram'
+        results = pipeline.process_image(image, platform, yolo_variant, caption_language)
+        if results is None:
+            return None, "<div style='color: #E74C3C; padding: 24px; text-align: center;'>Processing failed. Check terminal logs for details.</div>"
+    except Exception as e:
+        import traceback
+        error_msg = traceback.format_exc()
+        print("="*60)
+        print("ERROR DETAILS:")
+        print(error_msg)
+        print("="*60)
+        # Return detailed error to user
+        error_html = f"""
+        <div style='background: #FADBD8; border: 2px solid #E74C3C; border-radius: 20px; padding: 28px; margin: 16px 0;'>
+            <h3 style='color: #C0392B; margin-top: 0; font-size: 22px;'>❌ Processing Error</h3>
+            <p style='color: #E74C3C; font-weight: bold; font-size: 17px; margin-bottom: 16px;'>{str(e)}</p>
+            <details style='margin-top: 12px;'>
+                <summary style='cursor: pointer; color: #C0392B; font-weight: bold; font-size: 16px;'>View Full Error Trace</summary>
+                <pre style='background: white; padding: 16px; border-radius: 12px; overflow-x: auto; font-size: 13px; color: #2C3E50; margin-top: 12px;'>{error_msg}</pre>
+            </details>
+        </div>
+        """
+        return None, error_html
+    # Get visualized image with brand boxes
+    visualized_image = results.get('visualized_image', image)
+    # Format captions with copy functionality
+    captions_html = ui_manager.format_captions_with_copy(results['captions'])
+    return visualized_image, captions_html
+# Create Gradio Interface
+with gr.Blocks(css=ui_manager.custom_css, title="Pixcribe - AI Social Media Captions") as app:
+    # Header
+    ui_manager.create_header()
+    # Info Banner - Loading Time Notice
+    ui_manager.create_info_banner()
+    # Top Row - Upload Images & Detected Objects
+    with gr.Row(elem_classes="main-row"):
+        # Left - Upload Card
+        with gr.Column(scale=1):
+            with gr.Group(elem_classes="upload-card"):
+                image_input = gr.Image(
+                    type="pil",
+                    label="Upload Image",
+                    elem_classes="upload-area"
+                )
+        # Right - Detected Objects
+        with gr.Column(scale=1):
+            with gr.Group(elem_classes="results-card"):
+                gr.Markdown("### Detected Objects", elem_classes="section-title")
+                visualized_image = gr.Image(
+                    label="",
+                    elem_classes="image-container"
+                )
+    # Bottom - Settings Section (Full Width)
+    with gr.Group(elem_classes="settings-container"):
+        gr.Markdown("### Settings", elem_classes="section-title-left")
+        with gr.Row(elem_classes="settings-row"):
+            caption_language = gr.Radio(
+                choices=[
+                    ('繁體中文', 'zh'),
+                    ('English', 'en')
+                ],
+                value='en',
+                label="Caption Language",
+                elem_classes="radio-group-inline"
+            )
+            yolo_variant = gr.Radio(
+                choices=[
+                    ('Fast (m)', 'm'),
+                    ('Balanced (l)', 'l'),
+                    ('Accurate (x)', 'x')
+                ],
+                value='l',
+                label="Detection Mode",
+                elem_classes="radio-group-inline"
+            )
+        # Generate Button (Centered)
+        with gr.Row(elem_classes="button-row"):
+            analyze_btn = gr.Button(
+                "Generate Captions",
+                variant="primary",
+                elem_classes="generate-button"
+            )
+        # Processing Time Notice
+        gr.HTML("""
+        <div style="text-align: center; margin-top: 16px; color: #7F8C8D; font-size: 14px;">
+            <span style="opacity: 0.8;">⚡ Please be patient - AI processing may take some time</span>
+        </div>
+        """)
+    # Caption Results (Full Width)
+    with gr.Group(elem_classes="caption-results-container"):
+        gr.Markdown("### 📝 Generated Captions", elem_classes="section-title")
+        caption_output = gr.HTML(
+            label="",
+            elem_id="caption-results"
+        )
+    # Footer
+    ui_manager.create_footer()
+    # Connect button to processing function
+    analyze_btn.click(
+        fn=process_wrapper,
+        inputs=[image_input, yolo_variant, caption_language],
+        outputs=[visualized_image, caption_output]
+    )
+if __name__ == "__main__":
+    app.launch(share=True)

brand_detection_optimizer.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import torch
+from PIL import Image
+from typing import Dict, List, Tuple
+import numpy as np
+class BrandDetectionOptimizer:
+    """
+    智能品牌檢測優化器 - 性能與準確度平衡
+    通過快速預篩選減少不必要的深度檢測
+    """
+    def __init__(self, clip_manager, ocr_manager, prompt_library):
+        self.clip_manager = clip_manager
+        self.ocr_manager = ocr_manager
+        self.prompt_library = prompt_library
+    def quick_brand_prescreening(self, image: Image.Image) -> List[str]:
+        """
+        快速品牌預篩選 - 只檢測最可能的品牌類別
+        大幅減少需要深度檢測的品牌數量
+        Returns:
+            List of brand names that are likely present
+        """
+        # Step 1: OCR 快速掃描（最快且最準確的方法）
+        likely_brands = set()
+        ocr_results = self.ocr_manager.extract_text(image, use_brand_preprocessing=True)
+        for ocr_item in ocr_results:
+            text = ocr_item['text'].upper()
+            # 過濾太短的文字（避免誤匹配）
+            if len(text) < 2:
+                continue
+            # 檢查所有品牌的別名
+            for brand_name, brand_info in self.prompt_library.get_all_brands().items():
+                aliases = [alias.upper() for alias in brand_info.get('aliases', [])]
+                # 完全匹配或部分匹配（但要求較高相似度）
+                for alias in aliases:
+                    # 完全匹配
+                    if alias == text:
+                        likely_brands.add(brand_name)
+                        break
+                    # 部分匹配：要求別名長度 >= 3 且匹配度高
+                    elif len(alias) >= 3:
+                        if alias in text and len(alias) / len(text) > 0.6:
+                            likely_brands.add(brand_name)
+                            break
+                        elif text in alias and len(text) / len(alias) > 0.6:
+                            likely_brands.add(brand_name)
+                            break
+        # Step 2: 視覺特徵快速分類（使用通用品牌類別）
+        category_prompts = {
+            'luxury': 'luxury brand product with monogram pattern and leather details',
+            'sportswear': 'sportswear brand product with athletic logo and swoosh design',
+            'tech': 'technology brand product with minimalist design and metal finish',
+            'automotive': 'luxury car brand with distinctive grille and emblem',
+            'watches': 'luxury watch with distinctive dial and brand logo',
+            'fashion': 'fashion brand product with signature pattern or logo'
+        }
+        category_scores = self.clip_manager.classify_zero_shot(
+            image, list(category_prompts.values())
+        )
+        # 獲取最可能的類別（top 2）
+        sorted_categories = sorted(
+            category_scores.items(), key=lambda x: x[1], reverse=True
+        )[:2]
+        # 將類別映射回品牌
+        category_mapping = {v: k for k, v in category_prompts.items()}
+        for prompt_text, score in sorted_categories:
+            if score > 0.30:  # 提高閾值，減少誤判（0.15 → 0.30）
+                category = category_mapping[prompt_text]
+                # 添加該類別的所有品牌
+                category_brands = self.prompt_library.get_brands_by_category(category)
+                likely_brands.update(category_brands.keys())
+        # Step 3: 如果完全沒有線索，只添加視覺特徵最明顯的 3 個品牌（保底）
+        # 注意：這不是硬編碼，而是在無任何線索時的合理默認值
+        if not likely_brands:
+            # 只添加視覺特徵極其明顯且常見的品牌
+            default_brands = ['Louis Vuitton', 'Gucci', 'Nike']
+            likely_brands.update(default_brands)
+        # 返回品牌列表（不限制數量，而是依賴質量過濾）
+        return list(likely_brands)
+    def smart_region_selection(self, image: Image.Image,
+                                 saliency_regions: List[Dict]) -> List[Tuple[int, int, int, int]]:
+        """
+        智能區域選擇 - 只掃描有品牌可能性的區域
+        替代低效的網格掃描
+        Args:
+            image: PIL Image
+            saliency_regions: Saliency detection results
+        Returns:
+            List of bboxes (x1, y1, x2, y2) to scan
+        """
+        regions_to_scan = []
+        img_width, img_height = image.size
+        # Strategy 1: 使用顯著性區域（最有可能包含品牌）
+        if saliency_regions:
+            for region in saliency_regions[:3]:  # Top 3 salient regions
+                bbox = region.get('bbox')
+                if bbox:
+                    # 擴展區域以包含周邊context
+                    x1, y1, x2, y2 = bbox
+                    padding = 20
+                    x1 = max(0, x1 - padding)
+                    y1 = max(0, y1 - padding)
+                    x2 = min(img_width, x2 + padding)
+                    y2 = min(img_height, y2 + padding)
+                    # 確保區域夠大
+                    if (x2 - x1) > 100 and (y2 - y1) > 100:
+                        regions_to_scan.append((x1, y1, x2, y2))
+        # Strategy 2: 中心區域（品牌通常在中心）
+        center_x = img_width // 2
+        center_y = img_height // 2
+        center_size = min(img_width, img_height) // 2
+        center_bbox = (
+            max(0, center_x - center_size // 2),
+            max(0, center_y - center_size // 2),
+            min(img_width, center_x + center_size // 2),
+            min(img_height, center_y + center_size // 2)
+        )
+        regions_to_scan.append(center_bbox)
+        # Strategy 3: 如果沒有顯著區域，使用全圖
+        if not regions_to_scan:
+            regions_to_scan.append((0, 0, img_width, img_height))
+        return regions_to_scan
+    def compute_brand_confidence_boost(self, brand_name: str,
+                                        ocr_results: List[Dict],
+                                        base_confidence: float) -> float:
+        """
+        基於 OCR 結果提升品牌信心度
+        如果 OCR 檢測到品牌名稱，大幅提升信心度
+        Args:
+            brand_name: Brand name
+            ocr_results: OCR detection results
+            base_confidence: Base confidence from visual matching
+        Returns:
+            Boosted confidence score
+        """
+        brand_info = self.prompt_library.get_brand_prompts(brand_name)
+        if not brand_info:
+            return base_confidence
+        aliases = [alias.upper() for alias in brand_info.get('aliases', [])]
+        max_boost = 0.0
+        for ocr_item in ocr_results:
+            text = ocr_item['text'].upper()
+            ocr_conf = ocr_item['confidence']
+            for alias in aliases:
+                # 完全匹配
+                if alias == text:
+                    max_boost = max(max_boost, 0.40 * ocr_conf)  # 最高提升 0.40
+                # 部分匹配
+                elif alias in text or text in alias:
+                    if len(alias) > 2:  # 避免短字符串誤匹配
+                        max_boost = max(max_boost, 0.25 * ocr_conf)
+        # 應用提升，但不超過 0.95
+        boosted_confidence = min(base_confidence + max_boost, 0.95)
+        return boosted_confidence
+print("✓ BrandDetectionOptimizer (performance and accuracy optimizer) defined")

brand_prompts.py ADDED Viewed

	@@ -0,0 +1,970 @@

+from typing import Dict, List, Optional
+class BrandPrompts:
+    """
+    品牌視覺特徵與多模態識別資料庫
+    提供品牌的視覺線索、OpenCLIP prompts、Hashtags
+    """
+    def __init__(self):
+        """初始化品牌資料庫"""
+        self.brand_prompts = {
+            # ===== 奢侈品牌 Luxury Brands =====
+            'luxury': {
+                "Louis Vuitton": {
+                    "strong_cues": [
+                        "LV monogram pattern with interlocking L and V letters on brown canvas",
+                        "Brown canvas with golden hardware and leather trim showing Louis Vuitton signature",
+                        "Damier checkerboard pattern in brown and tan showing LV design",
+                        "Monogram flower motifs with LV initials repeated across surface"
+                    ],
+                    "weak_cues": [
+                        "Luxury handbag with structured silhouette and top handles",
+                        "Premium leather goods with golden metal accents",
+                        "Designer bag with classic proportions and elegant hardware"
+                    ],
+                    "region_contexts": ["bag_panel", "luggage_surface", "wallet_front", "accessory_detail"],
+                    "openclip_prompts": {
+                        "bag_panel": [
+                            "Louis Vuitton monogram canvas handbag with leather trim and brass hardware",
+                            "LV brown monogram pattern on luxury bag with golden clasps",
+                            "Designer handbag showing Louis Vuitton signature canvas and leather details"
+                        ],
+                        "luggage_surface": [
+                            "Louis Vuitton monogram luggage with brown canvas and leather corners",
+                            "LV travel bag showing iconic monogram pattern with metallic accents",
+                            "Luxury suitcase with Louis Vuitton canvas and protective leather trim"
+                        ],
+                        "wallet_front": [
+                            "Louis Vuitton monogram wallet with compact folding design",
+                            "LV small leather good showing monogram canvas and card slots",
+                            "Designer wallet with Louis Vuitton pattern and golden hardware"
+                        ]
+                    },
+                    "aliases": ["LV", "Louis Vuitton Monogram", "VUITTON", "LOUIS VUITTON"],
+                    "hashtags": {
+                        "zh": ["LV", "路易威登", "奢侈品", "精品包", "時尚"],
+                        "en": ["LouisVuitton", "LV", "LuxuryFashion", "DesignerBag", "Luxury"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": False
+                },
+                "Gucci": {
+                    "strong_cues": [
+                        "Interlocking double G logo in gold or silver metal with heart shape design",
+                        "GG logo in shiny gold brass hardware on black quilted leather",
+                        "Green and red web stripe on beige or brown canvas background",
+                        "GG monogram pattern repeated across fabric or leather surface",
+                        "Chevron matelassé quilted leather with V-shaped stitching pattern",
+                        "Heart-shaped double G logo with antique gold finish",
+                        "Bamboo handle detail on handbag with curved shape and metal hardware"
+                    ],
+                    "weak_cues": [
+                        "Luxury fashion item with bold logo placement and premium materials",
+                        "Designer accessory with distinctive hardware and Italian branding",
+                        "High-end quilted leather goods with geometric stitching pattern",
+                        "Black leather handbag with gold chain strap and structured silhouette",
+                        "Luxury bag with chevron quilting and metallic hardware accents"
+                    ],
+                    "region_contexts": ["bag_front", "bag_panel", "belt_buckle", "shoe_detail", "accessory_surface", "logo_area"],
+                    "openclip_prompts": {
+                        "bag_front": [
+                            "Gucci Marmont handbag with heart-shaped GG logo in antique gold on quilted black leather",
+                            "Designer bag showing Gucci chevron matelassé quilted pattern with gold GG hardware",
+                            "Luxury handbag with double G heart logo and V-shaped quilting on black leather",
+                            "Gucci bag with interlocking GG logo web stripe and canvas texture",
+                            "Black quilted leather Gucci bag with shiny gold double G emblem and chain strap",
+                            "Gucci Marmont camera bag with chevron quilted leather and gold hardware",
+                            "Designer handbag featuring Gucci signature GG logo with geometric quilting pattern"
+                        ],
+                        "bag_panel": [
+                            "Gucci matelassé quilted leather surface with chevron V-pattern stitching",
+                            "Black quilted leather panel with Gucci heart-shaped GG logo in center",
+                            "Luxury leather with geometric quilting showing Gucci craftsmanship and gold hardware",
+                            "Chevron stitched leather surface with interlocking GG emblem in antique gold",
+                            "Gucci quilted pattern with V-shaped chevron design and metallic logo placement"
+                        ],
+                        "belt_buckle": [
+                            "Gucci belt with large interlocking GG buckle in polished gold metal",
+                            "Designer belt showing double G logo buckle with black or brown leather strap",
+                            "Luxury belt with Gucci signature GG buckle in brass finish and Italian leather",
+                            "Gucci GG Marmont belt with textured double G buckle and leather band"
+                        ],
+                        "logo_area": [
+                            "Close-up of Gucci interlocking GG logo in gold metal with heart shape",
+                            "Gucci double G emblem in antique gold brass on black leather background",
+                            "Heart-shaped GG logo with metallic gold finish showing Gucci branding",
+                            "Shiny gold Gucci GG hardware on quilted matelassé leather surface"
+                        ]
+                    },
+                    "aliases": ["GG", "GUCCI", "Gucci Marmont"],
+                    "hashtags": {
+                        "zh": ["Gucci", "古馳", "奢侈品", "精品", "義大利時尚", "Marmont"],
+                        "en": ["Gucci", "LuxuryFashion", "DesignerBrand", "ItalianFashion", "GG", "GucciMarmont"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": False
+                },
+                "Chanel": {
+                    "strong_cues": [
+                        "Interlocking double C logo in metal or quilted leather",
+                        "Quilted diamond pattern leather with chain strap",
+                        "Black and white color scheme with gold or silver chain",
+                        "Camellia flower motif as decorative element"
+                    ],
+                    "weak_cues": [
+                        "Elegant quilted leather handbag with chain details",
+                        "Luxury fashion item with classic French design",
+                        "Designer accessory with sophisticated minimalist styling"
+                    ],
+                    "region_contexts": ["bag_flap", "jewelry_detail", "perfume_bottle", "clothing_label"],
+                    "openclip_prompts": {
+                        "bag_flap": [
+                            "Chanel quilted bag with interlocking CC logo and chain strap",
+                            "Classic flap bag showing Chanel diamond quilting and gold hardware",
+                            "Luxury handbag with Chanel CC closure and leather chain"
+                        ]
+                    },
+                    "aliases": ["CC", "CHANEL"],
+                    "hashtags": {
+                        "zh": ["Chanel", "香奈兒", "奢侈品", "精品包", "法國時尚"],
+                        "en": ["Chanel", "ChanelBag", "Luxury", "FrenchFashion", "ClassicBag"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": False
+                },
+                "Hermès": {
+                    "strong_cues": [
+                        "Orange box or shopping bag with brown ribbon",
+                        "Birkin or Kelly bag with distinctive silhouette and hardware",
+                        "H logo belt buckle in polished metal",
+                        "Saddle stitching on leather goods with equestrian heritage"
+                    ],
+                    "weak_cues": [
+                        "Ultra-luxury leather handbag with exceptional craftsmanship",
+                        "Designer accessory with understated elegance and premium materials",
+                        "High-end fashion item with classic proportions and hardware"
+                    ],
+                    "region_contexts": ["bag_structure", "belt_buckle", "scarf_pattern", "packaging"],
+                    "openclip_prompts": {
+                        "bag_structure": [
+                            "Hermès Birkin bag with structured leather and golden padlock",
+                            "Luxury handbag showing Hermès Kelly bag silhouette with turnlock",
+                            "Designer bag with Hermès craftsmanship and distinctive hardware"
+                        ]
+                    },
+                    "aliases": ["HERMES", "HERMÈS", "BIRKIN", "KELLY"],
+                    "hashtags": {
+                        "zh": ["Hermès", "愛馬仕", "柏金包", "奢侈品", "頂級精品"],
+                        "en": ["Hermes", "Birkin", "KellyBag", "Luxury", "UltraLuxury"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": False
+                },
+                "Prada": {
+                    "strong_cues": [
+                        "Triangular metal logo plate with Prada Milano text",
+                        "Saffiano leather with crosshatch texture pattern",
+                        "Black nylon bag with triangular logo badge",
+                        "Minimalist design with subtle branding placement"
+                    ],
+                    "weak_cues": [
+                        "Italian luxury handbag with clean modern lines",
+                        "Designer accessory with understated contemporary styling",
+                        "High-end leather goods with minimalist aesthetic"
+                    ],
+                    "region_contexts": ["bag_front", "wallet_surface", "shoe_heel", "clothing_tag"],
+                    "openclip_prompts": {
+                        "bag_front": [
+                            "Prada bag with triangular metal logo and saffiano leather",
+                            "Designer handbag showing Prada Milano badge with textured leather",
+                            "Luxury bag with Prada signature triangle and minimalist design"
+                        ]
+                    },
+                    "aliases": ["PRADA", "MILANO"],
+                    "hashtags": {
+                        "zh": ["Prada", "普拉達", "奢侈品", "義大利精品", "時尚"],
+                        "en": ["Prada", "ItalianLuxury", "DesignerBag", "LuxuryFashion", "Minimalist"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": False
+                }
+            },
+            # ===== 運動品牌 Sportswear Brands =====
+            'sportswear': {
+                "Nike": {
+                    "strong_cues": [
+                        "Swoosh logo in black white or colored variations",
+                        "Just Do It slogan text accompanying swoosh",
+                        "Air Jordan jumpman silhouette logo",
+                        "Nike Air branding on shoe midsole or tongue"
+                    ],
+                    "weak_cues": [
+                        "Athletic footwear with sporty performance design",
+                        "Sportswear with moisture-wicking technical fabric",
+                        "Running shoe with cushioned midsole and branded details"
+                    ],
+                    "region_contexts": ["shoe_side", "apparel_chest", "equipment_surface", "logo_placement"],
+                    "openclip_prompts": {
+                        "shoe_side": [
+                            "Nike sneaker with swoosh logo on side panel",
+                            "Athletic shoe showing Nike branding and Air technology",
+                            "Running shoe with Nike swoosh and performance design"
+                        ],
+                        "apparel_chest": [
+                            "Nike athletic wear with swoosh logo on chest",
+                            "Sports apparel showing Nike branding and technical fabric",
+                            "Performance clothing with Nike swoosh and Just Do It text"
+                        ]
+                    },
+                    "aliases": ["NIKE", "JUST DO IT", "swoosh"],
+                    "hashtags": {
+                        "zh": ["Nike", "耐吉", "運動", "球鞋", "運動品牌"],
+                        "en": ["Nike", "JustDoIt", "Sneakers", "Athletic", "Sportswear"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": True
+                },
+                "Adidas": {
+                    "strong_cues": [
+                        "Three stripes design on side of shoes or apparel",
+                        "Trefoil logo with three-leaf clover design",
+                        "Performance logo with three bars forming mountain shape",
+                        "Boost technology branding on shoe midsole"
+                    ],
+                    "weak_cues": [
+                        "Athletic footwear with three-stripe design element",
+                        "Sportswear with retro or performance styling",
+                        "Running shoe with distinctive midsole technology"
+                    ],
+                    "region_contexts": ["shoe_side", "apparel_sleeve", "equipment_detail", "logo_area"],
+                    "openclip_prompts": {
+                        "shoe_side": [
+                            "Adidas sneaker with three stripes on side panel",
+                            "Athletic shoe showing Adidas branding and Boost sole",
+                            "Sports footwear with Adidas three-stripe design"
+                        ]
+                    },
+                    "aliases": ["ADIDAS", "ORIGINALS", "three stripes"],
+                    "hashtags": {
+                        "zh": ["Adidas", "愛迪達", "三條線", "運動", "球鞋"],
+                        "en": ["Adidas", "ThreeStripes", "Sneakers", "Sportswear", "Athletic"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": True
+                },
+                "Puma": {
+                    "strong_cues": [
+                        "Leaping puma cat logo in silhouette form",
+                        "Puma wordmark text in distinctive font",
+                        "Formstrip design on side of shoes",
+                        "Cat logo combined with Puma text branding"
+                    ],
+                    "weak_cues": [
+                        "Athletic footwear with sleek performance design",
+                        "Sportswear with modern styling and branding",
+                        "Running shoe with lightweight construction"
+                    ],
+                    "region_contexts": ["shoe_side", "apparel_detail", "equipment_logo"],
+                    "openclip_prompts": {
+                        "shoe_side": [
+                            "Puma sneaker with cat logo and formstrip design",
+                            "Athletic shoe showing Puma branding on side",
+                            "Sports footwear with Puma leaping cat emblem"
+                        ]
+                    },
+                    "aliases": ["PUMA"],
+                    "hashtags": {
+                        "zh": ["Puma", "彪馬", "運動品牌", "球鞋"],
+                        "en": ["Puma", "Sneakers", "Athletic", "Sportswear"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": True
+                },
+                "Under Armour": {
+                    "strong_cues": [
+                        "Interlocking UA logo design",
+                        "HeatGear or ColdGear technology branding",
+                        "Under Armour wordmark in athletic font",
+                        "Performance fabric with visible texture pattern"
+                    ],
+                    "weak_cues": [
+                        "Athletic apparel with technical performance features",
+                        "Sportswear with moisture management technology",
+                        "Training gear with modern athletic design"
+                    ],
+                    "region_contexts": ["apparel_chest", "shoe_detail", "equipment_surface"],
+                    "openclip_prompts": {
+                        "apparel_chest": [
+                            "Under Armour shirt with UA logo on chest",
+                            "Athletic wear showing Under Armour branding and HeatGear",
+                            "Performance apparel with Under Armour logo and technical fabric"
+                        ]
+                    },
+                    "aliases": ["UA", "UNDER ARMOUR"],
+                    "hashtags": {
+                        "zh": ["UnderArmour", "安德瑪", "運動服飾", "訓練裝備"],
+                        "en": ["UnderArmour", "UA", "Athletic", "PerformanceGear", "Training"]
+                    },
+                    "visual_distinctive": False,
+                    "text_prominent": True
+                }
+            },
+            # ===== 科技品牌 Tech Brands =====
+            'tech': {
+                "Apple": {
+                    "strong_cues": [
+                        "Bitten apple logo in silver white or black",
+                        "Minimalist aluminum or glass device design",
+                        "iPhone with distinctive notch or dynamic island",
+                        "MacBook with glowing apple logo on lid"
+                    ],
+                    "weak_cues": [
+                        "Sleek electronic device with premium materials",
+                        "Smartphone with edge-to-edge display design",
+                        "Laptop with thin profile and minimal branding"
+                    ],
+                    "region_contexts": ["device_back", "laptop_lid", "packaging", "product_front"],
+                    "openclip_prompts": {
+                        "device_back": [
+                            "iPhone back with apple logo and camera array",
+                            "Apple device showing bitten apple emblem and glass back",
+                            "Smartphone with Apple branding and premium finish"
+                        ],
+                        "laptop_lid": [
+                            "MacBook with glowing apple logo on aluminum lid",
+                            "Apple laptop showing minimalist design and apple emblem",
+                            "Premium notebook with Apple branding and sleek profile"
+                        ]
+                    },
+                    "aliases": ["APPLE", "IPHONE", "IPAD", "MACBOOK", "apple logo"],
+                    "hashtags": {
+                        "zh": ["Apple", "蘋果", "iPhone", "科技", "蘋果產品"],
+                        "en": ["Apple", "iPhone", "MacBook", "Tech", "iOS"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": False
+                },
+                "Samsung": {
+                    "strong_cues": [
+                        "Samsung wordmark logo in blue or white",
+                        "Galaxy branding on smartphone",
+                        "Curved edge display on premium devices",
+                        "S Pen stylus with Samsung device"
+                    ],
+                    "weak_cues": [
+                        "Android smartphone with large display",
+                        "Electronic device with modern design",
+                        "Tech product with screen and branding"
+                    ],
+                    "region_contexts": ["device_front", "product_back", "packaging"],
+                    "openclip_prompts": {
+                        "device_front": [
+                            "Samsung Galaxy phone with curved display and minimal bezels",
+                            "Smartphone showing Samsung branding and screen",
+                            "Android device with Samsung logo and modern design"
+                        ]
+                    },
+                    "aliases": ["SAMSUNG", "Galaxy"],
+                    "hashtags": {
+                        "zh": ["Samsung", "三星", "Galaxy", "安卓", "科技"],
+                        "en": ["Samsung", "Galaxy", "Android", "Tech", "Smartphone"]
+                    },
+                    "visual_distinctive": False,
+                    "text_prominent": True
+                },
+                "Microsoft": {
+                    "strong_cues": [
+                        "Four-colored square window logo",
+                        "Surface branding on devices",
+                        "Windows logo on keyboard or device",
+                        "Xbox green logo on gaming products"
+                    ],
+                    "weak_cues": [
+                        "Premium laptop or tablet device",
+                        "Gaming console or controller",
+                        "Computer hardware with modern design"
+                    ],
+                    "region_contexts": ["device_surface", "keyboard_area", "product_branding"],
+                    "openclip_prompts": {
+                        "device_surface": [
+                            "Microsoft Surface laptop with logo and premium build",
+                            "Device showing Microsoft branding and sleek design",
+                            "Surface product with distinctive kickstand and logo"
+                        ]
+                    },
+                    "aliases": ["MICROSOFT", "Surface", "Windows"],
+                    "hashtags": {
+                        "zh": ["Microsoft", "微軟", "Surface", "科技", "Windows"],
+                        "en": ["Microsoft", "Surface", "Windows", "Tech", "Xbox"]
+                    },
+                    "visual_distinctive": False,
+                    "text_prominent": True
+                }
+            },
+            # ===== 汽車品牌 Automotive Brands =====
+            'automotive': {
+                "Mercedes-Benz": {
+                    "strong_cues": [
+                        "Three-pointed star logo in circle",
+                        "Mercedes-Benz wordmark on vehicle",
+                        "Large star emblem on front grille",
+                        "Hood ornament with standing star"
+                    ],
+                    "weak_cues": [
+                        "Luxury vehicle with premium design",
+                        "Car with elegant styling and badge",
+                        "Automobile with refined details"
+                    ],
+                    "region_contexts": ["front_grille", "hood_ornament", "wheel_center", "badge"],
+                    "openclip_prompts": {
+                        "front_grille": [
+                            "Mercedes-Benz front with three-pointed star on grille",
+                            "Luxury car showing Mercedes logo and elegant grille design",
+                            "Vehicle with Mercedes-Benz star emblem and premium styling"
+                        ]
+                    },
+                    "aliases": ["Mercedes", "Benz", "MB", "MERCEDES-BENZ"],
+                    "hashtags": {
+                        "zh": ["Mercedes", "賓士", "豪華車", "汽車", "德國車"],
+                        "en": ["Mercedes", "Benz", "LuxuryCar", "German", "Automotive"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": False
+                },
+                "BMW": {
+                    "strong_cues": [
+                        "Blue and white roundel logo with BMW letters",
+                        "Kidney grille design on front",
+                        "Hofmeister kink in rear window design",
+                        "BMW M badge for performance models"
+                    ],
+                    "weak_cues": [
+                        "Luxury sports sedan with dynamic styling",
+                        "Premium vehicle with distinctive design",
+                        "Car with performance-oriented features"
+                    ],
+                    "region_contexts": ["front_badge", "wheel_center", "rear_emblem"],
+                    "openclip_prompts": {
+                        "front_badge": [
+                            "BMW front with blue and white roundel and kidney grille",
+                            "Luxury car showing BMW logo and distinctive grille design",
+                            "Vehicle with BMW emblem and sporty styling"
+                        ]
+                    },
+                    "aliases": ["BMW"],
+                    "hashtags": {
+                        "zh": ["BMW", "寶馬", "豪華車", "德國車", "性能車"],
+                        "en": ["BMW", "LuxuryCar", "German", "Performance", "Ultimate Driving Machine"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": False
+                },
+                "Tesla": {
+                    "strong_cues": [
+                        "T-shaped logo resembling cross-section of electric motor",
+                        "Tesla wordmark on vehicle",
+                        "Minimalist design with flush door handles",
+                        "Large touchscreen display in interior"
+                    ],
+                    "weak_cues": [
+                        "Electric vehicle with modern design",
+                        "Car with clean aerodynamic styling",
+                        "Automobile with minimal exterior branding"
+                    ],
+                    "region_contexts": ["front_badge", "rear_emblem", "wheel_center"],
+                    "openclip_prompts": {
+                        "front_badge": [
+                            "Tesla front with T logo and minimalist design",
+                            "Electric vehicle showing Tesla branding and clean styling",
+                            "Car with Tesla emblem and aerodynamic profile"
+                        ]
+                    },
+                    "aliases": ["TESLA"],
+                    "hashtags": {
+                        "zh": ["Tesla", "特斯拉", "電動車", "科技", "環保"],
+                        "en": ["Tesla", "ElectricVehicle", "EV", "Tech", "Sustainable"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": False
+                }
+            },
+            # ===== 鐘錶品牌 Watch Brands =====
+            'watches': {
+                "Rolex": {
+                    "strong_cues": [
+                        "Crown logo at 12 o'clock position",
+                        "Rolex wordmark on dial with Oyster Perpetual text",
+                        "Cyclops date magnifier on crystal",
+                        "Jubilee or Oyster bracelet design"
+                    ],
+                    "weak_cues": [
+                        "Luxury watch with metal bracelet",
+                        "Timepiece with classic round case",
+                        "Wristwatch with premium finish"
+                    ],
+                    "region_contexts": ["watch_dial", "bracelet_clasp", "case_side"],
+                    "openclip_prompts": {
+                        "watch_dial": [
+                            "Rolex watch dial with crown logo and Oyster Perpetual text",
+                            "Luxury timepiece showing Rolex branding and date window",
+                            "Wristwatch with Rolex crown emblem and classic design"
+                        ]
+                    },
+                    "aliases": ["ROLEX", "OYSTER PERPETUAL"],
+                    "hashtags": {
+                        "zh": ["Rolex", "勞力士", "手錶", "奢華", "瑞士錶"],
+                        "en": ["Rolex", "LuxuryWatch", "Swiss", "Timepiece", "OysterPerpetual"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": True
+                },
+                "Omega": {
+                    "strong_cues": [
+                        "Omega symbol Ω on dial or case",
+                        "Seamaster or Speedmaster model branding",
+                        "Co-Axial escapement text on dial",
+                        "Distinctive bracelet or strap design"
+                    ],
+                    "weak_cues": [
+                        "Swiss luxury watch with sporty design",
+                        "Timepiece with professional appearance",
+                        "Wristwatch with precision craftsmanship"
+                    ],
+                    "region_contexts": ["watch_dial", "case_back", "bracelet"],
+                    "openclip_prompts": {
+                        "watch_dial": [
+                            "Omega watch dial with Ω symbol and Seamaster branding",
+                            "Luxury timepiece showing Omega logo and Co-Axial text",
+                            "Wristwatch with Omega emblem and professional design"
+                        ]
+                    },
+                    "aliases": ["OMEGA", "Ω"],
+                    "hashtags": {
+                        "zh": ["Omega", "歐米茄", "手錶", "瑞士錶", "奢華"],
+                        "en": ["Omega", "Seamaster", "Speedmaster", "SwissWatch", "Luxury"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": True
+                }
+            },
+            # ===== 時尚品牌 Fashion Brands =====
+            'fashion': {
+                "Zara": {
+                    "strong_cues": [
+                        "Zara wordmark in sans-serif font",
+                        "Minimalist clothing tag design",
+                        "Fast fashion styling with current trends",
+                        "Zara logo on shopping bag or packaging"
+                    ],
+                    "weak_cues": [
+                        "Contemporary fashion apparel",
+                        "Trendy clothing with modern cut",
+                        "Affordable fashion item"
+                    ],
+                    "region_contexts": ["clothing_tag", "shopping_bag", "label"],
+                    "openclip_prompts": {
+                        "clothing_tag": [
+                            "Zara clothing tag with brand logo",
+                            "Fashion item showing Zara label",
+                            "Apparel with Zara branding"
+                        ]
+                    },
+                    "aliases": ["ZARA"],
+                    "hashtags": {
+                        "zh": ["Zara", "時尚", "快時尚", "穿搭"],
+                        "en": ["Zara", "Fashion", "FastFashion", "Style", "OOTD"]
+                    },
+                    "visual_distinctive": False,
+                    "text_prominent": True
+                },
+                "H&M": {
+                    "strong_cues": [
+                        "H&M logo in red and white",
+                        "Hennes & Mauritz full brand name",
+                        "Conscious collection labeling",
+                        "Distinctive red shopping bag"
+                    ],
+                    "weak_cues": [
+                        "Affordable fashion clothing",
+                        "Casual apparel with trendy design",
+                        "Fast fashion item"
+                    ],
+                    "region_contexts": ["clothing_tag", "label", "shopping_bag"],
+                    "openclip_prompts": {
+                        "clothing_tag": [
+                            "H&M clothing tag with red and white logo",
+                            "Fashion item showing H&M branding",
+                            "Apparel with Hennes & Mauritz label"
+                        ]
+                    },
+                    "aliases": ["HM", "H&M", "HENNES", "MAURITZ"],
+                    "hashtags": {
+                        "zh": ["HM", "時尚", "快時尚", "平價時尚"],
+                        "en": ["HM", "Fashion", "FastFashion", "Style", "AffordableFashion"]
+                    },
+                    "visual_distinctive": False,
+                    "text_prominent": True
+                },
+                "Ralph Lauren": {
+                    "strong_cues": [
+                        "Polo player on horse logo",
+                        "Polo Ralph Lauren text branding",
+                        "Preppy American style clothing",
+                        "Polo shirt with collar and logo"
+                    ],
+                    "weak_cues": [
+                        "Classic American fashion item",
+                        "Preppy styled clothing",
+                        "Casual wear with logo detail"
+                    ],
+                    "region_contexts": ["shirt_chest", "clothing_tag", "logo_placement"],
+                    "openclip_prompts": {
+                        "shirt_chest": [
+                            "Polo shirt with Ralph Lauren polo player logo",
+                            "Casual wear showing Polo Ralph Lauren emblem",
+                            "Apparel with Ralph Lauren polo player branding"
+                        ]
+                    },
+                    "aliases": ["Polo", "RALPH LAUREN", "RL"],
+                    "hashtags": {
+                        "zh": ["RalphLauren", "Polo", "美式風格", "經典時尚"],
+                        "en": ["RalphLauren", "Polo", "AmericanStyle", "Preppy", "Classic"]
+                    },
+                    "visual_distinctive": True,
+                    "text_prominent": True
+                },
+                "Tommy Hilfiger": {
+                    "strong_cues": [
+                        "Red white and blue flag logo",
+                        "Tommy Hilfiger wordmark text",
+                        "Preppy American sportswear styling",
+                        "Flag emblem on clothing"
+                    ],
+                    "weak_cues": [
+                        "Casual American fashion",
+                        "Sporty preppy clothing",
+                        "Logo-embellished apparel"
+                    ],
+                    "region_contexts": ["clothing_chest", "tag", "logo_area"],
+                    "openclip_prompts": {
+                        "clothing_chest": [
+                            "Tommy Hilfiger apparel with flag logo",
+                            "Casual wear showing Tommy Hilfiger branding",
+                            "Clothing with red white blue Tommy emblem"
+                        ]
+                    },
+                    "hashtags": {
+                        "zh": ["TommyHilfiger", "美式休閒", "時尚", "經典"],
+                        "en": ["TommyHilfiger", "American", "Preppy", "Fashion", "Classic"]
+                    }
+                },
+                "Uniqlo": {
+                    "strong_cues": [
+                        "Uniqlo wordmark in red and white",
+                        "LifeWear philosophy branding",
+                        "Minimalist Japanese design aesthetic",
+                        "HeatTech or AIRism technology labels"
+                    ],
+                    "weak_cues": [
+                        "Simple functional clothing",
+                        "Basic casual apparel",
+                        "Affordable everyday wear"
+                    ],
+                    "region_contexts": ["clothing_tag", "label", "shopping_bag"],
+                    "openclip_prompts": {
+                        "clothing_tag": [
+                            "Uniqlo clothing tag with brand logo",
+                            "Apparel showing Uniqlo LifeWear branding",
+                            "Clothing with Uniqlo label and technology marker"
+                        ]
+                    },
+                    "hashtags": {
+                        "zh": ["Uniqlo", "優衣庫", "日系", "簡約", "基本款"],
+                        "en": ["Uniqlo", "LifeWear", "Japanese", "Minimalist", "Basics"]
+                    }
+                },
+                "Gap": {
+                    "strong_cues": [
+                        "Gap logo in blue square",
+                        "Classic American casual styling",
+                        "Denim and khaki product focus",
+                        "Gap wordmark on tags"
+                    ],
+                    "weak_cues": [
+                        "Casual American clothing",
+                        "Basic everyday apparel",
+                        "Classic wardrobe staples"
+                    ],
+                    "region_contexts": ["clothing_tag", "label", "logo_placement"],
+                    "openclip_prompts": {
+                        "clothing_tag": [
+                            "Gap clothing tag with blue logo",
+                            "Apparel showing Gap branding",
+                            "Casual wear with Gap label"
+                        ]
+                    },
+                    "hashtags": {
+                        "zh": ["Gap", "美式休閒", "經典", "基本款"],
+                        "en": ["Gap", "American", "Casual", "Classic", "Everyday"]
+                    }
+                },
+                "Lacoste": {
+                    "strong_cues": [
+                        "Green crocodile logo",
+                        "Polo shirt with crocodile emblem",
+                        "French sportswear styling",
+                        "Crocodile on left chest area"
+                    ],
+                    "weak_cues": [
+                        "Tennis-inspired fashion",
+                        "Sporty casual clothing",
+                        "Preppy athletic wear"
+                    ],
+                    "region_contexts": ["shirt_chest", "clothing_detail", "logo_area"],
+                    "openclip_prompts": {
+                        "shirt_chest": [
+                            "Lacoste polo shirt with green crocodile logo",
+                            "Sportswear showing Lacoste emblem on chest",
+                            "Tennis apparel with Lacoste crocodile branding"
+                        ]
+                    },
+                    "hashtags": {
+                        "zh": ["Lacoste", "鱷魚", "法國", "網球", "運動時尚"],
+                        "en": ["Lacoste", "Crocodile", "French", "Tennis", "Sporty"]
+                    }
+                },
+                "Calvin Klein": {
+                    "strong_cues": [
+                        "CK logo or Calvin Klein wordmark",
+                        "Minimalist modern design aesthetic",
+                        "Monochromatic color schemes",
+                        "Underwear waistband with CK logo"
+                    ],
+                    "weak_cues": [
+                        "Contemporary minimalist fashion",
+                        "Modern casual clothing",
+                        "Designer basics"
+                    ],
+                    "region_contexts": ["clothing_tag", "waistband", "logo_area"],
+                    "openclip_prompts": {
+                        "clothing_tag": [
+                            "Calvin Klein clothing with CK logo",
+                            "Apparel showing Calvin Klein minimalist branding",
+                            "Fashion item with CK monogram"
+                        ]
+                    },
+                    "hashtags": {
+                        "zh": ["CalvinKlein", "CK", "簡約", "美式時尚", "現代"],
+                        "en": ["CalvinKlein", "CK", "Minimalist", "Modern", "Designer"]
+                    }
+                },
+                "Levi's": {
+                    "strong_cues": [
+                        "Red tab on back pocket of jeans",
+                        "Two horse leather patch on waistband",
+                        "501 or other style number branding",
+                        "Arcuate stitching pattern on back pockets"
+                    ],
+                    "weak_cues": [
+                        "Classic denim jeans",
+                        "American workwear styling",
+                        "Vintage-inspired casual wear"
+                    ],
+                    "region_contexts": ["jeans_pocket", "waistband_patch", "back_detail"],
+                    "openclip_prompts": {
+                        "jeans_pocket": [
+                            "Levi's jeans with red tab on back pocket",
+                            "Denim showing Levi's two horse patch and arcuate stitching",
+                            "Jeans with Levi's 501 branding and classic details"
+                        ]
+                    },
+                    "hashtags": {
+                        "zh": ["Levis", "李維斯", "牛仔褲", "丹寧", "美式"],
+                        "en": ["Levis", "Denim", "Jeans", "American", "501"]
+                    }
+                },
+                "The North Face": {
+                    "strong_cues": [
+                        "Half dome logo design",
+                        "The North Face wordmark",
+                        "Outdoor technical gear styling",
+                        "Logo patch on jacket or backpack"
+                    ],
+                    "weak_cues": [
+                        "Outdoor athletic apparel",
+                        "Technical outdoor gear",
+                        "Adventure clothing"
+                    ],
+                    "region_contexts": ["jacket_chest", "backpack_front", "apparel_sleeve"],
+                    "openclip_prompts": {
+                        "jacket_chest": [
+                            "The North Face jacket with half dome logo",
+                            "Outdoor apparel showing North Face branding",
+                            "Technical gear with The North Face emblem"
+                        ]
+                    },
+                    "hashtags": {
+                        "zh": ["TheNorthFace", "北臉", "戶外", "機能", "登山"],
+                        "en": ["TheNorthFace", "Outdoor", "Adventure", "Technical", "Hiking"]
+                    }
+                }
+            }
+        }
+        print(f"✓ Brand Prompts initialized with {self._count_brands()} brands across {len(self.brand_prompts)} categories")
+    def _count_brands(self) -> int:
+        """計算總品牌數量"""
+        total = 0
+        for category in self.brand_prompts.values():
+            total += len(category)
+        return total
+    def get_prompts(self, brand_name: str) -> Optional[Dict]:
+        """
+        取得特定品牌的完整 prompt 資料
+        Args:
+            brand_name: 品牌名稱
+        Returns:
+            品牌資料字典，若不存在則返回 None
+        """
+        for category in self.brand_prompts.values():
+            if brand_name in category:
+                result = category[brand_name].copy()
+                result['category'] = self.get_brand_category(brand_name)
+                return result
+        return None
+    def get_brand_category(self, brand_name: str) -> str:
+        """
+        取得品牌類別
+        Args:
+            brand_name: 品牌名稱
+        Returns:
+            品牌類別（luxury, sportswear, tech, etc.）
+        """
+        for category_name, brands in self.brand_prompts.items():
+            if brand_name in brands:
+                return category_name
+        return 'unknown'
+    def get_all_brands(self) -> Dict:
+        """
+        取得所有品牌的扁平化字典
+        Returns:
+            扁平化的品牌字典 {brand_name: brand_data}
+        """
+        flat_brands = {}
+        for category_name, brands in self.brand_prompts.items():
+            for brand_name, brand_data in brands.items():
+                brand_data_copy = brand_data.copy()
+                brand_data_copy['category'] = category_name
+                flat_brands[brand_name] = brand_data_copy
+        return flat_brands
+    def get_brands_by_category(self, category: str) -> Dict:
+        """
+        取得特定類別的所有品牌
+        Args:
+            category: 類別名稱
+        Returns:
+            該類別的品牌字典
+        """
+        return self.brand_prompts.get(category, {})
+    def search_brand_by_alias(self, alias: str) -> Optional[str]:
+        """
+        根據別名搜尋品牌名稱（模糊匹配）
+        Args:
+            alias: 品牌別名或簡稱
+        Returns:
+            品牌正式名稱，若找不到則返回 None
+        """
+        alias_lower = alias.lower()
+        # 簡單的別名映射
+        alias_map = {
+            'lv': 'Louis Vuitton',
+            'ck': 'Calvin Klein',
+            'tnf': 'The North Face',
+            'ua': 'Under Armour',
+            'hm': 'H&M'
+        }
+        if alias_lower in alias_map:
+            return alias_map[alias_lower]
+        # 模糊匹配品牌名稱
+        for brand_name in self.get_all_brands().keys():
+            if alias_lower in brand_name.lower():
+                return brand_name
+        return None
+    def get_hashtags(self, brand_name: str, language: str = 'zh') -> List[str]:
+        """
+        取得品牌的 hashtags
+        Args:
+            brand_name: 品牌名稱
+            language: 語言 ('zh', 'en', 或 'zh-en')
+        Returns:
+            Hashtag 列表
+        """
+        brand_data = self.get_prompts(brand_name)
+        if not brand_data:
+            return []
+        hashtags = brand_data.get('hashtags', {})
+        if language == 'zh':
+            return hashtags.get('zh', [])
+        elif language == 'en':
+            return hashtags.get('en', [])
+        elif language == 'zh-en' or language == 'both':
+            zh_tags = hashtags.get('zh', [])
+            en_tags = hashtags.get('en', [])
+            return zh_tags + en_tags
+        else:
+            return hashtags.get('zh', [])
+print("✓ BrandPrompts defined")

brand_recognition_manager.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import torch
+import math
+from PIL import Image
+from typing import Dict, List, Tuple
+from rapidfuzz import fuzz
+from prompt_library_manager import PromptLibraryManager
+from brand_detection_optimizer import BrandDetectionOptimizer
+class BrandRecognitionManager:
+    """Multi-modal brand recognition with detailed prompts (Visual + Text)"""
+    def __init__(self, clip_manager, ocr_manager, prompt_library=None):
+        self.clip_manager = clip_manager
+        self.ocr_manager = ocr_manager
+        self.prompt_library = prompt_library
+        self.flat_brands = prompt_library.get_all_brands()
+        # Initialize optimizer for smart brand detection
+        self.optimizer = BrandDetectionOptimizer(clip_manager, ocr_manager, prompt_library)
+        print(f"✓ Brand Recognition Manager loaded with {len(self.flat_brands)} brands (with optimizer)")
+    def recognize_brand(self, image_region: Image.Image, full_image: Image.Image,
+                       region_bbox: List[int] = None) -> List[Tuple[str, float, List[int]]]:
+        """Recognize brands using detailed context-aware prompts
+        Args:
+            image_region: Cropped region containing potential brand
+            full_image: Full image for OCR
+            region_bbox: Bounding box [x1, y1, x2, y2] for visualization
+        Returns:
+            List of (brand_name, confidence, bbox) tuples
+        """
+        # Step 1: Classify region context
+        region_context = self._classify_region_context(image_region)
+        print(f"  [DEBUG] Region context classified as: {region_context}")
+        # Step 2: Use context-specific OpenCLIP prompts
+        brand_scores = {}
+        for brand_name, brand_info in self.flat_brands.items():
+            # Get best matching context for this brand
+            best_context = self._match_region_to_brand_context(region_context, brand_info['region_contexts'])
+            if best_context and best_context in brand_info['openclip_prompts']:
+                # Use context-specific prompts
+                prompts = brand_info['openclip_prompts'][best_context]
+                visual_scores = self.clip_manager.classify_zero_shot(image_region, prompts)
+                # Average scores from all prompts
+                avg_score = sum(visual_scores.values()) / len(visual_scores) if visual_scores else 0.0
+            else:
+                # Fallback to strong cues
+                prompts = brand_info['strong_cues'][:5]  # Top 5 strong cues
+                visual_scores = self.clip_manager.classify_zero_shot(image_region, prompts)
+                avg_score = sum(visual_scores.values()) / len(visual_scores) if visual_scores else 0.0
+            brand_scores[brand_name] = avg_score
+        # Step 2.5: Multi-scale visual matching for better robustness
+        brand_scores = self._multi_scale_visual_matching(image_region, brand_scores)
+        # Step 3: OCR text matching with brand-optimized preprocessing
+        ocr_results = self.ocr_manager.extract_text(full_image, use_brand_preprocessing=True)
+        text_matches = self._fuzzy_text_matching(ocr_results)
+        print(f"  [DEBUG] OCR found {len(ocr_results)} text regions")
+        if text_matches:
+            print(f"  [DEBUG] OCR brand matches: {text_matches}")
+        # Step 4: Adaptive weighted fusion (dynamic weights per brand)
+        final_scores = {}
+        for brand_name in self.flat_brands.keys():
+            visual_score = brand_scores.get(brand_name, 0.0)
+            text_score, ocr_conf = text_matches.get(brand_name, (0.0, 0.0))
+            # Calculate adaptive weights based on brand characteristics
+            visual_weight, text_weight, ocr_weight = self._calculate_adaptive_weights(
+                brand_name, visual_score, text_score, ocr_conf
+            )
+            # Weighted fusion with adaptive weights
+            final_score = (
+                visual_weight * self._scale_visual(visual_score) +
+                text_weight * text_score +
+                ocr_weight * ocr_conf
+            )
+            final_scores[brand_name] = final_score
+        sorted_scores = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)[:5]
+        print(f"  [DEBUG] Top 5 brand scores:")
+        for brand, score in sorted_scores:
+            print(f"    {brand}: {score:.4f} (visual={brand_scores.get(brand, 0):.4f}, text={text_matches.get(brand, (0, 0))[0]:.4f})")
+        # Return confident matches with bounding boxes
+        confident_brands = []
+        for brand_name, score in final_scores.items():
+            if score > 0.10:
+                confident_brands.append((brand_name, score, region_bbox))
+                print(f"  [DEBUG] ✓ Brand detected: {brand_name} (confidence: {score:.4f})")
+        confident_brands.sort(key=lambda x: x[1], reverse=True)
+        if not confident_brands:
+            print(f"  [DEBUG] ✗ No brands passed threshold 0.10")
+        return confident_brands
+    def _classify_region_context(self, image_region: Image.Image) -> str:
+        """Classify what type of region this is (bag_panel, shoe_side, etc.)"""
+        context_labels = [
+            'bag panel with pattern',
+            'luggage surface with branding',
+            'luxury trunk with monogram pattern',
+            'vintage travel trunk with hardware',
+            'shoe side view',
+            'device back cover',
+            'apparel chest area',
+            'belt buckle',
+            'storefront sign',
+            'product tag or label',
+            'wallet surface',
+            'perfume bottle',
+            'watch dial or face',
+            'car front grille',
+            'laptop lid'
+        ]
+        scores = self.clip_manager.classify_zero_shot(image_region, context_labels)
+        # Map to simplified contexts
+        context_mapping = {
+            'bag panel with pattern': 'bag_panel',
+            'luggage surface with branding': 'luggage_surface',
+            'luxury trunk with monogram pattern': 'trunk_body',
+            'vintage travel trunk with hardware': 'trunk_body',
+            'shoe side view': 'shoe_side',
+            'device back cover': 'device_back',
+            'apparel chest area': 'apparel_chest',
+            'belt buckle': 'belt_buckle',
+            'storefront sign': 'storefront',
+            'product tag or label': 'product_tag',
+            'wallet surface': 'wallet',
+            'perfume bottle': 'perfume_bottle',
+            'watch dial or face': 'watch_dial',
+            'car front grille': 'car_front',
+            'laptop lid': 'laptop_lid'
+        }
+        top_context = max(scores.items(), key=lambda x: x[1])[0]
+        return context_mapping.get(top_context, 'unknown')
+    def _match_region_to_brand_context(self, region_context: str, brand_contexts: List[str]) -> str:
+        """Match detected region context to brand's available contexts"""
+        if region_context in brand_contexts:
+            return region_context
+        # Fuzzy matching
+        for brand_context in brand_contexts:
+            if region_context.split('_')[0] in brand_context:
+                return brand_context
+        return None
+    def _fuzzy_text_matching(self, ocr_results: List[Dict]) -> Dict[str, Tuple[float, float]]:
+        """Fuzzy text matching using brand aliases (optimized for logo text)"""
+        matches = {}
+        for ocr_item in ocr_results:
+            text = ocr_item['text']
+            conf = ocr_item['confidence']
+            for brand_name, brand_info in self.flat_brands.items():
+                # Check all aliases
+                all_names = [brand_name] + brand_info.get('aliases', [])
+                for alias in all_names:
+                    ratio = fuzz.ratio(text, alias) / 100.0
+                    if ratio > 0.70:  # Lowered threshold for better recall
+                        if brand_name not in matches or ratio > matches[brand_name][0]:
+                            matches[brand_name] = (ratio, conf)
+        return matches
+    def _scale_visual(self, score: float) -> float:
+        """Scale visual score using sigmoid"""
+        return 1 / (1 + math.exp(-10 * (score - 0.5)))
+    def _calculate_adaptive_weights(self, brand_name: str, visual_score: float,
+                                     text_score: float, ocr_conf: float) -> tuple:
+        """
+        Calculate adaptive weights based on brand characteristics and signal strengths
+        Args:
+            brand_name: Name of the brand
+            visual_score: Visual similarity score
+            text_score: Text matching score
+            ocr_conf: OCR confidence
+        Returns:
+            Tuple of (visual_weight, text_weight, ocr_weight)
+        """
+        brand_info = self.prompt_library.get_brand_prompts(brand_name)
+        if not brand_info:
+            # Default balanced weights
+            return 0.50, 0.30, 0.20
+        # Base weights based on brand characteristics
+        if brand_info.get('visual_distinctive', False):
+            # Visually distinctive brands (LV, Burberry)
+            visual_weight = 0.65
+            text_weight = 0.20
+            ocr_weight = 0.15
+        elif brand_info.get('text_prominent', False):
+            # Text-prominent brands (Nike, Adidas)
+            visual_weight = 0.30
+            text_weight = 0.30
+            ocr_weight = 0.40
+        else:
+            # Balanced for general brands
+            visual_weight = 0.50
+            text_weight = 0.30
+            ocr_weight = 0.20
+        # Dynamic adjustment based on signal strength
+        # If visual signal is very strong, boost its weight
+        if visual_score > 0.7:
+            boost = 0.10
+            visual_weight += boost
+            text_weight -= boost * 0.5
+            ocr_weight -= boost * 0.5
+        # If OCR has very high confidence, boost its weight
+        if ocr_conf > 0.85:
+            boost = 0.10
+            ocr_weight += boost
+            visual_weight -= boost * 0.6
+            text_weight -= boost * 0.4
+        # If text match is very strong, boost its weight
+        if text_score > 0.80:
+            boost = 0.08
+            text_weight += boost
+            visual_weight -= boost * 0.5
+            ocr_weight -= boost * 0.5
+        # Normalize weights to sum to 1
+        total = visual_weight + text_weight + ocr_weight
+        return visual_weight / total, text_weight / total, ocr_weight / total
+    def _multi_scale_visual_matching(self, image_region: Image.Image,
+                                      initial_scores: Dict[str, float]) -> Dict[str, float]:
+        """
+        Apply multi-scale matching to improve robustness
+        Args:
+            image_region: Image region to analyze
+            initial_scores: Initial brand scores from single-scale matching
+        Returns:
+            Updated brand scores with multi-scale matching
+        """
+        scales = [0.8, 1.0, 1.2]  # Three scales
+        multi_scale_scores = {brand: [] for brand in initial_scores.keys()}
+        for scale in scales:
+            # Resize image
+            new_width = int(image_region.width * scale)
+            new_height = int(image_region.height * scale)
+            # Ensure minimum size
+            if new_width < 50 or new_height < 50:
+                continue
+            try:
+                scaled_img = image_region.resize((new_width, new_height), Image.Resampling.LANCZOS)
+                # Re-run classification on each brand's prompts
+                for brand_name, brand_info in self.flat_brands.items():
+                    # Get context-specific prompts
+                    best_context = self._match_region_to_brand_context(
+                        'bag_panel',  # Default context, ideally should be passed as parameter
+                        brand_info.get('region_contexts', [])
+                    )
+                    if best_context and best_context in brand_info.get('openclip_prompts', {}):
+                        prompts = brand_info['openclip_prompts'][best_context]
+                        visual_scores = self.clip_manager.classify_zero_shot(scaled_img, prompts)
+                        avg_score = sum(visual_scores.values()) / len(visual_scores) if visual_scores else 0.0
+                    else:
+                        prompts = brand_info.get('strong_cues', [])[:3]
+                        visual_scores = self.clip_manager.classify_zero_shot(scaled_img, prompts)
+                        avg_score = sum(visual_scores.values()) / len(visual_scores) if visual_scores else 0.0
+                    multi_scale_scores[brand_name].append(avg_score)
+            except Exception as e:
+                # Skip this scale if error occurs
+                continue
+        # Aggregate multi-scale scores (use max score across scales)
+        final_scores = {}
+        for brand_name, scores in multi_scale_scores.items():
+            if scores:
+                final_scores[brand_name] = max(scores)
+            else:
+                final_scores[brand_name] = initial_scores.get(brand_name, 0.0)
+        return final_scores
+    def scan_full_image_for_brands(self, full_image: Image.Image,
+                                    exclude_bboxes: List[List[int]] = None,
+                                    saliency_regions: List[Dict] = None) -> List[Tuple[str, float, List[int]]]:
+        """
+        智能全圖品牌掃描 - 性能優化版本
+        使用預篩選和智能區域選擇大幅減少檢測時間
+        Args:
+            full_image: PIL Image (full image)
+            exclude_bboxes: List of bboxes to exclude (already detected)
+            saliency_regions: Saliency detection results for smart region selection
+        Returns:
+            List of (brand_name, confidence, bbox) tuples
+        """
+        if exclude_bboxes is None:
+            exclude_bboxes = []
+        detected_brands = {}  # brand_name -> (confidence, bbox)
+        img_width, img_height = full_image.size
+        # OPTIMIZATION 1: 快速品牌預篩選
+        likely_brands = self.optimizer.quick_brand_prescreening(full_image)
+        print(f"      Quick prescreening found {len(likely_brands)} potential brands")
+        # OPTIMIZATION 2: 智能區域選擇（只掃描有意義的區域）
+        regions_to_scan = self.optimizer.smart_region_selection(full_image, saliency_regions or [])
+        print(f"      Scanning {len(regions_to_scan)} intelligent regions")
+        # 掃描選定的區域
+        for region_bbox in regions_to_scan:
+            x1, y1, x2, y2 = region_bbox
+            # 跳過已檢測區域
+            if self._bbox_overlap(list(region_bbox), exclude_bboxes):
+                continue
+            # 提取區域
+            region = full_image.crop(region_bbox)
+            # 只檢測預篩選的品牌（而非所有20+品牌）
+            for brand_name in likely_brands:
+                brand_info = self.flat_brands.get(brand_name)
+                if not brand_info:
+                    continue
+                # only use strong_cues
+                strong_cues = brand_info.get('strong_cues', [])[:5]  # Top 5
+                if not strong_cues:
+                    continue
+                visual_scores = self.clip_manager.classify_zero_shot(region, strong_cues)
+                avg_score = sum(visual_scores.values()) / len(visual_scores) if visual_scores else 0.0
+                # OCR 增強
+                ocr_results = self.ocr_manager.extract_text(full_image, use_brand_preprocessing=True)
+                boosted_score = self.optimizer.compute_brand_confidence_boost(
+                    brand_name, ocr_results, avg_score
+                )
+                # 極度寬鬆的閾值以最大化檢測率
+                if boosted_score > 0.08:  # 降低到 0.08
+                    # 更新最佳結果
+                    if brand_name not in detected_brands or boosted_score > detected_brands[brand_name][0]:
+                        detected_brands[brand_name] = (boosted_score, list(region_bbox))
+        # 轉換為列表格式
+        final_brands = [
+            (brand_name, confidence, bbox)
+            for brand_name, (confidence, bbox) in detected_brands.items()
+        ]
+        # 按信心度排序
+        final_brands.sort(key=lambda x: x[1], reverse=True)
+        return final_brands[:5]  # 返回前5個
+    def _bbox_overlap(self, bbox1: List[int], bbox_list: List[List[int]]) -> bool:
+        """Check if bbox1 overlaps significantly with any bbox in bbox_list"""
+        if not bbox_list:
+            return False
+        x1_1, y1_1, x2_1, y2_1 = bbox1
+        for bbox2 in bbox_list:
+            if bbox2 is None:
+                continue
+            x1_2, y1_2, x2_2, y2_2 = bbox2
+            # Calculate intersection
+            x_left = max(x1_1, x1_2)
+            y_top = max(y1_1, y1_2)
+            x_right = min(x2_1, x2_2)
+            y_bottom = min(y2_1, y2_2)
+            if x_right < x_left or y_bottom < y_top:
+                continue
+            intersection_area = (x_right - x_left) * (y_bottom - y_top)
+            bbox1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
+            # 如果重疊超過 30%，視為重疊
+            if intersection_area / bbox1_area > 0.3:
+                return True
+        return False
+print("✓ BrandRecognitionManager (with full-image scan for commercial use) defined")

brand_verification_manager.py ADDED Viewed

	@@ -0,0 +1,349 @@

+import torch
+import json
+import re
+from PIL import Image
+from typing import List, Dict, Tuple
+from datetime import datetime
+from caption_generation_manager import CaptionGenerationManager
+class BrandVerificationManager:
+    """VLM-based brand verification and three-way voting system"""
+    def __init__(self, caption_generator: CaptionGenerationManager = None):
+        """
+        Args:
+            caption_generator: CaptionGenerationManager instance for VLM access
+        """
+        if caption_generator is None:
+            caption_generator = CaptionGenerationManager()
+        self.caption_generator = caption_generator
+        # Confidence mapping for VLM responses
+        self.confidence_map = {
+            'high': 0.9,
+            'medium': 0.7,
+            'low': 0.5,
+            'very high': 0.95,
+            'very low': 0.3
+        }
+        print("✓ Brand Verification Manager initialized with VLM")
+    def verify_brands(self, image: Image.Image, detected_brands: List[Tuple[str, float, list]]) -> Dict:
+        """
+        Use VLM to verify detected brands
+        Args:
+            image: PIL Image
+            detected_brands: List of (brand_name, confidence, bbox) tuples
+        Returns:
+            Dictionary with verification results
+        """
+        if not detected_brands:
+            return {
+                'verified_brands': [],
+                'false_positives': [],
+                'additional_brands': [],
+                'confidence': 0.0
+            }
+        # Construct verification prompt
+        brand_list = ', '.join([f"{brand[0]} (confidence: {brand[1]:.2f})"
+                                for brand in detected_brands[:3]])  # Top 3 brands
+        verification_prompt = f"""Analyze this image carefully. Our computer vision system detected the following brands: {brand_list}.
+            Please verify each brand identification:
+            1. Are these brand identifications correct based on visible logos, patterns, text, or distinctive features?
+            2. If incorrect, what brands do you actually see (if any)?
+            3. Describe the visual evidence (logo shape, text, pattern, color scheme, hardware) that supports your conclusion.
+            Respond in JSON format:
+            {{
+                "verified_brands": [
+                    {{"name": "Brand Name", "confidence": "high/medium/low", "evidence": "description of visual evidence"}}
+                ],
+                "false_positives": ["brand names that were incorrectly detected"],
+                "additional_brands": ["brands we missed but you can see"]
+            }}
+            IMPORTANT: Only include brands you can clearly identify with visual evidence. If unsure, use "low" confidence."""
+        # Generate VLM response
+        try:
+            response = self._generate_vlm_response(image, verification_prompt)
+            parsed_result = self._parse_verification_response(response)
+            return parsed_result
+        except Exception as e:
+            print(f"VLM verification error: {e}")
+            # Fallback to original detections
+            return {
+                'verified_brands': [
+                    {'name': brand[0], 'confidence': 'medium', 'evidence': 'VLM verification failed'}
+                    for brand in detected_brands
+                ],
+                'false_positives': [],
+                'additional_brands': []
+            }
+    def three_way_voting(self, openclip_brands: List[Tuple], ocr_brands: Dict,
+                          vlm_result: Dict) -> List[Tuple[str, float, list]]:
+        """
+        Three-way voting: OpenCLIP vs OCR vs VLM
+        Args:
+            openclip_brands: List of (brand_name, confidence, bbox) from OpenCLIP
+            ocr_brands: Dict of {brand_name: (text_score, ocr_conf)} from OCR
+            vlm_result: Verification result from VLM
+        Returns:
+            List of (brand_name, final_confidence, bbox) tuples
+        """
+        votes = {}  # brand_name -> {votes: int, sources: list, bbox: list}
+        confidence_scores = {}  # brand_name -> list of (source, confidence)
+        # Vote 1: OpenCLIP
+        for brand_name, confidence, bbox in openclip_brands:
+            if brand_name not in votes:
+                votes[brand_name] = {'votes': 0, 'sources': [], 'bbox': bbox}
+                confidence_scores[brand_name] = []
+            votes[brand_name]['votes'] += 1
+            votes[brand_name]['sources'].append('openclip')
+            confidence_scores[brand_name].append(('openclip', confidence * 0.8))
+        # Vote 2: OCR
+        for brand_name, (text_score, ocr_conf) in ocr_brands.items():
+            if brand_name not in votes:
+                # OCR found a brand not detected by OpenCLIP
+                votes[brand_name] = {'votes': 0, 'sources': [], 'bbox': None}
+                confidence_scores[brand_name] = []
+            votes[brand_name]['votes'] += 1
+            votes[brand_name]['sources'].append('ocr')
+            combined_ocr_score = (text_score + ocr_conf) / 2
+            confidence_scores[brand_name].append(('ocr', combined_ocr_score * 0.7))
+        # Vote 3: VLM (double weight - most reliable)
+        for brand_info in vlm_result.get('verified_brands', []):
+            brand_name = brand_info['name']
+            vlm_confidence_level = brand_info.get('confidence', 'medium')
+            vlm_confidence = self.confidence_map.get(vlm_confidence_level.lower(), 0.7)
+            if brand_name not in votes:
+                # VLM found a brand missed by both OpenCLIP and OCR
+                votes[brand_name] = {'votes': 0, 'sources': [], 'bbox': None}
+                confidence_scores[brand_name] = []
+            votes[brand_name]['votes'] += 2  # VLM gets double vote
+            votes[brand_name]['sources'].append('vlm')
+            confidence_scores[brand_name].append(('vlm', vlm_confidence))
+        # Remove false positives flagged by VLM
+        for false_positive in vlm_result.get('false_positives', []):
+            if false_positive in votes:
+                # Reduce votes significantly
+                votes[false_positive]['votes'] = max(0, votes[false_positive]['votes'] - 2)
+        # Calculate final scores
+        final_brands = []
+        for brand_name, vote_info in votes.items():
+            if vote_info['votes'] <= 0:
+                continue  # Skip brands with no votes
+            # Calculate weighted average confidence
+            scores = confidence_scores.get(brand_name, [])
+            if not scores:
+                continue
+            # VLM has highest weight, OpenCLIP medium, OCR lowest
+            weighted_sum = 0.0
+            weight_total = 0.0
+            for source, score in scores:
+                if source == 'vlm':
+                    weight = 1.0
+                elif source == 'openclip':
+                    weight = 0.6
+                else:  # ocr
+                    weight = 0.4
+                weighted_sum += score * weight
+                weight_total += weight
+            avg_confidence = weighted_sum / weight_total if weight_total > 0 else 0.0
+            # Boost confidence if multiple sources agree
+            if vote_info['votes'] >= 2:
+                avg_confidence *= 1.15  # 15% boost for agreement
+            # Cap at 0.95
+            avg_confidence = min(avg_confidence, 0.95)
+            # Only include if confidence is reasonable
+            if avg_confidence > 0.30:
+                final_brands.append((brand_name, avg_confidence, vote_info['bbox']))
+        # Sort by confidence
+        final_brands.sort(key=lambda x: x[1], reverse=True)
+        return final_brands
+    def extract_visual_evidence(self, image: Image.Image, brand_name: str) -> Dict:
+        """
+        Extract detailed visual evidence for identified brand
+        Args:
+            image: PIL Image
+            brand_name: Identified brand name
+        Returns:
+            Dictionary with evidence description
+        """
+        evidence_prompt = f"""You identified {brand_name} in this image. Please describe the specific visual evidence:
+            1. Logo appearance: Describe the logo's shape, style, color, and exact location in the image
+            2. Text elements: What text did you see? (exact wording, font style, placement)
+            3. Distinctive patterns: Any signature patterns, textures, or design elements
+            4. Color scheme: Brand-specific colors used
+            5. Product features: Distinctive product design characteristics
+            Be specific and detailed. Focus on objective visual features."""
+        try:
+            evidence_description = self._generate_vlm_response(image, evidence_prompt)
+            return {
+                'brand': brand_name,
+                'evidence_description': evidence_description,
+                'timestamp': datetime.now().isoformat()
+            }
+        except Exception as e:
+            return {
+                'brand': brand_name,
+                'evidence_description': f"Evidence extraction failed: {str(e)}",
+                'timestamp': datetime.now().isoformat()
+            }
+    def _generate_vlm_response(self, image: Image.Image, prompt: str) -> str:
+        """
+        Generate VLM response for given image and prompt
+        Args:
+            image: PIL Image
+            prompt: Text prompt
+        Returns:
+            VLM response string
+        """
+        from qwen_vl_utils import process_vision_info
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": prompt}
+            ]
+        }]
+        text = self.caption_generator.processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = self.caption_generator.processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt"
+        ).to(self.caption_generator.model.device)
+        # Generate with low temperature for factual responses
+        generation_config = {
+            'temperature': 0.3,  # Low temperature for factual verification
+            'top_p': 0.9,
+            'max_new_tokens': 300,
+            'repetition_penalty': 1.1
+        }
+        generated_ids = self.caption_generator.model.generate(
+            **inputs,
+            **generation_config
+        )
+        # Trim input tokens
+        generated_ids_trimmed = [
+            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        output_text = self.caption_generator.processor.batch_decode(
+            generated_ids_trimmed,
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=False
+        )[0]
+        return output_text
+    def _parse_verification_response(self, response: str) -> Dict:
+        """
+        Parse VLM verification response
+        Args:
+            response: VLM response string
+        Returns:
+            Parsed dictionary
+        """
+        try:
+            # Try to extract JSON from response
+            json_match = re.search(r'\{.*\}', response, re.DOTALL)
+            if json_match:
+                result = json.loads(json_match.group())
+                return result
+        except json.JSONDecodeError:
+            pass
+        # Fallback: rule-based parsing
+        return self._rule_based_parse(response)
+    def _rule_based_parse(self, response: str) -> Dict:
+        """
+        Fallback rule-based parsing if JSON fails
+        Args:
+            response: VLM response string
+        Returns:
+            Parsed dictionary
+        """
+        result = {
+            'verified_brands': [],
+            'false_positives': [],
+            'additional_brands': []
+        }
+        # Simple pattern matching
+        lines = response.lower().split('\n')
+        for line in lines:
+            # Look for brand names mentioned with positive sentiment
+            if any(word in line for word in ['correct', 'yes', 'visible', 'see', 'identified']):
+                # Extract potential brand names (capitalize words)
+                words = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', response)
+                for word in words:
+                    if len(word) > 2:  # Avoid short words
+                        result['verified_brands'].append({
+                            'name': word,
+                            'confidence': 'medium',
+                            'evidence': 'Extracted from VLM response'
+                        })
+        return result
+print("✓ BrandVerificationManager (VLM verification and voting) defined")

brand_visualization_manager.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import cv2
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+from typing import List, Tuple, Dict
+class BrandVisualizationManager:
+    """Visualize detected brands with bounding boxes and labels (like YOLO)"""
+    def __init__(self):
+        """Initialize visualization manager"""
+        # Color palette for different brand categories
+        self.colors = {
+            'luxury': (218, 165, 32),      # Gold
+            'sportswear': (0, 191, 255),   # Deep Sky Blue
+            'tech': (169, 169, 169),       # Dark Gray
+            'automotive': (220, 20, 60),   # Crimson
+            'fashion': (186, 85, 211),     # Medium Orchid
+            'watches': (184, 134, 11),     # Dark Goldenrod
+            'default': (0, 255, 0)         # Green
+        }
+        print("✓ Brand Visualization Manager initialized")
+    def draw_brand_detections(self, image: Image.Image, brand_detections: List[Dict],
+                              show_confidence: bool = True) -> Image.Image:
+        """Draw bounding boxes and labels for detected brands
+        Args:
+            image: PIL Image
+            brand_detections: List of dicts with keys: 'name', 'confidence', 'bbox', 'category'
+            show_confidence: Whether to show confidence scores
+        Returns:
+            Image with drawn bounding boxes
+        """
+        if not brand_detections:
+            return image
+        # Convert PIL to OpenCV format
+        img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+        for detection in brand_detections:
+            brand_name = detection.get('name', 'Unknown')
+            confidence = detection.get('confidence', 0.0)
+            bbox = detection.get('bbox')
+            category = detection.get('category', 'default')
+            if bbox is None:
+                continue
+            x1, y1, x2, y2 = bbox
+            color = self.colors.get(category, self.colors['default'])
+            # Draw bounding box
+            cv2.rectangle(img_cv, (int(x1), int(y1)), (int(x2), int(y2)), color, 3)
+            # Prepare label text
+            if show_confidence:
+                label = f"{brand_name} {confidence:.2f}"
+            else:
+                label = brand_name
+            # Calculate text size
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            font_scale = 0.7
+            thickness = 2
+            (text_width, text_height), baseline = cv2.getTextSize(label, font, font_scale, thickness)
+            # Draw label background
+            cv2.rectangle(img_cv,
+                         (int(x1), int(y1) - text_height - 10),
+                         (int(x1) + text_width + 10, int(y1)),
+                         color, -1)
+            # Draw label text
+            cv2.putText(img_cv, label,
+                       (int(x1) + 5, int(y1) - 5),
+                       font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)
+        # Convert back to PIL
+        img_pil = Image.fromarray(cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB))
+        return img_pil
+    def format_brand_list(self, brand_detections: List[Dict]) -> str:
+        """Format brand detections as readable text
+        Args:
+            brand_detections: List of brand detection dicts
+        Returns:
+            Formatted string with brand names and confidences
+        """
+        if not brand_detections:
+            return "No brands identified"
+        formatted = []
+        for detection in brand_detections:
+            brand_name = detection.get('name', 'Unknown')
+            confidence = detection.get('confidence', 0.0)
+            # 移除 category 標籤顯示，保持簡潔
+            formatted.append(f"{brand_name} ({confidence:.2f})")
+        return ", ".join(formatted)
+print("✓ BrandVisualizationManager defined")

caption_generation_manager.py ADDED Viewed

	@@ -0,0 +1,499 @@

+import torch
+from transformers import AutoModelForImageTextToText, AutoProcessor
+from qwen_vl_utils import process_vision_info
+from PIL import Image
+from typing import List, Dict
+import json
+from opencc import OpenCC
+import warnings
+class CaptionGenerationManager:
+    """Caption generation using Vision-Language Models (supports Qwen2.5-VL, Qwen3-VL, etc.)"""
+    def __init__(self, model_name: str = "Qwen/Qwen2.5-VL-7B-Instruct"):
+        """
+        Args:
+            model_name: Vision-Language model name, e.g.:
+                - "Qwen/Qwen2.5-VL-7B-Instruct" (default)
+                - "Qwen/Qwen3-VL-8B-Instruct" (2025 latest)
+        """
+        print(f"Loading Vision-Language Model: {model_name}...")
+        # Suppress processor warning
+        warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
+        # Use Auto* classes for flexibility (supports Qwen2.5-VL, Qwen3-VL, etc.)
+        self.processor = AutoProcessor.from_pretrained(model_name, use_fast=False)
+        self.model = AutoModelForImageTextToText.from_pretrained(
+            model_name,
+            dtype=torch.bfloat16,  # Changed from torch_dtype to dtype
+            device_map="auto"
+        )
+        # Simplified Chinese to Traditional Chinese converter
+        self.cc = OpenCC('s2t')  # Simplified to Traditional
+        self.generation_config = {
+            'temperature': 0.7,
+            'top_p': 0.9,
+            'max_new_tokens': 300,  # Increased from 200 to prevent truncation
+            'repetition_penalty': 1.1
+        }
+        # Platform-specific templates
+        self.platform_templates = {
+            'instagram': {
+                'style': 'storytelling, aesthetic',
+                'emoji_count': '2-3',
+                'hashtag_count': '8-10',
+                'min_length': 120,  # Increased for richer content
+                'max_length': 220,  # Allow more detailed descriptions
+                'features': ['call-to-action', 'question', 'relatable']
+            },
+            'tiktok': {
+                'style': 'brief, punchy',
+                'emoji_count': '1-2',
+                'hashtag_count': '5-8',
+                'min_length': 60,
+                'max_length': 120,
+                'features': ['trending', 'POV', 'relatable']
+            },
+            'xiaohongshu': {
+                'style': 'structured, informative, detailed',
+                'emoji_count': '5-8',
+                'hashtag_count': '8-12',
+                'min_length': 180,
+                'max_length': 500,
+                'features': ['tips', 'bullets', 'sharing-tone']
+            }
+        }
+        print(f"✓ {model_name.split('/')[-1]} loaded successfully (using Auto* classes for flexibility)")
+    def construct_prompt(self, analysis_results: Dict, platform: str = 'instagram', language: str = 'zh') -> str:
+        """Construct prompt with language support ensuring consistency
+        Args:
+            language: 'zh' (Traditional Chinese), 'en' (English), 'zh-en' (Bilingual)
+        """
+        platform_config = self.platform_templates.get(platform, self.platform_templates['instagram'])
+        # Language-specific instructions
+        language_instructions = {
+            'zh': '請使用繁體中文生成標題和標籤。語言要自然流暢，符合華語社群媒體的表達習慣。避免使用簡體字。當偵測到品牌時，必須在標題中提及品牌名稱。',
+            'en': '''🚨 CRITICAL LANGUAGE REQUIREMENT 🚨
+            Generate captions and hashtags EXCLUSIVELY in English.
+            - NEVER use Chinese characters (Traditional or Simplified)
+            - NEVER mix languages
+            - Use natural, engaging language suitable for international social media
+            - When brands are detected, mention them naturally in English
+            - All text output must be 100% English only
+            This is MANDATORY and NON-NEGOTIABLE.''',
+            'zh-en': '''生成雙語內容：標題使用繁體中文，同時提供英文翻譯。標籤混合使用中英文以擴大觸及範圍。當偵測到品牌時，必須在標題中提及品牌名稱。
+            🚨 重要：雙語一致性要求 🚨
+            - 中文和英文必須表達相同的核心意義
+            - 允許表達方式的差異（形容詞、語法不同）
+            - 但整體訊息、語氣、品牌提及必須一致
+            - 兩種語言都要朝同一方向詮釋內容'''
+        }
+        system_instruction = f"""You are a professional social media content strategist.
+            {language_instructions.get(language, language_instructions['zh'])}
+            Target platform: {platform}
+            Content style: Authentic, creative, and optimized for engagement.
+            CRITICAL RULE: Never include hashtags (symbols starting with #) in the caption text. Hashtags must only appear in the separate 'hashtags' array."""
+        # Extract analysis context
+        objects = analysis_results.get('detections', [])
+        brands = analysis_results.get('brands', [])
+        scene_info = analysis_results.get('scene_analysis', {})
+        composition = analysis_results.get('composition', {})
+        # FIXED: Get fused lighting from scene_info (it's been updated by DetectionFusionManager)
+        lighting = scene_info.get('lighting', {}).get('top', 'natural light')
+        lighting_confidence = scene_info.get('lighting', {}).get('confidence', 0.7)
+        # Provide explicit Chinese translations to ensure consistency
+        lighting_translations_zh = {
+            'soft diffused light': '柔和漫射光',
+            'overcast atmosphere': '陰天氛圍',
+            'natural daylight': '自然日光',
+            'warm ambient light': '溫暖環境光',
+            'evening light': '傍晚光線',
+            'bright sunlight': '明亮陽光',
+            'golden hour': '金黃時刻',
+            'blue hour': '藍調時刻'
+        }
+        # Get appropriate lighting description based on language
+        if language == 'zh':
+            lighting_zh = lighting_translations_zh.get(lighting, lighting)
+            lighting_display = lighting_zh
+        else:
+            # For English and bilingual, use English only
+            lighting_display = lighting
+            lighting_zh = lighting
+        objects_str = ', '.join([obj['class_name'] for obj in objects[:10]])
+        # CRITICAL: Emphasize brands EXTREMELY prominently - repeat multiple times
+        if brands:
+            brands_list = [b[0] for b in brands[:5]]
+            brands_str = ', '.join(brands_list)
+            brand_emphasis = f"""
+            🚨 CRITICAL BRAND REQUIREMENT 🚨
+            The following brands were POSITIVELY IDENTIFIED in this image: {brands_str}
+            YOU ABSOLUTELY MUST:
+            1. Mention the brand name "{brands_list[0]}" explicitly in the FIRST sentence
+            2. Use the exact brand name - do not use generic terms like "bag" or "accessory" without the brand
+            3. Write naturally as if you're excited to share this {brands_list[0]} item
+            4. Example: "在傍晚光線下，這款{brands_list[0]}經典黑色菱格紋皮革包..." (CORRECT)
+            5. NOT acceptable: "在傍晚光線下，這款經典黑色菱格紋皮革包..." (WRONG - missing brand name!)
+            THIS IS MANDATORY - The caption will be rejected if it doesn't mention {brands_str}.
+            """
+        else:
+            brands_str = 'None detected'
+            brand_emphasis = ""
+        # Enhanced scene description
+        urban_scene = scene_info.get('urban', {}).get('top', 'unknown')
+        mood = scene_info.get('mood', {}).get('top', 'neutral')
+        comp_type = composition.get('composition_type', 'standard')
+        context = f"""
+            Analyze this image and generate an engaging, DETAILED social media caption with rich visual descriptions.
+            **Visual Elements (Describe in Detail):**
+            - Detected objects: {objects_str}
+            - Scene composition: {comp_type}
+            - Urban environment: {urban_scene}
+            - **IMPORTANT**: Include specific details about:
+            * Materials (leather, metal, fabric, canvas, etc.)
+            * Colors (use descriptive terms: jet black, antique gold, midnight blue, etc.)
+            * Textures (quilted, smooth, matte, glossy, metallic, etc.)
+            * Design features (stitching patterns, hardware, logos, emblems, etc.)
+            * Reflections and lighting effects on surfaces
+            **Atmosphere:**
+            - Lighting (analyzed with Places365 + CV): {lighting_display} (confidence: {lighting_confidence:.2f})
+            - Mood: {mood}
+            **Brand Detection:**
+            - Identified brands: {brands_str}{brand_emphasis}
+            **Caption Structure (Required - BE SPECIFIC AND DETAILED):**
+            1. Opening hook - Most striking visual element with SPECIFIC details (1-2 sentences)
+            {f"- 🚨 MANDATORY: Start with the BRAND NAME '{brands_list[0]}' in the FIRST sentence!" if brands else ""}
+            {f"- Example (CORRECT): '這款{brands_list[0]}經典黑色菱格紋皮革包...'" if brands else ""}
+            {f"- Example (WRONG): '這款經典黑色菱格紋皮革包...' (missing {brands_list[0]}!)" if brands else ""}
+            - Be SPECIFIC: Include material, color, design features WITH the brand name
+            2. Visual details - Describe materials, textures, colors, and design elements (2-3 sentences)
+            - Be SPECIFIC: mention quilting patterns, metal finishes, chain details, logo placements
+            - Describe how light interacts with materials (reflections on leather, gleam of metal)
+            - MUST use the EXACT lighting description: "{lighting_display}"
+            3. Atmospheric context - How lighting and mood create the scene's character (1-2 sentences)
+            - Connect lighting to the overall visual impact
+            - Describe depth, shadows, contrasts
+            4. Emotional connection & Engagement - How this resonates with viewers + call-to-action (1 sentence)
+            **Content Requirements:**
+            - Minimum information: 3-4 specific visual details per caption
+            - Include material types, color descriptions, design features
+            - Describe how lighting affects the appearance
+            - Make it vivid and immersive
+            Platform style: {platform_config['style']}
+            """
+        # Language-specific examples with DETAILED visual descriptions AND BRAND NAMES
+        if language == 'zh':
+            brand_name_zh = brands_list[0] if brands else "Gucci"  # Use detected brand or example
+            example_correct = f"""正確範例 - 詳細描述 + 品牌提及 (繁體中文):
+            "在{lighting_zh}的映襯下，這款{brand_name_zh}經典黑色菱格紋皮革包展現奢華質感，V字形縫線在柔軟小牛皮上勾勒出精緻的幾何圖案，復古金色雙G標誌在深色背景中熠熠生輝。金屬鏈條肩帶反射著{lighting_zh}，增添層次感與立體效果。皮革表面細膩的光澤與霧面質地形成迷人對比，每個細節都彰顯義大利工藝的極致追求。這樣的{brand_name_zh}單品不只是配件，更是品味與格調的完美詮釋。你的衣櫃裡有哪件經典單品？✨🖤"
+            注意：品牌名稱 "{brand_name_zh}" 出現在第一句！這是正確的做法。
+            CRITICAL:
+            - 必須包含材質描述（皮革、金屬等）
+            - 必須包含顏色細節（黑色、復古金色等）
+            - 必須包含設計特點（縫線、標誌、鏈條等）
+            - 必須使用"{lighting_zh}"來描述光線
+            """
+        elif language == 'en':
+            brand_name_en = brands_list[0] if brands else "Gucci"  # Use detected brand or example
+            example_correct = f"""CORRECT EXAMPLE - Detailed Description + Brand Mention (ENGLISH ONLY - NO CHINESE):
+                "Under the {lighting}, this {brand_name_en} classic black quilted leather bag showcases luxurious craftsmanship. V-shaped stitching traces intricate geometric patterns across supple calfskin, while the antique gold double-G logo gleams against the dark backdrop. The metal chain strap catches and reflects the {lighting}, adding dimension and depth to the piece. The leather surface presents a captivating contrast between fine sheen and matte texture, with every detail exemplifying Italian artisanship at its finest. This {brand_name_en} piece isn't just an accessory – it's a perfect expression of taste and sophistication. What's your timeless wardrobe essential? ✨🖤"
+                NOTE: Brand name "{brand_name_en}" appears in the FIRST sentence! This is the correct approach.
+                🚨 ABSOLUTE REQUIREMENT FOR ENGLISH MODE 🚨
+                - Output must be 100% ENGLISH - zero Chinese characters allowed
+                - MUST include material descriptions (leather, metal, etc.)
+                - MUST include color details (black, antique gold, etc.)
+                - MUST include design features (stitching, logo, chain, etc.)
+                - MUST use "{lighting}" to describe the lighting
+                - NO Chinese characters anywhere in the output
+                """
+        else:  # zh-en bilingual
+            brand_name_en = brands_list[0] if brands else "Gucci"
+            example_correct = f"""BILINGUAL EXAMPLE - 雙語範例:
+                Caption in Traditional Chinese, with English hashtags support.
+                (Details omitted for brevity)
+                """
+        # Language-specific hashtag instructions
+        if language == 'zh':
+            hashtag_instruction = """
+            【CRITICAL HASHTAG REQUIREMENT - 繁體中文】:
+            - ALL hashtags MUST be in Traditional Chinese (繁體中文)
+            - NEVER use English hashtags when language is 繁體中文
+            - Examples of CORRECT hashtags: ["時尚包包", "奢華風格", "皮革工藝", "精品配件"]
+            - Examples of WRONG hashtags: ["FashionBlogger", "LuxuryLifestyle"] - DO NOT USE THESE
+            """
+        elif language == 'en':
+            hashtag_instruction = """
+            【CRITICAL HASHTAG REQUIREMENT - English】:
+            - ALL hashtags MUST be in English
+            - NEVER use Chinese characters in hashtags
+            - Examples of CORRECT hashtags: ["FashionBlogger", "LuxuryLifestyle", "LeatherCraft"]
+            """
+        else:  # zh-en
+            hashtag_instruction = """
+            【CRITICAL HASHTAG REQUIREMENT - Bilingual】:
+            - Hashtags should MIX Traditional Chinese and English
+            - First half in Chinese, second half in English
+            - Example: ["時尚包包", "奢華風格", "FashionBlogger", "LuxuryLifestyle"]
+            """
+        output_format = f"""
+            Generate output in JSON format:
+            {{
+                "caption": "string (minimum {platform_config['min_length']} chars, maximum {platform_config['max_length']} chars, engaging and descriptive)",
+                "hashtags": ["tag1", "tag2", ...] ({platform_config['hashtag_count']} relevant hashtags),
+                "tone": "casual|professional|playful",
+                "platform": "{platform}"
+            }}
+            {hashtag_instruction}
+            STRICT REQUIREMENTS:
+            1. Caption length: {platform_config['min_length']}-{platform_config['max_length']} characters
+            2. 🚨 EMOJI REQUIREMENT 🚨 - MUST use EXACTLY {platform_config['emoji_count']} emojis naturally integrated into caption text
+            - Professional style: 1-2 emojis (e.g., ✨💼🌟)
+            - Creative style: 2-3 emojis (e.g., 🎨✨💫🌙)
+            - Authentic style: 2-3 emojis (e.g., 💖👜✨🖤)
+            - Place emojis naturally within or at end of sentences
+            3. Caption must be pure descriptive text only - absolutely NO hashtags allowed
+            4. 🚨 CALL-TO-ACTION REQUIREMENT 🚨 - MUST include an engaging question or CTA at the end
+            - Professional: Brief professional question (e.g., "What's your go-to piece?")
+            - Creative: Thought-provoking question (e.g., "How does this speak to you?")
+            - Authentic: Personal question (e.g., "What's your favorite timeless accessory?")
+            5. Write 3-4 complete sentences following the structure above
+            6. Be specific and vivid - describe what you see in detail
+            7. 【CRITICAL】 MUST use the EXACT lighting description: "{lighting_display}"
+            - DO NOT substitute with similar terms
+            - DO NOT use "金黃時刻" if the lighting is "{lighting_zh if language == 'zh' else lighting}"
+            - DO NOT invent your own lighting description
+            8. 🚨 HASHTAG REQUIREMENT 🚨 - Generate {platform_config['hashtag_count']} relevant hashtags
+            - Hashtags go ONLY in the 'hashtags' array, NEVER in the caption text
+            - Mix of broad and specific tags
+            - Include brand name as hashtag if detected
+            9. {"🚨 CRITICAL BRAND REQUIREMENT 🚨 - The brand name '" + brands_list[0] + "' MUST appear in the FIRST sentence of your caption. This is MANDATORY and NON-NEGOTIABLE. Example: " + ("'這款" + brands_list[0] + "經典黑色...'" if language == 'zh' else "'This " + brands_list[0] + " classic black...'") if brands else "No brands detected to mention"}
+            10. {"🚨 LANGUAGE REQUIREMENT 🚨 - Output must be 100% ENGLISH ONLY. NO Chinese characters allowed anywhere." if language == 'en' else ""}
+            WRONG EXAMPLE (DO NOT DO THIS):
+            "Lost in the city's towering skyscrapers 🏙️✨ | #UrbanVibes #CityLife"
+            {example_correct}
+            """
+        full_prompt = f"{system_instruction}\n\n{context}\n\n{output_format}"
+        return full_prompt
+    def generate_captions(self, analysis_results: Dict, image: Image.Image,
+                         platform: str = 'instagram', language: str = 'zh') -> List[Dict]:
+        """Generate 3 captions with distinct styles: Professional, Creative, Authentic"""
+        # Extract brands for style instructions
+        brands_in_image = analysis_results.get('brands', [])
+        brand_names = [b[0] for b in brands_in_image[:3]] if brands_in_image else []
+        brand_mention_requirement = f" CRITICAL: Mention {', '.join(brand_names)} brand(s) naturally in the caption." if brand_names else ""
+        # Define 3 distinct styles
+        styles = [
+            {
+                'name': 'professional',
+                'temp': 0.6,
+                'instruction': f'Professional style: Concise, elegant, sophisticated. Focus on quality and craftsmanship. Use refined language.{brand_mention_requirement}',
+                'length_modifier': 0.8  # Shorter, more concise
+            },
+            {
+                'name': 'creative',
+                'temp': 0.7,
+                'instruction': f'Creative style: Artistic, expressive, imaginative. Use vivid metaphors and sensory descriptions. Balance detail with flair.{brand_mention_requirement}',
+                'length_modifier': 1.0  # Medium length
+            },
+            {
+                'name': 'authentic',
+                'temp': 0.8,
+                'instruction': f'Authentic style: Personal, detailed, storytelling. Share rich observations and genuine feelings. Most descriptive and engaging.{brand_mention_requirement}',
+                'length_modifier': 1.2  # Longer, more detailed
+            }
+        ]
+        variations = []
+        for style in styles:
+            # Build style-specific prompt
+            base_prompt = self.construct_prompt(analysis_results, platform, language)
+            # Add style instruction
+            style_prompt = f"""{base_prompt}
+                **STYLE REQUIREMENT FOR THIS CAPTION:**
+                {style['instruction']}
+                Adjust tone to be clearly '{style['name']}' - this should be noticeably different from other styles."""
+            messages = [{
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": image},
+                    {"type": "text", "text": style_prompt}
+                ]
+            }]
+            text = self.processor.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+            image_inputs, video_inputs = process_vision_info(messages)
+            inputs = self.processor(
+                text=[text],
+                images=image_inputs,
+                videos=video_inputs,
+                padding=True,
+                return_tensors="pt"
+            )
+            if torch.cuda.is_available():
+                inputs = inputs.to("cuda")
+            # Generate with style-specific temperature
+            config = self.generation_config.copy()
+            config['temperature'] = style['temp']
+            with torch.no_grad():
+                generated_ids = self.model.generate(**inputs, **config)
+            generated_ids_trimmed = [
+                out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+            ]
+            output_text = self.processor.batch_decode(
+                generated_ids_trimmed,
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=False
+            )[0]
+            parsed = self._parse_json_output(output_text)
+            if parsed:
+                # Force the correct tone
+                parsed['tone'] = style['name']
+                # Remove any hashtags that leaked into caption
+                if 'caption' in parsed:
+                    parsed['caption'] = self._remove_hashtags_from_caption(parsed['caption'])
+                # Convert Simplified Chinese to Traditional if language is 'zh'
+                if language == 'zh' or language == 'zh-en':
+                    parsed = self._convert_to_traditional(parsed)
+                variations.append(parsed)
+        return variations if variations else [self._get_fallback_caption(platform, language)]
+    def _remove_hashtags_from_caption(self, caption: str) -> str:
+        """Remove any hashtags, pipes, and debug info that leaked into caption text"""
+        import re
+        # CRITICAL FIX: Remove pipe symbol and everything after it (debug info)
+        # Example: "Text 🕰️🌉 | SoftDiffusedLight" -> "Text 🕰️🌉"
+        if '|' in caption:
+            caption = caption.split('|')[0].strip()
+        # Remove hashtags (words starting with #)
+        caption = re.sub(r'#\w+', '', caption)
+        caption = re.sub(r'#[\u4e00-\u9fff]+', '', caption)  # Remove Chinese hashtags
+        # Remove standalone weird text patterns (like "BLACKBELT")
+        # If there's a suspicious all-caps word at the end without context, remove it
+        words = caption.split()
+        if len(words) > 0:
+            last_word = words[-1].strip('✨💎👗🌟💫🖤')
+            # If last "word" is all caps and doesn't look like a normal sentence word, remove it
+            if last_word.isupper() and len(last_word) > 3 and not any(char in last_word for char in '.,!?'):
+                caption = ' '.join(words[:-1])
+        # Remove excessive emojis at the end (more than 3)
+        emoji_pattern = r'[\U0001F300-\U0001F9FF]{4,}$'
+        caption = re.sub(emoji_pattern, '', caption)
+        # Remove multiple spaces
+        caption = re.sub(r'\s+', ' ', caption)
+        # Remove trailing/leading whitespace
+        caption = caption.strip()
+        # Final cleanup: if caption ends with weird patterns like "✨X 👗💎", clean it
+        if re.search(r'[✨💎👗🌟💫🖤]{2,}\s*$', caption):
+            caption = re.sub(r'[✨💎👗🌟💫🖤\s]+$', '', caption).strip()
+        return caption
+    def _convert_to_traditional(self, caption: Dict) -> Dict:
+        """Convert Simplified Chinese to Traditional Chinese"""
+        if 'caption' in caption:
+            caption['caption'] = self.cc.convert(caption['caption'])
+        return caption
+    def _parse_json_output(self, text: str) -> Dict:
+        """Parse JSON output"""
+        try:
+            start = text.find('{')
+            end = text.rfind('}') + 1
+            if start != -1 and end > start:
+                json_str = text[start:end]
+                return json.loads(json_str)
+        except:
+            pass
+        return None
+    def _get_fallback_caption(self, platform: str, language: str) -> Dict:
+        """Fallback caption"""
+        if language == 'en':
+            return {
+                'caption': 'Every moment tells a story worth sharing. The world around us is filled with beauty waiting to be discovered. Take a pause and appreciate the details that make life extraordinary. What caught your eye today? ✨',
+                'hashtags': ['photography', 'daily', 'lifestyle', 'moment', 'capture'],
+                'tone': 'casual',
+                'platform': platform
+            }
+        else:
+            return {
+                'caption': '每個瞬間都值得被記錄與分享。生活中充滿了等待被發現的美好細節。停下腳步，用心感受周遭的一切。今天什麼畫面觸動了你的心？✨',
+                'hashtags': ['攝影', '日常', '生活', '瞬間', '分享'],
+                'tone': 'casual',
+                'platform': platform
+            }
+print("✓ CaptionGenerationManager (with Auto* classes for flexible model support) defined")

detection_fusion_manager.py ADDED Viewed

	@@ -0,0 +1,242 @@

+from typing import List, Dict
+import numpy as np
+class DetectionFusionManager:
+    """Integrate and prioritize detection results with intelligent lighting fusion"""
+    def __init__(self, clip_manager):
+        self.clip_manager = clip_manager
+    def fuse_lighting_analysis(self, cv_lighting: Dict, clip_scene: Dict) -> Dict:
+        """Intelligently fuse CV+Places365 lighting with CLIP scene understanding"""
+        cv_lighting_type = cv_lighting.get('lighting_type', 'soft diffused light')
+        cv_confidence = cv_lighting.get('confidence', 0.7)
+        cv_features = cv_lighting.get('cv_features', {})
+        # Get CLIP's lighting prediction
+        clip_lighting_data = clip_scene.get('lighting', {})
+        clip_lighting_type = clip_lighting_data.get('top', 'natural light')
+        clip_confidence = clip_lighting_data.get('confidence', 0.5)
+        # Intelligent fusion strategy:
+        # 1. If CV has high confidence (>0.85), trust it
+        # 2. If CV and CLIP semantically agree, boost confidence
+        # 3. Otherwise, weighted average based on confidence
+        if cv_confidence > 0.85:
+            # High confidence from CV+Places365
+            final_lighting = cv_lighting_type
+            final_confidence = cv_confidence
+            fusion_method = 'cv_dominant'
+        elif self._lighting_semantically_similar(cv_lighting_type, clip_lighting_type):
+            # Semantic agreement between CV and CLIP
+            final_lighting = cv_lighting_type  # Prefer CV's specific description
+            # Boost confidence when both agree
+            final_confidence = min(cv_confidence * 1.15, 0.95)
+            fusion_method = 'consensus'
+        else:
+            # Weighted fusion based on confidence
+            cv_weight = cv_confidence / (cv_confidence + clip_confidence)
+            clip_weight = 1.0 - cv_weight
+            # If CV weight is higher, use CV result
+            if cv_weight > 0.6:
+                final_lighting = cv_lighting_type
+                final_confidence = cv_confidence * 0.9
+                fusion_method = 'cv_weighted'
+            else:
+                # Use more generic description when uncertain
+                final_lighting = self._generalize_lighting_description(
+                    cv_lighting_type, clip_lighting_type, cv_features
+                )
+                final_confidence = (cv_confidence * cv_weight + clip_confidence * clip_weight) * 0.85
+                fusion_method = 'generalized'
+        return {
+            'lighting_type': final_lighting,
+            'confidence': min(final_confidence, 0.95),
+            'cv_analysis': cv_lighting_type,
+            'clip_prediction': clip_lighting_type,
+            'fusion_method': fusion_method,
+            'cv_confidence': cv_confidence,
+            'clip_confidence': clip_confidence
+        }
+    def _lighting_semantically_similar(self, cv_type: str, clip_type: str) -> bool:
+        """Check if two lighting descriptions are semantically similar"""
+        # Define semantic similarity groups
+        similarity_groups = [
+            {'soft', 'diffused', 'overcast', 'cloudy'},
+            {'bright', 'sunny', 'sunlight', 'clear'},
+            {'warm', 'golden', 'amber', 'evening'},
+            {'natural', 'daylight', 'outdoor'},
+            {'cool', 'blue', 'twilight'},
+        ]
+        cv_words = set(cv_type.lower().split())
+        clip_words = set(clip_type.lower().split())
+        # Check if both descriptions share words from same semantic group
+        for group in similarity_groups:
+            cv_match = cv_words & group
+            clip_match = clip_words & group
+            if cv_match and clip_match:
+                return True
+        # Direct word overlap
+        common_words = cv_words & clip_words
+        return len(common_words) >= 1
+    def _generalize_lighting_description(self, cv_type: str, clip_type: str,
+                                         cv_features: Dict) -> str:
+        """Generate a generalized lighting description when CV and CLIP disagree"""
+        brightness = cv_features.get('brightness', 128)
+        contrast = cv_features.get('contrast', 50)
+        color_temp = cv_features.get('color_temp', 1.0)
+        # Use feature-based generalization (not hard thresholds)
+        brightness_norm = brightness / 255.0
+        contrast_norm = min(contrast / 100.0, 1.0)
+        # Decision tree based on physical features
+        if contrast_norm < 0.5:
+            # Low contrast
+            if color_temp < 1.0:
+                return 'soft diffused light'
+            else:
+                return 'warm ambient light'
+        elif brightness_norm > 0.7:
+            # High brightness
+            return 'natural daylight'
+        elif color_temp > 1.1:
+            # Warm temperature
+            return 'warm ambient light'
+        else:
+            # Default safe description
+            return 'soft diffused light'
+    def analyze_composition(self, image, detections: List[Dict]) -> Dict:
+        """Analyze image composition"""
+        if not detections:
+            return {'composition_type': 'empty', 'vertical_ratio': 0.0}
+        # Calculate vertical element ratio
+        vertical_objects = [
+            d for d in detections
+            if (d['bbox'][3] - d['bbox'][1]) > (d['bbox'][2] - d['bbox'][0])
+        ]
+        vertical_ratio = len(vertical_objects) / max(len(detections), 1)
+        # Determine composition type
+        if vertical_ratio > 0.6:
+            composition_type = 'urban canyon'
+        elif vertical_ratio > 0.4:
+            composition_type = 'vertical emphasis'
+        else:
+            composition_type = 'standard street view'
+        return {
+            'composition_type': composition_type,
+            'vertical_ratio': vertical_ratio,
+            'vertical_objects_count': len(vertical_objects),
+            'total_objects': len(detections)
+        }
+    def fuse_detections(self, yolo_results: List[Dict], unknown_regions: List[Dict],
+                       scene_info: Dict, image=None, cv_lighting: Dict = None) -> Dict:
+        """Fuse all detection results with intelligent lighting fusion"""
+        all_detections = []
+        # Process YOLO detections with attention scores
+        for det in yolo_results:
+            attention_score = self._calculate_attention_score(det)
+            det['attention_score'] = attention_score
+            all_detections.append(det)
+        # Classify unknown regions using OpenCLIP
+        for region in unknown_regions:
+            if 'image' not in region:
+                continue
+            classification = self.clip_manager.classify_hierarchical(region['image'])
+            detection = {
+                'class_name': classification['top_prediction'],
+                'bbox': region['bbox'],
+                'confidence': classification.get('confidence', 0.5),
+                'attention_score': region.get('saliency_score', 0.5),
+                'source': 'openclip'
+            }
+            all_detections.append(detection)
+        # Sort by attention score
+        ranked_detections = sorted(
+            all_detections,
+            key=lambda x: x['attention_score'],
+            reverse=True
+        )
+        # Filter top 15
+        filtered = []
+        for det in ranked_detections:
+            if len(filtered) >= 15:
+                if det.get('brand') and det.get('brand_confidence', 0) > 0.45:
+                    filtered.append(det)
+                else:
+                    break
+            else:
+                filtered.append(det)
+        # Analyze composition
+        composition = self.analyze_composition(image, filtered) if image else {}
+        # Intelligent lighting fusion
+        if cv_lighting:
+            fused_lighting = self.fuse_lighting_analysis(cv_lighting, scene_info)
+            # Update scene_info with fused lighting
+            scene_info['lighting'] = {
+                'top': fused_lighting['lighting_type'],
+                'confidence': fused_lighting['confidence'],
+                'fusion_details': fused_lighting
+            }
+        return {
+            'detections': filtered,
+            'scene_info': scene_info,
+            'composition': composition,
+            'total_objects': len(all_detections)
+        }
+    def _calculate_attention_score(self, detection: Dict) -> float:
+        """Calculate attention score based on position, size, and confidence"""
+        bbox = detection['bbox']
+        x1, y1, x2, y2 = bbox
+        center_x = (x1 + x2) / 2
+        center_y = (y1 + y2) / 2
+        if x2 > 100:
+            position_score = 0.5
+        else:
+            position_score = 1.0 - (abs(center_x - 0.5) + abs(center_y - 0.5))
+        area = abs((x2 - x1) * (y2 - y1))
+        if x2 > 100:
+            area = area / (1000 * 1000)
+        size_score = min(area, 0.5)
+        conf_score = detection.get('confidence', 0.5)
+        attention = (
+            0.3 * position_score +
+            0.3 * size_score +
+            0.4 * conf_score
+        )
+        return attention
+print("✓ DetectionFusionManager (V2 with intelligent fusion) defined")

image_processor_manager.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import torch
+import numpy as np
+from PIL import Image
+from typing import Tuple, Optional, Union
+import torchvision.transforms as transforms
+class ImageProcessorManager:
+    """Image validation, preprocessing and format standardization"""
+    def __init__(self):
+        self.supported_formats = ['JPEG', 'PNG', 'WEBP', 'JPG']
+        self.min_resolution = (224, 224)
+        # CLIP preprocessing transform
+        self.clip_transform = transforms.Compose([
+            transforms.Resize((336, 336), interpolation=transforms.InterpolationMode.BICUBIC),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.48145466, 0.4578275, 0.40821073],
+                std=[0.26862954, 0.26130258, 0.27577711]
+            )
+        ])
+    def load_image(self, file_path: Union[str, Image.Image]) -> Image.Image:
+        """Load and validate image"""
+        if isinstance(file_path, Image.Image):
+            image = file_path
+        else:
+            try:
+                image = Image.open(file_path)
+            except Exception as e:
+                raise ValueError(f"Failed to load image: {e}")
+        # Convert to RGB
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        # Check resolution
+        if image.size[0] < self.min_resolution[0] or image.size[1] < self.min_resolution[1]:
+            raise ValueError(f"Image resolution too low, minimum required: {self.min_resolution}")
+        return image
+    def preprocess_for_yolo(self, image: Image.Image) -> np.ndarray:
+        """Preprocess image for YOLO (keep original format)"""
+        return np.array(image)
+    def preprocess_for_clip(self, image: Image.Image) -> torch.Tensor:
+        """Preprocess image for CLIP (336x336, ImageNet normalization)"""
+        return self.clip_transform(image)
+    def preprocess_for_qwen(self, image: Image.Image) -> Image.Image:
+        """Preprocess image for Qwen2.5-VL (dynamic resolution)"""
+        return image
+    def resize_with_aspect_ratio(self, image: Image.Image, max_size: int = 1024) -> Image.Image:
+        """Resize image while maintaining aspect ratio"""
+        width, height = image.size
+        if max(width, height) > max_size:
+            if width > height:
+                new_width = max_size
+                new_height = int(height * (max_size / width))
+            else:
+                new_height = max_size
+                new_width = int(width * (max_size / height))
+            image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
+        return image
+print("✓ ImageProcessorManager defined")

landmark_prompts.py ADDED Viewed

	@@ -0,0 +1,1030 @@

+from typing import Dict, List, Optional
+class LandmarkPrompts:
+    """
+    世界地標視覺描述與 Hashtag 資料庫
+    提供 20 個世界知名地標的詳細資料
+    """
+    def __init__(self):
+        """初始化地標資料庫"""
+        self.landmarks = {
+            # ===== 歐洲 Europe =====
+            "Big Ben": {
+                "name": "Big Ben",
+                "official_name": "Elizabeth Tower",
+                "location": {
+                    "city": "London",
+                    "country": "United Kingdom",
+                    "region": "Westminster",
+                    "continent": "Europe"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Gothic Revival clock tower with four ornate clock faces rising beside Westminster Palace and Thames River",
+                        "Tall Victorian tower with intricate stone detailing golden clock faces and pointed spire against London sky",
+                        "Famous clock tower landmark showing detailed Gothic architecture with Palace of Westminster backdrop",
+                        "Majestic bell tower with elaborate Victorian Gothic design overlooking Westminster Bridge"
+                    ],
+                    "architectural_details": [
+                        "Ornate clock faces with Roman numerals surrounded by decorative Gothic stonework and gilded details",
+                        "Victorian Gothic Revival architecture featuring pointed arches flying buttresses and limestone facade",
+                        "Detailed carved stonework showing Gothic tracery pinnacles and decorative moldings on tower exterior",
+                        "Cast iron and gold leaf clock mechanisms visible within ornamental Gothic Revival tower framework"
+                    ],
+                    "contextual_view": [
+                        "Clock tower rising above Westminster Bridge with red double-decker buses and Thames River in foreground",
+                        "Big Ben silhouetted against dramatic London sunset with Westminster Palace and river reflections",
+                        "Famous landmark viewed from Parliament Square with traffic pedestrians and London Eye in distance",
+                        "Tower seen through tree branches in nearby park with Westminster Abbey and government buildings visible"
+                    ],
+                    "seasonal_lighting": [
+                        "Tower illuminated at night with golden clock faces glowing against dark sky creating iconic London scene",
+                        "Soft morning light highlighting limestone details as mist rises from Thames creating atmospheric mood",
+                        "Dramatic storm clouds gathering behind tower with contrasting sunlight illuminating Gothic stonework",
+                        "Winter scene with tower emerging from fog as streetlights reflect on wet Westminster Bridge pavement"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["大笨鐘", "倫敦地標", "西敏寺", "泰晤士河", "英國旅遊", "倫敦", "英國"],
+                    "en": ["BigBen", "London", "Westminster", "Thames", "UKTravel", "LondonLandmarks", "ElizabethTower"]
+                },
+                "cultural_info": {
+                    "built_year": 1859,
+                    "architect": "Augustus Pugin",
+                    "architectural_style": "Gothic Revival"
+                }
+            },
+            "Eiffel Tower": {
+                "name": "Eiffel Tower",
+                "official_name": "La Tour Eiffel",
+                "location": {
+                    "city": "Paris",
+                    "country": "France",
+                    "region": "Champ de Mars",
+                    "continent": "Europe"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Iconic iron lattice tower rising 330 meters above Champ de Mars with distinctive tapering silhouette",
+                        "Wrought iron structure with three observation levels showing intricate lattice framework against Paris sky",
+                        "Famous Parisian landmark with characteristic brown paint and elegant art nouveau iron lattice design",
+                        "Monumental tower structure displaying puddle iron construction with four massive arched base legs"
+                    ],
+                    "architectural_details": [
+                        "Intricate wrought iron lattice work showing 18000 metallic parts joined by 2.5 million rivets",
+                        "Distinctive curved base arches with elevator shafts and lattice framework creating transparent appearance",
+                        "Observation deck platforms with iron railings providing panoramic views across Paris rooftops",
+                        "Antique elevators and iron staircases winding through lattice structure between three viewing levels"
+                    ],
+                    "contextual_view": [
+                        "Tower framed by Trocadéro fountains with reflecting pools and Parisian cityscape in background",
+                        "Eiffel Tower viewed from Seine River with tourist boats and bridges in romantic Parisian setting",
+                        "Landmark rising above Champ de Mars gardens with visitors and green lawns in foreground",
+                        "Tower seen from Montparnasse showing Paris rooftops Sacré-Cœur and urban landscape panorama"
+                    ],
+                    "seasonal_lighting": [
+                        "Tower illuminated at night with golden lights creating magical sparkling effect every hour",
+                        "Sunset silhouette with tower's iron structure outlined against orange and pink Paris sky",
+                        "Cherry blossoms framing tower in spring with soft natural light on iron lattice",
+                        "Winter scene with tower emerging from clouds as snow dusts Champ de Mars gardens"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["艾菲爾鐵塔", "巴黎鐵塔", "巴黎地標", "法國旅遊", "巴黎", "鐵塔"],
+                    "en": ["EiffelTower", "Paris", "ParisLandmark", "TourEiffel", "France", "ParisTravel"]
+                },
+                "cultural_info": {
+                    "built_year": 1889,
+                    "architect": "Gustave Eiffel",
+                    "architectural_style": "Structural Expressionism"
+                }
+            },
+            "Colosseum": {
+                "name": "Colosseum",
+                "official_name": "Flavian Amphitheatre",
+                "location": {
+                    "city": "Rome",
+                    "country": "Italy",
+                    "region": "Lazio",
+                    "continent": "Europe"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Ancient Roman amphitheater with massive oval structure showing three tiers of arches in weathered stone",
+                        "Iconic ruined arena with partially collapsed walls revealing internal chambers and underground passages",
+                        "Monumental stone amphitheater displaying Roman engineering with distinctive arched facade and columns",
+                        "Historic gladiatorial arena showing travertine limestone construction with Doric Ionic and Corinthian orders"
+                    ],
+                    "architectural_details": [
+                        "Three stories of arches supported by columns showing progression of classical orders from ground to top",
+                        "Weathered travertine blocks and brick revealing ancient construction techniques and earthquake damage",
+                        "Hypogeum underground chambers visible through arena floor showing complex staging machinery areas",
+                        "Massive exterior wall with remaining arches brackets and column fragments from original four-story height"
+                    ],
+                    "contextual_view": [
+                        "Colosseum rising above Roman Forum with ancient temples columns and ruins in surrounding area",
+                        "Amphitheater viewed from Palatine Hill showing relationship to Imperial Palace and Roman landscape",
+                        "Monument surrounded by modern Rome with traffic tourists and urban development contrasting ancient stone",
+                        "Arena illuminated at dusk with Constantine's Arch and Roman ruins visible in archaeological park"
+                    ],
+                    "seasonal_lighting": [
+                        "Golden hour light warming travertine stone with dramatic shadows emphasizing architectural depth",
+                        "Night illumination creating dramatic effect on ancient arches with warm amber lighting",
+                        "Overcast sky providing even light showing weathering patterns and stone texture details",
+                        "Bright midday sun creating strong contrast between light and shadow in deep archways"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["羅馬競技場", "古羅馬", "羅馬", "義大利旅遊", "古蹟", "世界遺產"],
+                    "en": ["Colosseum", "Rome", "AncientRome", "Italy", "Roman", "WorldHeritage"]
+                },
+                "cultural_info": {
+                    "built_year": 80,
+                    "architect": "Emperor Vespasian",
+                    "architectural_style": "Ancient Roman"
+                }
+            },
+            "Sagrada Familia": {
+                "name": "Sagrada Familia",
+                "official_name": "Basílica de la Sagrada Família",
+                "location": {
+                    "city": "Barcelona",
+                    "country": "Spain",
+                    "region": "Catalonia",
+                    "continent": "Europe"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Extraordinary basilica with soaring organic towers showing Gaudí's distinctive naturalistic Gothic design",
+                        "Unfinished cathedral with multiple spires featuring intricate stone carving and colorful mosaic details",
+                        "Fantastical church architecture combining Gothic and Art Nouveau with nature-inspired sculptural forms",
+                        "Massive religious monument with elaborate facades showing biblical scenes in highly detailed stonework"
+                    ],
+                    "architectural_details": [
+                        "Organic columns branching like trees supporting vaulted ceiling with natural light filtering through",
+                        "Nativity facade with detailed sculptural groups showing biblical narratives in stone",
+                        "Colorful stained glass windows creating rainbow light effects throughout cathedral interior",
+                        "Hyperboloid structures and ruled surfaces demonstrating Gaudí's mathematical geometric approach"
+                    ],
+                    "contextual_view": [
+                        "Basilica towers rising above Barcelona cityscape with Mediterranean architecture and urban landscape",
+                        "Church viewed from Plaça de Gaudí with reflecting pool mirroring elaborate facades",
+                        "Construction cranes visible around towers showing ongoing building work on Gaudí's vision",
+                        "Interior forest of columns with visitors experiencing spectacular light and space"
+                    ],
+                    "seasonal_lighting": [
+                        "Sunset light streaming through stained glass creating vibrant color patterns on stone columns",
+                        "Night illumination highlighting intricate facade details with dramatic architectural lighting",
+                        "Morning light revealing texture and depth of carved stone with soft shadows",
+                        "Bright Mediterranean sun emphasizing colorful mosaic work on tower exteriors"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["聖家堂", "巴塞隆納", "高第建築", "西班牙旅遊", "世界遺產", "教堂"],
+                    "en": ["SagradaFamilia", "Barcelona", "Gaudi", "Spain", "Cathedral", "Architecture"]
+                },
+                "cultural_info": {
+                    "built_year": 1882,
+                    "architect": "Antoni Gaudí",
+                    "architectural_style": "Catalan Modernism"
+                }
+            },
+            "Brandenburg Gate": {
+                "name": "Brandenburg Gate",
+                "official_name": "Brandenburger Tor",
+                "location": {
+                    "city": "Berlin",
+                    "country": "Germany",
+                    "region": "Mitte",
+                    "continent": "Europe"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Neoclassical triumphal arch with twelve Doric columns supporting entablature and Quadriga sculpture",
+                        "Monumental city gate with goddess of victory chariot crowning sandstone classical structure",
+                        "Historic gateway showing Greek Revival architecture with columned portico and sculptural decoration",
+                        "Famous Berlin landmark with symmetrical design and copper Quadriga statue against sky"
+                    ],
+                    "architectural_details": [
+                        "Twelve Doric columns arranged in six pairs creating five passageways through gate structure",
+                        "Quadriga sculpture showing goddess Victoria in four-horse chariot with Prussian eagle and Iron Cross",
+                        "Sandstone construction with classical Greek proportions and restrained decorative elements",
+                        "Relief sculptures in metopes showing mythological scenes and Prussian military symbolism"
+                    ],
+                    "contextual_view": [
+                        "Gate standing at Pariser Platz with modern buildings and historic square surrounding monument",
+                        "Brandenburg Gate viewed down Unter den Linden boulevard with linden trees and embassies",
+                        "Monument at edge of Tiergarten park showing relationship to green space and city",
+                        "Gate illuminated with Reichstag building and government district visible in background"
+                    ],
+                    "seasonal_lighting": [
+                        "Dramatic night lighting in various colors for events creating stunning visual effects",
+                        "Soft morning light highlighting sandstone texture and classical architectural details",
+                        "Sunset silhouette with Quadriga outlined against colorful Berlin sky",
+                        "Winter scene with gate surrounded by Christmas market lights and seasonal decorations"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["布蘭登堡門", "柏林", "德國旅遊", "歷史建築", "柏林地標"],
+                    "en": ["BrandenburgGate", "Berlin", "Germany", "BerlinLandmark", "GermanHistory"]
+                },
+                "cultural_info": {
+                    "built_year": 1791,
+                    "architect": "Carl Gotthard Langhans",
+                    "architectural_style": "Neoclassicism"
+                }
+            },
+            # ===== 亞洲 Asia =====
+            "Tokyo Tower": {
+                "name": "Tokyo Tower",
+                "official_name": "東京タワー",
+                "location": {
+                    "city": "Tokyo",
+                    "country": "Japan",
+                    "region": "Minato",
+                    "continent": "Asia"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Red and white lattice steel tower inspired by Eiffel Tower rising 333 meters above Tokyo",
+                        "Iconic communication tower with distinctive orange and white paint showing two observation decks",
+                        "Tall broadcasting tower with lattice framework and observation platforms overlooking Tokyo cityscape",
+                        "Famous Japanese landmark tower with red-orange color scheme and tapering lattice structure"
+                    ],
+                    "architectural_details": [
+                        "Steel lattice framework painted international orange and white for aviation safety",
+                        "Two observation decks at 150m and 250m heights with panoramic windows and viewing platforms",
+                        "Four massive support legs with elevators and emergency stairs running through lattice structure",
+                        "Broadcasting antennas and equipment at tower top with decorative lighting systems"
+                    ],
+                    "contextual_view": [
+                        "Tower rising above Shiba Park with traditional temple buildings and modern Tokyo skyscrapers",
+                        "Tokyo Tower viewed from Roppongi Hills with Mount Fuji visible in distant background",
+                        "Landmark tower dominating skyline with Rainbow Bridge and Tokyo Bay in view",
+                        "Tower surrounded by cherry blossoms in spring with pink petals and urban landscape"
+                    ],
+                    "seasonal_lighting": [
+                        "Tower illuminated at night in orange creating warm glow against Tokyo night sky",
+                        "Special lighting displays in various colors for holidays and events creating festive atmosphere",
+                        "Sunset view with tower silhouetted against orange and pink sky",
+                        "Winter illumination with tower and surrounding trees decorated with seasonal lights"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["東京鐵塔", "東京", "日本旅遊", "東京地標", "日本"],
+                    "en": ["TokyoTower", "Tokyo", "Japan", "TokyoLandmark", "JapanTravel"]
+                },
+                "cultural_info": {
+                    "built_year": 1958,
+                    "architect": "Tachū Naitō",
+                    "architectural_style": "Lattice Tower"
+                }
+            },
+            "Taipei 101": {
+                "name": "Taipei 101",
+                "official_name": "台北101",
+                "location": {
+                    "city": "Taipei",
+                    "country": "Taiwan",
+                    "region": "Xinyi District",
+                    "continent": "Asia"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Massive skyscraper with bamboo-inspired segmented design rising 508 meters above Taipei",
+                        "101-story tower with distinctive eight-segment structure and traditional Chinese architectural elements",
+                        "Iconic green-glass building with pagoda-like tiers showing postmodern Asian design",
+                        "Supertall skyscraper with gold-tinted windows and traditional motifs in modern interpretation"
+                    ],
+                    "architectural_details": [
+                        "Eight eight-story modules stacked vertically representing prosperity in Chinese numerology",
+                        "Traditional ruyi ornaments at corners of each section adding cultural architectural elements",
+                        "Massive tuned mass damper sphere visible to visitors providing earthquake protection",
+                        "Double-deck elevators with pressurization system ascending at world-record speeds"
+                    ],
+                    "contextual_view": [
+                        "Tower dominating Taipei skyline with Elephant Mountain and lush green hills in background",
+                        "Building viewed from Xiangshan with city sprawl and mountains creating dramatic setting",
+                        "Taipei 101 rising above Xinyi shopping district with modern urban development below",
+                        "Tower illuminated against night sky with busy streets and city lights surrounding base"
+                    ],
+                    "seasonal_lighting": [
+                        "New Year's Eve fireworks display launched from building creating spectacular light show",
+                        "LED lighting system displaying colors for holidays and special occasions",
+                        "Sunset illumination with building's glass reflecting golden and orange tones",
+                        "Night view with tower lit in green and gold standing out against dark sky"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["台北101", "台北", "台灣", "台北地標", "摩天大樓", "台灣旅遊"],
+                    "en": ["Taipei101", "Taipei", "Taiwan", "TaipeiLandmark", "Skyscraper", "TaiwanTravel"]
+                },
+                "cultural_info": {
+                    "built_year": 2004,
+                    "architect": "C.Y. Lee & Partners",
+                    "architectural_style": "Postmodern"
+                }
+            },
+            "Burj Khalifa": {
+                "name": "Burj Khalifa",
+                "official_name": "برج خليفة",
+                "location": {
+                    "city": "Dubai",
+                    "country": "United Arab Emirates",
+                    "region": "Downtown Dubai",
+                    "continent": "Asia"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "World's tallest building at 828 meters with Y-shaped floor plan and sleek tapering design",
+                        "Supertall skyscraper with reflective glass facade and setback design inspired by desert flower",
+                        "Iconic needle-like tower piercing clouds with distinctive spire and observation decks",
+                        "Neo-futurist architecture with Islamic geometric patterns in modern glass and steel construction"
+                    ],
+                    "architectural_details": [
+                        "Buttressed core structural system with wings extending from central hexagonal hub",
+                        "Reflective glazing with aluminum and textured stainless steel spandrel panels",
+                        "Observation decks on 124th 125th and 148th floors offering panoramic views",
+                        "Spire adding 200 meters to height with communication equipment and decorative elements"
+                    ],
+                    "contextual_view": [
+                        "Tower rising from Downtown Dubai with Dubai Mall fountain show and urban development below",
+                        "Building dominating skyline with Persian Gulf and Palm Jumeirah visible in distance",
+                        "Burj Khalifa viewed from desert showing contrast between modern architecture and natural landscape",
+                        "Tower at center of Dubai's business district with surrounding high-rises and infrastructure"
+                    ],
+                    "seasonal_lighting": [
+                        "LED light show on facade creating dynamic patterns and colors for celebrations",
+                        "Night illumination with tower glowing against dark sky as city lights spread below",
+                        "Sunset view with building's glass reflecting orange and gold desert light",
+                        "New Year's Eve spectacular with building covered in coordinated light and firework display"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["哈里發塔", "杜拜", "阿聯酋", "世界最高", "摩天大樓", "杜拜旅遊"],
+                    "en": ["BurjKhalifa", "Dubai", "UAE", "WorldsTallest", "Skyscraper", "DubaiTravel"]
+                },
+                "cultural_info": {
+                    "built_year": 2010,
+                    "architect": "Adrian Smith (SOM)",
+                    "architectural_style": "Neo-futurism"
+                }
+            },
+            "Petronas Towers": {
+                "name": "Petronas Towers",
+                "official_name": "Menara Berkembar Petronas",
+                "location": {
+                    "city": "Kuala Lumpur",
+                    "country": "Malaysia",
+                    "region": "KLCC",
+                    "continent": "Asia"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Twin skyscrapers with Islamic-inspired design connected by sky bridge at 452 meters height",
+                        "Matching 88-story towers with distinctive postmodern style and geometric floor plans",
+                        "Iconic twin towers with stainless steel and glass facades showing eight-pointed star motif",
+                        "Symmetrical tower pair with sky bridge and spires creating recognizable Kuala Lumpur silhouette"
+                    ],
+                    "architectural_details": [
+                        "Floor plan based on Islamic geometric patterns with two interlocking squares creating eight-pointed star",
+                        "Stainless steel and glass curtain wall with Islamic art-inspired design elements",
+                        "Double-deck sky bridge on 41st and 42nd floors connecting towers at 170 meters height",
+                        "Pinnacles adding 73 meters to height with Islamic architectural styling and lighting"
+                    ],
+                    "contextual_view": [
+                        "Towers dominating KLCC Park with fountain lake and green space in foreground",
+                        "Twin buildings viewed from KL Tower showing relationship to city and surrounding jungle hills",
+                        "Petronas Towers as centerpiece of business district with modern urban development",
+                        "Towers reflecting in KLCC Park water features with tropical landscaping and city backdrop"
+                    ],
+                    "seasonal_lighting": [
+                        "Towers illuminated at night with synchronized lighting creating mirror image effect",
+                        "Special lighting displays for Malaysian holidays in national colors",
+                        "Blue hour with towers glowing against twilight sky as city lights emerge",
+                        "Dramatic storm clouds behind towers with lightning and architectural lighting contrast"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["雙子星大樓", "吉隆坡", "馬來西亞", "雙峰塔", "吉隆坡地標"],
+                    "en": ["PetronasTowers", "KualaLumpur", "Malaysia", "TwinTowers", "KLCC"]
+                },
+                "cultural_info": {
+                    "built_year": 1998,
+                    "architect": "César Pelli",
+                    "architectural_style": "Postmodern Islamic"
+                }
+            },
+            "Forbidden City": {
+                "name": "Forbidden City",
+                "official_name": "故宮",
+                "location": {
+                    "city": "Beijing",
+                    "country": "China",
+                    "region": "Dongcheng District",
+                    "continent": "Asia"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Massive imperial palace complex with yellow-glazed roof tiles and red walls showing traditional Chinese architecture",
+                        "Ancient palace with multiple courtyards ceremonial halls and gates in classical Chinese design",
+                        "Historic royal residence with distinctive golden roofs and vermilion walls in orthogonal layout",
+                        "Imperial complex with 980 buildings showing Ming and Qing dynasty architectural grandeur"
+                    ],
+                    "architectural_details": [
+                        "Yellow glazed roof tiles symbolizing imperial authority with elaborate ceramic figurine decorations",
+                        "Vermilion walls and columns with golden door studs arranged in traditional Chinese numerical symbolism",
+                        "Marble terraces and balustrades with dragon and phoenix carved relief decorations",
+                        "Wooden architecture using traditional dougong bracket system without nails in construction"
+                    ],
+                    "contextual_view": [
+                        "Palace viewed through Meridian Gate with vast courtyard and Hall of Supreme Harmony beyond",
+                        "Forbidden City from Jingshan Park showing complete palace layout and Beijing cityscape",
+                        "Palace moat and walls with modern Beijing visible in background showing old and new contrast",
+                        "Interior courtyard with tourists and traditional architecture under blue Beijing sky"
+                    ],
+                    "seasonal_lighting": [
+                        "Winter snow covering golden roofs creating dramatic color contrast with white and gold",
+                        "Autumn light warming red walls with traditional Chinese architecture in clear air",
+                        "Sunset illuminating yellow roof tiles with golden hour light creating magical atmosphere",
+                        "Night opening events with palace buildings subtly illuminated showing architectural details"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["故宮", "北京", "紫禁城", "中國", "古蹟", "世界遺產"],
+                    "en": ["ForbiddenCity", "Beijing", "China", "ImperialPalace", "WorldHeritage", "Palace"]
+                },
+                "cultural_info": {
+                    "built_year": 1420,
+                    "architect": "Kuai Xiang",
+                    "architectural_style": "Traditional Chinese"
+                }
+            },
+            # ===== 美洲 Americas =====
+            "Statue of Liberty": {
+                "name": "Statue of Liberty",
+                "official_name": "Liberty Enlightening the World",
+                "location": {
+                    "city": "New York",
+                    "country": "United States",
+                    "region": "Liberty Island",
+                    "continent": "North America"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Colossal neoclassical sculpture with copper patina holding torch aloft on Liberty Island",
+                        "Famous statue with crown and tablet showing robed female figure representing Libertas",
+                        "Iconic green copper statue on pedestal with torch raised and broken chains at feet",
+                        "Monument with seven-ray crown tablet and torch symbolizing freedom and democracy"
+                    ],
+                    "architectural_details": [
+                        "Copper skin with green patina over iron framework designed by Gustave Eiffel",
+                        "Crown with seven rays representing seven continents and seas with 25 windows",
+                        "Tablet inscribed with July 4 1776 in Roman numerals held in left hand",
+                        "Broken shackles and chains at feet symbolizing freedom from oppression"
+                    ],
+                    "contextual_view": [
+                        "Statue viewed from Battery Park with New York Harbor and Manhattan skyline behind",
+                        "Liberty Island with statue and star-shaped Fort Wood pedestal from aerial view",
+                        "Statue with Staten Island Ferry passing in foreground and Ellis Island nearby",
+                        "Sunset silhouette with statue outlined against orange sky and New York City lights"
+                    ],
+                    "seasonal_lighting": [
+                        "Statue illuminated at night with dramatic uplighting showing sculptural details",
+                        "Golden hour light warming copper patina with soft shadows on draped clothing",
+                        "Fourth of July fireworks surrounding statue with patriotic celebration",
+                        "Misty morning with statue emerging from harbor fog creating mystical atmosphere"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["自由女神", "紐約", "美國", "紐約地標", "自由女神像"],
+                    "en": ["StatueOfLiberty", "NewYork", "NYC", "Liberty", "USA", "America"]
+                },
+                "cultural_info": {
+                    "built_year": 1886,
+                    "architect": "Frédéric Auguste Bartholdi",
+                    "architectural_style": "Neoclassicism"
+                }
+            },
+            "Golden Gate Bridge": {
+                "name": "Golden Gate Bridge",
+                "official_name": "Golden Gate Bridge",
+                "location": {
+                    "city": "San Francisco",
+                    "country": "United States",
+                    "region": "California",
+                    "continent": "North America"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Suspension bridge with distinctive International Orange color spanning Golden Gate strait",
+                        "Art Deco bridge with two towers and cables connecting San Francisco to Marin County",
+                        "Famous orange bridge with 1.7-mile span over blue Pacific waters and hills beyond",
+                        "Iconic suspension structure with tall towers and sweeping cables against San Francisco Bay"
+                    ],
+                    "architectural_details": [
+                        "Art Deco towers rising 227 meters above water with distinctive vertical ribbing",
+                        "Main suspension cables made of 27000 wires in distinctive orange color",
+                        "Deck structure with six traffic lanes suspended from vertical cables",
+                        "Art Deco design elements including tower portals and lighting fixtures in period style"
+                    ],
+                    "contextual_view": [
+                        "Bridge viewed from Marin Headlands with San Francisco skyline and bay in background",
+                        "Golden Gate from Baker Beach with bridge spanning across water to northern hills",
+                        "Bridge emerging from famous fog with towers visible above marine layer",
+                        "Aerial view showing complete span connecting two peninsulas across Golden Gate strait"
+                    ],
+                    "seasonal_lighting": [
+                        "Sunset with bridge silhouetted against orange and purple sky over Pacific Ocean",
+                        "Bridge partially obscured by fog creating mysterious atmospheric effect",
+                        "Blue hour with bridge illuminated and city lights twinkling in background",
+                        "Clear day with International Orange color vibrant against blue sky and water"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["金門大橋", "舊金山", "美國", "三藩市", "加州"],
+                    "en": ["GoldenGateBridge", "SanFrancisco", "SF", "California", "USA", "Bridge"]
+                },
+                "cultural_info": {
+                    "built_year": 1937,
+                    "architect": "Joseph Strauss",
+                    "architectural_style": "Art Deco"
+                }
+            },
+            "Christ the Redeemer": {
+                "name": "Christ the Redeemer",
+                "official_name": "Cristo Redentor",
+                "location": {
+                    "city": "Rio de Janeiro",
+                    "country": "Brazil",
+                    "region": "Corcovado Mountain",
+                    "continent": "South America"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Massive Art Deco statue of Jesus Christ with outstretched arms atop Corcovado mountain",
+                        "Colossal soapstone and concrete sculpture overlooking Rio with arms spanning 28 meters",
+                        "Iconic statue at 30 meters height standing on 8-meter pedestal above rainforest",
+                        "Monument with distinctive silhouette of Christ figure blessing city from mountain peak"
+                    ],
+                    "architectural_details": [
+                        "Reinforced concrete and soapstone construction with Art Deco styling",
+                        "Triangular mosaic tiles covering exterior in whitish soapstone material",
+                        "Internal chapel at pedestal base with access stairs and elevator system",
+                        "Outstretched arms forming cross shape with detailed hands and robed figure"
+                    ],
+                    "contextual_view": [
+                        "Statue viewed from Sugarloaf Mountain with Guanabara Bay and Rio sprawl below",
+                        "Christ overlooking Copacabana and Ipanema beaches with Atlantic Ocean beyond",
+                        "Monument surrounded by Tijuca Forest with lush tropical vegetation on mountain",
+                        "Aerial view showing statue's position above city with both ocean and mountains visible"
+                    ],
+                    "seasonal_lighting": [
+                        "Statue illuminated at night with dramatic lighting visible across Rio",
+                        "Sunset silhouette with statue outlined against orange sky above darkening city",
+                        "Stormy weather with lightning behind statue creating dramatic atmosphere",
+                        "Special event lighting in various colors for holidays and celebrations"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["基督像", "里約熱內盧", "巴西", "救世基督像", "世界新七大奇蹟"],
+                    "en": ["ChristTheRedeemer", "Rio", "Brazil", "RioDeJaneiro", "CristoRedentor"]
+                },
+                "cultural_info": {
+                    "built_year": 1931,
+                    "architect": "Paul Landowski",
+                    "architectural_style": "Art Deco"
+                }
+            },
+            "CN Tower": {
+                "name": "CN Tower",
+                "official_name": "Canadian National Tower",
+                "location": {
+                    "city": "Toronto",
+                    "country": "Canada",
+                    "region": "Ontario",
+                    "continent": "North America"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Concrete communication tower at 553 meters with distinctive pod and antenna spire",
+                        "Iconic Toronto landmark with observation deck pod and long concrete shaft",
+                        "Tall broadcasting tower with revolving restaurant and glass floor observation area",
+                        "Slender concrete tower dominating Toronto skyline with characteristic Y-shaped floor plan"
+                    ],
+                    "architectural_details": [
+                        "Hexagonal concrete shaft with three support legs forming Y-shaped base",
+                        "SkyPod observation level with indoor and outdoor viewing areas at 447 meters",
+                        "Glass floor section allowing visitors to look straight down to ground",
+                        "Revolving restaurant completing 360-degree rotation every 72 minutes"
+                    ],
+                    "contextual_view": [
+                        "Tower rising above Toronto skyline with Lake Ontario and city sprawl visible",
+                        "CN Tower viewed from Toronto Islands with waterfront and downtown core",
+                        "Tower dominating cityscape with Rogers Centre stadium and financial district nearby",
+                        "Landmark visible from throughout Greater Toronto Area as defining skyline element"
+                    ],
+                    "seasonal_lighting": [
+                        "Tower illuminated at night in various colors for events and causes",
+                        "Canada Day celebration with tower lit in red and white national colors",
+                        "Sunset with tower silhouetted against colorful sky over Lake Ontario",
+                        "Winter scene with tower emerging from snow and city lights below"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["CN塔", "多倫多", "加拿大", "多倫多地標", "加拿大國家電視塔"],
+                    "en": ["CNTower", "Toronto", "Canada", "TorontoLandmark", "YYZ"]
+                },
+                "cultural_info": {
+                    "built_year": 1976,
+                    "architect": "John Andrews",
+                    "architectural_style": "Modern"
+                }
+            },
+            # ===== 大洋洲與其他 Oceania & Others =====
+            "Sydney Opera House": {
+                "name": "Sydney Opera House",
+                "official_name": "Sydney Opera House",
+                "location": {
+                    "city": "Sydney",
+                    "country": "Australia",
+                    "region": "Bennelong Point",
+                    "continent": "Oceania"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Expressionist modern design with distinctive white shell-shaped roof sails on harbor peninsula",
+                        "Multiple shell structures covered in white and cream tiles rising from water's edge",
+                        "Iconic performance venue with overlapping concrete shells creating sail-like silhouette",
+                        "Modernist architecture with innovative roof design of interlocking vaulted shells"
+                    ],
+                    "architectural_details": [
+                        "Precast concrete ribs covered with 1056006 white and cream Swedish tiles",
+                        "Shell structures based on spherical geometry creating self-supporting roof sections",
+                        "Glass curtain walls filling spaces between shells and podium below",
+                        "Multiple performance halls including Concert Hall and Joan Sutherland Theatre within shells"
+                    ],
+                    "contextual_view": [
+                        "Opera House on Bennelong Point with Sydney Harbour Bridge in background",
+                        "Building viewed from Circular Quay with harbor ferries and city skyline",
+                        "Opera House at sunset with sails reflecting golden light over harbor waters",
+                        "Aerial view showing building's position on peninsula with Royal Botanic Gardens adjacent"
+                    ],
+                    "seasonal_lighting": [
+                        "Vivid Sydney festival with colorful projections on shell surfaces",
+                        "Sunset illuminating white tiles with warm light and harbor reflections",
+                        "Night lighting highlighting architectural forms against dark harbor",
+                        "New Year's Eve with fireworks from Harbour Bridge framing Opera House"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["雪梨歌劇院", "雪梨", "澳洲", "澳大利亞", "世界遺產"],
+                    "en": ["SydneyOperaHouse", "Sydney", "Australia", "OperaHouse", "WorldHeritage"]
+                },
+                "cultural_info": {
+                    "built_year": 1973,
+                    "architect": "Jørn Utzon",
+                    "architectural_style": "Expressionist Modernism"
+                }
+            },
+            "Taj Mahal": {
+                "name": "Taj Mahal",
+                "official_name": "ताज महल",
+                "location": {
+                    "city": "Agra",
+                    "country": "India",
+                    "region": "Uttar Pradesh",
+                    "continent": "Asia"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "White marble mausoleum with central dome and four minarets in Mughal architecture style",
+                        "Ivory-white marble structure with perfect symmetry reflected in long rectangular pool",
+                        "Iconic domed monument with intricate inlay work and Islamic calligraphy decorations",
+                        "Majestic tomb complex with main building flanked by symmetrical mosque and guest house"
+                    ],
+                    "architectural_details": [
+                        "Central dome rising 35 meters surrounded by four smaller chattri domes",
+                        "Pietra dura inlay work with semi-precious stones creating floral patterns",
+                        "Four minarets at corners standing 40 meters high with tilted design for earthquake safety",
+                        "Calligraphic inscriptions from Quran decorating entrance archways in black marble"
+                    ],
+                    "contextual_view": [
+                        "Taj Mahal viewed through main gateway with frame creating first impression",
+                        "Monument reflected in Yamuna River during calm conditions with gardens in foreground",
+                        "Taj from Mehtab Bagh garden across river showing rear view and riverbank",
+                        "Complex with charbagh Persian garden layout leading to mausoleum platform"
+                    ],
+                    "seasonal_lighting": [
+                        "Sunrise with monument glowing pink and orange in soft morning light",
+                        "Full moon night viewing with white marble luminous under moonlight",
+                        "Sunset creating warm golden tones on marble with long shadows",
+                        "Misty morning with Taj emerging from fog over Yamuna River"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["泰姬陵", "印度", "阿格拉", "世界遺產", "世界奇蹟"],
+                    "en": ["TajMahal", "India", "Agra", "WorldHeritage", "Monument", "Mausoleum"]
+                },
+                "cultural_info": {
+                    "built_year": 1653,
+                    "architect": "Ustad Ahmad Lahauri",
+                    "architectural_style": "Mughal"
+                }
+            },
+            "Pyramids of Giza": {
+                "name": "Pyramids of Giza",
+                "official_name": "أهرامات الجيزة",
+                "location": {
+                    "city": "Giza",
+                    "country": "Egypt",
+                    "region": "Greater Cairo",
+                    "continent": "Africa"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Three ancient pyramids rising from desert plateau with Great Pyramid as largest structure",
+                        "Massive limestone pyramids with Great Sphinx in foreground on Giza Plateau",
+                        "Ancient Egyptian royal tombs with precise geometric forms against desert sky",
+                        "Monumental pyramids showing weathered limestone blocks and missing outer casing"
+                    ],
+                    "architectural_details": [
+                        "Great Pyramid originally 146 meters with 2.3 million limestone blocks",
+                        "Precise alignment to cardinal directions with astronomical significance",
+                        "Internal chambers and passages including King's Chamber and Grand Gallery",
+                        "Remaining casing stones at apex showing original smooth white limestone covering"
+                    ],
+                    "contextual_view": [
+                        "Pyramids with Great Sphinx in foreground and Cairo urban sprawl in background",
+                        "Three pyramids aligned with smaller queens pyramids and ancient cemetery",
+                        "Desert landscape with pyramids and camel riders providing scale",
+                        "Aerial view showing pyramid complex relationship to Nile River and modern city"
+                    ],
+                    "seasonal_lighting": [
+                        "Sound and light show with colorful illumination on pyramid faces at night",
+                        "Sunrise with pyramids silhouetted against orange desert sky",
+                        "Harsh midday sun creating strong shadows and highlighting weathered stone",
+                        "Golden hour light warming limestone with dramatic shadows emphasizing geometry"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["金字塔", "埃及", "吉薩", "古埃及", "世界奇蹟", "人面獅身像"],
+                    "en": ["Pyramids", "Egypt", "Giza", "GreatPyramid", "AncientEgypt", "Sphinx"]
+                },
+                "cultural_info": {
+                    "built_year": -2560,
+                    "architect": "Hemiunu",
+                    "architectural_style": "Ancient Egyptian"
+                }
+            },
+            "Machu Picchu": {
+                "name": "Machu Picchu",
+                "official_name": "Machu Picchu",
+                "location": {
+                    "city": "Cusco Region",
+                    "country": "Peru",
+                    "region": "Urubamba Province",
+                    "continent": "South America"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Ancient Incan citadel on mountain ridge with terraced structures and Huayna Picchu peak behind",
+                        "Stone ruins at 2430 meters altitude with dramatic mountain setting and cloud forest",
+                        "Archaeological site with precisely fitted stone walls temples and agricultural terraces",
+                        "Lost city with iconic postcard view showing complete site with Wayna Picchu mountain"
+                    ],
+                    "architectural_details": [
+                        "Dry-stone construction with precisely cut granite blocks without mortar",
+                        "Agricultural terraces with sophisticated drainage systems on steep slopes",
+                        "Temple of the Sun with curved wall and astronomical alignment features",
+                        "Intihuatana ritual stone showing Incan astronomical and agricultural knowledge"
+                    ],
+                    "contextual_view": [
+                        "Citadel viewed from Sun Gate after completing Inca Trail with morning light",
+                        "Site from Huayna Picchu summit showing complete layout and surrounding mountains",
+                        "Machu Picchu with Urubamba River valley and cloud forest below",
+                        "Ruins with llamas grazing among ancient structures creating iconic Andean scene"
+                    ],
+                    "seasonal_lighting": [
+                        "Sunrise illuminating ruins with first light as mist clears from valleys",
+                        "Dramatic clouds surrounding peaks with ruins emerging from mountain fog",
+                        "Afternoon light creating shadows that emphasize stone wall construction details",
+                        "Rainy season with lush green terraces and dramatic cloud formations"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["馬丘比丘", "秘魯", "印加", "世界遺產", "失落之城"],
+                    "en": ["MachuPicchu", "Peru", "Inca", "WorldHeritage", "LostCity", "Cusco"]
+                },
+                "cultural_info": {
+                    "built_year": 1450,
+                    "architect": "Pachacuti Inca Yupanqui",
+                    "architectural_style": "Inca"
+                }
+            },
+            "Petra": {
+                "name": "Petra",
+                "official_name": "البتراء",
+                "location": {
+                    "city": "Ma'an Governorate",
+                    "country": "Jordan",
+                    "region": "Wadi Musa",
+                    "continent": "Asia"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Rose-red sandstone Treasury building carved into cliff face with Hellenistic facade",
+                        "Al-Khazneh temple with elaborate columns and sculptures in pink Nabataean rock",
+                        "Ancient city carved from rock with dramatic facade revealed through narrow Siq canyon",
+                        "Monumental rock-cut architecture with classical design in desert landscape"
+                    ],
+                    "architectural_details": [
+                        "Hellenistic facade with Corinthian columns and ornate sculptural decorations",
+                        "Rock-cut construction showing Nabataean engineering carved directly from sandstone cliff",
+                        "Rose-red to pink sandstone with natural color variations in rock layers",
+                        "Urn monument crowning upper level with classical Greek architectural influences"
+                    ],
+                    "contextual_view": [
+                        "Treasury viewed through narrow opening of Siq canyon creating dramatic reveal",
+                        "Petra archaeological park with multiple rock-cut structures and Roman amphitheater",
+                        "Site in desert landscape with Bedouin presence and arid mountain scenery",
+                        "Monastery building requiring climb up ancient steps with panoramic desert views"
+                    ],
+                    "seasonal_lighting": [
+                        "Morning light illuminating Treasury facade with warm glow on rose-red stone",
+                        "Petra by Night with Treasury lit by candlelight creating magical atmosphere",
+                        "Harsh midday sun emphasizing color variations and carved details in rock",
+                        "Late afternoon shadows creating depth and emphasizing architectural relief"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["佩特拉", "約旦", "玫瑰城", "世界遺產", "世界新七大奇蹟"],
+                    "en": ["Petra", "Jordan", "Treasury", "AlKhazneh", "WorldHeritage", "RoseCity"]
+                },
+                "cultural_info": {
+                    "built_year": -312,
+                    "architect": "Nabataeans",
+                    "architectural_style": "Nabataean"
+                }
+            },
+            "Stonehenge": {
+                "name": "Stonehenge",
+                "official_name": "Stonehenge",
+                "location": {
+                    "city": "Wiltshire",
+                    "country": "United Kingdom",
+                    "region": "Salisbury Plain",
+                    "continent": "Europe"
+                },
+                "visual_cues": {
+                    "iconic_view": [
+                        "Prehistoric monument with massive standing stones arranged in circular pattern on plain",
+                        "Ancient stone circle with trilithons and sarsen stones in open landscape",
+                        "Neolithic structure with distinctive stone archways and circular earthwork setting",
+                        "Mysterious megalithic monument with bluestones and sarsen stones against sky"
+                    ],
+                    "architectural_details": [
+                        "Sarsen stone trilithons with horizontal lintels connected by mortise and tenon joints",
+                        "Bluestone arrangement within larger sarsen circle showing different stone types",
+                        "Heel Stone and Avenue aligned to summer solstice sunrise",
+                        "Weathered surfaces showing 5000 years of exposure to English weather"
+                    ],
+                    "contextual_view": [
+                        "Stone circle in pastoral English landscape with sheep grazing on Salisbury Plain",
+                        "Monument from distance showing relationship to surrounding earthworks and barrows",
+                        "Stonehenge with visitors for scale showing massive size of individual stones",
+                        "Site from aerial view revealing circular formation and astronomical alignments"
+                    ],
+                    "seasonal_lighting": [
+                        "Summer solstice sunrise with sun aligned through stones as crowds gather",
+                        "Winter solstice sunset creating dramatic silhouettes of standing stones",
+                        "Moody overcast conditions with stones against dramatic English sky",
+                        "Misty morning with stones emerging from fog creating mystical atmosphere"
+                    ]
+                },
+                "hashtags": {
+                    "zh": ["巨石陣", "英國", "史前遺跡", "世界遺產", "威爾特郡"],
+                    "en": ["Stonehenge", "England", "UK", "Prehistoric", "WorldHeritage", "Wiltshire"]
+                },
+                "cultural_info": {
+                    "built_year": -3000,
+                    "architect": "Unknown (Neolithic peoples)",
+                    "architectural_style": "Prehistoric"
+                }
+            }
+        }
+        print(f"✓ Landmark Prompts initialized with {len(self.landmarks)} world landmarks")
+    def get_prompts(self, landmark_name: str) -> Optional[Dict]:
+        """
+        取得特定地標的完整 prompt 資料
+        Args:
+            landmark_name: 地標名稱
+        Returns:
+            地標資料字典，若不存在則返回 None
+        """
+        return self.landmarks.get(landmark_name)
+    def get_all_landmarks(self) -> Dict:
+        """取得所有地標資料"""
+        return self.landmarks
+    def search_by_location(self, city: str = None, country: str = None) -> List[str]:
+        """
+        根據地理位置搜尋地標
+        Args:
+            city: 城市名稱
+            country: 國家名稱
+        Returns:
+            符合條件的地標名稱列表
+        """
+        results = []
+        for landmark_name, data in self.landmarks.items():
+            location = data.get('location', {})
+            if city and country:
+                if location.get('city') == city and location.get('country') == country:
+                    results.append(landmark_name)
+            elif city:
+                if location.get('city') == city:
+                    results.append(landmark_name)
+            elif country:
+                if location.get('country') == country:
+                    results.append(landmark_name)
+        return results
+    def get_visual_prompts(self, landmark_name: str, context: str = 'iconic_view') -> List[str]:
+        """
+        取得地標的視覺描述 prompts
+        Args:
+            landmark_name: 地標名稱
+            context: 情境類型 ('iconic_view', 'architectural_details', 'contextual_view', 'seasonal_lighting')
+        Returns:
+            視覺描述列表
+        """
+        landmark = self.landmarks.get(landmark_name)
+        if not landmark:
+            return []
+        visual_cues = landmark.get('visual_cues', {})
+        return visual_cues.get(context, [])
+    def get_hashtags(self, landmark_name: str, language: str = 'zh') -> List[str]:
+        """
+        取得地標的 hashtags
+        Args:
+            landmark_name: 地標名稱
+            language: 語言 ('zh', 'en', 或 'zh-en')
+        Returns:
+            Hashtag 列表
+        """
+        landmark = self.landmarks.get(landmark_name)
+        if not landmark:
+            return []
+        hashtags = landmark.get('hashtags', {})
+        if language == 'zh':
+            return hashtags.get('zh', [])
+        elif language == 'en':
+            return hashtags.get('en', [])
+        elif language == 'zh-en' or language == 'both':
+            zh_tags = hashtags.get('zh', [])
+            en_tags = hashtags.get('en', [])
+            return zh_tags + en_tags
+        else:
+            return hashtags.get('zh', [])
+print("✓ LandmarkPrompts defined")

lighting_analysis_manager.py ADDED Viewed

	@@ -0,0 +1,453 @@

+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+from PIL import Image
+from typing import Dict, Tuple
+import torchvision.models as models
+import torchvision.transforms as transforms
+class LightingAnalysisManager:
+    """Advanced lighting analysis using Places365 scene recognition + CV features"""
+    def __init__(self):
+        print("Initializing Lighting Analysis Manager with Places365...")
+        # Places365 ResNet18
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self._load_places365_model()
+        # CV feature weights (Places365 gets higher weight)
+        self.feature_weights = {
+            'places365': 0.50,     # Primary weight to Places365
+            'brightness': 0.15,
+            'color_temp': 0.15,
+            'contrast': 0.08,
+            'gradient': 0.05,      # Auxiliary features
+            'laplacian': 0.04,
+            'color_variation': 0.03
+        }
+        print("✓ Lighting Analysis Manager initialized with Places365 + advanced CV features")
+    def _load_places365_model(self):
+        """Load Places365 ResNet18 for scene attributes"""
+        try:
+            # Use ResNet18 pretrained on Places365
+            model = models.resnet18(weights=None)
+            model.fc = nn.Linear(model.fc.in_features, 365)
+            # Load Places365 weights (if available, otherwise use ImageNet as fallback)
+            try:
+                import urllib
+                checkpoint_url = 'http://places2.csail.mit.edu/models_places365/resnet18_places365.pth.tar'
+                checkpoint = torch.hub.load_state_dict_from_url(
+                    checkpoint_url,
+                    map_location=self.device,
+                    progress=False
+                )
+                state_dict = {str.replace(k, 'module.', ''): v for k, v in checkpoint['state_dict'].items()}
+                model.load_state_dict(state_dict)
+                print("      Loaded Places365 ResNet18 weights")
+            except:
+                print("      Using ImageNet pretrained ResNet18 (fallback)")
+                model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
+            model = model.to(self.device)
+            model.eval()
+            self.places_model = model
+            # Image preprocessing for Places365
+            self.places_transform = transforms.Compose([
+                transforms.Resize((224, 224)),
+                transforms.ToTensor(),
+                transforms.Normalize(
+                    mean=[0.485, 0.456, 0.406],
+                    std=[0.229, 0.224, 0.225]
+                )
+            ])
+            # Scene categories related to lighting
+            self.lighting_scenes = {
+                'sunny': ['street', 'downtown', 'plaza', 'park', 'field'],
+                'overcast': ['alley', 'covered_bridge', 'corridor'],
+                'indoor': ['lobby', 'office', 'museum', 'restaurant'],
+                'evening': ['street', 'downtown', 'plaza'],
+                'natural': ['park', 'forest', 'mountain', 'coast']
+            }
+        except Exception as e:
+            print(f"      Warning: Places365 loading failed ({e}), using CV-only mode")
+            self.places_model = None
+    def analyze_lighting(self, image: Image.Image) -> Dict:
+        """Comprehensive lighting analysis using Places365 + CV"""
+        # 1. CV-based physical features (including advanced features)
+        cv_features = self._extract_cv_features(image)
+        # 2. Places365 scene understanding (if available)
+        scene_info = self._analyze_scene_places365(image)
+        # 3. Determine lighting condition (adaptive with auxiliary features)
+        lighting_condition, confidence = self._determine_lighting_adaptive(
+            cv_features, scene_info
+        )
+        return {
+            'lighting_type': lighting_condition,
+            'confidence': confidence,
+            'cv_features': cv_features,
+            'scene_info': scene_info
+        }
+    def _extract_cv_features(self, image: Image.Image) -> Dict:
+        """Extract CV-based features including advanced gradient and color analysis"""
+        img_array = np.array(image)
+        img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
+        # Basic Features (Primary)
+        # Brightness (LAB L-channel)
+        lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB)
+        brightness = float(np.mean(lab[:, :, 0]))
+        # Color temperature (R/B ratio)
+        b_mean = np.mean(img_bgr[:, :, 0])
+        r_mean = np.mean(img_bgr[:, :, 2])
+        color_temp = float(r_mean / (b_mean + 1e-6))
+        # Contrast (std of grayscale)
+        gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
+        contrast = float(np.std(gray))
+        # Shadow ratio
+        _, shadow_mask = cv2.threshold(gray, 80, 255, cv2.THRESH_BINARY_INV)
+        shadow_ratio = float(np.sum(shadow_mask > 0) / shadow_mask.size)
+        # Advanced Features
+        # 1. First derivative: Sobel gradient magnitude (edge strength)
+        # Strong gradients suggest directional lighting, weak suggest diffused
+        sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
+        sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
+        gradient_magnitude = np.sqrt(sobelx**2 + sobely**2)
+        gradient_strength = float(np.mean(gradient_magnitude))
+        # 2. Second derivative: Laplacian variance (lighting change detection)
+        # High variance indicates complex lighting with many transitions
+        laplacian = cv2.Laplacian(gray, cv2.CV_64F)
+        laplacian_var = float(np.var(laplacian))
+        # 3. Color difference in LAB space (color uniformity)
+        # Low variation suggests overcast/diffused, high suggests mixed lighting
+        a_std = float(np.std(lab[:, :, 1]))  # a* channel (green-red)
+        b_std = float(np.std(lab[:, :, 2]))  # b* channel (blue-yellow)
+        color_variation = (a_std + b_std) / 2
+        return {
+            # Primary features
+            'brightness': brightness,
+            'color_temp': color_temp,
+            'contrast': contrast,
+            'shadow_ratio': shadow_ratio,
+            # Advanced auxiliary features (to assist Places365)
+            'gradient_strength': gradient_strength,
+            'laplacian_variance': laplacian_var,
+            'color_variation': color_variation
+        }
+    def _analyze_scene_places365(self, image: Image.Image) -> Dict:
+        """Analyze scene using Places365"""
+        if self.places_model is None:
+            return {'scene_category': 'unknown', 'confidence': 0.0}
+        try:
+            with torch.no_grad():
+                img_tensor = self.places_transform(image).unsqueeze(0).to(self.device)
+                logits = self.places_model(img_tensor)
+                probs = torch.nn.functional.softmax(logits, dim=1)
+                # Get top prediction
+                top_prob, top_idx = torch.max(probs, 1)
+                # Simple scene categories
+                # Using index ranges for common outdoor scenes
+                is_outdoor = top_idx.item() < 200  # Rough heuristic
+                return {
+                    'scene_category': 'outdoor' if is_outdoor else 'indoor',
+                    'confidence': float(top_prob.item()),
+                    'scene_idx': int(top_idx.item())
+                }
+        except Exception as e:
+            print(f"      Places365 inference failed: {e}")
+            return {'scene_category': 'unknown', 'confidence': 0.0}
+    def _detect_indoor_scene(self, cv_features: Dict, scene_info: Dict) -> bool:
+        """
+        Detect if scene is indoor or outdoor using multiple signals
+        Args:
+            cv_features: Computer vision features
+            scene_info: Places365 scene information
+        Returns:
+            True if indoor, False if outdoor
+        """
+        indoor_score = 0.0
+        # Signal 1: Places365 scene category (strongest signal)
+        if scene_info.get('scene_category') == 'indoor':
+            indoor_score += 0.5
+        elif scene_info.get('scene_category') == 'outdoor':
+            indoor_score -= 0.3
+        # Signal 2: Brightness patterns
+        # Indoor scenes typically have controlled brightness (not too bright, not too dark)
+        brightness = cv_features['brightness']
+        if 60 < brightness < 220:  # 放寬範圍，包含更多室內場景
+            indoor_score += 0.15
+        elif brightness > 230:  # Very bright suggests outdoor
+            indoor_score -= 0.2
+        # Signal 3: Low gradient suggests controlled/diffused indoor lighting
+        gradient = cv_features['gradient_strength']
+        if gradient < 20:  # 放寬閾值，更多室內場景符合
+            indoor_score += 0.15
+        # Signal 4: Low laplacian variance suggests smooth indoor lighting
+        laplacian = cv_features['laplacian_variance']
+        if laplacian < 400:  # 放寬閾值，包含更多室內場景
+            indoor_score += 0.10
+        # Signal 5: Shadow ratio - indoor scenes have less harsh shadows
+        shadow_ratio = cv_features['shadow_ratio']
+        if shadow_ratio < 0.25:  # 放寬閾值，包含更多室內場景
+            indoor_score += 0.10
+        elif shadow_ratio > 0.5:  # Strong shadows suggest outdoor sunlight
+            indoor_score -= 0.15
+        # Threshold: indoor if score > 0.15 (降低閾值，更容易判定為室內)
+        return indoor_score > 0.15
+    def _determine_indoor_lighting(self, cv_features: Dict) -> Tuple[str, float]:
+        """
+        Determine lighting type for indoor scenes
+        Returns indoor-specific lighting types with confidence
+        """
+        brightness = cv_features['brightness']
+        color_temp = cv_features['color_temp']
+        contrast = cv_features['contrast']
+        shadow_ratio = cv_features['shadow_ratio']
+        gradient = cv_features['gradient_strength']
+        laplacian = cv_features['laplacian_variance']
+        # Normalize features
+        brightness_norm = min(brightness / 255.0, 1.0)
+        contrast_norm = min(contrast / 100.0, 1.0)
+        gradient_norm = min(gradient / 50.0, 1.0)
+        laplacian_norm = min(laplacian / 1000.0, 1.0)
+        scores = {}
+        # Studio/Product Lighting (工作室/產品攝影燈光)
+        # Very controlled, bright, minimal shadows, low gradient
+        studio_score = (
+            0.35 * (1.0 if brightness_norm > 0.6 else 0.5) +  # Bright
+            0.25 * (1.0 - shadow_ratio) +                      # Minimal shadows
+            0.20 * (1.0 - gradient_norm) +                     # Smooth, even
+            0.15 * (1.0 - laplacian_norm) +                   # Very smooth
+            0.05 * (1.0 - abs(color_temp - 1.0))              # Neutral temp
+        )
+        scores['studio lighting'] = studio_score
+        # Indoor Natural Light (室內自然光 - 窗光)
+        # Medium-bright, some contrast, neutral to warm temp
+        natural_indoor_score = (
+            0.30 * (1.0 if 0.5 < brightness_norm < 0.8 else 0.5) +  # Medium-bright
+            0.25 * min(contrast_norm, 0.6) +                         # Some contrast
+            0.20 * (1.0 if color_temp > 0.95 else 0.5) +            # Neutral to warm
+            0.15 * min(gradient_norm, 0.5) +                         # Some direction
+            0.10 * (1.0 if shadow_ratio < 0.3 else 0.5)             # Some shadows
+        )
+        scores['indoor natural light'] = natural_indoor_score
+        # Warm Artificial Lighting (溫暖人工照明)
+        # Warm color temp, medium brightness, soft
+        warm_artificial_score = (
+            0.35 * (1.0 if color_temp > 1.1 else 0.3) +              # Warm temp
+            0.25 * (1.0 - abs(brightness_norm - 0.5)) +              # Medium brightness
+            0.20 * (1.0 - gradient_norm) +                           # Soft
+            0.15 * (1.0 - shadow_ratio) +                            # Minimal shadows
+            0.05 * (1.0 - laplacian_norm)                            # Smooth
+        )
+        scores['warm artificial lighting'] = warm_artificial_score
+        # Cool Artificial Lighting (冷色人工照明)
+        # Cool/neutral temp, medium-bright
+        cool_artificial_score = (
+            0.35 * (1.0 if color_temp < 1.05 else 0.4) +            # Cool/neutral temp
+            0.25 * (1.0 if brightness_norm > 0.5 else 0.5) +        # Medium-bright
+            0.20 * (1.0 - gradient_norm) +                           # Smooth
+            0.15 * (1.0 - shadow_ratio) +                            # Minimal shadows
+            0.05 * (1.0 - laplacian_norm)                            # Even
+        )
+        scores['cool artificial lighting'] = cool_artificial_score
+        # Soft Indoor Lighting (柔和室內光線)
+        # Low contrast, diffused, medium brightness
+        soft_indoor_score = (
+            0.30 * (1.0 - abs(brightness_norm - 0.5)) +             # Medium brightness
+            0.30 * (1.0 - contrast_norm) +                           # Low contrast
+            0.20 * (1.0 - gradient_norm) +                           # Very soft
+            0.15 * (1.0 - shadow_ratio) +                            # Minimal shadows
+            0.05 * (1.0 - laplacian_norm)                            # Smooth
+        )
+        scores['soft indoor lighting'] = soft_indoor_score
+        # Dramatic Indoor Lighting (戲劇性室內光線)
+        # High contrast, directional, some shadows
+        dramatic_score = (
+            0.35 * contrast_norm +                                   # High contrast
+            0.25 * gradient_norm +                                   # Directional
+            0.20 * shadow_ratio +                                    # Shadows present
+            0.15 * laplacian_norm +                                  # Sharp transitions
+            0.05 * (1.0 if brightness_norm < 0.6 else 0.5)          # Can be darker
+        )
+        scores['dramatic indoor lighting'] = dramatic_score
+        # Get best match
+        best_condition = max(scores.items(), key=lambda x: x[1])
+        # Calculate confidence
+        sorted_scores = sorted(scores.values(), reverse=True)
+        if len(sorted_scores) > 1:
+            score_gap = sorted_scores[0] - sorted_scores[1]
+            confidence = min(0.7 + score_gap * 0.3, 0.95)
+        else:
+            confidence = 0.7
+        return best_condition[0], confidence
+    def _determine_lighting_adaptive(self, cv_features: Dict, scene_info: Dict) -> Tuple[str, float]:
+        """Determine lighting using adaptive thresholds with indoor/outdoor detection"""
+        # Extract all features
+        brightness = cv_features['brightness']
+        color_temp = cv_features['color_temp']
+        contrast = cv_features['contrast']
+        shadow = cv_features['shadow_ratio']
+        gradient = cv_features['gradient_strength']
+        laplacian = cv_features['laplacian_variance']
+        color_var = cv_features['color_variation']
+        # NEW: Detect indoor vs outdoor
+        is_indoor = self._detect_indoor_scene(cv_features, scene_info)
+        if is_indoor:
+            # 室內場景優先使用室內光線類型
+            return self._determine_indoor_lighting(cv_features)
+        # 否則使用原有邏輯
+        # Normalize features to 0-1 scale
+        brightness_norm = min(brightness / 255.0, 1.0)
+        contrast_norm = min(contrast / 100.0, 1.0)
+        gradient_norm = min(gradient / 50.0, 1.0)  # Typical range 0-50
+        laplacian_norm = min(laplacian / 1000.0, 1.0)  # Typical range 0-1000
+        color_var_norm = min(color_var / 50.0, 1.0)  # Typical range 0-50
+        # Adaptive scoring (Places365 dominant, CV features assist)
+        scores = {}
+        # Soft diffused light (柔和漫射光)
+        # Characteristics: medium brightness, low contrast, neutral temp
+        # Auxiliary: low gradient (no strong edges), low laplacian (smooth transitions)
+        diffuse_score = (
+            0.40 * (1.0 - abs(brightness_norm - 0.5)) +  # Medium brightness
+            0.25 * (1.0 - contrast_norm) +                # Low contrast
+            0.20 * (1.0 - abs(color_temp - 1.0)) +       # Neutral temp
+            0.08 * (1.0 - gradient_norm) +                # Weak edges (diffused)
+            0.05 * (1.0 - laplacian_norm) +              # Smooth transitions
+            0.02 * (1.0 - color_var_norm)                # Uniform color
+        )
+        scores['soft diffused light'] = diffuse_score
+        # Natural daylight (自然光)
+        # Characteristics: bright, moderate contrast
+        # Auxiliary: moderate gradient, moderate color variation
+        daylight_score = (
+            0.40 * brightness_norm +                      # Bright
+            0.25 * min(contrast_norm, 0.7) +             # Moderate contrast
+            0.20 * (1.0 - abs(color_temp - 1.0)) +       # Neutral temp
+            0.08 * min(gradient_norm, 0.6) +             # Moderate edges
+            0.05 * min(laplacian_norm, 0.6) +            # Some detail
+            0.02 * min(color_var_norm, 0.5)              # Some color variation
+        )
+        scores['natural daylight'] = daylight_score
+        # Overcast atmosphere (陰天氛圍)
+        # Characteristics: medium-low brightness, very low contrast, cool temp, minimal shadow
+        # Auxiliary: very low gradient (flat), low laplacian, low color variation
+        overcast_score = (
+            0.35 * (1.0 - abs(brightness_norm - 0.45)) + # Medium-low brightness
+            0.25 * (1.0 - contrast_norm) +                # Very low contrast
+            0.15 * (1.0 if color_temp < 1.05 else 0.5) + # Cool temp
+            0.10 * (1.0 - shadow) +                       # Minimal shadows
+            0.08 * (1.0 - gradient_norm) +                # Flat appearance
+            0.05 * (1.0 - laplacian_norm) +              # Smooth lighting
+            0.02 * (1.0 - color_var_norm)                # Uniform color
+        )
+        scores['overcast atmosphere'] = overcast_score
+        # Warm ambient light (溫暖環境光)
+        # Characteristics: medium brightness, warm temp
+        # Auxiliary: moderate gradient, warm color bias
+        warm_score = (
+            0.40 * (1.0 - abs(brightness_norm - 0.5)) +  # Medium brightness
+            0.30 * (1.0 if color_temp > 1.1 else 0.5) +  # Warm temp
+            0.15 * min(contrast_norm, 0.6) +             # Moderate contrast
+            0.08 * min(gradient_norm, 0.5) +             # Soft edges
+            0.05 * min(laplacian_norm, 0.5) +            # Soft transitions
+            0.02 * color_var_norm                        # Some color variation (warmth)
+        )
+        scores['warm ambient light'] = warm_score
+        # Evening light (傍晚光線)
+        # Characteristics: medium-low brightness, warm temp, medium contrast
+        # Auxiliary: moderate gradient (directional), some color variation
+        evening_score = (
+            0.35 * (1.0 if brightness_norm < 0.6 else 0.5) +  # Lower brightness
+            0.30 * (1.0 if color_temp > 1.05 else 0.5) +      # Slightly warm
+            0.20 * contrast_norm +                             # Some contrast
+            0.08 * min(gradient_norm, 0.7) +                   # Directional light
+            0.05 * laplacian_norm +                            # Detail present
+            0.02 * color_var_norm                              # Color variation
+        )
+        scores['evening light'] = evening_score
+        # Bright sunlight (明亮陽光)
+        # Characteristics: high brightness, high contrast, strong shadows
+        # Auxiliary: high gradient (strong edges), high laplacian (sharp transitions)
+        sunlight_score = (
+            0.40 * (1.0 if brightness_norm > 0.7 else 0.3) +  # High brightness
+            0.25 * contrast_norm +                             # High contrast
+            0.15 * shadow +                                    # Strong shadows
+            0.10 * gradient_norm +                             # Strong edges
+            0.08 * laplacian_norm +                            # Sharp detail
+            0.02 * color_var_norm                              # Color variation
+        )
+        scores['bright sunlight'] = sunlight_score
+        # Get top scoring condition
+        best_condition = max(scores.items(), key=lambda x: x[1])
+        # Calculate confidence based on score separation
+        sorted_scores = sorted(scores.values(), reverse=True)
+        if len(sorted_scores) > 1:
+            score_gap = sorted_scores[0] - sorted_scores[1]
+            confidence = min(0.7 + score_gap * 0.3, 0.95)
+        else:
+            confidence = 0.7
+        return best_condition[0], confidence
+print("✓ LightingAnalysisManager (with Places365 + advanced CV features) defined")

ocr_engine_manager.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import torch
+import easyocr
+import numpy as np
+import cv2
+from PIL import Image
+from typing import List, Dict
+import re
+class OCREngineManager:
+    """Text extraction using EasyOCR with brand-optimized preprocessing"""
+    def __init__(self):
+        print("Loading EasyOCR (English + Traditional Chinese)...")
+        # Try GPU first, fallback to CPU if GPU fails
+        try:
+            if torch.cuda.is_available():
+                print("  Attempting GPU initialization...")
+                self.reader = easyocr.Reader(['en', 'ch_tra'], gpu=True)
+                print("  ✓ EasyOCR loaded with GPU")
+            else:
+                print("  CUDA not available, using CPU...")
+                self.reader = easyocr.Reader(['en', 'ch_tra'], gpu=False)
+                print("  ✓ EasyOCR loaded with CPU")
+        except Exception as e:
+            print(f"  ⚠️ GPU initialization failed: {e}")
+            print("  Falling back to CPU...")
+            self.reader = easyocr.Reader(['en', 'ch_tra'], gpu=False)
+            print("  ✓ EasyOCR loaded with CPU (fallback)")
+        print("✓ EasyOCR loaded")
+    def extract_text(self, image: Image.Image, use_brand_preprocessing: bool = False) -> List[Dict]:
+        """Extract text from image with optional brand-optimized preprocessing"""
+        if use_brand_preprocessing:
+            # Apply brand-optimized preprocessing
+            processed_image = self.preprocess_for_brand_ocr(image)
+            img_array = np.array(processed_image)
+        else:
+            img_array = np.array(image)
+        # Use more aggressive settings for brand detection
+        if use_brand_preprocessing:
+            results = self.reader.readtext(
+                img_array,
+                detail=1,
+                paragraph=False,
+                min_size=10,  # Lower to catch small brand text
+                text_threshold=0.5,  # Lower threshold for brand logos
+                link_threshold=0.3,
+                contrast_ths=0.1,  # Lower to handle metallic/reflective text
+                adjust_contrast=0.8  # Enhance contrast for logos
+            )
+        else:
+            results = self.reader.readtext(
+                img_array,
+                detail=1,
+                paragraph=False,
+                min_size=20,
+                text_threshold=0.7,
+                link_threshold=0.4
+            )
+        structured_results = []
+        for bbox, text, confidence in results:
+            structured_results.append({
+                'bbox': bbox,
+                'text': self.clean_and_normalize(text),
+                'confidence': confidence,
+                'raw_text': text
+            })
+        return structured_results
+    def clean_and_normalize(self, text: str) -> str:
+        """Clean and normalize text"""
+        # Keep Traditional Chinese characters
+        text = re.sub(r'[^\w\s\u4e00-\u9fff]', '', text)
+        text = ' '.join(text.split())
+        return text.upper()
+    def preprocess_for_brand_ocr(self, image_region: Image.Image) -> Image.Image:
+        """
+        Preprocess image for brand OCR recognition
+        Optimizes for detecting brand logos and text on products (especially metallic logos)
+        Args:
+            image_region: PIL Image (typically a cropped region)
+        Returns:
+            Preprocessed PIL Image
+        """
+        # Convert to numpy array
+        img_array = np.array(image_region)
+        # Convert to grayscale
+        if len(img_array.shape) == 3:
+            gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+        else:
+            gray = img_array
+        # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
+        # Increased clipLimit for metallic logos (2.0 → 3.0)
+        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
+        enhanced = clahe.apply(gray)
+        # Denoise (slightly reduced strength to preserve logo edges)
+        denoised = cv2.fastNlMeansDenoising(enhanced, None, h=8, templateWindowSize=7, searchWindowSize=21)
+        # Adaptive thresholding to handle varying lighting
+        # Adjusted blockSize for better logo detection (11 → 15)
+        binary = cv2.adaptiveThreshold(
+            denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY, 15, 2
+        )
+        # Morphological operations to connect broken characters
+        # Slightly larger kernel for logo text (2x2 → 3x3)
+        kernel = np.ones((3, 3), np.uint8)
+        morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
+        # Sharpen to enhance edges (increased center weight 9 → 11)
+        kernel_sharp = np.array([[-1, -1, -1], [-1, 11, -1], [-1, -1, -1]])
+        sharpened = cv2.filter2D(morph, -1, kernel_sharp)
+        # Convert back to PIL Image
+        return Image.fromarray(sharpened)
+print("✓ OCREngineManager (with brand OCR preprocessing) defined")

openclip_semantic_manager.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import torch
+import open_clip
+from PIL import Image
+from typing import List, Dict
+import numpy as np
+class OpenCLIPSemanticManager:
+    """Zero-shot classification and visual feature extraction with enhanced scene understanding"""
+    def __init__(self):
+        print("Loading OpenCLIP ViT-H/14 model...")
+        self.model, _, self.preprocess = open_clip.create_model_and_transforms(
+            'ViT-H-14',
+            pretrained='laion2b_s32b_b79k'
+        )
+        self.tokenizer = open_clip.get_tokenizer('ViT-H-14')
+        if torch.cuda.is_available():
+            self.model = self.model.cuda()
+        self.model.eval()
+        # Enhanced scene vocabularies
+        self.scene_vocabularies = {
+            'urban': [
+                'city canyon with tall buildings',
+                'downtown street with skyscrapers',
+                'urban corridor between buildings',
+                'busy city intersection',
+                'metropolitan avenue'
+            ],
+            'lighting': [
+                'overcast cloudy day',
+                'bright sunny day',
+                'golden hour warm glow',
+                'blue hour twilight',
+                'harsh midday sun',
+                'soft diffused light',
+                'dramatic evening light',
+                'moody overcast atmosphere'
+            ],
+            'mood': [
+                'bustling and energetic',
+                'calm and contemplative',
+                'dramatic and imposing',
+                'intimate and cozy',
+                'vibrant and lively'
+            ]
+        }
+        # Hierarchical vocabularies
+        self.coarse_labels = [
+            'furniture', 'musical instrument', 'artwork',
+            'appliance', 'decoration', 'tool', 'electronic device',
+            'clothing', 'accessory', 'food', 'plant'
+        ]
+        self.domain_vocabularies = {
+            'musical instrument': [
+                'acoustic guitar', 'electric guitar', 'bass guitar',
+                'classical guitar', 'ukulele', 'violin', 'cello',
+                'piano', 'keyboard', 'drums', 'saxophone', 'trumpet'
+            ],
+            'furniture': [
+                'chair', 'sofa', 'table', 'desk', 'shelf',
+                'cabinet', 'bed', 'stool', 'bench', 'wardrobe'
+            ],
+            'electronic device': [
+                'smartphone', 'laptop', 'tablet', 'camera',
+                'headphones', 'speaker', 'monitor', 'keyboard', 'mouse'
+            ],
+            'clothing': [
+                'shirt', 'pants', 'dress', 'jacket', 'coat',
+                'sweater', 'skirt', 'jeans', 'hoodie'
+            ],
+            'accessory': [
+                'watch', 'sunglasses', 'hat', 'scarf', 'belt',
+                'bag', 'wallet', 'jewelry', 'tie'
+            ]
+        }
+        self.text_features_cache = {}
+        self._cache_text_features()
+        print("✓ OpenCLIP loaded with enhanced scene understanding")
+    def _cache_text_features(self):
+        """Pre-compute and cache text features"""
+        with torch.no_grad():
+            # Cache coarse labels
+            prompts = [f"a photo of {label}" for label in self.coarse_labels]
+            text = self.tokenizer(prompts)
+            if torch.cuda.is_available():
+                text = text.cuda()
+            self.text_features_cache['coarse'] = self.model.encode_text(text)
+            self.text_features_cache['coarse'] /= self.text_features_cache['coarse'].norm(dim=-1, keepdim=True)
+            # Cache domain vocabularies
+            for domain, labels in self.domain_vocabularies.items():
+                prompts = [f"a photo of {label}" for label in labels]
+                text = self.tokenizer(prompts)
+                if torch.cuda.is_available():
+                    text = text.cuda()
+                features = self.model.encode_text(text)
+                features /= features.norm(dim=-1, keepdim=True)
+                self.text_features_cache[domain] = features
+            # Cache scene vocabularies
+            for scene_type, labels in self.scene_vocabularies.items():
+                text = self.tokenizer(labels)
+                if torch.cuda.is_available():
+                    text = text.cuda()
+                features = self.model.encode_text(text)
+                features /= features.norm(dim=-1, keepdim=True)
+                self.text_features_cache[f'scene_{scene_type}'] = features
+    def analyze_scene(self, image: Image.Image) -> Dict:
+        """Comprehensive scene analysis"""
+        image_features = self.encode_image(image)
+        scene_analysis = {}
+        # Analyze each scene aspect
+        for scene_type in ['urban', 'lighting', 'mood']:
+            cache_key = f'scene_{scene_type}'
+            similarity = (image_features @ self.text_features_cache[cache_key].T) / 0.01
+            probs = similarity.softmax(dim=-1)
+            results = {}
+            for i, label in enumerate(self.scene_vocabularies[scene_type]):
+                results[label] = float(probs[0, i].cpu())
+            top_result = max(results.items(), key=lambda x: x[1])
+            scene_analysis[scene_type] = {
+                'top': top_result[0],
+                'confidence': top_result[1],
+                'all_scores': results
+            }
+        return scene_analysis
+    def encode_image(self, image: Image.Image) -> torch.Tensor:
+        """Encode image to feature vector"""
+        with torch.no_grad():
+            image_tensor = self.preprocess(image).unsqueeze(0)
+            if torch.cuda.is_available():
+                image_tensor = image_tensor.cuda()
+            image_features = self.model.encode_image(image_tensor)
+            image_features /= image_features.norm(dim=-1, keepdim=True)
+            return image_features
+    def encode_text(self, text_list: List[str]) -> torch.Tensor:
+        """Encode text list to feature vectors"""
+        with torch.no_grad():
+            prompts = [f"a photo of {text}" for text in text_list]
+            text = self.tokenizer(prompts)
+            if torch.cuda.is_available():
+                text = text.cuda()
+            text_features = self.model.encode_text(text)
+            text_features /= text_features.norm(dim=-1, keepdim=True)
+            return text_features
+    def classify_zero_shot(self, image: Image.Image, candidate_labels: List[str]) -> Dict[str, float]:
+        """Zero-shot classification"""
+        image_features = self.encode_image(image)
+        text_features = self.encode_text(candidate_labels)
+        similarity = (image_features @ text_features.T) / 0.01
+        probs = similarity.softmax(dim=-1)
+        results = {}
+        for i, label in enumerate(candidate_labels):
+            results[label] = float(probs[0, i].cpu())
+        return results
+    def classify_hierarchical(self, image: Image.Image) -> Dict:
+        """Hierarchical classification"""
+        image_features = self.encode_image(image)
+        coarse_similarity = (image_features @ self.text_features_cache['coarse'].T) / 0.01
+        coarse_probs = coarse_similarity.softmax(dim=-1)
+        coarse_results = {}
+        for i, label in enumerate(self.coarse_labels):
+            coarse_results[label] = float(coarse_probs[0, i].cpu())
+        top_category = max(coarse_results, key=coarse_results.get)
+        if top_category in self.domain_vocabularies:
+            fine_labels = self.domain_vocabularies[top_category]
+            fine_similarity = (image_features @ self.text_features_cache[top_category].T) / 0.01
+            fine_probs = fine_similarity.softmax(dim=-1)
+            fine_results = {}
+            for i, label in enumerate(fine_labels):
+                fine_results[label] = float(fine_probs[0, i].cpu())
+            top_prediction = max(fine_results, key=fine_results.get)
+            return {
+                'coarse': top_category,
+                'fine': fine_results,
+                'top_prediction': top_prediction,
+                'confidence': fine_results[top_prediction]
+            }
+        return {
+            'coarse': top_category,
+            'top_prediction': top_category,
+            'confidence': coarse_results[top_category]
+        }
+print("✓ OpenCLIPSemanticManager defined")

output_processing_manager.py ADDED Viewed

	@@ -0,0 +1,350 @@

+import re
+from typing import Dict, List, Tuple, Optional
+from prompt_library_manager import PromptLibraryManager
+class OutputProcessingManager:
+    """
+    輸出驗證、格式化與智能標籤生成
+    整合 PromptLibraryManager 提供商業級標籤生成
+    """
+    def __init__(self, prompt_library: PromptLibraryManager = None):
+        """
+        Args:
+            prompt_library: PromptLibraryManager 實例（可選，會自動創建）
+        """
+        self.profanity_filter = set([])
+        self.max_lengths = {
+            'instagram': 2200,
+            'tiktok': 100,
+            'xiaohongshu': 500
+        }
+        # 初始化或使用提供的 PromptLibraryManager
+        if prompt_library is None:
+            self.prompt_library = PromptLibraryManager()
+        else:
+            self.prompt_library = prompt_library
+        # 地標檢測關鍵字（用於簡單的地標識別）
+        self.landmark_keywords = self._init_landmark_keywords()
+        print("✓ OutputProcessingManager (with integrated PromptLibraryManager) initialized")
+    def _init_landmark_keywords(self) -> Dict[str, List[str]]:
+        """
+        初始化地標檢測關鍵字映射
+        用於從檢測到的物體和場景中推測可能的地標
+        """
+        return {
+            'Big Ben': ['clock tower', 'tower', 'bridge', 'palace', 'gothic'],
+            'Eiffel Tower': ['tower', 'iron', 'landmark', 'lattice'],
+            'Statue of Liberty': ['statue', 'monument', 'harbor', 'torch'],
+            'Golden Gate Bridge': ['bridge', 'suspension', 'orange', 'bay'],
+            'Sydney Opera House': ['opera', 'building', 'harbor', 'shell'],
+            'Taj Mahal': ['palace', 'dome', 'monument', 'marble'],
+            'Colosseum': ['arena', 'amphitheater', 'ruins', 'ancient'],
+            'Pyramids of Giza': ['pyramid', 'desert', 'ancient', 'monument'],
+            'Burj Khalifa': ['skyscraper', 'tower', 'building', 'tall'],
+            'Tokyo Tower': ['tower', 'lattice', 'red'],
+            'Taipei 101': ['skyscraper', 'tower', 'building'],
+            # 可以擴展更多
+        }
+    def detect_landmark(self, detections: List[Dict], scene_info: Dict) -> Optional[str]:
+        """
+        從檢測結果中推測可能的地標
+        Args:
+            detections: YOLO 檢測結果
+            scene_info: 場景分析結果
+        Returns:
+            推測的地標名稱，若無法推測則返回 None
+        """
+        detected_objects = [d.get('class_name', '').lower() for d in detections]
+        # 從場景資訊中提取更多線索
+        scene_keywords = []
+        urban_scene = scene_info.get('urban', {}).get('top', '')
+        if urban_scene:
+            scene_keywords.append(urban_scene.lower())
+        all_keywords = detected_objects + scene_keywords
+        # 計算每個地標的匹配分數
+        scores = {}
+        for landmark, keywords in self.landmark_keywords.items():
+            match_count = sum(1 for obj in all_keywords
+                            if any(kw in obj for kw in keywords))
+            if match_count > 0:
+                scores[landmark] = match_count
+        # 返回得分最高的地標（至少需要 2 個匹配）
+        if scores:
+            best_landmark = max(scores.items(), key=lambda x: x[1])
+            if best_landmark[1] >= 2:
+                return best_landmark[0]
+        return None
+    def generate_smart_hashtags(self, detections: List[Dict], scene_info: Dict,
+                               brands: List, platform: str, language: str) -> List[str]:
+        """
+        智能標籤生成：整合品牌、地標、場景的標籤
+        Args:
+            detections: 檢測到的物體列表
+            scene_info: 場景分析結果
+            brands: 檢測到的品牌列表
+            platform: 平台名稱
+            language: 語言 ('zh', 'en', 或 'zh-en')
+        Returns:
+            智能生成的 hashtag 列表（最多 10 個）
+        """
+        hashtags = []
+        # 1. 檢測地標（最高優先級）
+        detected_landmark = self.detect_landmark(detections, scene_info)
+        if detected_landmark:
+            landmark_tags = self.prompt_library.landmark_prompts.get_hashtags(
+                detected_landmark, language
+            )
+            hashtags.extend(landmark_tags[:5])  # 地標標籤限制 5 個
+        # 2. 品牌標籤（高優先級）
+        if brands:
+            for brand in brands[:3]:  # 最多 3 個品牌
+                brand_name = brand[0] if isinstance(brand, tuple) else brand
+                brand_tags = self.prompt_library.brand_prompts.get_hashtags(
+                    brand_name, language
+                )
+                hashtags.extend(brand_tags[:3])  # 每個品牌最多 3 個標籤
+        # 3. 場景標籤（中優先級）
+        scene_category = self._detect_scene_category(scene_info, detections)
+        if scene_category:
+            scene_tags = self.prompt_library.scene_prompts.get_hashtags(
+                scene_category, language
+            )
+            hashtags.extend(scene_tags[:4])
+        # 4. 構圖特定標籤
+        composition_tags = self._get_composition_hashtags(scene_info, language)
+        hashtags.extend(composition_tags)
+        # 5. 平台特定標籤
+        platform_tags = self._get_platform_hashtags(platform, language)
+        hashtags.extend(platform_tags)
+        # 去重並保持順序（地標 > 品牌 > 場景 > 構圖 > 平台）
+        seen = set()
+        unique_hashtags = []
+        for tag in hashtags:
+            if tag not in seen and tag:  # 確保標籤不為空
+                seen.add(tag)
+                unique_hashtags.append(tag)
+        # 返回前 10 個
+        return unique_hashtags[:10]
+    def _detect_scene_category(self, scene_info: Dict, detections: List[Dict]) -> Optional[str]:
+        """
+        檢測場景類別
+        Returns:
+            場景類別名稱 ('urban', 'nature', 'indoor', 'food', etc.)
+        """
+        # 檢查物體類別來判斷場景
+        object_classes = [d.get('class_name', '').lower() for d in detections]
+        # 食物場景
+        food_keywords = ['sandwich', 'pizza', 'cake', 'food', 'plate', 'bowl', 'cup', 'bottle']
+        if any(kw in obj for kw in food_keywords for obj in object_classes):
+            return 'food'
+        # 自然場景
+        nature_keywords = ['tree', 'mountain', 'water', 'sky', 'beach', 'ocean']
+        if any(kw in obj for kw in nature_keywords for obj in object_classes):
+            return 'nature'
+        # 城市場景（默認）
+        urban_scene = scene_info.get('urban', {}).get('top', '')
+        if urban_scene and ('canyon' in urban_scene or 'street' in urban_scene or 'building' in urban_scene):
+            return 'urban'
+        # 室內場景
+        indoor_keywords = ['chair', 'table', 'couch', 'bed', 'desk']
+        if any(kw in obj for kw in indoor_keywords for obj in object_classes):
+            return 'indoor'
+        return 'urban'  # 默認城市場景
+    def _get_composition_hashtags(self, scene_info: Dict, language: str) -> List[str]:
+        """
+        根據構圖類型生成標籤
+        """
+        hashtags = []
+        composition = scene_info.get('urban', {}).get('top', '')
+        # 城市峽谷
+        if 'canyon' in composition or 'skyscraper' in composition:
+            if language == 'zh':
+                hashtags.extend(['城市峽谷', '城市風景'])
+            elif language == 'en':
+                hashtags.extend(['UrbanCanyon', 'Cityscape'])
+            else:  # bilingual
+                hashtags.extend(['城市峽谷', 'UrbanCanyon'])
+        # 攝影類型
+        if language == 'zh':
+            hashtags.append('攝影日常')
+        elif language == 'en':
+            hashtags.append('Photography')
+        else:
+            hashtags.extend(['攝影日常', 'Photography'])
+        return hashtags
+    def _get_platform_hashtags(self, platform: str, language: str) -> List[str]:
+        """
+        根據平台生成特定標籤
+        """
+        hashtags = []
+        if platform == 'instagram':
+            if language == 'zh':
+                hashtags.append('IG日常')
+            elif language == 'en':
+                hashtags.append('InstaDaily')
+            else:
+                hashtags.extend(['IG日常', 'InstaDaily'])
+        elif platform == 'tiktok':
+            if language == 'zh':
+                hashtags.append('抖音')
+            elif language == 'en':
+                hashtags.append('TikTok')
+            else:
+                hashtags.extend(['抖音', 'TikTok'])
+        elif platform == 'xiaohongshu':
+            hashtags.extend(['小紅書', '分享日常'])
+        return hashtags
+    def validate_output(self, output: Dict, platform: str,
+                       detections: List[Dict] = None, scene_info: Dict = None,
+                       brands: List = None, language: str = 'en') -> Tuple[bool, str]:
+        """
+        驗證輸出格式和內容（含標籤自動補充）
+        Args:
+            output: 生成的標題字典
+            platform: 平台名稱
+            detections: 檢測結果（用於標籤補充）
+            scene_info: 場景資訊（用於標籤補充）
+            brands: 品牌列表（用於標籤補充）
+            language: 語言
+        Returns:
+            (是否通過驗證, 驗證訊息)
+        """
+        # 1. 結構驗證
+        required_fields = ['caption', 'hashtags', 'tone', 'platform']
+        if not all(field in output for field in required_fields):
+            return False, "Missing required fields"
+        # 2. 長度驗證
+        max_length = self.max_lengths.get(platform, 2200)
+        if len(output['caption']) > max_length:
+            output['caption'] = output['caption'][:max_length-3] + '...'
+        # 3. 內容過濾
+        if self._contains_profanity(output['caption']):
+            return False, "Contains inappropriate content"
+        # 4. 標籤驗證
+        output['hashtags'] = self._validate_hashtags(output['hashtags'])
+        # 🆕 5. 標籤數量檢查與自動補充（商業級功能）
+        min_hashtags = 5  # 最低標籤數量要求
+        if len(output['hashtags']) < min_hashtags:
+            # 如果提供了檢測資訊，自動補充標籤
+            if detections is not None and scene_info is not None:
+                additional_tags = self.generate_smart_hashtags(
+                    detections, scene_info, brands or [], platform, language
+                )
+                # 補充標籤（避免重複）
+                for tag in additional_tags:
+                    if tag not in output['hashtags'] and len(output['hashtags']) < 10:
+                        output['hashtags'].append(tag)
+                print(f"  [AUTO-補充] 標籤數量不足 ({len(output['hashtags'])} < {min_hashtags})，已自動補充至 {len(output['hashtags'])} 個")
+        # 6. 確保標題中沒有 hashtag 符號
+        if '#' in output['caption']:
+            # 移除標題中的 hashtag
+            output['caption'] = re.sub(r'#\w+', '', output['caption']).strip()
+        return True, "Validation passed"
+    def _contains_profanity(self, text: str) -> bool:
+        """檢查不當內容"""
+        text_lower = text.lower()
+        for word in self.profanity_filter:
+            if word in text_lower:
+                return True
+        return False
+    def _validate_hashtags(self, hashtags: List[str]) -> List[str]:
+        """
+        驗證並清理 hashtags
+        Args:
+            hashtags: 原始 hashtag 列表
+        Returns:
+            清理後的 hashtag 列表
+        """
+        cleaned = []
+        for tag in hashtags:
+            # 移除 # 符號
+            tag = tag.lstrip('#')
+            # 保留中文、英文、數字
+            tag = re.sub(r'[^\w\u4e00-\u9fff]', '', tag)
+            # 確保不為空且不重複
+            if tag and tag not in cleaned:
+                cleaned.append(tag)
+        return cleaned[:10]  # 最多 10 個
+    def format_for_platform(self, caption: Dict, platform: str) -> str:
+        """
+        根據平台格式化輸出
+        Args:
+            caption: 標題字典
+            platform: 平台名稱
+        Returns:
+            格式化的字串
+        """
+        formatted = f"{caption['caption']}\n\n"
+        if platform == 'xiaohongshu':
+            # 小紅書：標籤直接接在標題後
+            formatted += ' '.join([f"#{tag}" for tag in caption['hashtags']])
+        else:
+            # Instagram/TikTok：標籤另起一行
+            formatted += '\n' + ' '.join([f"#{tag}" for tag in caption['hashtags']])
+        return formatted
+print("✓ OutputProcessingManager (V3 with PromptLibraryManager integration) defined")

pixcribe_pipeline.py ADDED Viewed

	@@ -0,0 +1,335 @@

+import sys
+import time
+import traceback
+from PIL import Image
+from typing import Dict
+from image_processor_manager import ImageProcessorManager
+from yolo_detection_manager import YOLODetectionManager
+from saliency_detection_manager import SaliencyDetectionManager
+from openclip_semantic_manager import OpenCLIPSemanticManager
+from lighting_analysis_manager import LightingAnalysisManager
+from ocr_engine_manager import OCREngineManager
+from prompt_library_manager import PromptLibraryManager
+from brand_recognition_manager import BrandRecognitionManager
+from brand_visualization_manager import BrandVisualizationManager
+from brand_verification_manager import BrandVerificationManager
+from scene_compatibility_manager import SceneCompatibilityManager
+from caption_generation_manager import CaptionGenerationManager
+from detection_fusion_manager import DetectionFusionManager
+from output_processing_manager import OutputProcessingManager
+class PixcribePipeline:
+    """Main Facade coordinating all components (V2 with multi-language support)"""
+    def __init__(self, yolo_variant='l', vlm_model_name='Qwen/Qwen2.5-VL-7B-Instruct'):
+        """
+        Args:
+            yolo_variant: 'm', 'l' (default), or 'x'
+            vlm_model_name: Vision-Language Model name (default: Qwen2.5-VL-7B-Instruct)
+                Can be changed to 'Qwen/Qwen3-VL-8B-Instruct' for latest model
+        """
+        print("="*60)
+        print("Initializing Pixcribe Pipeline V2...")
+        print("="*60)
+        start_time = time.time()
+        # Initialize all managers
+        self.image_processor = ImageProcessorManager()
+        self.yolo_detector = YOLODetectionManager(variant=yolo_variant)
+        self.saliency_detector = SaliencyDetectionManager()
+        self.clip_semantic = OpenCLIPSemanticManager()
+        self.lighting_analyzer = LightingAnalysisManager()
+        self.ocr_engine = OCREngineManager()
+        # NEW: Initialize PromptLibrary (centralized prompt management)
+        self.prompt_library = PromptLibraryManager()
+        # Initialize BrandRecognitionManager with PromptLibrary
+        self.brand_recognizer = BrandRecognitionManager(
+            self.clip_semantic, self.ocr_engine, self.prompt_library
+        )
+        # NEW: Brand visualization manager
+        self.brand_visualizer = BrandVisualizationManager()
+        self.caption_generator = CaptionGenerationManager(model_name=vlm_model_name)
+        # NEW: Brand verification with VLM
+        self.brand_verifier = BrandVerificationManager(self.caption_generator)
+        # NEW: Scene compatibility checker
+        self.scene_compatibility = SceneCompatibilityManager(self.prompt_library)
+        self.fusion_manager = DetectionFusionManager(self.clip_semantic)
+        # Initialize OutputProcessingManager with PromptLibrary for smart hashtag generation
+        self.output_processor = OutputProcessingManager(self.prompt_library)
+        elapsed = time.time() - start_time
+        print("="*60)
+        print(f"✓ Pipeline initialized successfully (Time: {elapsed:.2f}s)")
+        print("="*60)
+    def process_image(self, image, platform='instagram', yolo_variant='l', language='zh') -> Dict:
+        """End-to-end image processing pipeline
+        Args:
+            image: PIL Image or path
+            platform: 'instagram', 'tiktok', or 'xiaohongshu'
+            yolo_variant: 'm', 'l' (default), or 'x'
+            language: 'zh' (Traditional Chinese), 'en' (English), 'zh-en' (Bilingual)
+        Returns:
+            Processing results dictionary with brand visualizations
+        """
+        print(f"\nProcessing image (Platform: {platform}, Language: {language})...")
+        start_time = time.time()
+        try:
+            # Step 1: Preprocessing
+            print("[1/9] Preprocessing image...")
+            processed_img = self.image_processor.load_image(image)
+            yolo_input = self.image_processor.preprocess_for_yolo(processed_img)
+            # Step 2: Parallel detection
+            print("[2/9] YOLO object detection...")
+            yolo_results = self.yolo_detector.detect(yolo_input)
+            print(f"      Detected {len(yolo_results)} objects")
+            print("[3/9] Saliency detection...")
+            salient_regions = self.saliency_detector.detect_salient_regions(processed_img)
+            print(f"      Found {len(salient_regions)} salient regions")
+            # Step 3: Identify unknown objects
+            print("[4/9] Identifying unknown objects...")
+            unknown_regions = self.saliency_detector.extract_unknown_regions(
+                salient_regions, yolo_results
+            )
+            print(f"      Found {len(unknown_regions)} unknown regions")
+            # Step 4: Brand recognition (with bounding boxes)
+            print("[5/9] Brand recognition...")
+            brands = []
+            brand_detections = []  # For visualization
+            # Method 1: Check YOLO-detected brand-relevant objects
+            brand_relevant = self.yolo_detector.filter_brand_relevant_objects(yolo_results)
+            if brand_relevant:
+                print(f"      Checking {len(brand_relevant)} YOLO brand-relevant objects...")
+                for det in brand_relevant[:5]:  # Check top 5 brand-relevant objects
+                    region = processed_img.crop(det['bbox'])
+                    brand_result = self.brand_recognizer.recognize_brand(
+                        region, processed_img, region_bbox=det['bbox']
+                    )
+                    if brand_result:
+                        for brand_name, confidence, bbox in brand_result[:2]:  # Top 2 brands per region
+                            brands.append((brand_name, confidence))
+                            # Prepare for visualization
+                            brand_info = self.prompt_library.get_brand_prompts(brand_name)
+                            category = brand_info.get('category', 'default') if brand_info else 'default'
+                            brand_detections.append({
+                                'name': brand_name,
+                                'confidence': confidence,
+                                'bbox': bbox,
+                                'category': category
+                            })
+            # Method 2: Full-image brand scan (商業級必要功能)
+            # 無論 YOLO 是否檢測到相關物體，都執行全圖品牌掃描
+            print("      Performing intelligent full-image brand scan...")
+            full_image_brands = self.brand_recognizer.scan_full_image_for_brands(
+                processed_img,
+                exclude_bboxes=[bd['bbox'] for bd in brand_detections if bd.get('bbox')],
+                saliency_regions=salient_regions  # 傳遞顯著性區域以智能選擇掃描區域
+            )
+            # 合併全圖掃描結果
+            if full_image_brands:
+                print(f"      Full-image scan found {len(full_image_brands)} additional brands")
+                for brand_name, confidence, bbox in full_image_brands:
+                    # 避免重複檢測同一品牌
+                    if not any(bd['name'] == brand_name for bd in brand_detections):
+                        brands.append((brand_name, confidence))
+                        brand_info = self.prompt_library.get_brand_prompts(brand_name)
+                        category = brand_info.get('category', 'default') if brand_info else 'default'
+                        brand_detections.append({
+                            'name': brand_name,
+                            'confidence': confidence,
+                            'bbox': bbox,
+                            'category': category
+                        })
+            print(f"      Identified {len(brands)} brand instances (before verification)")
+            # Step 4.5: CLIP scene understanding (moved earlier for compatibility check)
+            print("[5.5/11] Scene understanding (CLIP)...")
+            scene_analysis = self.clip_semantic.analyze_scene(processed_img)
+            print(f"      Scene: {scene_analysis.get('urban', {}).get('top', 'unknown')}")
+            # Step 4.6: Scene compatibility check
+            if brands:
+                print("[5.6/11] Checking scene compatibility...")
+                brands_with_bbox = [(b[0], b[1], brand_detections[i]['bbox'])
+                                    for i, b in enumerate(brands)]
+                compatible_brands = self.scene_compatibility.batch_check_compatibility(
+                    brands_with_bbox, scene_analysis
+                )
+                print(f"      {len(compatible_brands)} brands passed compatibility check")
+                # Update brands and brand_detections
+                if compatible_brands:
+                    brands = [(b[0], b[1]) for b in compatible_brands]
+                    brand_detections = []
+                    for brand_name, confidence, bbox in compatible_brands:
+                        brand_info = self.prompt_library.get_brand_prompts(brand_name)
+                        category = brand_info.get('category', 'default') if brand_info else 'default'
+                        brand_detections.append({
+                            'name': brand_name,
+                            'confidence': confidence,
+                            'bbox': bbox,
+                            'category': category
+                        })
+                else:
+                    brands = []
+                    brand_detections = []
+            # Step 4.7: VLM brand verification
+            if brand_detections:
+                print("[5.7/11] VLM brand verification...")
+                vlm_verification = self.brand_verifier.verify_brands(
+                    processed_img, [(bd['name'], bd['confidence'], bd['bbox'])
+                                   for bd in brand_detections]
+                )
+                print(f"      VLM verified {len(vlm_verification.get('verified_brands', []))} brands")
+                # Three-way voting: OpenCLIP + OCR + VLM
+                # Collect OCR matches for voting
+                ocr_brands = {}
+                for brand_name, conf in brands:
+                    if brand_name not in ocr_brands:
+                        ocr_brands[brand_name] = (0.5, conf)  # Approximate text/ocr split
+                final_brands = self.brand_verifier.three_way_voting(
+                    [(bd['name'], bd['confidence'], bd['bbox']) for bd in brand_detections],
+                    ocr_brands,
+                    vlm_verification
+                )
+                print(f"      Final verified brands: {len(final_brands)}")
+                # Update brands and brand_detections with verified results
+                if final_brands:
+                    brands = [(b[0], b[1]) for b in final_brands]
+                    brand_detections = []
+                    for brand_name, confidence, bbox in final_brands:
+                        brand_info = self.prompt_library.get_brand_prompts(brand_name)
+                        category = brand_info.get('category', 'default') if brand_info else 'default'
+                        brand_detections.append({
+                            'name': brand_name,
+                            'confidence': confidence,
+                            'bbox': bbox,
+                            'category': category
+                        })
+                else:
+                    brands = []
+                    brand_detections = []
+            # NEW: Visualize brand detections on image
+            if brand_detections:
+                visualized_image = self.brand_visualizer.draw_brand_detections(
+                    processed_img.copy(), brand_detections
+                )
+            else:
+                visualized_image = processed_img
+            # Step 6: CV-based lighting analysis
+            print("[7/11] Analyzing lighting conditions...")
+            cv_lighting = self.lighting_analyzer.analyze_lighting(processed_img)
+            print(f"      CV Lighting: {cv_lighting['lighting_type']} (confidence: {cv_lighting['confidence']:.2f})")
+            print(f"      Details: brightness={cv_lighting['cv_features']['brightness']:.1f}, "
+                  f"temp_ratio={cv_lighting['cv_features']['color_temp']:.2f}, "
+                  f"contrast={cv_lighting['cv_features']['contrast']:.1f}")
+            # Step 7: Additional scene analysis details
+            print("[8/11] Additional scene analysis...")
+            print(f"      CLIP Lighting: {scene_analysis.get('lighting', {}).get('top', 'unknown')}")
+            print(f"      Mood: {scene_analysis.get('mood', {}).get('top', 'unknown')}")
+            # Step 8: Fusion with lighting analysis
+            print("[9/11] Fusing detection results...")
+            fused_results = self.fusion_manager.fuse_detections(
+                yolo_results, unknown_regions, scene_analysis, processed_img, cv_lighting
+            )
+            fused_results['brands'] = brands
+            fused_results['scene_analysis'] = scene_analysis
+            # Print fused lighting result
+            fused_lighting = fused_results['scene_analysis']['lighting']['top']
+            print(f"      Fused Lighting: {fused_lighting}")
+            # Step 9: Caption generation with language support
+            print("[10/11] Generating captions...")
+            captions = self.caption_generator.generate_captions(
+                fused_results, processed_img, platform, language
+            )
+            # Step 10: Output processing with smart hashtags
+            print("[11/11] Output processing...")
+            validated_captions = []
+            for caption in captions:
+                # Only generate hashtags if VLM didn't generate any
+                # DO NOT override VLM hashtags - they follow language requirements
+                if not caption.get('hashtags') or len(caption.get('hashtags', [])) < 3:
+                    print(f"  [DEBUG] Caption has {len(caption.get('hashtags', []))} hashtags, generating smart hashtags...")
+                    caption['hashtags'] = self.output_processor.generate_smart_hashtags(
+                        fused_results['detections'],
+                        scene_analysis,
+                        brands,
+                        platform,
+                        language
+                    )
+                else:
+                    print(f"  [DEBUG] Caption has {len(caption['hashtags'])} VLM-generated hashtags")
+                # 傳遞完整參數給 validate_output 以啟用標籤自動補充
+                is_valid, msg = self.output_processor.validate_output(
+                    caption, platform,
+                    detections=fused_results['detections'],
+                    scene_info=scene_analysis,
+                    brands=brands,
+                    language=language
+                )
+                if is_valid:
+                    validated_captions.append(caption)
+                else:
+                    print(f"  [DEBUG] Caption validation failed: {msg}")
+            elapsed = time.time() - start_time
+            print(f"\n✓ Processing complete (Total time: {elapsed:.2f}s)")
+            print(f"  Generated {len(validated_captions)} caption variations")
+            return {
+                'captions': validated_captions,
+                'detections': fused_results['detections'],
+                'brands': brands,
+                'brand_detections': brand_detections,  # NEW: For UI display
+                'visualized_image': visualized_image,  # NEW: Image with brand boxes
+                'scene': scene_analysis,
+                'composition': fused_results.get('composition', {}),
+                'lighting': cv_lighting,
+                'processing_time': elapsed
+            }
+        except Exception as e:
+            print(f"\n✗ Processing error: {str(e)}")
+            traceback.print_exc()
+            # Re-raise exception so it can be caught and displayed
+            raise
+print("✓ PixcribePipeline (V2 with VLM Verification, Scene Compatibility, and Adaptive Weights) defined")

prompt_library_manager.py ADDED Viewed

	@@ -0,0 +1,272 @@

+from typing import Dict, List, Optional
+from landmark_prompts import LandmarkPrompts
+from brand_prompts import BrandPrompts
+from scene_prompts import ScenePrompts
+from universal_object_prompts import UniversalObjectPrompts
+class PromptLibraryManager:
+    """
+    Facade 模式：統一管理所有 Prompt 子模組
+    提供單一介面存取品牌、地標、場景、通用物品等 prompts
+    """
+    def __init__(self):
+        """初始化所有 Prompt 子模組"""
+        print("Initializing Prompt Library Manager (Facade)...")
+        # 載入所有子模組
+        self.brand_prompts = BrandPrompts()
+        self.landmark_prompts = LandmarkPrompts()
+        self.scene_prompts = ScenePrompts()
+        self.object_prompts = UniversalObjectPrompts()
+        # 統計資訊
+        total_brands = self._count_brands()
+        total_landmarks = len(self.landmark_prompts.landmarks)
+        total_scenes = len(self.scene_prompts.scene_vocabularies)
+        total_objects = len(self.object_prompts.object_vocabularies)
+        print(f"✓ Prompt Library Manager initialized:")
+        print(f"  - {total_brands} brands across {len(self.brand_prompts.brand_prompts)} categories")
+        print(f"  - {total_landmarks} world landmarks")
+        print(f"  - {total_scenes} scene categories")
+        print(f"  - {total_objects} universal object categories")
+    def _count_brands(self) -> int:
+        """計算總品牌數量"""
+        total = 0
+        for category in self.brand_prompts.brand_prompts.values():
+            total += len(category)
+        return total
+    # ===== 品牌相關方法 Brand Methods =====
+    def get_brand_prompts(self, brand_name: str) -> Optional[Dict]:
+        """
+        取得特定品牌的完整 prompt 資料
+        Args:
+            brand_name: 品牌名稱
+        Returns:
+            品牌資料字典
+        """
+        return self.brand_prompts.get_prompts(brand_name)
+    def get_brand_category(self, brand_name: str) -> str:
+        """取得品牌類別"""
+        return self.brand_prompts.get_brand_category(brand_name)
+    def get_all_brands(self) -> Dict:
+        """取得所有品牌的扁平化字典"""
+        return self.brand_prompts.get_all_brands()
+    def get_brands_by_category(self, category: str) -> Dict:
+        """取得特定類別的所有品牌"""
+        return self.brand_prompts.get_brands_by_category(category)
+    def search_brand_by_alias(self, alias: str) -> Optional[str]:
+        """根據別名搜尋品牌名稱"""
+        return self.brand_prompts.search_brand_by_alias(alias)
+    # ===== 地標相關方法 Landmark Methods =====
+    def get_landmark_prompts(self, landmark_name: str) -> Optional[Dict]:
+        """
+        取得特定地標的完整 prompt 資料
+        Args:
+            landmark_name: 地標名稱
+        Returns:
+            地標資料字典
+        """
+        return self.landmark_prompts.get_prompts(landmark_name)
+    def get_all_landmarks(self) -> Dict:
+        """取得所有地標資料"""
+        return self.landmark_prompts.get_all_landmarks()
+    def search_landmark_by_location(self, city: str = None, country: str = None) -> List[str]:
+        """
+        根據地理位置搜尋地標
+        Args:
+            city: 城市名稱
+            country: 國家名稱
+        Returns:
+            符合條件的地標名稱列表
+        """
+        return self.landmark_prompts.search_by_location(city, country)
+    def get_landmark_visual_prompts(self, landmark_name: str, context: str = 'iconic_view') -> List[str]:
+        """
+        取得地標的視覺描述 prompts
+        Args:
+            landmark_name: 地標名稱
+            context: 情境類型
+        Returns:
+            視覺描述列表
+        """
+        return self.landmark_prompts.get_visual_prompts(landmark_name, context)
+    # Scene Methods
+    def get_scene_prompts(self, scene_category: str, subcategory: str = None) -> List[str]:
+        """
+        取得場景 prompts
+        Args:
+            scene_category: 場景類別
+            subcategory: 子類別（可選）
+        Returns:
+            Prompt 列表
+        """
+        return self.scene_prompts.get_prompts(scene_category, subcategory)
+    def get_all_scene_categories(self) -> List[str]:
+        """取得所有場景類別"""
+        return self.scene_prompts.get_all_categories()
+    def get_scene_subcategories(self, scene_category: str) -> List[str]:
+        """取得場景的子類別"""
+        return self.scene_prompts.get_subcategories(scene_category)
+    # Universal Object Methods
+    def get_object_prompts(self, category: str, subcategory: str = None) -> List[str]:
+        """
+        取得通用物品 prompts
+        Args:
+            category: 物品類別 (如 'animals', 'vehicles')
+            subcategory: 子類別 (如 'dogs', 'cats')
+        Returns:
+            Prompt 列表
+        """
+        return self.object_prompts.get_prompts(category, subcategory)
+    def get_all_object_categories(self) -> List[str]:
+        """取得所有通用物品類別"""
+        return self.object_prompts.get_all_categories()
+    def get_object_subcategories(self, category: str) -> List[str]:
+        """取得物品的子類別"""
+        return self.object_prompts.get_subcategories(category)
+    def detect_object_category(self, detected_objects: List[str]) -> Optional[str]:
+        """根據檢測到的物體推測主要類別"""
+        return self.object_prompts.detect_object_category(detected_objects)
+    # Smart Hashtag Generation
+    def get_hashtags_for_content(self, detected_items: Dict, language: str = 'zh') -> List[str]:
+        """
+        智能標籤生成：整合品牌、地標、場景的標籤
+        Args:
+            detected_items: 檢測到的內容字典
+                {
+                    'landmarks': ['Big Ben', ...],
+                    'brands': ['Apple', ...],
+                    'scene_category': 'urban',
+                    'scene_subcategory': 'city_canyon'
+                }
+            language: 語言 ('zh', 'en', 或 'zh-en')
+        Returns:
+            Hashtag 列表（去重並排序）
+        """
+        hashtags = []
+        # 1. 地標標籤（最高優先級）
+        landmarks = detected_items.get('landmarks', [])
+        for landmark in landmarks:
+            landmark_tags = self.landmark_prompts.get_hashtags(landmark, language)
+            hashtags.extend(landmark_tags)
+        # 2. 品牌標籤（高優先級）
+        brands = detected_items.get('brands', [])
+        for brand in brands:
+            brand_tags = self.brand_prompts.get_hashtags(brand, language)
+            hashtags.extend(brand_tags)
+        # 3. 場景標籤（中優先級）
+        scene_category = detected_items.get('scene_category')
+        if scene_category:
+            scene_tags = self.scene_prompts.get_hashtags(scene_category, language)
+            hashtags.extend(scene_tags)
+        # 去重並保持順序（地標 > 品牌 > 場景）
+        seen = set()
+        unique_hashtags = []
+        for tag in hashtags:
+            if tag not in seen:
+                seen.add(tag)
+                unique_hashtags.append(tag)
+        # 返回前 10 個
+        return unique_hashtags[:10]
+    # Search Functions
+    def search_by_location(self, city: str = None, country: str = None) -> Dict:
+        """
+        根據地點搜尋所有相關內容（地標、品牌）
+        Args:
+            city: 城市名稱
+            country: 國家名稱
+        Returns:
+            搜尋結果字典
+        """
+        results = {
+            'landmarks': [],
+            'brands': []
+        }
+        # 搜尋地標
+        landmarks = self.landmark_prompts.search_by_location(city, country)
+        results['landmarks'] = landmarks
+        # 品牌通常不按地理位置分類，但可以擴展此功能
+        return results
+    def detect_landmark_from_image_context(self, detected_objects: List[str],
+                                          scene_analysis: Dict) -> Optional[str]:
+        """
+        根據檢測到的物體和場景分析推測可能的地標
+        Args:
+            detected_objects: 檢測到的物體列表
+            scene_analysis: 場景分析結果
+        Returns:
+            推測的地標名稱，若無法推測則返回 None
+        """
+        # 關鍵字映射到地標
+        landmark_keywords = {
+            'Big Ben': ['clock tower', 'tower', 'bridge', 'river'],
+            'Eiffel Tower': ['tower', 'iron structure', 'landmark'],
+            'Statue of Liberty': ['statue', 'monument', 'island', 'harbor'],
+            'Sydney Opera House': ['building', 'harbor', 'architecture'],
+            'Taj Mahal': ['building', 'monument', 'dome'],
+            'Pyramids of Giza': ['pyramid', 'desert', 'monument'],
+            # 可以擴展更多
+        }
+        # 簡單的關鍵字匹配
+        for landmark, keywords in landmark_keywords.items():
+            match_count = sum(1 for obj in detected_objects
+                            if any(kw in obj.lower() for kw in keywords))
+            if match_count >= 2:  # 至少匹配 2 個關鍵字
+                return landmark
+        return None
+print("✓ PromptLibraryManager (Facade) defined")

saliency_detection_manager.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import torch
+import numpy as np
+from PIL import Image
+import cv2
+from typing import List, Dict
+import torchvision.transforms as transforms
+class SaliencyDetectionManager:
+    """Visual saliency detection using U2-Net"""
+    def __init__(self):
+        print("Loading U2-Net model...")
+        try:
+            from torchvision.models.segmentation import deeplabv3_resnet50
+            self.model = deeplabv3_resnet50(pretrained=True)
+            self.model.eval()
+            if torch.cuda.is_available():
+                self.model = self.model.cuda()
+        except Exception as e:
+            print(f"Warning: Cannot load deep learning model, using fallback: {e}")
+            self.model = None
+        self.threshold = 0.5
+        self.min_area = 1600
+        self.min_saliency = 0.6
+        self.transform = transforms.Compose([
+            transforms.Resize((320, 320)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+        print("✓ SaliencyDetectionManager initialized")
+    def detect_salient_regions(self, image: Image.Image) -> List[Dict]:
+        """Detect salient regions"""
+        img_array = np.array(image)
+        gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        regions = []
+        height, width = img_array.shape[:2]
+        for contour in contours:
+            area = cv2.contourArea(contour)
+            if area < self.min_area:
+                continue
+            x, y, w, h = cv2.boundingRect(contour)
+            bbox = [float(x), float(y), float(x + w), float(y + h)]
+            region_img = image.crop(bbox)
+            regions.append({
+                'bbox': bbox,
+                'area': area,
+                'saliency_score': min(area / (width * height), 1.0),
+                'image': region_img
+            })
+        regions = sorted(regions, key=lambda x: x['saliency_score'], reverse=True)
+        return regions[:10]
+    def extract_unknown_regions(self, salient_regions: List[Dict], yolo_detections: List[Dict]) -> List[Dict]:
+        """Extract salient regions not detected by YOLO"""
+        unknown_regions = []
+        for region in salient_regions:
+            max_iou = 0.0
+            for det in yolo_detections:
+                iou = self._calculate_iou(region['bbox'], det['bbox'])
+                max_iou = max(max_iou, iou)
+            if max_iou < 0.3:
+                unknown_regions.append(region)
+        return unknown_regions
+    def _calculate_iou(self, box1: List[float], box2: List[float]) -> float:
+        """Calculate IoU (Intersection over Union)"""
+        x1_min, y1_min, x1_max, y1_max = box1
+        x2_min, y2_min, x2_max, y2_max = box2
+        inter_xmin = max(x1_min, x2_min)
+        inter_ymin = max(y1_min, y2_min)
+        inter_xmax = min(x1_max, x2_max)
+        inter_ymax = min(y1_max, y2_max)
+        if inter_xmax < inter_xmin or inter_ymax < inter_ymin:
+            return 0.0
+        inter_area = (inter_xmax - inter_xmin) * (inter_ymax - inter_ymin)
+        box1_area = (x1_max - x1_min) * (y1_max - y1_min)
+        box2_area = (x2_max - x2_min) * (y2_max - y2_min)
+        union_area = box1_area + box2_area - inter_area
+        return inter_area / union_area if union_area > 0 else 0.0
+print("✓ SaliencyDetectionManager defined")

scene_compatibility_manager.py ADDED Viewed

	@@ -0,0 +1,133 @@

+from typing import Dict, List
+from prompt_library_manager import PromptLibraryManager
+class SceneCompatibilityManager:
+    """Check brand-scene compatibility to reduce false positives"""
+    def __init__(self, prompt_library: PromptLibraryManager = None):
+        """
+        Args:
+            prompt_library: PromptLibraryManager instance for brand metadata
+        """
+        if prompt_library is None:
+            prompt_library = PromptLibraryManager()
+        self.prompt_library = prompt_library
+        # Scene classification keywords
+        self.scene_keywords = {
+            'food_closeup': ['food', 'meal', 'dish', 'plate', 'restaurant', 'dining', 'cuisine'],
+            'nature_landscape': ['mountain', 'forest', 'beach', 'ocean', 'lake', 'sky', 'sunset', 'outdoor'],
+            'industrial': ['factory', 'warehouse', 'industrial', 'machinery', 'construction'],
+            'sports': ['gym', 'fitness', 'running', 'sports', 'athletic', 'exercise'],
+            'fashion': ['fashion', 'outfit', 'style', 'wearing', 'model'],
+            'luxury_retail': ['store', 'boutique', 'shop', 'retail', 'display'],
+            'office': ['office', 'desk', 'computer', 'workspace', 'business'],
+            'home': ['home', 'room', 'interior', 'living', 'bedroom'],
+            'lifestyle': ['lifestyle', 'casual', 'everyday', 'daily'],
+            'tech_review': ['unboxing', 'review', 'tech', 'device', 'gadget'],
+            'formal_event': ['event', 'party', 'formal', 'ceremony', 'celebration'],
+            'outdoor': ['outdoor', 'park', 'street', 'outside'],
+            'travel': ['travel', 'trip', 'luggage', 'airport', 'vacation'],
+            'street': ['street', 'road', 'urban', 'city'],
+            'parking': ['parking', 'car park', 'garage'],
+            'showroom': ['showroom', 'exhibition', 'display'],
+            'closeup': ['closeup', 'detail', 'macro', 'close-up']
+        }
+        print("✓ Scene Compatibility Manager initialized")
+    def classify_scene(self, scene_analysis: Dict) -> str:
+        """
+        Classify scene type from OpenCLIP scene analysis
+        Args:
+            scene_analysis: Scene analysis results from OpenCLIPSemanticManager
+        Returns:
+            Scene type string (e.g., 'food_closeup', 'fashion', 'tech_review')
+        """
+        # Extract top scene categories
+        scene_scores = {}
+        # Check different scene analysis keys
+        for key in ['urban', 'lighting', 'mood', 'composition']:
+            if key in scene_analysis and 'top' in scene_analysis[key]:
+                top_label = scene_analysis[key]['top'].lower()
+                # Match with scene keywords
+                for scene_type, keywords in self.scene_keywords.items():
+                    for keyword in keywords:
+                        if keyword in top_label:
+                            scene_scores[scene_type] = scene_scores.get(scene_type, 0) + 1
+        # Return most matched scene type
+        if scene_scores:
+            return max(scene_scores.items(), key=lambda x: x[1])[0]
+        return 'general'
+    def check_compatibility(self, brand_name: str, scene_type: str) -> float:
+        """
+        Check if brand is compatible with scene
+        Args:
+            brand_name: Name of the brand
+            scene_type: Scene type (e.g., 'food_closeup', 'fashion')
+        Returns:
+            Compatibility score (0.3 to 1.0)
+                - 1.0: fully compatible
+                - 0.7: neutral (no strong match or mismatch)
+                - 0.3: incompatible (reduce confidence)
+        """
+        brand_info = self.prompt_library.get_brand_prompts(brand_name)
+        if not brand_info:
+            return 0.7  # Neutral if brand not found
+        # Check if scene is typical for this brand
+        typical_scenes = brand_info.get('typical_scenes', [])
+        if scene_type in typical_scenes:
+            return 1.0  # Fully compatible
+        # Check if scene is incompatible
+        incompatible_scenes = brand_info.get('incompatible_scenes', [])
+        if scene_type in incompatible_scenes:
+            return 0.3  # Reduce confidence significantly
+        # Neutral case - no strong evidence either way
+        return 0.7
+    def batch_check_compatibility(self, detected_brands: List[tuple],
+                                   scene_analysis: Dict) -> List[tuple]:
+        """
+        Check compatibility for multiple brands
+        Args:
+            detected_brands: List of (brand_name, confidence, bbox) tuples
+            scene_analysis: Scene analysis results
+        Returns:
+            List of (brand_name, adjusted_confidence, bbox) tuples
+        """
+        scene_type = self.classify_scene(scene_analysis)
+        adjusted_brands = []
+        for brand_name, confidence, bbox in detected_brands:
+            compatibility_score = self.check_compatibility(brand_name, scene_type)
+            # Adjust confidence based on compatibility
+            adjusted_confidence = confidence * compatibility_score
+            # Only keep if adjusted confidence is still reasonable
+            if adjusted_confidence > 0.25:
+                adjusted_brands.append((brand_name, adjusted_confidence, bbox))
+        # Re-sort by adjusted confidence
+        adjusted_brands.sort(key=lambda x: x[1], reverse=True)
+        return adjusted_brands
+print("✓ SceneCompatibilityManager defined")

scene_prompts.py ADDED Viewed

	@@ -0,0 +1,433 @@

+from typing import Dict, List
+class ScenePrompts:
+    """
+    場景描述 Prompt 庫
+    提供多元化場景類型的詳細視覺描述
+    涵蓋：城市、自然、室內、食物、人物、產品等場景
+    """
+    def __init__(self):
+        """初始化場景詞彙庫"""
+        self.scene_vocabularies = {
+            # ===== 城市場景 Urban =====
+            'urban': {
+                'city_canyon': [
+                    'urban canyon with towering skyscrapers lining both sides of street creating vertical corridor',
+                    'metropolitan corridor formed by tall buildings with strong vertical emphasis and symmetrical composition',
+                    'downtown street flanked by modern high-rise architecture creating canyon effect',
+                    'city street with tall buildings on both sides creating narrow vertical perspective'
+                ],
+                'street_level': [
+                    'bustling city street with pedestrians and vehicles in urban environment',
+                    'urban sidewalk scene with street furniture storefronts and mixed activity',
+                    'downtown pedestrian area with commercial buildings and urban infrastructure',
+                    'street view with urban architecture shops and people walking'
+                ],
+                'skyline': [
+                    'city skyline with skyscrapers silhouetted against sky',
+                    'urban panorama showing downtown high-rise buildings and city sprawl',
+                    'metropolitan skyline view from elevated vantage point',
+                    'cityscape with distinctive tall buildings defining horizon line'
+                ],
+                'plaza': [
+                    'urban plaza with open public space and surrounding architecture',
+                    'city square with pedestrians monuments and commercial buildings',
+                    'downtown plaza featuring fountains sculptures and gathering spaces',
+                    'public square with mixed use of recreational and commercial activities'
+                ]
+            },
+            # ===== 自然風景 Nature =====
+            'nature': {
+                'mountain': [
+                    'majestic mountain range with snow-capped peaks against blue sky',
+                    'alpine landscape with rocky summits and glacial valleys',
+                    'mountain vista with layered ridges fading into distance creating depth',
+                    'dramatic mountain scenery with rugged peaks and alpine vegetation',
+                    'mountainous terrain with steep slopes and varied elevation'
+                ],
+                'beach': [
+                    'serene beach with turquoise water and white sand shore',
+                    'coastal scene with gentle waves lapping at sandy beach',
+                    'tropical beach with clear water and palm tree shadows',
+                    'beach landscape with ocean horizon and coastal features',
+                    'seaside view with beach sand water and sky meeting at horizon'
+                ],
+                'forest': [
+                    'lush forest with dense canopy and dappled sunlight filtering through trees',
+                    'woodland scene with tall trees and undergrowth vegetation',
+                    'forest interior with tree trunks and leafy canopy overhead',
+                    'dense forest landscape with natural vegetation and organic forms',
+                    'wooded area with trees creating natural shade and green environment'
+                ],
+                'lake': [
+                    'tranquil lake with still water reflecting surrounding landscape',
+                    'mountain lake with clear water and scenic backdrop',
+                    'lakeside view with calm water and shoreline vegetation',
+                    'peaceful lake scene with water sky and natural surroundings',
+                    'alpine lake with pristine water and mountain reflections'
+                ],
+                'desert': [
+                    'desert landscape with sand dunes and arid terrain',
+                    'sandy desert with undulating dunes and clear sky',
+                    'arid desert scene with sparse vegetation and sandy ground',
+                    'desert vista with sand formations and minimal vegetation',
+                    'dry desert landscape with sand rock and desert plants'
+                ],
+                'waterfall': [
+                    'cascading waterfall with flowing water over rocks',
+                    'waterfall scene with water spray and lush surrounding vegetation',
+                    'natural waterfall with water rushing down cliff face',
+                    'scenic waterfall with water pool and natural setting',
+                    'tiered waterfall with multiple cascades and mist'
+                ]
+            },
+            # ===== 室內場景 Indoor =====
+            'indoor': {
+                'cafe': [
+                    'cozy cafe interior with warm ambient lighting and wooden furniture',
+                    'modern coffee shop with industrial decor and minimalist design',
+                    'rustic cafe setting with vintage decorations and soft lighting',
+                    'contemporary cafe space with comfortable seating and artistic elements',
+                    'intimate coffee shop with warm atmosphere and inviting ambiance'
+                ],
+                'restaurant': [
+                    'upscale restaurant interior with elegant table settings and refined decor',
+                    'casual dining space with comfortable seating and welcoming atmosphere',
+                    'fine dining restaurant with sophisticated lighting and premium furnishings',
+                    'restaurant setting with tables chairs and ambient lighting',
+                    'dining establishment with culinary presentation and service area'
+                ],
+                'office': [
+                    'modern office space with desks computers and professional workspace',
+                    'contemporary work environment with ergonomic furniture and technology',
+                    'office interior with cubicles meeting areas and work stations',
+                    'professional office setting with business equipment and organized layout',
+                    'corporate workspace with clean lines and functional design'
+                ],
+                'home_living': [
+                    'cozy living room with sofa comfortable seating and home decor',
+                    'modern home interior with minimalist furniture and clean aesthetic',
+                    'warm living space with personal touches and inviting atmosphere',
+                    'residential interior with family room features and casual comfort',
+                    'home living area with relaxation space and domestic furnishings'
+                ],
+                'bedroom': [
+                    'peaceful bedroom with bed nightstands and soft lighting',
+                    'modern bedroom interior with minimalist design and calm atmosphere',
+                    'cozy sleeping space with comfortable bedding and personal decor',
+                    'bedroom setting with rest area and private sanctuary feel',
+                    'sleeping quarters with bed furniture and restful ambiance'
+                ],
+                'museum': [
+                    'museum interior with exhibited artworks and gallery lighting',
+                    'cultural institution space with display cases and visitor areas',
+                    'art gallery with paintings sculptures and exhibition design',
+                    'museum hall with artifacts and informational displays',
+                    'exhibition space with curated collections and viewing areas'
+                ]
+            },
+            # ===== 食物場景 Food =====
+            'food': {
+                'plated_dish': [
+                    'gourmet plated dish with artistic presentation and fine dining aesthetics',
+                    'restaurant plate with carefully arranged food components and garnishes',
+                    'culinary creation with vibrant colors and professional plating',
+                    'plated meal with balanced composition and appetizing appearance',
+                    'food presentation with attention to visual detail and portion control',
+                    'elegant dinner plate with sophisticated garnish and culinary artistry',
+                    'fusion cuisine dish with innovative presentation and colorful elements',
+                    'fine dining entree with sauce art and premium ingredients',
+                    'contemporary plated food with geometric arrangement and edible flowers',
+                    "chef's special with meticulous plating and restaurant-quality finish"
+                ],
+                'street_food': [
+                    'casual street food on wooden table or food truck setting',
+                    'authentic street cuisine with rustic presentation and local character',
+                    'food stall offering with simple plating and traditional preparation',
+                    'street vendor food with casual serving style and cultural authenticity',
+                    'local street eats with informal presentation and fresh ingredients',
+                    'food truck meal with paper packaging and urban backdrop',
+                    'market stall food with traditional cooking methods and local flavors',
+                    'outdoor food stand offering with casual atmosphere and quick service',
+                    'street-side cuisine with vibrant colors and authentic preparation',
+                    'hawker food with cultural heritage and honest presentation'
+                ],
+                'dessert': [
+                    'elaborate dessert with decorative elements and sweet presentation',
+                    'pastry or cake with artistic decoration and enticing appearance',
+                    'sweet course with layered construction and visual appeal',
+                    'dessert plate with confectionery artistry and color contrast',
+                    'bakery creation with detailed finishing and appetizing styling',
+                    'chocolate dessert with glossy ganache and elegant garnish',
+                    'fruit tart with colorful berries and glazed finish',
+                    'layered cake slice with frosting art and textured decoration',
+                    'ice cream sundae with toppings drizzle and attractive presentation',
+                    'patisserie item with delicate decoration and refined sweetness'
+                ],
+                'ingredients': [
+                    'fresh ingredients closeup shot with natural textures and vibrant colors',
+                    'raw food components with organic forms and market-fresh appearance',
+                    'culinary ingredients arranged with attention to color and composition',
+                    'fresh produce with natural beauty and wholesome qualities',
+                    'cooking ingredients with variety of textures and natural appeal',
+                    'farmers market vegetables with rich colors and organic shapes',
+                    'herb and spice arrangement with aromatic qualities and rustic charm',
+                    'seafood display with ice and fresh-from-ocean appearance',
+                    'butcher quality meat with marbling and premium cut presentation',
+                    'artisan bread and grains with wholesome texture and natural crust'
+                ],
+                'beverage': [
+                    'artisan beverage with careful presentation and appealing pour',
+                    'drink in glassware with garnish and professional service style',
+                    'coffee or tea with latte art and aesthetic serving',
+                    'refreshing beverage with ice garnish and attractive glass',
+                    'drink presentation with attention to color and visual interest',
+                    'craft cocktail with creative garnish and sophisticated glassware',
+                    'specialty coffee with foam art and ceramic cup presentation',
+                    'fresh juice with fruit garnish and vibrant natural color',
+                    'tea service with elegant teapot and traditional ceremony aesthetic',
+                    'smoothie bowl with fruit toppings and colorful healthy presentation'
+                ],
+                'breakfast': [
+                    'morning breakfast spread with eggs toast and fresh coffee',
+                    'continental breakfast with pastries croissants and fruit arrangement',
+                    'healthy breakfast bowl with granola yogurt and berries',
+                    'pancake stack with maple syrup butter and powdered sugar',
+                    'avocado toast with poached egg and microgreens on rustic plate',
+                    'breakfast plate with bacon eggs and golden hash browns',
+                    'brunch setting with mimosas fresh flowers and elegant tableware',
+                    'oatmeal bowl with nuts fruits and honey drizzle',
+                    'smoothie and acai bowl with tropical fruits and seeds',
+                    'breakfast sandwich with melted cheese and morning sunlight'
+                ],
+                'baked_goods': [
+                    'fresh baked bread with golden crust and flour dusting',
+                    'artisan pastries with flaky layers and butter sheen',
+                    'homemade cookies with chocolate chips and rustic appearance',
+                    'sourdough loaf with scoring pattern and crusty exterior',
+                    'cinnamon rolls with cream cheese frosting and swirls',
+                    'French baguette with crispy crust and airy crumb',
+                    'croissants with laminated layers and golden brown color',
+                    'muffins with crumb topping and fresh from oven warmth',
+                    'bagels with sesame seeds and chewy texture',
+                    'focaccia bread with herbs olive oil and dimpled surface'
+                ]
+            },
+            # ===== 人物場景 People =====
+            'people': {
+                'portrait': [
+                    'portrait photograph with shallow depth of field and subject focus',
+                    'headshot with clean background and flattering lighting on face',
+                    'personal portrait with emotional expression and eye contact',
+                    'portrait composition with subject as primary visual element',
+                    'close-up portrait with facial features and personality captured'
+                ],
+                'candid': [
+                    'candid street photography moment with natural unposed action',
+                    'spontaneous capture of people in authentic situations and activities',
+                    'documentary-style photograph of real-life moments and interactions',
+                    'natural human behavior captured without staged positioning',
+                    'unscripted moment showing genuine emotion and movement'
+                ],
+                'group': [
+                    'group photo with multiple people in organized composition',
+                    'gathering of people with social interaction and shared activity',
+                    'team or family portrait with coordinated positioning',
+                    'group setting with people engaged in collective experience',
+                    'multiple subjects arranged in harmonious group composition'
+                ],
+                'activity': [
+                    'people engaged in specific activity or recreational pursuit',
+                    'action photograph showing physical movement and dynamic energy',
+                    'sports or fitness activity with athletic performance captured',
+                    'people participating in hobby or leisure activity',
+                    'human subjects in motion demonstrating skill or exercise'
+                ]
+            },
+            # ===== 產品場景 Product =====
+            'product': {
+                'studio_shot': [
+                    'minimalist product photography on white background with clean lighting',
+                    'commercial product shot with professional lighting and sharp detail',
+                    'studio product photograph with controlled environment and even illumination',
+                    'catalog-style product image with neutral background and clear presentation',
+                    'product on white backdrop with shadow control and highlight management'
+                ],
+                'lifestyle': [
+                    'lifestyle product shot in natural setting with contextual environment',
+                    'product in use showing real-world application and human interaction',
+                    'environmental product photography with lifestyle context and atmosphere',
+                    'product placed in authentic setting with relatable situation',
+                    'contextual product image showing everyday use and practical application'
+                ],
+                'flatlay': [
+                    'overhead flatlay composition with products arranged on surface',
+                    "bird's eye view of items arranged in artistic layout",
+                    'top-down product styling with complementary objects and props',
+                    'flatlay arrangement with balanced composition and visual harmony',
+                    'aerial view of products styled with decorative elements'
+                ]
+            },
+            # ===== 建築場景 Architecture =====
+            'architecture': {
+                'modern': [
+                    'contemporary architecture with glass steel and minimalist design',
+                    'modern building with clean lines geometric forms and innovative structure',
+                    'architectural design featuring current aesthetic and building technology',
+                    'present-day construction with progressive design and materials',
+                    'modern structure with sleek surfaces and contemporary styling'
+                ],
+                'historic': [
+                    'historic architecture with traditional design and aged materials',
+                    'heritage building with classical elements and period styling',
+                    'old structure with architectural significance and historical character',
+                    'traditional building with cultural importance and time-worn beauty',
+                    'antique architecture showing craftsmanship of past eras'
+                ],
+                'interior': [
+                    'architectural interior space with designed environment and spatial quality',
+                    'building interior showing layout flow and functional design',
+                    'indoor architectural space with lighting surfaces and volumes',
+                    'interior architecture with structural elements and finish materials',
+                    'designed space interior with architectural features and spatial composition'
+                ],
+                'detail': [
+                    'architectural detail closeup showing construction method and materials',
+                    'building element with decorative or functional architectural feature',
+                    'structural detail revealing craftsmanship and design specifics',
+                    'architectural component with unique design characteristic',
+                    'close view of building feature showing texture pattern or ornamentation'
+                ]
+            },
+            # ===== 光線描述 Lighting =====
+            'lighting': [
+                'soft diffused light creating even illumination without harsh shadows',
+                'natural daylight with bright ambient illumination and true colors',
+                'overcast atmosphere with diffused skylight and muted shadows',
+                'warm ambient light with golden tones and cozy feeling',
+                'evening light with low angle sun and long shadows',
+                'bright sunlight with strong contrast and crisp shadows',
+                'studio lighting with controlled illumination and professional quality',
+                'indoor natural light from windows creating gentle directional lighting',
+                'warm artificial lighting with incandescent glow and amber tones',
+                'cool artificial lighting with fluorescent or LED quality',
+                'soft indoor lighting with diffused sources and minimal shadows',
+                'dramatic lighting with strong contrast and defined shadows'
+            ],
+            # ===== 氛圍描述 Mood =====
+            'mood': [
+                'calm and contemplative atmosphere with serene peaceful quality',
+                'bustling and energetic environment with dynamic active feeling',
+                'dramatic and imposing presence with powerful visual impact',
+                'cozy and intimate setting with warm welcoming ambiance',
+                'minimalist and clean aesthetic with simple uncluttered feel',
+                'vibrant and colorful scene with rich saturated hues',
+                'moody and atmospheric environment with evocative lighting',
+                'elegant and sophisticated setting with refined tasteful quality',
+                'rustic and natural atmosphere with organic earthy character',
+                'modern and sleek environment with contemporary styling'
+            ]
+        }
+        # 場景標籤映射
+        self.scene_hashtags = {
+            'urban': {
+                'zh': ['城市', '都市', '城市風景', '街拍', '建築'],
+                'en': ['Urban', 'Cityscape', 'StreetPhotography', 'Architecture', 'City']
+            },
+            'nature': {
+                'zh': ['自然', '風景', '戶外', '大自然', '風景攝影'],
+                'en': ['Nature', 'Landscape', 'Outdoor', 'Scenery', 'NaturePhotography']
+            },
+            'indoor': {
+                'zh': ['室內', '室內設計', '空間', '居家'],
+                'en': ['Indoor', 'InteriorDesign', 'Interior', 'Home']
+            },
+            'food': {
+                'zh': ['美食', '食物', '料理', '美食攝影', '餐廳'],
+                'en': ['Food', 'Foodie', 'FoodPhotography', 'Cuisine', 'Dining']
+            },
+            'people': {
+                'zh': ['人像', '人物', '肖像', '街拍'],
+                'en': ['Portrait', 'People', 'PortraitPhotography', 'Candid']
+            },
+            'product': {
+                'zh': ['產品', '商品', '產品攝影', '商業攝影'],
+                'en': ['Product', 'ProductPhotography', 'Commercial', 'Flatlay']
+            },
+            'architecture': {
+                'zh': ['建築', '建築攝影', '建築設計', '空間'],
+                'en': ['Architecture', 'ArchitecturalPhotography', 'Building', 'Design']
+            }
+        }
+        print(f"✓ Scene Prompts initialized with {len(self.scene_vocabularies)} scene categories")
+    def get_prompts(self, scene_category: str, subcategory: str = None) -> List[str]:
+        """
+        取得場景 prompts
+        Args:
+            scene_category: 場景類別 (如 'urban', 'nature')
+            subcategory: 子類別 (如 'city_canyon', 'mountain')
+        Returns:
+            Prompt 列表
+        """
+        category_prompts = self.scene_vocabularies.get(scene_category, {})
+        if subcategory:
+            return category_prompts.get(subcategory, [])
+        else:
+            # 返回該類別的所有 prompts
+            all_prompts = []
+            for prompts in category_prompts.values():
+                if isinstance(prompts, list):
+                    all_prompts.extend(prompts)
+            return all_prompts
+    def get_all_categories(self) -> List[str]:
+        """取得所有場景類別"""
+        return list(self.scene_vocabularies.keys())
+    def get_subcategories(self, scene_category: str) -> List[str]:
+        """取得特定類別的所有子類別"""
+        category = self.scene_vocabularies.get(scene_category, {})
+        return list(category.keys()) if isinstance(category, dict) else []
+    def get_hashtags(self, scene_category: str, language: str = 'zh') -> List[str]:
+        """
+        取得場景的 hashtags
+        Args:
+            scene_category: 場景類別
+            language: 語言 ('zh', 'en', 或 'both')
+        Returns:
+            Hashtag 列表
+        """
+        hashtags = self.scene_hashtags.get(scene_category, {})
+        if language == 'zh':
+            return hashtags.get('zh', [])
+        elif language == 'en':
+            return hashtags.get('en', [])
+        elif language == 'both' or language == 'zh-en':
+            zh_tags = hashtags.get('zh', [])
+            en_tags = hashtags.get('en', [])
+            return zh_tags + en_tags
+        else:
+            return hashtags.get('zh', [])
+print("✓ ScenePrompts defined")

ui_manager.py ADDED Viewed

	@@ -0,0 +1,681 @@

+import gradio as gr
+from typing import Dict, List
+class UIManager:
+    """Manages all UI components and styling for Pixcribe"""
+    def __init__(self):
+        self.custom_css = self._get_custom_css()
+    def _get_custom_css(self) -> str:
+        """Return complete CSS styling - Elegant light design"""
+        return """
+/* ==================== Global Reset & Base ==================== */
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+}
+.gradio-container {
+    background: linear-gradient(135deg, #F8F9FA 0%, #E9ECEF 100%) !important;
+    font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Display', 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif !important;
+    padding: 0 !important;
+    max-width: 100% !important;
+    min-height: 100vh !important;
+}
+/* Main content wrapper - Generous padding to prevent edge clipping */
+.contain {
+    max-width: 1600px !important;
+    margin: 0 auto !important;
+    padding: 64px 96px 96px 96px !important;
+}
+/* ==================== Header ==================== */
+.app-header {
+    text-align: center;
+    margin-bottom: 72px;
+    animation: fadeInDown 0.8s ease-out;
+    padding: 0 32px;
+}
+@keyframes fadeInDown {
+    from {
+        opacity: 0;
+        transform: translateY(-30px);
+    }
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+.app-title {
+    font-size: 72px;
+    font-weight: 800;
+    background: linear-gradient(135deg, #2C3E50 0%, #34495E 100%);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    background-clip: text;
+    margin-bottom: 24px;
+    letter-spacing: -0.05em;
+    line-height: 1.1;
+}
+.app-subtitle {
+    font-size: 26px;
+    font-weight: 400;
+    color: #6C757D;
+    margin-bottom: 0;
+    letter-spacing: 0.01em;
+}
+/* ==================== Layout ==================== */
+.main-row {
+    gap: 48px !important;
+    margin-bottom: 48px !important;
+}
+/* Left column elegant container */
+.main-row > .column:first-child {
+    background: linear-gradient(135deg, rgba(255, 255, 255, 0.8) 0%, rgba(252, 253, 254, 0.6) 100%) !important;
+    border-radius: 28px !important;
+    padding: 40px !important;
+    border: 1px solid rgba(52, 152, 219, 0.08) !important;
+    box-shadow: 0 4px 20px rgba(0, 0, 0, 0.04) !important;
+}
+/* Right column elegant container */
+.main-row > .column:last-child {
+    background: linear-gradient(135deg, rgba(255, 255, 255, 0.8) 0%, rgba(252, 253, 254, 0.6) 100%) !important;
+    border-radius: 28px !important;
+    padding: 40px !important;
+    border: 1px solid rgba(52, 152, 219, 0.08) !important;
+    box-shadow: 0 4px 20px rgba(0, 0, 0, 0.04) !important;
+}
+/* ==================== Premium Cards - Light & Spacious ==================== */
+.upload-card {
+    background: rgba(255, 255, 255, 0.95) !important;
+    border-radius: 32px !important;
+    box-shadow:
+        0 4px 16px rgba(0, 0, 0, 0.06),
+        0 2px 4px rgba(0, 0, 0, 0.03),
+        0 1px 2px rgba(0, 0, 0, 0.02) !important;
+    border: 1px solid rgba(0, 0, 0, 0.05) !important;
+    padding: 48px !important;
+    margin-bottom: 32px !important;
+    transition: all 0.4s cubic-bezier(0.25, 0.46, 0.45, 0.94) !important;
+    overflow: visible !important;
+}
+.results-card {
+    background: transparent !important;
+    border-radius: 0 !important;
+    box-shadow: none !important;
+    border: none !important;
+    padding: 0 !important;
+    margin-bottom: 32px !important;
+    overflow: visible !important;
+}
+/* Caption Results Container - Elegant Design */
+.caption-results-container {
+    background: linear-gradient(135deg, rgba(255, 255, 255, 0.85) 0%, rgba(252, 253, 254, 0.7) 100%) !important;
+    border-radius: 28px !important;
+    padding: 44px !important;
+    border: 1px solid rgba(52, 152, 219, 0.1) !important;
+    box-shadow:
+        0 4px 20px rgba(0, 0, 0, 0.04),
+        0 2px 8px rgba(52, 152, 219, 0.03) !important;
+    margin-bottom: 40px !important;
+    overflow: visible !important;
+}
+.upload-card:hover {
+    box-shadow:
+        0 8px 32px rgba(0, 0, 0, 0.10),
+        0 4px 8px rgba(0, 0, 0, 0.06) !important;
+    transform: translateY(-6px);
+    border-color: rgba(52, 152, 219, 0.3) !important;
+}
+/* ==================== Upload Area ==================== */
+.upload-area {
+    border: 3px dashed rgba(52, 152, 219, 0.35) !important;
+    border-radius: 28px !important;
+    background: linear-gradient(135deg, rgba(52, 152, 219, 0.03) 0%, rgba(52, 152, 219, 0.06) 100%) !important;
+    padding: 96px 40px !important;
+    text-align: center !important;
+    transition: all 0.3s ease !important;
+    min-height: 360px !important;
+}
+.upload-area:hover {
+    border-color: #3498DB !important;
+    background: linear-gradient(135deg, rgba(52, 152, 219, 0.06) 0%, rgba(52, 152, 219, 0.12) 100%) !important;
+    transform: scale(1.02);
+}
+/* ==================== Section Titles - Consistent Spacing ==================== */
+.section-title {
+    font-size: 28px !important;
+    font-weight: 700 !important;
+    color: #2C3E50 !important;
+    margin-bottom: 20px !important;
+    letter-spacing: -0.02em !important;
+    padding-bottom: 0 !important;
+    border-bottom: none !important;
+    text-align: left !important;
+    margin-top: 0 !important;
+}
+.section-title-left {
+    font-size: 28px !important;
+    font-weight: 700 !important;
+    color: #2C3E50 !important;
+    margin-bottom: 20px !important;
+    margin-top: 0 !important;
+    letter-spacing: -0.02em !important;
+    text-align: left !important;
+    border-bottom: none !important;
+    padding-bottom: 0 !important;
+}
+/* ==================== Form Elements - Generous Padding ==================== */
+.settings-row {
+    gap: 24px !important;
+    margin-bottom: 28px !important;
+}
+.radio-group {
+    background: rgba(248, 249, 250, 0.5) !important;
+    border-radius: 20px !important;
+    padding: 24px 28px !important;
+    border: none !important;
+    margin-bottom: 24px !important;
+    border: 1px solid rgba(0, 0, 0, 0.04) !important;
+}
+.radio-group:last-child {
+    margin-bottom: 0 !important;
+}
+/* Inline radio groups for side-by-side layout */
+.radio-group-inline {
+    background: linear-gradient(135deg, rgba(255, 255, 255, 0.7) 0%, rgba(248, 249, 250, 0.5) 100%) !important;
+    border-radius: 16px !important;
+    padding: 20px !important;
+    border: 1px solid rgba(52, 152, 219, 0.1) !important;
+    margin-bottom: 0 !important;
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.03) !important;
+    transition: all 0.3s ease !important;
+}
+.radio-group-inline:hover {
+    box-shadow: 0 4px 16px rgba(52, 152, 219, 0.08) !important;
+    border-color: rgba(52, 152, 219, 0.2) !important;
+}
+.radio-group label {
+    color: #6C757D !important;
+    font-weight: 600 !important;
+    font-size: 14px !important;
+    margin-bottom: 16px !important;
+    letter-spacing: 0.08em !important;
+    text-transform: uppercase !important;
+    display: block !important;
+    text-align: left !important;
+}
+/* Radio group title (the actual input label) */
+.radio-group > label:first-child {
+    color: #2C3E50 !important;
+    font-weight: 700 !important;
+    font-size: 19px !important;
+    margin-bottom: 16px !important;
+    letter-spacing: -0.02em !important;
+    text-transform: none !important;
+}
+/* Inline radio group title - BIGGER and BOLD */
+.radio-group-inline > label:first-child {
+    color: #2C3E50 !important;
+    font-weight: 700 !important;
+    font-size: 18px !important;
+    margin-bottom: 14px !important;
+    letter-spacing: -0.01em !important;
+    text-transform: none !important;
+    display: block !important;
+}
+.radio-group input[type="radio"] {
+    accent-color: #3498DB !important;
+    width: 22px !important;
+    height: 22px !important;
+    margin-right: 14px !important;
+}
+/* Radio option labels */
+.radio-group > div > label {
+    color: #495057 !important;
+    font-weight: 500 !important;
+    font-size: 17px !important;
+    letter-spacing: -0.01em !important;
+    text-transform: none !important;
+    padding: 14px 20px !important;
+    border-radius: 14px !important;
+    transition: all 0.2s ease !important;
+    cursor: pointer !important;
+    display: flex !important;
+    align-items: center !important;
+}
+/* Inline radio option labels - BIGGER */
+.radio-group-inline > div > label {
+    color: #495057 !important;
+    font-weight: 500 !important;
+    font-size: 16px !important;
+    letter-spacing: -0.01em !important;
+    text-transform: none !important;
+    padding: 12px 16px !important;
+    border-radius: 10px !important;
+    transition: all 0.2s ease !important;
+    cursor: pointer !important;
+    display: flex !important;
+    align-items: center !important;
+    background: rgba(255, 255, 255, 0.6) !important;
+    margin-bottom: 8px !important;
+    border: 1px solid rgba(0, 0, 0, 0.04) !important;
+}
+.radio-group > div > label:hover {
+    background: rgba(52, 152, 219, 0.08) !important;
+}
+.radio-group-inline > div > label:hover {
+    background: rgba(52, 152, 219, 0.12) !important;
+    transform: translateX(4px);
+}
+/* ==================== Button ==================== */
+.generate-button {
+    background: linear-gradient(135deg, #3498DB 0%, #2980B9 100%) !important;
+    color: white !important;
+    border: none !important;
+    border-radius: 20px !important;
+    padding: 24px 64px !important;
+    font-size: 19px !important;
+    font-weight: 700 !important;
+    cursor: pointer !important;
+    box-shadow:
+        0 6px 24px rgba(52, 152, 219, 0.35),
+        0 3px 6px rgba(52, 152, 219, 0.25) !important;
+    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
+    letter-spacing: -0.02em !important;
+    width: 100% !important;
+    margin-top: 24px !important;
+}
+.generate-button:hover {
+    transform: translateY(-6px) scale(1.02) !important;
+    box-shadow:
+        0 16px 48px rgba(52, 152, 219, 0.45),
+        0 6px 12px rgba(52, 152, 219, 0.35) !important;
+}
+.generate-button:active {
+    transform: translateY(-3px) scale(1.01) !important;
+}
+/* ==================== Caption Cards - Light & Elegant ==================== */
+.caption-card {
+    background: linear-gradient(135deg, rgba(255, 255, 255, 0.98) 0%, rgba(248, 249, 250, 0.95) 100%);
+    backdrop-filter: blur(20px);
+    border: 1px solid rgba(0, 0, 0, 0.06);
+    border-radius: 28px;
+    padding: 32px 36px;
+    margin-bottom: 28px;
+    transition: all 0.4s cubic-bezier(0.25, 0.46, 0.45, 0.94);
+    box-shadow:
+        0 4px 16px rgba(0, 0, 0, 0.05),
+        0 2px 4px rgba(0, 0, 0, 0.03);
+    position: relative;
+}
+.caption-card:hover {
+    box-shadow:
+        0 8px 32px rgba(0, 0, 0, 0.10),
+        0 4px 8px rgba(0, 0, 0, 0.06);
+    transform: translateY(-6px);
+    border-color: rgba(52, 152, 219, 0.3);
+}
+.caption-header {
+    font-size: 15px;
+    font-weight: 700;
+    color: #6C757D;
+    text-transform: uppercase;
+    letter-spacing: 0.14em;
+    margin-bottom: 20px;
+}
+.caption-text {
+    font-size: 21px;
+    font-weight: 400;
+    color: #2C3E50;
+    line-height: 1.8;
+    margin-bottom: 24px;
+    letter-spacing: -0.01em;
+}
+.caption-hashtags {
+    font-size: 18px;
+    font-weight: 600;
+    color: #3498DB;
+    margin-bottom: 0;
+    word-wrap: break-word;
+    line-height: 1.75;
+}
+/* Copy Button */
+.copy-button {
+    position: absolute;
+    top: 28px;
+    right: 28px;
+    background: rgba(52, 152, 219, 0.10);
+    border: 1px solid rgba(52, 152, 219, 0.25);
+    border-radius: 14px;
+    padding: 12px 20px;
+    font-size: 15px;
+    font-weight: 600;
+    color: #3498DB;
+    cursor: pointer;
+    transition: all 0.2s ease;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+}
+.copy-button:hover {
+    background: rgba(52, 152, 219, 0.18);
+    border-color: #3498DB;
+    transform: translateY(-2px);
+    box-shadow: 0 4px 12px rgba(52, 152, 219, 0.25);
+}
+.copy-button:active {
+    transform: translateY(0);
+}
+.copy-button.copied {
+    background: rgba(39, 174, 96, 0.15);
+    border-color: #27AE60;
+    color: #27AE60;
+}
+/* ==================== Footer ==================== */
+.app-footer {
+    text-align: center;
+    margin-top: 96px;
+    padding-top: 64px;
+    border-top: 3px solid rgba(0, 0, 0, 0.08);
+    animation: fadeInUp 0.8s ease-out 0.3s backwards;
+}
+@keyframes fadeInUp {
+    from {
+        opacity: 0;
+        transform: translateY(30px);
+    }
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+.footer-text {
+    font-size: 17px;
+    color: #6C757D;
+    line-height: 2.0;
+    letter-spacing: -0.01em;
+    font-weight: 500;
+}
+.footer-models {
+    font-size: 15px;
+    color: #ADB5BD;
+    margin-top: 20px;
+    font-weight: 600;
+    letter-spacing: 0.03em;
+}
+/* ==================== Image Display ==================== */
+.image-container {
+    border-radius: 28px !important;
+    overflow: hidden !important;
+    box-shadow:
+        0 6px 24px rgba(0, 0, 0, 0.10),
+        0 3px 6px rgba(0, 0, 0, 0.06) !important;
+}
+.image-container img {
+    border-radius: 28px !important;
+    box-shadow:
+        0 6px 24px rgba(0, 0, 0, 0.12),
+        0 3px 6px rgba(0, 0, 0, 0.08) !important;
+}
+/* ==================== Responsive Design ==================== */
+@media (max-width: 768px) {
+    .contain {
+        padding: 48px 32px 64px 32px !important;
+    }
+    .app-title {
+        font-size: 52px;
+    }
+    .app-subtitle {
+        font-size: 20px;
+    }
+    .upload-card, .options-card, .results-card {
+        padding: 40px !important;
+    }
+    .upload-area {
+        padding: 64px 32px !important;
+        min-height: 280px !important;
+    }
+    .caption-card {
+        padding: 28px;
+    }
+    .section-title {
+        font-size: 30px !important;
+    }
+    .copy-button {
+        top: 20px;
+        right: 20px;
+        padding: 10px 16px;
+        font-size: 14px;
+    }
+}
+/* ==================== Loading Animation ==================== */
+@keyframes shimmer {
+    0% {
+        background-position: -1000px 0;
+    }
+    100% {
+        background-position: 1000px 0;
+    }
+}
+.loading {
+    animation: shimmer 2s infinite;
+    background: linear-gradient(to right, #f8f9fa 4%, #e9ecef 25%, #f8f9fa 36%);
+    background-size: 1000px 100%;
+}
+"""
+    def create_header(self):
+        """Create application header"""
+        return gr.HTML("""
+        <div class="app-header">
+            <h1 class="app-title">✨ Pixcribe</h1>
+            <p class="app-subtitle">AI-Powered Social Media Caption Generator</p>
+        </div>
+        """)
+    def create_info_banner(self):
+        """Create informational banner about model loading and processing times"""
+        return gr.HTML("""
+        <div style="
+            background: linear-gradient(135deg, #E8F4F8 0%, #D4E9F2 100%);
+            border-left: 4px solid #3498DB;
+            border-radius: 16px;
+            padding: 24px 32px;
+            margin: 0 auto 48px auto;
+            max-width: 1200px;
+            box-shadow: 0 4px 16px rgba(52, 152, 219, 0.12);
+        ">
+            <div style="display: flex; align-items: start; gap: 20px;">
+                <div style="font-size: 32px; line-height: 1; margin-top: 4px;">⏱️</div>
+                <div style="flex: 1;">
+                    <h3 style="
+                        margin: 0 0 12px 0;
+                        font-size: 20px;
+                        font-weight: 700;
+                        color: #2C3E50;
+                        letter-spacing: -0.02em;
+                    ">
+                        Please Note: Processing Time
+                    </h3>
+                    <p style="
+                        margin: 0 0 12px 0;
+                        font-size: 15px;
+                        line-height: 1.6;
+                        color: #5D6D7E;
+                    ">
+                        <strong style="color: #2980B9;">Initial setup and model loading may take a while</strong> as multiple AI models
+                        are initialized and cached. This includes YOLOv11 object detection, OpenCLIP semantic analysis,
+                        Qwen2.5-VL caption generation, and other advanced models.
+                    </p>
+                    <p style="
+                        margin: 0;
+                        font-size: 15px;
+                        line-height: 1.6;
+                        color: #5D6D7E;
+                    ">
+                        ✨ <strong style="color: #27AE60;">Processing time varies depending on system resources.</strong>
+                        Thank you for your patience while we generate high-quality captions!
+                    </p>
+                </div>
+            </div>
+        </div>
+        """)
+    def create_footer(self):
+        """Create application footer"""
+        return gr.HTML("""
+        <div class="app-footer">
+            <p class="footer-text">
+                Powered by advanced AI models
+            </p>
+            <p class="footer-models">
+                YOLOv11 · OpenCLIP ViT-H/14 · Qwen2.5-VL-7B · EasyOCR · Places365 · U2-Net
+            </p>
+            <p class="footer-text" style="margin-top: 32px;">
+                © 2025 Pixcribe · Built for creators
+            </p>
+        </div>
+        """)
+    def format_captions_with_copy(self, captions: List[Dict]) -> str:
+        """Format captions as HTML with copy functionality"""
+        if not captions:
+            return "<p style='color: #6C757D; padding: 24px;'>No captions generated</p>"
+        captions_html = ""
+        for i, cap in enumerate(captions):
+            caption_text = cap.get('caption', '')
+            hashtags = cap.get('hashtags', [])
+            tone = cap.get('tone', 'unknown').title()
+            # Create unique ID for each caption
+            caption_id = f"caption_{i}"
+            # Full text to copy (caption + hashtags)
+            full_text = f"{caption_text}\n\n{' '.join([f'#{tag}' for tag in hashtags])}"
+            captions_html += f"""
+            <div class="caption-card" id="{caption_id}">
+                <button class="copy-button" onclick="copyCaption{i}()" id="copy-btn-{i}">
+                    📋 Copy
+                </button>
+                <div class="caption-header">Caption {i+1} · {tone}</div>
+                <div class="caption-text">{caption_text}</div>
+                <div class="caption-hashtags">
+                    {' '.join([f'#{tag}' for tag in hashtags])}
+                </div>
+                <textarea id="caption-text-{i}" style="position: absolute; left: -9999px;">{full_text}</textarea>
+            </div>
+            <script>
+            function copyCaption{i}() {{
+                const text = document.getElementById('caption-text-{i}').value;
+                const btn = document.getElementById('copy-btn-{i}');
+                // Try modern clipboard API first
+                if (navigator.clipboard && navigator.clipboard.writeText) {{
+                    navigator.clipboard.writeText(text).then(() => {{
+                        btn.innerHTML = '✓ Copied!';
+                        btn.classList.add('copied');
+                        setTimeout(() => {{
+                            btn.innerHTML = '📋 Copy';
+                            btn.classList.remove('copied');
+                        }}, 2000);
+                    }}).catch(() => {{
+                        // Fallback to old method
+                        fallbackCopy{i}();
+                    }});
+                }} else {{
+                    // Fallback for older browsers
+                    fallbackCopy{i}();
+                }}
+            }}
+            function fallbackCopy{i}() {{
+                const textarea = document.getElementById('caption-text-{i}');
+                const btn = document.getElementById('copy-btn-{i}');
+                textarea.style.position = 'static';
+                textarea.style.opacity = '0';
+                textarea.select();
+                try {{
+                    document.execCommand('copy');
+                    btn.innerHTML = '✓ Copied!';
+                    btn.classList.add('copied');
+                    setTimeout(() => {{
+                        btn.innerHTML = '📋 Copy';
+                        btn.classList.remove('copied');
+                    }}, 2000);
+                }} catch (err) {{
+                    btn.innerHTML = '✗ Failed';
+                    setTimeout(() => {{
+                        btn.innerHTML = '📋 Copy';
+                    }}, 2000);
+                }}
+                textarea.style.position = 'absolute';
+                textarea.style.opacity = '1';
+            }}
+            </script>
+            """
+        return captions_html
+print("✓ UIManager defined")

universal_object_prompts.py ADDED Viewed

	@@ -0,0 +1,464 @@

+from typing import Dict, List
+class UniversalObjectPrompts:
+    """
+    通用物品描述 Prompt 庫
+    涵蓋日常物品、動物、交通工具、電子產品等
+    確保系統能夠描述各種類型的圖片
+    """
+    def __init__(self):
+        """初始化通用物品詞彙庫"""
+        self.object_vocabularies = {
+            # ===== 動物 Animals =====
+            'animals': {
+                'dogs': [
+                    'friendly dog with expressive eyes and playful demeanor',
+                    'canine companion with soft fur and loyal presence',
+                    'domestic dog breed with distinct markings and alert posture',
+                    'pet dog in outdoor setting with natural behavior',
+                    'puppy with cute features and energetic personality',
+                    'large breed dog with muscular build and protective stance',
+                    'small lap dog with fluffy coat and adorable expression',
+                    'working dog demonstrating intelligence and trained skills',
+                    'mixed breed dog with unique features and charming character',
+                    'dog portrait with focused gaze and photogenic qualities'
+                ],
+                'cats': [
+                    'elegant cat with graceful posture and alert expression',
+                    'feline companion with soft fur and independent character',
+                    'domestic cat with distinctive markings and curious nature',
+                    'cat resting in comfortable position with relaxed demeanor',
+                    'kitten with playful energy and adorable tiny features',
+                    'long-haired cat with fluffy coat and majestic appearance',
+                    'short-haired cat with sleek coat and athletic build',
+                    'cat portrait with piercing eyes and photogenic pose',
+                    'tabby cat with striped pattern and charming personality',
+                    'cat in natural sunlight with warm ambient lighting'
+                ],
+                'birds': [
+                    'colorful bird with vibrant plumage and natural beauty',
+                    'bird in flight with spread wings and dynamic motion',
+                    'perched bird with detailed feather texture and alert posture',
+                    'exotic bird species with distinctive beak and eye markings',
+                    'songbird with delicate features and graceful appearance',
+                    'bird of prey with powerful build and intense gaze',
+                    'waterfowl with sleek feathers and aquatic adaptation',
+                    'tropical bird with brilliant colors and exotic appeal',
+                    'bird feeding or foraging showing natural behavior',
+                    'bird silhouette against sky with artistic composition'
+                ],
+                'wildlife': [
+                    'wildlife creature in natural habitat showing authentic behavior',
+                    'wild animal with powerful build and majestic presence',
+                    'forest wildlife with camouflage coloring and alert senses',
+                    'marine wildlife with aquatic adaptation and fluid movement',
+                    'safari animal with distinctive features and exotic appeal',
+                    'small mammal with cute features and curious expression',
+                    'endangered species with conservation importance and beauty',
+                    'nocturnal animal with adapted eyes and nighttime behavior',
+                    'wildlife portrait with environmental context and natural light',
+                    'animal in motion demonstrating speed agility or power'
+                ]
+            },
+            # ===== 交通工具 Vehicles =====
+            'vehicles': {
+                'cars': [
+                    'modern automobile with sleek design and aerodynamic lines',
+                    'luxury car with premium finish and sophisticated styling',
+                    'sports car with aggressive stance and performance aesthetics',
+                    'classic car with vintage charm and timeless design',
+                    'electric vehicle with futuristic design and eco-friendly appeal',
+                    'SUV with robust build and commanding presence',
+                    'sedan with elegant profile and comfortable proportions',
+                    'convertible with open top and free-spirited character',
+                    'vintage automobile with chrome details and nostalgic beauty',
+                    'race car with aerodynamic body and competition livery'
+                ],
+                'motorcycles': [
+                    'motorcycle with powerful engine and dynamic design',
+                    'cruiser bike with low profile and relaxed riding position',
+                    'sport bike with aggressive fairings and racing aesthetics',
+                    'vintage motorcycle with classic styling and heritage appeal',
+                    'custom bike with unique modifications and personal touches',
+                    'touring motorcycle with comfort features and long-distance capability',
+                    'dirt bike with off-road tires and rugged construction',
+                    'scooter with practical design and urban mobility',
+                    'cafe racer with minimalist design and retro styling',
+                    'adventure motorcycle with all-terrain capability and robust build'
+                ],
+                'bicycles': [
+                    'road bike with lightweight frame and racing geometry',
+                    'mountain bike with suspension and off-road tires',
+                    'vintage bicycle with classic design and nostalgic charm',
+                    'urban commuter bike with practical features and city-ready design',
+                    'electric bicycle with motor assist and modern technology',
+                    'BMX bike with compact frame and trick-ready build',
+                    'touring bicycle with panniers and long-distance setup',
+                    'folding bike with space-saving design and portability',
+                    'fixed gear bike with minimalist aesthetic and urban style',
+                    'cruiser bicycle with comfortable seat and relaxed riding position'
+                ],
+                'public_transport': [
+                    'city bus with public transit livery and urban setting',
+                    'train at station with platform and passenger environment',
+                    'subway car with interior lighting and metro system',
+                    'tram on city streets with overhead wires and urban backdrop',
+                    'ferry boat with water transportation and maritime setting',
+                    'taxi cab with distinctive markings and urban context',
+                    'double-decker bus with iconic design and city character',
+                    'monorail with elevated track and futuristic appearance',
+                    'light rail vehicle with modern design and efficient transit',
+                    'cable car with hillside location and scenic views'
+                ]
+            },
+            # ===== 電子產品 Electronics =====
+            'electronics': {
+                'smartphones': [
+                    'modern smartphone with edge-to-edge display and sleek design',
+                    'mobile phone with premium materials and minimalist aesthetic',
+                    'smartphone showing screen interface with app icons and features',
+                    'phone with camera system and advanced photography capabilities',
+                    'mobile device with protective case and personal accessories',
+                    'smartphone in hand demonstrating use and scale',
+                    'phone with wireless charging and modern conveniences',
+                    'mobile phone with notification screen and communication features',
+                    'smartphone capturing photo showing photography in action',
+                    'device with headphones and mobile entertainment setup'
+                ],
+                'laptops': [
+                    'laptop computer with open screen and modern workspace',
+                    'portable computer with sleek design and professional appearance',
+                    'laptop showing desktop interface and productivity software',
+                    'computer with external peripherals and complete workstation',
+                    'thin and light laptop with premium build and portability',
+                    'gaming laptop with powerful specs and aggressive styling',
+                    'laptop in cafe setting with remote work environment',
+                    'computer with split screen showing multitasking capability',
+                    'laptop with coding environment and developer workflow',
+                    'portable computer with creative software and design work'
+                ],
+                'cameras': [
+                    'professional camera with interchangeable lens and manual controls',
+                    'DSLR camera with telephoto lens and photography equipment',
+                    'mirrorless camera with compact design and modern features',
+                    'vintage film camera with classic design and analog charm',
+                    'action camera with rugged housing and adventure ready build',
+                    'instant camera with retro aesthetic and print functionality',
+                    'camera on tripod with stable shooting setup',
+                    'photography gear with lenses filters and accessories',
+                    "camera with viewfinder showing photographer's perspective",
+                    'compact camera with point-and-shoot simplicity'
+                ],
+                'wearables': [
+                    'smartwatch with digital display and fitness tracking features',
+                    'fitness tracker with health monitoring and activity data',
+                    'wireless earbuds with charging case and modern design',
+                    'smart glasses with augmented reality and tech integration',
+                    'VR headset with immersive technology and gaming capability',
+                    'smart ring with minimalist design and health sensors',
+                    'activity band with water resistance and sport features',
+                    'wireless headphones with noise cancellation and premium audio',
+                    'smart jewelry with notification features and elegant styling',
+                    'wearable device with heart rate monitor and workout tracking'
+                ]
+            },
+            # ===== 家居用品 Home Items =====
+            'home_items': {
+                'furniture': [
+                    'modern sofa with clean lines and comfortable upholstery',
+                    'wooden dining table with natural grain and family seating',
+                    'contemporary chair with ergonomic design and stylish form',
+                    'bookshelf with organized volumes and decorative objects',
+                    'bed with plush bedding and inviting sleep environment',
+                    'desk with workspace organization and productive setup',
+                    'coffee table with minimalist design and functional surface',
+                    'cabinet with storage solutions and practical organization',
+                    'armchair with cozy cushioning and reading nook appeal',
+                    'sideboard with display area and dining room elegance'
+                ],
+                'decor': [
+                    'wall art with framed artwork and gallery wall aesthetic',
+                    'decorative plant with lush foliage and natural greenery',
+                    'vase with fresh flowers and elegant arrangement',
+                    'candles with ambient lighting and cozy atmosphere',
+                    'throw pillows with colorful patterns and comfort layers',
+                    'mirror with decorative frame and space-enhancing reflection',
+                    'rug with textile pattern and floor covering warmth',
+                    'sculpture with artistic form and decorative presence',
+                    'decorative bowls with artisan craft and functional beauty',
+                    'wall clock with timepiece function and design statement'
+                ],
+                'kitchenware': [
+                    'ceramic plates with elegant design and table setting ready',
+                    'glassware with crystal clarity and beverage service',
+                    'cookware with non-stick surface and culinary preparation',
+                    'cutting board with natural wood and food prep surface',
+                    'kitchen utensils with stainless steel and cooking tools',
+                    'coffee maker with brewing capability and morning ritual',
+                    'mixing bowls with nesting design and baking essentials',
+                    'serving platters with presentation surface and entertaining ready',
+                    'storage containers with organization and food preservation',
+                    'tea kettle with stovetop heating and beverage preparation'
+                ]
+            },
+            # ===== 服飾配件 Fashion Accessories =====
+            'fashion_accessories': {
+                'shoes': [
+                    'leather shoes with polished finish and formal elegance',
+                    'sneakers with athletic design and casual comfort',
+                    'high heels with sophisticated style and fashion statement',
+                    'boots with rugged construction and seasonal appropriateness',
+                    'sandals with open design and warm weather comfort',
+                    'loafers with slip-on convenience and smart casual style',
+                    'running shoes with performance features and sport technology',
+                    'dress shoes with refined appearance and occasion ready',
+                    'canvas shoes with casual aesthetic and everyday wearability',
+                    'designer footwear with luxury branding and premium materials'
+                ],
+                'bags': [
+                    'leather handbag with structured form and quality craftsmanship',
+                    'backpack with practical compartments and daily carry capability',
+                    'tote bag with spacious interior and versatile use',
+                    'clutch with compact elegance and evening sophistication',
+                    'messenger bag with crossbody strap and urban function',
+                    'duffel bag with travel capacity and gym ready design',
+                    'satchel with classic styling and professional appearance',
+                    'wallet with organized card slots and essential storage',
+                    'shoulder bag with adjustable strap and comfortable carry',
+                    'luxury bag with designer branding and premium construction'
+                ],
+                'jewelry': [
+                    'necklace with pendant design and elegant neckline accent',
+                    'earrings with gemstone sparkle and facial framing beauty',
+                    'ring with precious metal and symbolic significance',
+                    'bracelet with linked design and wrist adornment',
+                    'watch with timepiece function and wrist jewelry appeal',
+                    'brooch with decorative pin and vintage charm',
+                    'anklet with delicate chain and summer accessory style',
+                    'cufflinks with formal accent and menswear detail',
+                    'charm bracelet with personal tokens and memory collection',
+                    'statement jewelry with bold design and fashion impact'
+                ],
+                'eyewear': [
+                    'sunglasses with UV protection and stylish frames',
+                    'eyeglasses with prescription lenses and daily wear design',
+                    'aviator sunglasses with classic pilot styling and metal frame',
+                    'cat-eye glasses with vintage inspired shape and feminine flair',
+                    'sport sunglasses with wraparound design and performance features',
+                    'reading glasses with magnification and close-work utility',
+                    'designer eyewear with luxury branding and premium materials',
+                    'mirrored sunglasses with reflective lenses and modern edge',
+                    'oversized sunglasses with dramatic proportions and fashion statement',
+                    'safety glasses with protective function and durable construction'
+                ]
+            },
+            # ===== 運動器材 Sports Equipment =====
+            'sports_equipment': {
+                'fitness': [
+                    'dumbbells with weight plates and strength training equipment',
+                    'yoga mat with non-slip surface and exercise foundation',
+                    'resistance bands with elastic tension and portable workout',
+                    'kettlebell with cast iron construction and functional training',
+                    'foam roller with massage texture and recovery tool',
+                    'exercise ball with inflatable design and core workout',
+                    'jump rope with cardio training and coordination exercise',
+                    'weight bench with adjustable positions and lifting support',
+                    'pull-up bar with doorframe mounting and bodyweight exercise',
+                    'treadmill with running surface and cardio machine'
+                ],
+                'outdoor_sports': [
+                    'tennis racket with string tension and court sport equipment',
+                    'basketball with leather or composite cover and game ready',
+                    'soccer ball with classic panel design and field sport',
+                    'golf clubs with metal woods and iron set',
+                    'baseball glove with leather construction and catching mitt',
+                    'skateboard with deck grip tape and wheel assembly',
+                    'surfboard with wax coating and wave riding design',
+                    'snowboard with bindings and mountain sport equipment',
+                    'hiking boots with ankle support and trail ready tread',
+                    'camping tent with weatherproof fabric and outdoor shelter'
+                ]
+            },
+            # ===== 樂器 Musical Instruments =====
+            'musical_instruments': {
+                'string': [
+                    'acoustic guitar with wooden body and string instrument charm',
+                    'electric guitar with solid body and amplified rock sound',
+                    'violin with curved body and classical string beauty',
+                    'cello with rich tone and orchestral presence',
+                    'bass guitar with deep sound and rhythm section foundation',
+                    'ukulele with small size and tropical string instrument',
+                    'harp with multiple strings and angelic sound quality',
+                    'banjo with circular body and folk music character',
+                    'mandolin with paired strings and bright tone',
+                    'sitar with resonating strings and world music heritage'
+                ],
+                'keyboard': [
+                    'piano with ivory keys and classical instrument elegance',
+                    'keyboard synthesizer with electronic sound and modern music',
+                    'organ with multiple manuals and church music tradition',
+                    'electric piano with vintage tone and stage performance',
+                    'digital piano with weighted keys and home practice',
+                    'accordion with bellows and folk dance music',
+                    'MIDI controller with production capability and studio tool',
+                    'harpsichord with baroque styling and historical instrument',
+                    'melodica with breath control and portable keyboard',
+                    'keytar with shoulder strap and performance showmanship'
+                ],
+                'percussion': [
+                    'drum kit with multiple pieces and rhythmic foundation',
+                    'djembe with hand drumming and African rhythm',
+                    'conga drums with Latin percussion and tropical beat',
+                    'tambourine with jingles and shaker instrument',
+                    'bongos with paired drums and Latin music style',
+                    'xylophone with mallet playing and melodic percussion',
+                    'cymbals with crash sound and orchestral accent',
+                    'maracas with rattle sound and Latin rhythm shaker',
+                    'cajón with box drum and flamenco percussion',
+                    'timpani with kettle drum and orchestral thunder'
+                ]
+            },
+            # ===== 辦公用品 Office Supplies =====
+            'office_supplies': {
+                'stationery': [
+                    'pen with smooth writing and everyday writing tool',
+                    'notebook with lined pages and note-taking essential',
+                    'pencil with graphite lead and sketching tool',
+                    'markers with vibrant colors and highlighting capability',
+                    'sticky notes with adhesive backing and reminder function',
+                    'paper clips with metal construction and document organization',
+                    'stapler with binding function and paper fastening',
+                    'tape dispenser with adhesive roll and office essential',
+                    'ruler with measurement markings and straight edge',
+                    'scissors with sharp blades and cutting tool'
+                ],
+                'desk_items': [
+                    'desk lamp with adjustable arm and task lighting',
+                    'organizer with compartments and clutter management',
+                    'mouse pad with smooth surface and wrist support',
+                    'desk calendar with date tracking and planning tool',
+                    'pen holder with upright storage and writing implement organization',
+                    'file folders with document sorting and category organization',
+                    'desk mat with large surface and workspace protection',
+                    'paper tray with stacking design and document storage',
+                    'business card holder with professional presentation',
+                    'cable organizer with cord management and tidy workspace'
+                ]
+            },
+            # ===== 玩具與遊戲 Toys and Games =====
+            'toys_games': {
+                'toys': [
+                    'stuffed animal with soft plush and cuddly companion',
+                    'action figure with articulated joints and character play',
+                    'doll with detailed features and imaginative play',
+                    'building blocks with interlocking pieces and creative construction',
+                    'toy car with rolling wheels and miniature vehicle play',
+                    'puzzle with interlocking pieces and problem-solving challenge',
+                    'board game with playing pieces and family entertainment',
+                    'card game with illustrated cards and strategy play',
+                    'remote control toy with wireless operation and interactive play',
+                    'educational toy with learning elements and developmental benefits'
+                ],
+                'collectibles': [
+                    'figurine with detailed sculpting and display collectible',
+                    'vinyl toy with designer art and limited edition appeal',
+                    'model kit with assembly parts and hobbyist construction',
+                    'trading cards with collectible series and rarity value',
+                    'die-cast model with metal construction and scale replica',
+                    'statue with artistic detail and collector showcase',
+                    'pop culture figure with character likeness and fandom appeal',
+                    'vintage toy with nostalgic value and retro charm',
+                    'limited edition collectible with numbered series and exclusivity',
+                    'display case with protective housing and collection showcase'
+                ]
+            }
+        }
+        print(f"✓ Universal Object Prompts initialized with {len(self.object_vocabularies)} major categories")
+    def get_prompts(self, category: str, subcategory: str = None) -> List[str]:
+        """
+        取得物品 prompts
+        Args:
+            category: 物品類別 (如 'animals', 'vehicles')
+            subcategory: 子類別 (如 'dogs', 'cats')
+        Returns:
+            Prompt 列表
+        """
+        category_prompts = self.object_vocabularies.get(category, {})
+        if subcategory:
+            return category_prompts.get(subcategory, [])
+        else:
+            # 返回該類別的所有 prompts
+            all_prompts = []
+            for prompts in category_prompts.values():
+                if isinstance(prompts, list):
+                    all_prompts.extend(prompts)
+            return all_prompts
+    def get_all_categories(self) -> List[str]:
+        """取得所有物品類別"""
+        return list(self.object_vocabularies.keys())
+    def get_subcategories(self, category: str) -> List[str]:
+        """取得特定類別的所有子類別"""
+        category_data = self.object_vocabularies.get(category, {})
+        return list(category_data.keys()) if isinstance(category_data, dict) else []
+    def detect_object_category(self, detected_objects: List[str]) -> str:
+        """
+        根據檢測到的物體推測主要類別
+        Args:
+            detected_objects: YOLO 檢測到的物體列表
+        Returns:
+            推測的類別名稱
+        """
+        object_str = ' '.join(detected_objects).lower()
+        # 動物關鍵字
+        if any(kw in object_str for kw in ['dog', 'cat', 'bird', 'animal', 'pet']):
+            return 'animals'
+        # 交通工具關鍵字
+        if any(kw in object_str for kw in ['car', 'bike', 'motorcycle', 'bus', 'train', 'vehicle']):
+            return 'vehicles'
+        # 電子產品關鍵字
+        if any(kw in object_str for kw in ['phone', 'laptop', 'camera', 'computer', 'tablet']):
+            return 'electronics'
+        # 家居用品關鍵字
+        if any(kw in object_str for kw in ['chair', 'table', 'bed', 'couch', 'furniture']):
+            return 'home_items'
+        # 服飾配件關鍵字
+        if any(kw in object_str for kw in ['shoe', 'bag', 'handbag', 'backpack', 'watch']):
+            return 'fashion_accessories'
+        # 運動器材關鍵字
+        if any(kw in object_str for kw in ['ball', 'racket', 'equipment', 'fitness']):
+            return 'sports_equipment'
+        # 樂器關鍵字
+        if any(kw in object_str for kw in ['guitar', 'piano', 'drum', 'instrument']):
+            return 'musical_instruments'
+        return None  # 無法辨識
+print("✓ UniversalObjectPrompts defined")

yolo_detection_manager.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from ultralytics import YOLO
+import numpy as np
+from typing import List, Dict
+from PIL import Image
+class YOLODetectionManager:
+    """Object detection using YOLOv11"""
+    def __init__(self, variant='m'):
+        print(f"Loading YOLOv11{variant} model...")
+        self.model = YOLO(f'yolo11{variant}.pt')
+        self.variant = variant
+        self.conf_threshold = 0.25
+        self.iou_threshold = 0.45
+        self.max_detections = 100
+        # Brand-relevant classes
+        self.brand_relevant_classes = [
+            'handbag', 'bottle', 'cell phone', 'laptop',
+            'backpack', 'tie', 'suitcase', 'cup', 'watch',
+            'shoe', 'sneaker', 'boot'
+        ]
+        print(f"✓ YOLOv11{variant} loaded")
+    def detect(self, image: np.ndarray) -> List[Dict]:
+        """Detect objects in image"""
+        results = self.model.predict(
+            image,
+            conf=self.conf_threshold,
+            iou=self.iou_threshold,
+            max_det=self.max_detections,
+            verbose=False
+        )
+        detections = []
+        for result in results:
+            boxes = result.boxes
+            for box in boxes:
+                class_id = int(box.cls[0])
+                class_name = result.names[class_id]
+                bbox = box.xyxy[0].cpu().numpy().tolist()
+                confidence = float(box.conf[0])
+                detection = {
+                    'class_id': class_id,
+                    'class_name': class_name,
+                    'bbox': bbox,
+                    'confidence': confidence,
+                    'is_brand_relevant': class_name.lower() in self.brand_relevant_classes,
+                    'source': 'yolo'
+                }
+                detections.append(detection)
+        return detections
+    def filter_brand_relevant_objects(self, detections: List[Dict]) -> List[Dict]:
+        """Filter brand-relevant objects"""
+        return [det for det in detections if det['is_brand_relevant']]
+print("✓ YOLODetectionManager defined")