Spaces:

dseditor
/

PPTCreator

Running

File size: 30,493 Bytes

09c9f6f

# ppt_analyzer.py
import os
import json
import tempfile
from io import BytesIO
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.shapes import MSO_SHAPE_TYPE
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
import google.generativeai as genai
from slide_themes import SlideThemeManager

class PPTAnalyzer:
    def __init__(self, gemini_model=None, pexels_headers=None, image_styles=None):
        self.gemini_model = gemini_model
        self.pexels_headers = pexels_headers
        self.theme_manager = SlideThemeManager()
        self.image_styles = image_styles or {
            "professional": "business professional corporate clean",
            "creative": "creative artistic colorful vibrant",
            "minimalist": "minimal clean simple white space",
            "modern": "modern contemporary sleek design",
            "natural": "natural outdoor organic environment",
            "technology": "technology digital modern tech innovation"
        }
    
    def analyze_ppt_file(self, ppt_file_path):
        """分析上傳的PPT文件"""
        try:
            prs = Presentation(ppt_file_path)
            slides_info = []
            
            for i, slide in enumerate(prs.slides):
                slide_info = {
                    "slide_number": i + 1,
                    "title": "",
                    "content": [],
                    "has_table": False,
                    "has_chart": False,
                    "has_image": False,
                    "layout_type": slide.slide_layout.name if hasattr(slide.slide_layout, 'name') else "Unknown"
                }
                
                # 提取文字內容和檢測對象類型
                for shape in slide.shapes:
                    # 檢測表格
                    if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
                        slide_info["has_table"] = True
                    
                    # 檢測圖表
                    elif shape.shape_type == MSO_SHAPE_TYPE.CHART:
                        slide_info["has_chart"] = True
                    
                    # 檢測圖片
                    elif shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
                        slide_info["has_image"] = True
                    
                    # 提取文字內容
                    elif hasattr(shape, "text_frame") and shape.text_frame:
                        text_content = shape.text_frame.text.strip()
                        if text_content:
                            # 判斷是否為標題（通常是第一個有內容的文字框或字體較大）
                            if not slide_info["title"] and len(text_content) < 100:
                                slide_info["title"] = text_content
                            else:
                                # 分割多行內容
                                lines = [line.strip() for line in text_content.split('\n') if line.strip()]
                                slide_info["content"].extend(lines)
                
                # 如果沒有找到標題，使用第一行內容作為標題
                if not slide_info["title"] and slide_info["content"]:
                    slide_info["title"] = slide_info["content"].pop(0)
                
                slides_info.append(slide_info)
            
            return {
                "total_slides": len(slides_info),
                "slides": slides_info,
                "original_size": {
                    "width": prs.slide_width,
                    "height": prs.slide_height
                }
            }
            
        except Exception as e:
            print(f"分析PPT文件錯誤: {e}")
            return None
    
    def generate_image_keywords_with_ai(self, slide_info):
        """使用AI分析投影片內容生成圖片搜尋關鍵字"""
        if not self.gemini_model:
            print("Gemini模型不可用，使用回退關鍵字")
            return self.generate_fallback_keywords(slide_info)
        
        # 構建分析提示
        title = slide_info.get("title", "")
        content = slide_info.get("content", [])
        content_text = " ".join(content[:3])  # 只取前3行內容避免太長
        
        print(f"AI分析輸入 - 標題: {title}, 內容: {content_text}")
        
        prompt = f"""

        請分析以下投影片內容，生成適合的英文圖片搜尋關鍵字：

        

        標題：{title}

        內容：{content_text}

        

        要求：

        1. 先理解中文內容的核心概念

        2. 將核心概念轉換為相應的英文關鍵字

        3. 生成3-5個英文關鍵字，用空格分隔

        4. 關鍵字要與內容主題相關，具體明確

        5. 避免過於抽象的詞彙

        6. 適合用於圖片搜尋

        7. 只回傳關鍵字，不要其他說明

        

        例如：

        - 如果內容是關於"商業會議"，回傳：business meeting office professional

        - 如果內容是關於"技術創新"，回傳：technology innovation digital development

        - 如果內容是關於"數據分析"，回傳：data analysis statistics chart

        """
        
        try:
            response = self.gemini_model.generate_content(prompt)
            keywords = response.text.strip()
            print(f"AI生成的原始關鍵字: {keywords}")
            
            # 清理回應，只保留英文字母和空格
            keywords = ''.join(c if c.isalnum() or c.isspace() else ' ' for c in keywords)
            keywords = ' '.join(keywords.split())  # 移除多餘空格
            
            # 如果關鍵字太短或為空，使用回退方案
            if len(keywords.strip()) < 3:
                print("AI生成的關鍵字太短，使用回退方案")
                return self.generate_fallback_keywords(slide_info)
            
            final_keywords = keywords[:100]  # 限制長度
            print(f"最終關鍵字: {final_keywords}")
            return final_keywords
            
        except Exception as e:
            print(f"AI分析錯誤: {e}")
            return self.generate_fallback_keywords(slide_info)
    
    def generate_fallback_keywords(self, slide_info):
        """當AI不可用時的回退關鍵字生成"""
        title = slide_info.get("title", "").lower()
        content = " ".join(slide_info.get("content", [])).lower()
        
        print(f"回退關鍵字生成 - 標題: {title}, 內容: {content[:100]}...")
        
        # 基於關鍵詞映射生成搜尋詞（中英文混合）
        keyword_mapping = {
            # 英文關鍵字
            "business": "business professional meeting",
            "technology": "technology innovation digital",
            "data": "data analysis statistics chart",
            "marketing": "marketing strategy advertising",
            "finance": "finance money investment",
            "education": "education learning school",
            "health": "health medical healthcare",
            "environment": "environment nature green",
            "team": "team collaboration teamwork",
            "strategy": "strategy planning business",
            "innovation": "innovation creative technology",
            "growth": "growth success achievement",
            "research": "research study academic",
            "development": "development progress building",
            "management": "management leadership office",
            "analysis": "analysis review examination",
            "solution": "solution problem solving",
            "project": "project work planning",
            "system": "system network infrastructure",
            "process": "process workflow method",
            "quality": "quality standard excellence",
            "performance": "performance improvement results",
            "customer": "customer service client",
            "market": "market industry commercial",
            "product": "product design manufacturing",
            "service": "service support assistance",
            # 中文關鍵字
            "商業": "business professional meeting",
            "企業": "business corporate company",
            "科技": "technology innovation digital",
            "技術": "technology digital development",
            "數據": "data analysis statistics",
            "資料": "data information analytics",
            "分析": "analysis research examination",
            "行銷": "marketing advertising strategy",
            "市場": "market industry commercial",
            "金融": "finance money investment",
            "財務": "finance accounting money",
            "教育": "education learning school",
            "學習": "learning study education",
            "健康": "health medical wellness",
            "醫療": "medical healthcare health",
            "環境": "environment nature sustainability",
            "環保": "environment green sustainability",
            "團隊": "team collaboration teamwork",
            "策略": "strategy planning business",
            "創新": "innovation creative development",
            "成長": "growth success achievement",
            "研究": "research study academic",
            "開發": "development programming building",
            "管理": "management leadership office",
            "解決": "solution problem solving",
            "專案": "project work planning",
            "系統": "system network infrastructure",
            "流程": "process workflow method",
            "品質": "quality standard excellence",
            "效能": "performance improvement results",
            "客戶": "customer service client",
            "產品": "product design manufacturing",
            "服務": "service support assistance",
            "會議": "meeting conference business",
            "報告": "report presentation business",
            "簡報": "presentation business professional"
        }
        
        found_keywords = []
        text_to_search = f"{title} {content}"
        
        for key, value in keyword_mapping.items():
            if key in text_to_search:
                found_keywords.append(value)
                print(f"找到關鍵字映射: {key} -> {value}")
        
        if found_keywords:
            result = " ".join(found_keywords[:2])  # 最多使用2組關鍵字
        else:
            result = "business presentation professional meeting"
        
        print(f"回退關鍵字結果: {result}")
        return result
    
    def apply_theme_to_presentation(self, original_ppt_path, theme_name, image_style, analysis_result):
        """將主題套用到現有簡報"""
        try:
            # 載入原始簡報
            prs = Presentation(original_ppt_path)
            theme = self.theme_manager.get_theme(theme_name)
            
            # 設定新的16:9尺寸
            prs.slide_width = self.theme_manager.slide_width
            prs.slide_height = self.theme_manager.slide_height
            
            processed_slides = []
            
            for i, slide_info in enumerate(analysis_result["slides"]):
                if i >= len(prs.slides):
                    break
                
                slide = prs.slides[i]
                
                # 應用背景和裝飾
                self.theme_manager.setup_slide_background_and_layout(slide, theme)
                
                # 重新格式化所有文字
                self.reformat_slide_text(slide, theme)
                
                # 決定是否添加圖片
                should_add_image = not (slide_info["has_table"] or slide_info["has_chart"])
                
                if should_add_image and self.pexels_headers:
                    # 生成圖片搜尋關鍵字
                    keywords = self.generate_image_keywords_with_ai(slide_info)
                    
                    # 搜尋和添加圖片
                    image_added = self.add_image_to_existing_slide(slide, keywords, image_style, theme)
                    slide_info["image_added"] = image_added
                    slide_info["search_keywords"] = keywords
                else:
                    slide_info["image_added"] = False
                    slide_info["skip_reason"] = "含有表格或圖表"
                
                processed_slides.append(slide_info)
            
            return prs, processed_slides
            
        except Exception as e:
            print(f"套用主題錯誤: {e}")
            return None, []
    
    def reformat_slide_text(self, slide, theme):
        """重新格式化投影片中的所有文字"""
        try:
            for shape in slide.shapes:
                if hasattr(shape, "text_frame") and shape.text_frame:
                    # 判斷是否為標題（通常在上方且文字較少）
                    is_title = (shape.top < Inches(2) and 
                               len(shape.text_frame.text) < 100 and
                               shape.text_frame.text.strip())
                    
                    for paragraph in shape.text_frame.paragraphs:
                        if paragraph.text.strip():
                            if is_title:
                                # 格式化為標題
                                paragraph.font.name = self.theme_manager.get_font_name()
                                paragraph.font.size = Pt(36)
                                paragraph.font.color.rgb = theme["title_color"]
                                paragraph.font.bold = True
                                paragraph.alignment = PP_ALIGN.LEFT
                            else:
                                # 格式化為內容
                                paragraph.font.name = self.theme_manager.get_font_name()
                                paragraph.font.size = Pt(24)
                                paragraph.font.color.rgb = theme["text_color"]
                                paragraph.space_before = Pt(8)
                                paragraph.space_after = Pt(8)
                                paragraph.line_spacing = 1.3
        except Exception as e:
            print(f"重新格式化文字錯誤: {e}")
    
    def add_image_to_existing_slide(self, slide, keywords, image_style, theme):
        """為現有投影片添加圖片"""
        try:
            print(f"開始為投影片添加圖片，關鍵字: {keywords}")
            
            # 搜尋圖片
            photos = self.search_pexels_with_style(keywords, image_style)
            if not photos:
                print(f"未找到相關圖片，關鍵字: {keywords}")
                return False
            
            print(f"找到 {len(photos)} 張圖片")
            
            # 選擇最佳圖片
            image_url = self.select_best_image(photos)
            if not image_url:
                print("無法選擇最佳圖片")
                return False
            
            print(f"選中圖片URL: {image_url}")
            
            # 下載圖片
            image_path = self.download_image(image_url)
            if not image_path:
                print("圖片下載失敗")
                return False
            
            print(f"圖片下載成功: {image_path}")
            
            # 計算可用空間並添加圖片
            available_area = self.calculate_available_space(slide)
            if available_area:
                print(f"找到可用空間: {available_area}")
                self.add_image_to_available_space(slide, image_path, available_area)
                print("圖片添加成功")
                return True
            else:
                print("未找到可用空間，嘗試背景圖片模式")
                # 如果找不到理想空間，將圖片作為背景放置，但在文字底下
                self.add_background_image_to_slide(slide, image_path)
                return True
            
        except Exception as e:
            print(f"添加圖片錯誤: {e}")
            import traceback
            print(f"詳細錯誤: {traceback.format_exc()}")
            return False
    
    def calculate_available_space(self, slide):
        """計算投影片中的可用空間"""
        try:
            slide_width = self.theme_manager.slide_width.inches
            slide_height = self.theme_manager.slide_height.inches
            
            print(f"投影片尺寸: {slide_width} x {slide_height}")
            
            # 收集所有現有形狀的位置
            occupied_areas = []
            shape_count = 0
            for shape in slide.shapes:
                if hasattr(shape, 'left') and hasattr(shape, 'top'):
                    area = {
                        'left': shape.left.inches,
                        'top': shape.top.inches,
                        'right': shape.left.inches + shape.width.inches,
                        'bottom': shape.top.inches + shape.height.inches
                    }
                    occupied_areas.append(area)
                    shape_count += 1
                    print(f"形狀 {shape_count}: {area}")
            
            # 定義可能的圖片位置區域（更大的尺寸）
            possible_areas = [
                # 右側區域 - 更大
                {'left': slide_width * 0.5, 'top': slide_height * 0.1, 
                 'width': slide_width * 0.45, 'height': slide_height * 0.8},
                # 下方區域 - 更大
                {'left': slide_width * 0.05, 'top': slide_height * 0.55, 
                 'width': slide_width * 0.9, 'height': slide_height * 0.4},
                # 左側區域 - 更大
                {'left': slide_width * 0.05, 'top': slide_height * 0.1, 
                 'width': slide_width * 0.45, 'height': slide_height * 0.8},
                # 中央下方區域 - 更大
                {'left': slide_width * 0.2, 'top': slide_height * 0.65, 
                 'width': slide_width * 0.6, 'height': slide_height * 0.3},
                # 右上區域 - 更大
                {'left': slide_width * 0.6, 'top': slide_height * 0.05, 
                 'width': slide_width * 0.35, 'height': slide_height * 0.5}
            ]
            
            print(f"檢查 {len(possible_areas)} 個可能區域")
            
            # 找到最大的可用區域
            for i, area in enumerate(possible_areas):
                print(f"檢查區域 {i+1}: {area}")
                if self.is_area_available(area, occupied_areas):
                    print(f"區域 {i+1} 可用")
                    return area
                else:
                    print(f"區域 {i+1} 被占用")
            
            print("所有預定義區域都被占用")
            return None
            
        except Exception as e:
            print(f"計算可用空間錯誤: {e}")
            import traceback
            print(f"詳細錯誤: {traceback.format_exc()}")
            return None
    
    def add_background_image_to_slide(self, slide, image_path):
        """將圖片作為背景添加到投影片，確保在文字底下"""
        try:
            print(f"添加背景圖片: {image_path}")
            
            # 計算較大的圖片尺寸，覆蓋更多區域
            slide_width = self.theme_manager.slide_width.inches
            slide_height = self.theme_manager.slide_height.inches
            
            # 使用更大的圖片尺寸，稍微偏移以不完全覆蓋標題
            img_left = slide_width * 0.1  # 10% 邊距
            img_top = slide_height * 0.2   # 20% 邊距，避開標題
            img_width = slide_width * 0.8  # 80% 寬度
            img_height = slide_height * 0.7 # 70% 高度
            
            # 計算圖片比例並調整尺寸
            from PIL import Image as PILImage
            with PILImage.open(image_path) as img:
                img_width_px, img_height_px = img.size
                img_ratio = img_width_px / img_height_px
                
                # 調整尺寸以保持比例
                if img_ratio > (img_width / img_height):
                    # 圖片較寬，以寬度為準
                    actual_width = img_width
                    actual_height = img_width / img_ratio
                    actual_top = img_top + (img_height - actual_height) / 2
                    actual_left = img_left
                else:
                    # 圖片較高，以高度為準
                    actual_height = img_height
                    actual_width = img_height * img_ratio
                    actual_left = img_left + (img_width - actual_width) / 2
                    actual_top = img_top
                
                print(f"背景圖片尺寸: left={actual_left:.2f}, top={actual_top:.2f}, width={actual_width:.2f}, height={actual_height:.2f}")
                
                # 添加圖片
                picture = slide.shapes.add_picture(
                    image_path,
                    Inches(actual_left),
                    Inches(actual_top),
                    Inches(actual_width),
                    Inches(actual_height)
                )
                
                # 將圖片移到最底層（在所有文字和形狀之下）
                picture.element.getparent().remove(picture.element)
                slide.shapes._spTree.insert(2, picture.element)
                
                print("背景圖片添加成功並移至底層")
                
        except Exception as e:
            print(f"添加背景圖片錯誤: {e}")
            import traceback
            print(f"詳細錯誤: {traceback.format_exc()}")
    
    def is_area_available(self, area, occupied_areas):
        """檢查區域是否可用（允許少量重疊）"""
        area_right = area['left'] + area['width']
        area_bottom = area['top'] + area['height']
        
        # 計算重疊程度的閾值（允許10%的重疊）
        overlap_threshold = 0.1
        
        for occupied in occupied_areas:
            # 計算重疊區域
            overlap_left = max(area['left'], occupied['left'])
            overlap_top = max(area['top'], occupied['top'])
            overlap_right = min(area_right, occupied['right'])
            overlap_bottom = min(area_bottom, occupied['bottom'])
            
            # 如果有重疊
            if overlap_left < overlap_right and overlap_top < overlap_bottom:
                overlap_width = overlap_right - overlap_left
                overlap_height = overlap_bottom - overlap_top
                overlap_area = overlap_width * overlap_height
                
                # 計算相對於目標區域的重疊比例
                target_area = area['width'] * area['height']
                overlap_ratio = overlap_area / target_area
                
                print(f"重疊比例: {overlap_ratio:.2f}")
                
                # 如果重疊超過閾值，則認為不可用
                if overlap_ratio > overlap_threshold:
                    return False
        
        return True
    
    def add_image_to_available_space(self, slide, image_path, area):
        """在可用空間添加圖片"""
        try:
            print(f"準備在區域添加圖片: {area}")
            print(f"圖片路徑: {image_path}")
            
            left = Inches(area['left'])
            top = Inches(area['top'])
            width = Inches(area['width'])
            height = Inches(area['height'])
            
            print(f"目標位置: left={left.inches}, top={top.inches}, width={width.inches}, height={height.inches}")
            
            # 計算圖片比例並調整尺寸
            from PIL import Image as PILImage
            with PILImage.open(image_path) as img:
                img_width, img_height = img.size
                img_ratio = img_width / img_height
                area_ratio = area['width'] / area['height']
                
                print(f"圖片原始尺寸: {img_width} x {img_height}, 比例: {img_ratio:.2f}")
                print(f"目標區域比例: {area_ratio:.2f}")
                
                if img_ratio > area_ratio:
                    # 圖片較寬，以寬度為準
                    actual_width = width
                    actual_height = Inches(width.inches / img_ratio)
                    actual_top = Inches(top.inches + (height.inches - actual_height.inches) / 2)
                    actual_left = left
                else:
                    # 圖片較高，以高度為準
                    actual_height = height
                    actual_width = Inches(height.inches * img_ratio)
                    actual_left = Inches(left.inches + (width.inches - actual_width.inches) / 2)
                    actual_top = top
                
                print(f"最終尺寸: left={actual_left.inches:.2f}, top={actual_top.inches:.2f}, width={actual_width.inches:.2f}, height={actual_height.inches:.2f}")
                
                # 添加圖片
                picture = slide.shapes.add_picture(image_path, actual_left, actual_top, 
                                                 actual_width, actual_height)
                print(f"圖片添加成功，picture對象: {picture}")
                
        except Exception as e:
            print(f"在可用空間添加圖片錯誤: {e}")
            import traceback
            print(f"詳細錯誤: {traceback.format_exc()}")
    
    def search_pexels_with_style(self, keywords, image_style, per_page=10):
        """搜尋Pexels圖片"""
        if not self.pexels_headers:
            return None
        
        import requests
        
        # 組合關鍵字
        style_modifier = self.image_styles.get(image_style, "")
        enhanced_keywords = f"{keywords} {style_modifier}"
        
        url = "https://api.pexels.com/v1/search"
        params = {
            "query": enhanced_keywords,
            "per_page": per_page,
            "orientation": "landscape",
            "size": "medium"
        }
        
        try:
            response = requests.get(url, headers=self.pexels_headers, params=params)
            if response.status_code == 200:
                data = response.json()
                return data["photos"] if data["photos"] else None
            return None
        except Exception as e:
            print(f"Pexels API 錯誤: {e}")
            return None
    
    def select_best_image(self, photos):
        """選擇最佳圖片"""
        if not photos:
            return None
        
        # 選擇解析度較高的圖片
        best_photo = photos[0]
        for photo in photos[:3]:
            if photo["width"] * photo["height"] > best_photo["width"] * best_photo["height"]:
                best_photo = photo
        
        return best_photo["src"]["medium"]
    
    def download_image(self, image_url):
        """下載圖片"""
        if not image_url:
            return None
        
        import requests
        from PIL import Image
        
        try:
            response = requests.get(image_url)
            if response.status_code == 200:
                temp_dir = tempfile.mkdtemp()
                image_path = os.path.join(temp_dir, "slide_image.jpg")
                
                # 處理圖片
                image = Image.open(BytesIO(response.content))
                
                # 調整圖片大小
                max_size = (800, 600)
                image.thumbnail(max_size, Image.Resampling.LANCZOS)
                
                # 轉換並儲存
                if image.mode in ("RGBA", "P"):
                    image = image.convert("RGB")
                image.save(image_path, "JPEG", quality=85)
                
                return image_path
            return None
        except Exception as e:
            print(f"圖片下載錯誤: {e}")
            return None
    
    def save_processed_presentation(self, prs, filename):
        """儲存處理後的簡報"""
        try:
            temp_dir = tempfile.mkdtemp()
            filepath = os.path.join(temp_dir, filename)
            prs.save(filepath)
            return filepath
        except Exception as e:
            print(f"儲存簡報錯誤: {e}")
            return None
    
    def generate_analysis_report(self, analysis_result, processed_slides):
        """生成分析報告"""
        report = f"📊 簡報分析報告\n"
        report += f"總投影片數：{analysis_result['total_slides']}\n\n"
        
        for i, slide_info in enumerate(processed_slides, 1):
            report += f"{i}. {slide_info.get('title', f'投影片 {i}')}\n"
            
            # 內容類型
            content_types = []
            if slide_info.get('has_table'):
                content_types.append("表格")
            if slide_info.get('has_chart'):
                content_types.append("圖表")
            if slide_info.get('has_image'):
                content_types.append("原有圖片")
            
            if content_types:
                report += f"   包含：{', '.join(content_types)}\n"
            
            # 圖片處理結果
            if slide_info.get('image_added'):
                report += f"   ✅ 已添加圖片 (關鍵字: {slide_info.get('search_keywords', 'N/A')})\n"
            elif slide_info.get('skip_reason'):
                report += f"   ⏭️  跳過添加圖片 ({slide_info['skip_reason']})\n"
            else:
                report += f"   ❌ 未能添加圖片\n"
            
            report += "\n"
        
        return report