PPTCreator / ppt_analyzer.py
dseditor's picture
CommitForLocalAndCloud
09c9f6f verified
# ppt_analyzer.py
import os
import json
import tempfile
from io import BytesIO
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.shapes import MSO_SHAPE_TYPE
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
import google.generativeai as genai
from slide_themes import SlideThemeManager
class PPTAnalyzer:
def __init__(self, gemini_model=None, pexels_headers=None, image_styles=None):
self.gemini_model = gemini_model
self.pexels_headers = pexels_headers
self.theme_manager = SlideThemeManager()
self.image_styles = image_styles or {
"professional": "business professional corporate clean",
"creative": "creative artistic colorful vibrant",
"minimalist": "minimal clean simple white space",
"modern": "modern contemporary sleek design",
"natural": "natural outdoor organic environment",
"technology": "technology digital modern tech innovation"
}
def analyze_ppt_file(self, ppt_file_path):
"""分析上傳的PPT文件"""
try:
prs = Presentation(ppt_file_path)
slides_info = []
for i, slide in enumerate(prs.slides):
slide_info = {
"slide_number": i + 1,
"title": "",
"content": [],
"has_table": False,
"has_chart": False,
"has_image": False,
"layout_type": slide.slide_layout.name if hasattr(slide.slide_layout, 'name') else "Unknown"
}
# 提取文字內容和檢測對象類型
for shape in slide.shapes:
# 檢測表格
if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
slide_info["has_table"] = True
# 檢測圖表
elif shape.shape_type == MSO_SHAPE_TYPE.CHART:
slide_info["has_chart"] = True
# 檢測圖片
elif shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
slide_info["has_image"] = True
# 提取文字內容
elif hasattr(shape, "text_frame") and shape.text_frame:
text_content = shape.text_frame.text.strip()
if text_content:
# 判斷是否為標題(通常是第一個有內容的文字框或字體較大)
if not slide_info["title"] and len(text_content) < 100:
slide_info["title"] = text_content
else:
# 分割多行內容
lines = [line.strip() for line in text_content.split('\n') if line.strip()]
slide_info["content"].extend(lines)
# 如果沒有找到標題,使用第一行內容作為標題
if not slide_info["title"] and slide_info["content"]:
slide_info["title"] = slide_info["content"].pop(0)
slides_info.append(slide_info)
return {
"total_slides": len(slides_info),
"slides": slides_info,
"original_size": {
"width": prs.slide_width,
"height": prs.slide_height
}
}
except Exception as e:
print(f"分析PPT文件錯誤: {e}")
return None
def generate_image_keywords_with_ai(self, slide_info):
"""使用AI分析投影片內容生成圖片搜尋關鍵字"""
if not self.gemini_model:
print("Gemini模型不可用,使用回退關鍵字")
return self.generate_fallback_keywords(slide_info)
# 構建分析提示
title = slide_info.get("title", "")
content = slide_info.get("content", [])
content_text = " ".join(content[:3]) # 只取前3行內容避免太長
print(f"AI分析輸入 - 標題: {title}, 內容: {content_text}")
prompt = f"""
請分析以下投影片內容,生成適合的英文圖片搜尋關鍵字:
標題:{title}
內容:{content_text}
要求:
1. 先理解中文內容的核心概念
2. 將核心概念轉換為相應的英文關鍵字
3. 生成3-5個英文關鍵字,用空格分隔
4. 關鍵字要與內容主題相關,具體明確
5. 避免過於抽象的詞彙
6. 適合用於圖片搜尋
7. 只回傳關鍵字,不要其他說明
例如:
- 如果內容是關於"商業會議",回傳:business meeting office professional
- 如果內容是關於"技術創新",回傳:technology innovation digital development
- 如果內容是關於"數據分析",回傳:data analysis statistics chart
"""
try:
response = self.gemini_model.generate_content(prompt)
keywords = response.text.strip()
print(f"AI生成的原始關鍵字: {keywords}")
# 清理回應,只保留英文字母和空格
keywords = ''.join(c if c.isalnum() or c.isspace() else ' ' for c in keywords)
keywords = ' '.join(keywords.split()) # 移除多餘空格
# 如果關鍵字太短或為空,使用回退方案
if len(keywords.strip()) < 3:
print("AI生成的關鍵字太短,使用回退方案")
return self.generate_fallback_keywords(slide_info)
final_keywords = keywords[:100] # 限制長度
print(f"最終關鍵字: {final_keywords}")
return final_keywords
except Exception as e:
print(f"AI分析錯誤: {e}")
return self.generate_fallback_keywords(slide_info)
def generate_fallback_keywords(self, slide_info):
"""當AI不可用時的回退關鍵字生成"""
title = slide_info.get("title", "").lower()
content = " ".join(slide_info.get("content", [])).lower()
print(f"回退關鍵字生成 - 標題: {title}, 內容: {content[:100]}...")
# 基於關鍵詞映射生成搜尋詞(中英文混合)
keyword_mapping = {
# 英文關鍵字
"business": "business professional meeting",
"technology": "technology innovation digital",
"data": "data analysis statistics chart",
"marketing": "marketing strategy advertising",
"finance": "finance money investment",
"education": "education learning school",
"health": "health medical healthcare",
"environment": "environment nature green",
"team": "team collaboration teamwork",
"strategy": "strategy planning business",
"innovation": "innovation creative technology",
"growth": "growth success achievement",
"research": "research study academic",
"development": "development progress building",
"management": "management leadership office",
"analysis": "analysis review examination",
"solution": "solution problem solving",
"project": "project work planning",
"system": "system network infrastructure",
"process": "process workflow method",
"quality": "quality standard excellence",
"performance": "performance improvement results",
"customer": "customer service client",
"market": "market industry commercial",
"product": "product design manufacturing",
"service": "service support assistance",
# 中文關鍵字
"商業": "business professional meeting",
"企業": "business corporate company",
"科技": "technology innovation digital",
"技術": "technology digital development",
"數據": "data analysis statistics",
"資料": "data information analytics",
"分析": "analysis research examination",
"行銷": "marketing advertising strategy",
"市場": "market industry commercial",
"金融": "finance money investment",
"財務": "finance accounting money",
"教育": "education learning school",
"學習": "learning study education",
"健康": "health medical wellness",
"醫療": "medical healthcare health",
"環境": "environment nature sustainability",
"環保": "environment green sustainability",
"團隊": "team collaboration teamwork",
"策略": "strategy planning business",
"創新": "innovation creative development",
"成長": "growth success achievement",
"研究": "research study academic",
"開發": "development programming building",
"管理": "management leadership office",
"解決": "solution problem solving",
"專案": "project work planning",
"系統": "system network infrastructure",
"流程": "process workflow method",
"品質": "quality standard excellence",
"效能": "performance improvement results",
"客戶": "customer service client",
"產品": "product design manufacturing",
"服務": "service support assistance",
"會議": "meeting conference business",
"報告": "report presentation business",
"簡報": "presentation business professional"
}
found_keywords = []
text_to_search = f"{title} {content}"
for key, value in keyword_mapping.items():
if key in text_to_search:
found_keywords.append(value)
print(f"找到關鍵字映射: {key} -> {value}")
if found_keywords:
result = " ".join(found_keywords[:2]) # 最多使用2組關鍵字
else:
result = "business presentation professional meeting"
print(f"回退關鍵字結果: {result}")
return result
def apply_theme_to_presentation(self, original_ppt_path, theme_name, image_style, analysis_result):
"""將主題套用到現有簡報"""
try:
# 載入原始簡報
prs = Presentation(original_ppt_path)
theme = self.theme_manager.get_theme(theme_name)
# 設定新的16:9尺寸
prs.slide_width = self.theme_manager.slide_width
prs.slide_height = self.theme_manager.slide_height
processed_slides = []
for i, slide_info in enumerate(analysis_result["slides"]):
if i >= len(prs.slides):
break
slide = prs.slides[i]
# 應用背景和裝飾
self.theme_manager.setup_slide_background_and_layout(slide, theme)
# 重新格式化所有文字
self.reformat_slide_text(slide, theme)
# 決定是否添加圖片
should_add_image = not (slide_info["has_table"] or slide_info["has_chart"])
if should_add_image and self.pexels_headers:
# 生成圖片搜尋關鍵字
keywords = self.generate_image_keywords_with_ai(slide_info)
# 搜尋和添加圖片
image_added = self.add_image_to_existing_slide(slide, keywords, image_style, theme)
slide_info["image_added"] = image_added
slide_info["search_keywords"] = keywords
else:
slide_info["image_added"] = False
slide_info["skip_reason"] = "含有表格或圖表"
processed_slides.append(slide_info)
return prs, processed_slides
except Exception as e:
print(f"套用主題錯誤: {e}")
return None, []
def reformat_slide_text(self, slide, theme):
"""重新格式化投影片中的所有文字"""
try:
for shape in slide.shapes:
if hasattr(shape, "text_frame") and shape.text_frame:
# 判斷是否為標題(通常在上方且文字較少)
is_title = (shape.top < Inches(2) and
len(shape.text_frame.text) < 100 and
shape.text_frame.text.strip())
for paragraph in shape.text_frame.paragraphs:
if paragraph.text.strip():
if is_title:
# 格式化為標題
paragraph.font.name = self.theme_manager.get_font_name()
paragraph.font.size = Pt(36)
paragraph.font.color.rgb = theme["title_color"]
paragraph.font.bold = True
paragraph.alignment = PP_ALIGN.LEFT
else:
# 格式化為內容
paragraph.font.name = self.theme_manager.get_font_name()
paragraph.font.size = Pt(24)
paragraph.font.color.rgb = theme["text_color"]
paragraph.space_before = Pt(8)
paragraph.space_after = Pt(8)
paragraph.line_spacing = 1.3
except Exception as e:
print(f"重新格式化文字錯誤: {e}")
def add_image_to_existing_slide(self, slide, keywords, image_style, theme):
"""為現有投影片添加圖片"""
try:
print(f"開始為投影片添加圖片,關鍵字: {keywords}")
# 搜尋圖片
photos = self.search_pexels_with_style(keywords, image_style)
if not photos:
print(f"未找到相關圖片,關鍵字: {keywords}")
return False
print(f"找到 {len(photos)} 張圖片")
# 選擇最佳圖片
image_url = self.select_best_image(photos)
if not image_url:
print("無法選擇最佳圖片")
return False
print(f"選中圖片URL: {image_url}")
# 下載圖片
image_path = self.download_image(image_url)
if not image_path:
print("圖片下載失敗")
return False
print(f"圖片下載成功: {image_path}")
# 計算可用空間並添加圖片
available_area = self.calculate_available_space(slide)
if available_area:
print(f"找到可用空間: {available_area}")
self.add_image_to_available_space(slide, image_path, available_area)
print("圖片添加成功")
return True
else:
print("未找到可用空間,嘗試背景圖片模式")
# 如果找不到理想空間,將圖片作為背景放置,但在文字底下
self.add_background_image_to_slide(slide, image_path)
return True
except Exception as e:
print(f"添加圖片錯誤: {e}")
import traceback
print(f"詳細錯誤: {traceback.format_exc()}")
return False
def calculate_available_space(self, slide):
"""計算投影片中的可用空間"""
try:
slide_width = self.theme_manager.slide_width.inches
slide_height = self.theme_manager.slide_height.inches
print(f"投影片尺寸: {slide_width} x {slide_height}")
# 收集所有現有形狀的位置
occupied_areas = []
shape_count = 0
for shape in slide.shapes:
if hasattr(shape, 'left') and hasattr(shape, 'top'):
area = {
'left': shape.left.inches,
'top': shape.top.inches,
'right': shape.left.inches + shape.width.inches,
'bottom': shape.top.inches + shape.height.inches
}
occupied_areas.append(area)
shape_count += 1
print(f"形狀 {shape_count}: {area}")
# 定義可能的圖片位置區域(更大的尺寸)
possible_areas = [
# 右側區域 - 更大
{'left': slide_width * 0.5, 'top': slide_height * 0.1,
'width': slide_width * 0.45, 'height': slide_height * 0.8},
# 下方區域 - 更大
{'left': slide_width * 0.05, 'top': slide_height * 0.55,
'width': slide_width * 0.9, 'height': slide_height * 0.4},
# 左側區域 - 更大
{'left': slide_width * 0.05, 'top': slide_height * 0.1,
'width': slide_width * 0.45, 'height': slide_height * 0.8},
# 中央下方區域 - 更大
{'left': slide_width * 0.2, 'top': slide_height * 0.65,
'width': slide_width * 0.6, 'height': slide_height * 0.3},
# 右上區域 - 更大
{'left': slide_width * 0.6, 'top': slide_height * 0.05,
'width': slide_width * 0.35, 'height': slide_height * 0.5}
]
print(f"檢查 {len(possible_areas)} 個可能區域")
# 找到最大的可用區域
for i, area in enumerate(possible_areas):
print(f"檢查區域 {i+1}: {area}")
if self.is_area_available(area, occupied_areas):
print(f"區域 {i+1} 可用")
return area
else:
print(f"區域 {i+1} 被占用")
print("所有預定義區域都被占用")
return None
except Exception as e:
print(f"計算可用空間錯誤: {e}")
import traceback
print(f"詳細錯誤: {traceback.format_exc()}")
return None
def add_background_image_to_slide(self, slide, image_path):
"""將圖片作為背景添加到投影片,確保在文字底下"""
try:
print(f"添加背景圖片: {image_path}")
# 計算較大的圖片尺寸,覆蓋更多區域
slide_width = self.theme_manager.slide_width.inches
slide_height = self.theme_manager.slide_height.inches
# 使用更大的圖片尺寸,稍微偏移以不完全覆蓋標題
img_left = slide_width * 0.1 # 10% 邊距
img_top = slide_height * 0.2 # 20% 邊距,避開標題
img_width = slide_width * 0.8 # 80% 寬度
img_height = slide_height * 0.7 # 70% 高度
# 計算圖片比例並調整尺寸
from PIL import Image as PILImage
with PILImage.open(image_path) as img:
img_width_px, img_height_px = img.size
img_ratio = img_width_px / img_height_px
# 調整尺寸以保持比例
if img_ratio > (img_width / img_height):
# 圖片較寬,以寬度為準
actual_width = img_width
actual_height = img_width / img_ratio
actual_top = img_top + (img_height - actual_height) / 2
actual_left = img_left
else:
# 圖片較高,以高度為準
actual_height = img_height
actual_width = img_height * img_ratio
actual_left = img_left + (img_width - actual_width) / 2
actual_top = img_top
print(f"背景圖片尺寸: left={actual_left:.2f}, top={actual_top:.2f}, width={actual_width:.2f}, height={actual_height:.2f}")
# 添加圖片
picture = slide.shapes.add_picture(
image_path,
Inches(actual_left),
Inches(actual_top),
Inches(actual_width),
Inches(actual_height)
)
# 將圖片移到最底層(在所有文字和形狀之下)
picture.element.getparent().remove(picture.element)
slide.shapes._spTree.insert(2, picture.element)
print("背景圖片添加成功並移至底層")
except Exception as e:
print(f"添加背景圖片錯誤: {e}")
import traceback
print(f"詳細錯誤: {traceback.format_exc()}")
def is_area_available(self, area, occupied_areas):
"""檢查區域是否可用(允許少量重疊)"""
area_right = area['left'] + area['width']
area_bottom = area['top'] + area['height']
# 計算重疊程度的閾值(允許10%的重疊)
overlap_threshold = 0.1
for occupied in occupied_areas:
# 計算重疊區域
overlap_left = max(area['left'], occupied['left'])
overlap_top = max(area['top'], occupied['top'])
overlap_right = min(area_right, occupied['right'])
overlap_bottom = min(area_bottom, occupied['bottom'])
# 如果有重疊
if overlap_left < overlap_right and overlap_top < overlap_bottom:
overlap_width = overlap_right - overlap_left
overlap_height = overlap_bottom - overlap_top
overlap_area = overlap_width * overlap_height
# 計算相對於目標區域的重疊比例
target_area = area['width'] * area['height']
overlap_ratio = overlap_area / target_area
print(f"重疊比例: {overlap_ratio:.2f}")
# 如果重疊超過閾值,則認為不可用
if overlap_ratio > overlap_threshold:
return False
return True
def add_image_to_available_space(self, slide, image_path, area):
"""在可用空間添加圖片"""
try:
print(f"準備在區域添加圖片: {area}")
print(f"圖片路徑: {image_path}")
left = Inches(area['left'])
top = Inches(area['top'])
width = Inches(area['width'])
height = Inches(area['height'])
print(f"目標位置: left={left.inches}, top={top.inches}, width={width.inches}, height={height.inches}")
# 計算圖片比例並調整尺寸
from PIL import Image as PILImage
with PILImage.open(image_path) as img:
img_width, img_height = img.size
img_ratio = img_width / img_height
area_ratio = area['width'] / area['height']
print(f"圖片原始尺寸: {img_width} x {img_height}, 比例: {img_ratio:.2f}")
print(f"目標區域比例: {area_ratio:.2f}")
if img_ratio > area_ratio:
# 圖片較寬,以寬度為準
actual_width = width
actual_height = Inches(width.inches / img_ratio)
actual_top = Inches(top.inches + (height.inches - actual_height.inches) / 2)
actual_left = left
else:
# 圖片較高,以高度為準
actual_height = height
actual_width = Inches(height.inches * img_ratio)
actual_left = Inches(left.inches + (width.inches - actual_width.inches) / 2)
actual_top = top
print(f"最終尺寸: left={actual_left.inches:.2f}, top={actual_top.inches:.2f}, width={actual_width.inches:.2f}, height={actual_height.inches:.2f}")
# 添加圖片
picture = slide.shapes.add_picture(image_path, actual_left, actual_top,
actual_width, actual_height)
print(f"圖片添加成功,picture對象: {picture}")
except Exception as e:
print(f"在可用空間添加圖片錯誤: {e}")
import traceback
print(f"詳細錯誤: {traceback.format_exc()}")
def search_pexels_with_style(self, keywords, image_style, per_page=10):
"""搜尋Pexels圖片"""
if not self.pexels_headers:
return None
import requests
# 組合關鍵字
style_modifier = self.image_styles.get(image_style, "")
enhanced_keywords = f"{keywords} {style_modifier}"
url = "https://api.pexels.com/v1/search"
params = {
"query": enhanced_keywords,
"per_page": per_page,
"orientation": "landscape",
"size": "medium"
}
try:
response = requests.get(url, headers=self.pexels_headers, params=params)
if response.status_code == 200:
data = response.json()
return data["photos"] if data["photos"] else None
return None
except Exception as e:
print(f"Pexels API 錯誤: {e}")
return None
def select_best_image(self, photos):
"""選擇最佳圖片"""
if not photos:
return None
# 選擇解析度較高的圖片
best_photo = photos[0]
for photo in photos[:3]:
if photo["width"] * photo["height"] > best_photo["width"] * best_photo["height"]:
best_photo = photo
return best_photo["src"]["medium"]
def download_image(self, image_url):
"""下載圖片"""
if not image_url:
return None
import requests
from PIL import Image
try:
response = requests.get(image_url)
if response.status_code == 200:
temp_dir = tempfile.mkdtemp()
image_path = os.path.join(temp_dir, "slide_image.jpg")
# 處理圖片
image = Image.open(BytesIO(response.content))
# 調整圖片大小
max_size = (800, 600)
image.thumbnail(max_size, Image.Resampling.LANCZOS)
# 轉換並儲存
if image.mode in ("RGBA", "P"):
image = image.convert("RGB")
image.save(image_path, "JPEG", quality=85)
return image_path
return None
except Exception as e:
print(f"圖片下載錯誤: {e}")
return None
def save_processed_presentation(self, prs, filename):
"""儲存處理後的簡報"""
try:
temp_dir = tempfile.mkdtemp()
filepath = os.path.join(temp_dir, filename)
prs.save(filepath)
return filepath
except Exception as e:
print(f"儲存簡報錯誤: {e}")
return None
def generate_analysis_report(self, analysis_result, processed_slides):
"""生成分析報告"""
report = f"📊 簡報分析報告\n"
report += f"總投影片數:{analysis_result['total_slides']}\n\n"
for i, slide_info in enumerate(processed_slides, 1):
report += f"{i}. {slide_info.get('title', f'投影片 {i}')}\n"
# 內容類型
content_types = []
if slide_info.get('has_table'):
content_types.append("表格")
if slide_info.get('has_chart'):
content_types.append("圖表")
if slide_info.get('has_image'):
content_types.append("原有圖片")
if content_types:
report += f" 包含:{', '.join(content_types)}\n"
# 圖片處理結果
if slide_info.get('image_added'):
report += f" ✅ 已添加圖片 (關鍵字: {slide_info.get('search_keywords', 'N/A')})\n"
elif slide_info.get('skip_reason'):
report += f" ⏭️ 跳過添加圖片 ({slide_info['skip_reason']})\n"
else:
report += f" ❌ 未能添加圖片\n"
report += "\n"
return report