Spaces:
Running
Running
# ppt_analyzer.py | |
import os | |
import json | |
import tempfile | |
from io import BytesIO | |
from pptx import Presentation | |
from pptx.util import Inches, Pt | |
from pptx.enum.shapes import MSO_SHAPE_TYPE | |
from pptx.enum.text import PP_ALIGN | |
from pptx.dml.color import RGBColor | |
import google.generativeai as genai | |
from slide_themes import SlideThemeManager | |
class PPTAnalyzer: | |
def __init__(self, gemini_model=None, pexels_headers=None, image_styles=None): | |
self.gemini_model = gemini_model | |
self.pexels_headers = pexels_headers | |
self.theme_manager = SlideThemeManager() | |
self.image_styles = image_styles or { | |
"professional": "business professional corporate clean", | |
"creative": "creative artistic colorful vibrant", | |
"minimalist": "minimal clean simple white space", | |
"modern": "modern contemporary sleek design", | |
"natural": "natural outdoor organic environment", | |
"technology": "technology digital modern tech innovation" | |
} | |
def analyze_ppt_file(self, ppt_file_path): | |
"""分析上傳的PPT文件""" | |
try: | |
prs = Presentation(ppt_file_path) | |
slides_info = [] | |
for i, slide in enumerate(prs.slides): | |
slide_info = { | |
"slide_number": i + 1, | |
"title": "", | |
"content": [], | |
"has_table": False, | |
"has_chart": False, | |
"has_image": False, | |
"layout_type": slide.slide_layout.name if hasattr(slide.slide_layout, 'name') else "Unknown" | |
} | |
# 提取文字內容和檢測對象類型 | |
for shape in slide.shapes: | |
# 檢測表格 | |
if shape.shape_type == MSO_SHAPE_TYPE.TABLE: | |
slide_info["has_table"] = True | |
# 檢測圖表 | |
elif shape.shape_type == MSO_SHAPE_TYPE.CHART: | |
slide_info["has_chart"] = True | |
# 檢測圖片 | |
elif shape.shape_type == MSO_SHAPE_TYPE.PICTURE: | |
slide_info["has_image"] = True | |
# 提取文字內容 | |
elif hasattr(shape, "text_frame") and shape.text_frame: | |
text_content = shape.text_frame.text.strip() | |
if text_content: | |
# 判斷是否為標題(通常是第一個有內容的文字框或字體較大) | |
if not slide_info["title"] and len(text_content) < 100: | |
slide_info["title"] = text_content | |
else: | |
# 分割多行內容 | |
lines = [line.strip() for line in text_content.split('\n') if line.strip()] | |
slide_info["content"].extend(lines) | |
# 如果沒有找到標題,使用第一行內容作為標題 | |
if not slide_info["title"] and slide_info["content"]: | |
slide_info["title"] = slide_info["content"].pop(0) | |
slides_info.append(slide_info) | |
return { | |
"total_slides": len(slides_info), | |
"slides": slides_info, | |
"original_size": { | |
"width": prs.slide_width, | |
"height": prs.slide_height | |
} | |
} | |
except Exception as e: | |
print(f"分析PPT文件錯誤: {e}") | |
return None | |
def generate_image_keywords_with_ai(self, slide_info): | |
"""使用AI分析投影片內容生成圖片搜尋關鍵字""" | |
if not self.gemini_model: | |
print("Gemini模型不可用,使用回退關鍵字") | |
return self.generate_fallback_keywords(slide_info) | |
# 構建分析提示 | |
title = slide_info.get("title", "") | |
content = slide_info.get("content", []) | |
content_text = " ".join(content[:3]) # 只取前3行內容避免太長 | |
print(f"AI分析輸入 - 標題: {title}, 內容: {content_text}") | |
prompt = f""" | |
請分析以下投影片內容,生成適合的英文圖片搜尋關鍵字: | |
標題:{title} | |
內容:{content_text} | |
要求: | |
1. 先理解中文內容的核心概念 | |
2. 將核心概念轉換為相應的英文關鍵字 | |
3. 生成3-5個英文關鍵字,用空格分隔 | |
4. 關鍵字要與內容主題相關,具體明確 | |
5. 避免過於抽象的詞彙 | |
6. 適合用於圖片搜尋 | |
7. 只回傳關鍵字,不要其他說明 | |
例如: | |
- 如果內容是關於"商業會議",回傳:business meeting office professional | |
- 如果內容是關於"技術創新",回傳:technology innovation digital development | |
- 如果內容是關於"數據分析",回傳:data analysis statistics chart | |
""" | |
try: | |
response = self.gemini_model.generate_content(prompt) | |
keywords = response.text.strip() | |
print(f"AI生成的原始關鍵字: {keywords}") | |
# 清理回應,只保留英文字母和空格 | |
keywords = ''.join(c if c.isalnum() or c.isspace() else ' ' for c in keywords) | |
keywords = ' '.join(keywords.split()) # 移除多餘空格 | |
# 如果關鍵字太短或為空,使用回退方案 | |
if len(keywords.strip()) < 3: | |
print("AI生成的關鍵字太短,使用回退方案") | |
return self.generate_fallback_keywords(slide_info) | |
final_keywords = keywords[:100] # 限制長度 | |
print(f"最終關鍵字: {final_keywords}") | |
return final_keywords | |
except Exception as e: | |
print(f"AI分析錯誤: {e}") | |
return self.generate_fallback_keywords(slide_info) | |
def generate_fallback_keywords(self, slide_info): | |
"""當AI不可用時的回退關鍵字生成""" | |
title = slide_info.get("title", "").lower() | |
content = " ".join(slide_info.get("content", [])).lower() | |
print(f"回退關鍵字生成 - 標題: {title}, 內容: {content[:100]}...") | |
# 基於關鍵詞映射生成搜尋詞(中英文混合) | |
keyword_mapping = { | |
# 英文關鍵字 | |
"business": "business professional meeting", | |
"technology": "technology innovation digital", | |
"data": "data analysis statistics chart", | |
"marketing": "marketing strategy advertising", | |
"finance": "finance money investment", | |
"education": "education learning school", | |
"health": "health medical healthcare", | |
"environment": "environment nature green", | |
"team": "team collaboration teamwork", | |
"strategy": "strategy planning business", | |
"innovation": "innovation creative technology", | |
"growth": "growth success achievement", | |
"research": "research study academic", | |
"development": "development progress building", | |
"management": "management leadership office", | |
"analysis": "analysis review examination", | |
"solution": "solution problem solving", | |
"project": "project work planning", | |
"system": "system network infrastructure", | |
"process": "process workflow method", | |
"quality": "quality standard excellence", | |
"performance": "performance improvement results", | |
"customer": "customer service client", | |
"market": "market industry commercial", | |
"product": "product design manufacturing", | |
"service": "service support assistance", | |
# 中文關鍵字 | |
"商業": "business professional meeting", | |
"企業": "business corporate company", | |
"科技": "technology innovation digital", | |
"技術": "technology digital development", | |
"數據": "data analysis statistics", | |
"資料": "data information analytics", | |
"分析": "analysis research examination", | |
"行銷": "marketing advertising strategy", | |
"市場": "market industry commercial", | |
"金融": "finance money investment", | |
"財務": "finance accounting money", | |
"教育": "education learning school", | |
"學習": "learning study education", | |
"健康": "health medical wellness", | |
"醫療": "medical healthcare health", | |
"環境": "environment nature sustainability", | |
"環保": "environment green sustainability", | |
"團隊": "team collaboration teamwork", | |
"策略": "strategy planning business", | |
"創新": "innovation creative development", | |
"成長": "growth success achievement", | |
"研究": "research study academic", | |
"開發": "development programming building", | |
"管理": "management leadership office", | |
"解決": "solution problem solving", | |
"專案": "project work planning", | |
"系統": "system network infrastructure", | |
"流程": "process workflow method", | |
"品質": "quality standard excellence", | |
"效能": "performance improvement results", | |
"客戶": "customer service client", | |
"產品": "product design manufacturing", | |
"服務": "service support assistance", | |
"會議": "meeting conference business", | |
"報告": "report presentation business", | |
"簡報": "presentation business professional" | |
} | |
found_keywords = [] | |
text_to_search = f"{title} {content}" | |
for key, value in keyword_mapping.items(): | |
if key in text_to_search: | |
found_keywords.append(value) | |
print(f"找到關鍵字映射: {key} -> {value}") | |
if found_keywords: | |
result = " ".join(found_keywords[:2]) # 最多使用2組關鍵字 | |
else: | |
result = "business presentation professional meeting" | |
print(f"回退關鍵字結果: {result}") | |
return result | |
def apply_theme_to_presentation(self, original_ppt_path, theme_name, image_style, analysis_result): | |
"""將主題套用到現有簡報""" | |
try: | |
# 載入原始簡報 | |
prs = Presentation(original_ppt_path) | |
theme = self.theme_manager.get_theme(theme_name) | |
# 設定新的16:9尺寸 | |
prs.slide_width = self.theme_manager.slide_width | |
prs.slide_height = self.theme_manager.slide_height | |
processed_slides = [] | |
for i, slide_info in enumerate(analysis_result["slides"]): | |
if i >= len(prs.slides): | |
break | |
slide = prs.slides[i] | |
# 應用背景和裝飾 | |
self.theme_manager.setup_slide_background_and_layout(slide, theme) | |
# 重新格式化所有文字 | |
self.reformat_slide_text(slide, theme) | |
# 決定是否添加圖片 | |
should_add_image = not (slide_info["has_table"] or slide_info["has_chart"]) | |
if should_add_image and self.pexels_headers: | |
# 生成圖片搜尋關鍵字 | |
keywords = self.generate_image_keywords_with_ai(slide_info) | |
# 搜尋和添加圖片 | |
image_added = self.add_image_to_existing_slide(slide, keywords, image_style, theme) | |
slide_info["image_added"] = image_added | |
slide_info["search_keywords"] = keywords | |
else: | |
slide_info["image_added"] = False | |
slide_info["skip_reason"] = "含有表格或圖表" | |
processed_slides.append(slide_info) | |
return prs, processed_slides | |
except Exception as e: | |
print(f"套用主題錯誤: {e}") | |
return None, [] | |
def reformat_slide_text(self, slide, theme): | |
"""重新格式化投影片中的所有文字""" | |
try: | |
for shape in slide.shapes: | |
if hasattr(shape, "text_frame") and shape.text_frame: | |
# 判斷是否為標題(通常在上方且文字較少) | |
is_title = (shape.top < Inches(2) and | |
len(shape.text_frame.text) < 100 and | |
shape.text_frame.text.strip()) | |
for paragraph in shape.text_frame.paragraphs: | |
if paragraph.text.strip(): | |
if is_title: | |
# 格式化為標題 | |
paragraph.font.name = self.theme_manager.get_font_name() | |
paragraph.font.size = Pt(36) | |
paragraph.font.color.rgb = theme["title_color"] | |
paragraph.font.bold = True | |
paragraph.alignment = PP_ALIGN.LEFT | |
else: | |
# 格式化為內容 | |
paragraph.font.name = self.theme_manager.get_font_name() | |
paragraph.font.size = Pt(24) | |
paragraph.font.color.rgb = theme["text_color"] | |
paragraph.space_before = Pt(8) | |
paragraph.space_after = Pt(8) | |
paragraph.line_spacing = 1.3 | |
except Exception as e: | |
print(f"重新格式化文字錯誤: {e}") | |
def add_image_to_existing_slide(self, slide, keywords, image_style, theme): | |
"""為現有投影片添加圖片""" | |
try: | |
print(f"開始為投影片添加圖片,關鍵字: {keywords}") | |
# 搜尋圖片 | |
photos = self.search_pexels_with_style(keywords, image_style) | |
if not photos: | |
print(f"未找到相關圖片,關鍵字: {keywords}") | |
return False | |
print(f"找到 {len(photos)} 張圖片") | |
# 選擇最佳圖片 | |
image_url = self.select_best_image(photos) | |
if not image_url: | |
print("無法選擇最佳圖片") | |
return False | |
print(f"選中圖片URL: {image_url}") | |
# 下載圖片 | |
image_path = self.download_image(image_url) | |
if not image_path: | |
print("圖片下載失敗") | |
return False | |
print(f"圖片下載成功: {image_path}") | |
# 計算可用空間並添加圖片 | |
available_area = self.calculate_available_space(slide) | |
if available_area: | |
print(f"找到可用空間: {available_area}") | |
self.add_image_to_available_space(slide, image_path, available_area) | |
print("圖片添加成功") | |
return True | |
else: | |
print("未找到可用空間,嘗試背景圖片模式") | |
# 如果找不到理想空間,將圖片作為背景放置,但在文字底下 | |
self.add_background_image_to_slide(slide, image_path) | |
return True | |
except Exception as e: | |
print(f"添加圖片錯誤: {e}") | |
import traceback | |
print(f"詳細錯誤: {traceback.format_exc()}") | |
return False | |
def calculate_available_space(self, slide): | |
"""計算投影片中的可用空間""" | |
try: | |
slide_width = self.theme_manager.slide_width.inches | |
slide_height = self.theme_manager.slide_height.inches | |
print(f"投影片尺寸: {slide_width} x {slide_height}") | |
# 收集所有現有形狀的位置 | |
occupied_areas = [] | |
shape_count = 0 | |
for shape in slide.shapes: | |
if hasattr(shape, 'left') and hasattr(shape, 'top'): | |
area = { | |
'left': shape.left.inches, | |
'top': shape.top.inches, | |
'right': shape.left.inches + shape.width.inches, | |
'bottom': shape.top.inches + shape.height.inches | |
} | |
occupied_areas.append(area) | |
shape_count += 1 | |
print(f"形狀 {shape_count}: {area}") | |
# 定義可能的圖片位置區域(更大的尺寸) | |
possible_areas = [ | |
# 右側區域 - 更大 | |
{'left': slide_width * 0.5, 'top': slide_height * 0.1, | |
'width': slide_width * 0.45, 'height': slide_height * 0.8}, | |
# 下方區域 - 更大 | |
{'left': slide_width * 0.05, 'top': slide_height * 0.55, | |
'width': slide_width * 0.9, 'height': slide_height * 0.4}, | |
# 左側區域 - 更大 | |
{'left': slide_width * 0.05, 'top': slide_height * 0.1, | |
'width': slide_width * 0.45, 'height': slide_height * 0.8}, | |
# 中央下方區域 - 更大 | |
{'left': slide_width * 0.2, 'top': slide_height * 0.65, | |
'width': slide_width * 0.6, 'height': slide_height * 0.3}, | |
# 右上區域 - 更大 | |
{'left': slide_width * 0.6, 'top': slide_height * 0.05, | |
'width': slide_width * 0.35, 'height': slide_height * 0.5} | |
] | |
print(f"檢查 {len(possible_areas)} 個可能區域") | |
# 找到最大的可用區域 | |
for i, area in enumerate(possible_areas): | |
print(f"檢查區域 {i+1}: {area}") | |
if self.is_area_available(area, occupied_areas): | |
print(f"區域 {i+1} 可用") | |
return area | |
else: | |
print(f"區域 {i+1} 被占用") | |
print("所有預定義區域都被占用") | |
return None | |
except Exception as e: | |
print(f"計算可用空間錯誤: {e}") | |
import traceback | |
print(f"詳細錯誤: {traceback.format_exc()}") | |
return None | |
def add_background_image_to_slide(self, slide, image_path): | |
"""將圖片作為背景添加到投影片,確保在文字底下""" | |
try: | |
print(f"添加背景圖片: {image_path}") | |
# 計算較大的圖片尺寸,覆蓋更多區域 | |
slide_width = self.theme_manager.slide_width.inches | |
slide_height = self.theme_manager.slide_height.inches | |
# 使用更大的圖片尺寸,稍微偏移以不完全覆蓋標題 | |
img_left = slide_width * 0.1 # 10% 邊距 | |
img_top = slide_height * 0.2 # 20% 邊距,避開標題 | |
img_width = slide_width * 0.8 # 80% 寬度 | |
img_height = slide_height * 0.7 # 70% 高度 | |
# 計算圖片比例並調整尺寸 | |
from PIL import Image as PILImage | |
with PILImage.open(image_path) as img: | |
img_width_px, img_height_px = img.size | |
img_ratio = img_width_px / img_height_px | |
# 調整尺寸以保持比例 | |
if img_ratio > (img_width / img_height): | |
# 圖片較寬,以寬度為準 | |
actual_width = img_width | |
actual_height = img_width / img_ratio | |
actual_top = img_top + (img_height - actual_height) / 2 | |
actual_left = img_left | |
else: | |
# 圖片較高,以高度為準 | |
actual_height = img_height | |
actual_width = img_height * img_ratio | |
actual_left = img_left + (img_width - actual_width) / 2 | |
actual_top = img_top | |
print(f"背景圖片尺寸: left={actual_left:.2f}, top={actual_top:.2f}, width={actual_width:.2f}, height={actual_height:.2f}") | |
# 添加圖片 | |
picture = slide.shapes.add_picture( | |
image_path, | |
Inches(actual_left), | |
Inches(actual_top), | |
Inches(actual_width), | |
Inches(actual_height) | |
) | |
# 將圖片移到最底層(在所有文字和形狀之下) | |
picture.element.getparent().remove(picture.element) | |
slide.shapes._spTree.insert(2, picture.element) | |
print("背景圖片添加成功並移至底層") | |
except Exception as e: | |
print(f"添加背景圖片錯誤: {e}") | |
import traceback | |
print(f"詳細錯誤: {traceback.format_exc()}") | |
def is_area_available(self, area, occupied_areas): | |
"""檢查區域是否可用(允許少量重疊)""" | |
area_right = area['left'] + area['width'] | |
area_bottom = area['top'] + area['height'] | |
# 計算重疊程度的閾值(允許10%的重疊) | |
overlap_threshold = 0.1 | |
for occupied in occupied_areas: | |
# 計算重疊區域 | |
overlap_left = max(area['left'], occupied['left']) | |
overlap_top = max(area['top'], occupied['top']) | |
overlap_right = min(area_right, occupied['right']) | |
overlap_bottom = min(area_bottom, occupied['bottom']) | |
# 如果有重疊 | |
if overlap_left < overlap_right and overlap_top < overlap_bottom: | |
overlap_width = overlap_right - overlap_left | |
overlap_height = overlap_bottom - overlap_top | |
overlap_area = overlap_width * overlap_height | |
# 計算相對於目標區域的重疊比例 | |
target_area = area['width'] * area['height'] | |
overlap_ratio = overlap_area / target_area | |
print(f"重疊比例: {overlap_ratio:.2f}") | |
# 如果重疊超過閾值,則認為不可用 | |
if overlap_ratio > overlap_threshold: | |
return False | |
return True | |
def add_image_to_available_space(self, slide, image_path, area): | |
"""在可用空間添加圖片""" | |
try: | |
print(f"準備在區域添加圖片: {area}") | |
print(f"圖片路徑: {image_path}") | |
left = Inches(area['left']) | |
top = Inches(area['top']) | |
width = Inches(area['width']) | |
height = Inches(area['height']) | |
print(f"目標位置: left={left.inches}, top={top.inches}, width={width.inches}, height={height.inches}") | |
# 計算圖片比例並調整尺寸 | |
from PIL import Image as PILImage | |
with PILImage.open(image_path) as img: | |
img_width, img_height = img.size | |
img_ratio = img_width / img_height | |
area_ratio = area['width'] / area['height'] | |
print(f"圖片原始尺寸: {img_width} x {img_height}, 比例: {img_ratio:.2f}") | |
print(f"目標區域比例: {area_ratio:.2f}") | |
if img_ratio > area_ratio: | |
# 圖片較寬,以寬度為準 | |
actual_width = width | |
actual_height = Inches(width.inches / img_ratio) | |
actual_top = Inches(top.inches + (height.inches - actual_height.inches) / 2) | |
actual_left = left | |
else: | |
# 圖片較高,以高度為準 | |
actual_height = height | |
actual_width = Inches(height.inches * img_ratio) | |
actual_left = Inches(left.inches + (width.inches - actual_width.inches) / 2) | |
actual_top = top | |
print(f"最終尺寸: left={actual_left.inches:.2f}, top={actual_top.inches:.2f}, width={actual_width.inches:.2f}, height={actual_height.inches:.2f}") | |
# 添加圖片 | |
picture = slide.shapes.add_picture(image_path, actual_left, actual_top, | |
actual_width, actual_height) | |
print(f"圖片添加成功,picture對象: {picture}") | |
except Exception as e: | |
print(f"在可用空間添加圖片錯誤: {e}") | |
import traceback | |
print(f"詳細錯誤: {traceback.format_exc()}") | |
def search_pexels_with_style(self, keywords, image_style, per_page=10): | |
"""搜尋Pexels圖片""" | |
if not self.pexels_headers: | |
return None | |
import requests | |
# 組合關鍵字 | |
style_modifier = self.image_styles.get(image_style, "") | |
enhanced_keywords = f"{keywords} {style_modifier}" | |
url = "https://api.pexels.com/v1/search" | |
params = { | |
"query": enhanced_keywords, | |
"per_page": per_page, | |
"orientation": "landscape", | |
"size": "medium" | |
} | |
try: | |
response = requests.get(url, headers=self.pexels_headers, params=params) | |
if response.status_code == 200: | |
data = response.json() | |
return data["photos"] if data["photos"] else None | |
return None | |
except Exception as e: | |
print(f"Pexels API 錯誤: {e}") | |
return None | |
def select_best_image(self, photos): | |
"""選擇最佳圖片""" | |
if not photos: | |
return None | |
# 選擇解析度較高的圖片 | |
best_photo = photos[0] | |
for photo in photos[:3]: | |
if photo["width"] * photo["height"] > best_photo["width"] * best_photo["height"]: | |
best_photo = photo | |
return best_photo["src"]["medium"] | |
def download_image(self, image_url): | |
"""下載圖片""" | |
if not image_url: | |
return None | |
import requests | |
from PIL import Image | |
try: | |
response = requests.get(image_url) | |
if response.status_code == 200: | |
temp_dir = tempfile.mkdtemp() | |
image_path = os.path.join(temp_dir, "slide_image.jpg") | |
# 處理圖片 | |
image = Image.open(BytesIO(response.content)) | |
# 調整圖片大小 | |
max_size = (800, 600) | |
image.thumbnail(max_size, Image.Resampling.LANCZOS) | |
# 轉換並儲存 | |
if image.mode in ("RGBA", "P"): | |
image = image.convert("RGB") | |
image.save(image_path, "JPEG", quality=85) | |
return image_path | |
return None | |
except Exception as e: | |
print(f"圖片下載錯誤: {e}") | |
return None | |
def save_processed_presentation(self, prs, filename): | |
"""儲存處理後的簡報""" | |
try: | |
temp_dir = tempfile.mkdtemp() | |
filepath = os.path.join(temp_dir, filename) | |
prs.save(filepath) | |
return filepath | |
except Exception as e: | |
print(f"儲存簡報錯誤: {e}") | |
return None | |
def generate_analysis_report(self, analysis_result, processed_slides): | |
"""生成分析報告""" | |
report = f"📊 簡報分析報告\n" | |
report += f"總投影片數:{analysis_result['total_slides']}\n\n" | |
for i, slide_info in enumerate(processed_slides, 1): | |
report += f"{i}. {slide_info.get('title', f'投影片 {i}')}\n" | |
# 內容類型 | |
content_types = [] | |
if slide_info.get('has_table'): | |
content_types.append("表格") | |
if slide_info.get('has_chart'): | |
content_types.append("圖表") | |
if slide_info.get('has_image'): | |
content_types.append("原有圖片") | |
if content_types: | |
report += f" 包含:{', '.join(content_types)}\n" | |
# 圖片處理結果 | |
if slide_info.get('image_added'): | |
report += f" ✅ 已添加圖片 (關鍵字: {slide_info.get('search_keywords', 'N/A')})\n" | |
elif slide_info.get('skip_reason'): | |
report += f" ⏭️ 跳過添加圖片 ({slide_info['skip_reason']})\n" | |
else: | |
report += f" ❌ 未能添加圖片\n" | |
report += "\n" | |
return report |