Spaces:

dseditor
/

PPTCreator

Running

App Files Files Community

PPTCreator / ppt_analyzer.py

dseditor

CommitForLocalAndCloud

09c9f6f verified about 1 month ago

raw

history blame contribute delete

30.5 kB

	# ppt_analyzer.py
	import os
	import json
	import tempfile
	from io import BytesIO
	from pptx import Presentation
	from pptx.util import Inches, Pt
	from pptx.enum.shapes import MSO_SHAPE_TYPE
	from pptx.enum.text import PP_ALIGN
	from pptx.dml.color import RGBColor
	import google.generativeai as genai
	from slide_themes import SlideThemeManager

	class PPTAnalyzer:
	def __init__(self, gemini_model=None, pexels_headers=None, image_styles=None):
	self.gemini_model = gemini_model
	self.pexels_headers = pexels_headers
	self.theme_manager = SlideThemeManager()
	self.image_styles = image_styles or {
	"professional": "business professional corporate clean",
	"creative": "creative artistic colorful vibrant",
	"minimalist": "minimal clean simple white space",
	"modern": "modern contemporary sleek design",
	"natural": "natural outdoor organic environment",
	"technology": "technology digital modern tech innovation"
	}

	def analyze_ppt_file(self, ppt_file_path):
	"""分析上傳的PPT文件"""
	try:
	prs = Presentation(ppt_file_path)
	slides_info = []

	for i, slide in enumerate(prs.slides):
	slide_info = {
	"slide_number": i + 1,
	"title": "",
	"content": [],
	"has_table": False,
	"has_chart": False,
	"has_image": False,
	"layout_type": slide.slide_layout.name if hasattr(slide.slide_layout, 'name') else "Unknown"
	}

	# 提取文字內容和檢測對象類型
	for shape in slide.shapes:
	# 檢測表格
	if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
	slide_info["has_table"] = True

	# 檢測圖表
	elif shape.shape_type == MSO_SHAPE_TYPE.CHART:
	slide_info["has_chart"] = True

	# 檢測圖片
	elif shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
	slide_info["has_image"] = True

	# 提取文字內容
	elif hasattr(shape, "text_frame") and shape.text_frame:
	text_content = shape.text_frame.text.strip()
	if text_content:
	# 判斷是否為標題（通常是第一個有內容的文字框或字體較大）
	if not slide_info["title"] and len(text_content) < 100:
	slide_info["title"] = text_content
	else:
	# 分割多行內容
	lines = [line.strip() for line in text_content.split('\n') if line.strip()]
	slide_info["content"].extend(lines)

	# 如果沒有找到標題，使用第一行內容作為標題
	if not slide_info["title"] and slide_info["content"]:
	slide_info["title"] = slide_info["content"].pop(0)

	slides_info.append(slide_info)

	return {
	"total_slides": len(slides_info),
	"slides": slides_info,
	"original_size": {
	"width": prs.slide_width,
	"height": prs.slide_height
	}
	}

	except Exception as e:
	print(f"分析PPT文件錯誤: {e}")
	return None

	def generate_image_keywords_with_ai(self, slide_info):
	"""使用AI分析投影片內容生成圖片搜尋關鍵字"""
	if not self.gemini_model:
	print("Gemini模型不可用，使用回退關鍵字")
	return self.generate_fallback_keywords(slide_info)

	# 構建分析提示
	title = slide_info.get("title", "")
	content = slide_info.get("content", [])
	content_text = " ".join(content[:3]) # 只取前3行內容避免太長

	print(f"AI分析輸入 - 標題: {title}, 內容: {content_text}")

	prompt = f"""
	請分析以下投影片內容，生成適合的英文圖片搜尋關鍵字：

	標題：{title}
	內容：{content_text}

	要求：
	1. 先理解中文內容的核心概念
	2. 將核心概念轉換為相應的英文關鍵字
	3. 生成3-5個英文關鍵字，用空格分隔
	4. 關鍵字要與內容主題相關，具體明確
	5. 避免過於抽象的詞彙
	6. 適合用於圖片搜尋
	7. 只回傳關鍵字，不要其他說明

	例如：
	- 如果內容是關於"商業會議"，回傳：business meeting office professional
	- 如果內容是關於"技術創新"，回傳：technology innovation digital development
	- 如果內容是關於"數據分析"，回傳：data analysis statistics chart
	"""

	try:
	response = self.gemini_model.generate_content(prompt)
	keywords = response.text.strip()
	print(f"AI生成的原始關鍵字: {keywords}")

	# 清理回應，只保留英文字母和空格
	keywords = ''.join(c if c.isalnum() or c.isspace() else ' ' for c in keywords)
	keywords = ' '.join(keywords.split()) # 移除多餘空格

	# 如果關鍵字太短或為空，使用回退方案
	if len(keywords.strip()) < 3:
	print("AI生成的關鍵字太短，使用回退方案")
	return self.generate_fallback_keywords(slide_info)

	final_keywords = keywords[:100] # 限制長度
	print(f"最終關鍵字: {final_keywords}")
	return final_keywords

	except Exception as e:
	print(f"AI分析錯誤: {e}")
	return self.generate_fallback_keywords(slide_info)

	def generate_fallback_keywords(self, slide_info):
	"""當AI不可用時的回退關鍵字生成"""
	title = slide_info.get("title", "").lower()
	content = " ".join(slide_info.get("content", [])).lower()

	print(f"回退關鍵字生成 - 標題: {title}, 內容: {content[:100]}...")

	# 基於關鍵詞映射生成搜尋詞（中英文混合）
	keyword_mapping = {
	# 英文關鍵字
	"business": "business professional meeting",
	"technology": "technology innovation digital",
	"data": "data analysis statistics chart",
	"marketing": "marketing strategy advertising",
	"finance": "finance money investment",
	"education": "education learning school",
	"health": "health medical healthcare",
	"environment": "environment nature green",
	"team": "team collaboration teamwork",
	"strategy": "strategy planning business",
	"innovation": "innovation creative technology",
	"growth": "growth success achievement",
	"research": "research study academic",
	"development": "development progress building",
	"management": "management leadership office",
	"analysis": "analysis review examination",
	"solution": "solution problem solving",
	"project": "project work planning",
	"system": "system network infrastructure",
	"process": "process workflow method",
	"quality": "quality standard excellence",
	"performance": "performance improvement results",
	"customer": "customer service client",
	"market": "market industry commercial",
	"product": "product design manufacturing",
	"service": "service support assistance",
	# 中文關鍵字
	"商業": "business professional meeting",
	"企業": "business corporate company",
	"科技": "technology innovation digital",
	"技術": "technology digital development",
	"數據": "data analysis statistics",
	"資料": "data information analytics",
	"分析": "analysis research examination",
	"行銷": "marketing advertising strategy",
	"市場": "market industry commercial",
	"金融": "finance money investment",
	"財務": "finance accounting money",
	"教育": "education learning school",
	"學習": "learning study education",
	"健康": "health medical wellness",
	"醫療": "medical healthcare health",
	"環境": "environment nature sustainability",
	"環保": "environment green sustainability",
	"團隊": "team collaboration teamwork",
	"策略": "strategy planning business",
	"創新": "innovation creative development",
	"成長": "growth success achievement",
	"研究": "research study academic",
	"開發": "development programming building",
	"管理": "management leadership office",
	"解決": "solution problem solving",
	"專案": "project work planning",
	"系統": "system network infrastructure",
	"流程": "process workflow method",
	"品質": "quality standard excellence",
	"效能": "performance improvement results",
	"客戶": "customer service client",
	"產品": "product design manufacturing",
	"服務": "service support assistance",
	"會議": "meeting conference business",
	"報告": "report presentation business",
	"簡報": "presentation business professional"
	}

	found_keywords = []
	text_to_search = f"{title} {content}"

	for key, value in keyword_mapping.items():
	if key in text_to_search:
	found_keywords.append(value)
	print(f"找到關鍵字映射: {key} -> {value}")

	if found_keywords:
	result = " ".join(found_keywords[:2]) # 最多使用2組關鍵字
	else:
	result = "business presentation professional meeting"

	print(f"回退關鍵字結果: {result}")
	return result

	def apply_theme_to_presentation(self, original_ppt_path, theme_name, image_style, analysis_result):
	"""將主題套用到現有簡報"""
	try:
	# 載入原始簡報
	prs = Presentation(original_ppt_path)
	theme = self.theme_manager.get_theme(theme_name)

	# 設定新的16:9尺寸
	prs.slide_width = self.theme_manager.slide_width
	prs.slide_height = self.theme_manager.slide_height

	processed_slides = []

	for i, slide_info in enumerate(analysis_result["slides"]):
	if i >= len(prs.slides):
	break

	slide = prs.slides[i]

	# 應用背景和裝飾
	self.theme_manager.setup_slide_background_and_layout(slide, theme)

	# 重新格式化所有文字
	self.reformat_slide_text(slide, theme)

	# 決定是否添加圖片
	should_add_image = not (slide_info["has_table"] or slide_info["has_chart"])

	if should_add_image and self.pexels_headers:
	# 生成圖片搜尋關鍵字
	keywords = self.generate_image_keywords_with_ai(slide_info)

	# 搜尋和添加圖片
	image_added = self.add_image_to_existing_slide(slide, keywords, image_style, theme)
	slide_info["image_added"] = image_added
	slide_info["search_keywords"] = keywords
	else:
	slide_info["image_added"] = False
	slide_info["skip_reason"] = "含有表格或圖表"

	processed_slides.append(slide_info)

	return prs, processed_slides

	except Exception as e:
	print(f"套用主題錯誤: {e}")
	return None, []

	def reformat_slide_text(self, slide, theme):
	"""重新格式化投影片中的所有文字"""
	try:
	for shape in slide.shapes:
	if hasattr(shape, "text_frame") and shape.text_frame:
	# 判斷是否為標題（通常在上方且文字較少）
	is_title = (shape.top < Inches(2) and
	len(shape.text_frame.text) < 100 and
	shape.text_frame.text.strip())

	for paragraph in shape.text_frame.paragraphs:
	if paragraph.text.strip():
	if is_title:
	# 格式化為標題
	paragraph.font.name = self.theme_manager.get_font_name()
	paragraph.font.size = Pt(36)
	paragraph.font.color.rgb = theme["title_color"]
	paragraph.font.bold = True
	paragraph.alignment = PP_ALIGN.LEFT
	else:
	# 格式化為內容
	paragraph.font.name = self.theme_manager.get_font_name()
	paragraph.font.size = Pt(24)
	paragraph.font.color.rgb = theme["text_color"]
	paragraph.space_before = Pt(8)
	paragraph.space_after = Pt(8)
	paragraph.line_spacing = 1.3
	except Exception as e:
	print(f"重新格式化文字錯誤: {e}")

	def add_image_to_existing_slide(self, slide, keywords, image_style, theme):
	"""為現有投影片添加圖片"""
	try:
	print(f"開始為投影片添加圖片，關鍵字: {keywords}")

	# 搜尋圖片
	photos = self.search_pexels_with_style(keywords, image_style)
	if not photos:
	print(f"未找到相關圖片，關鍵字: {keywords}")
	return False

	print(f"找到 {len(photos)} 張圖片")

	# 選擇最佳圖片
	image_url = self.select_best_image(photos)
	if not image_url:
	print("無法選擇最佳圖片")
	return False

	print(f"選中圖片URL: {image_url}")

	# 下載圖片
	image_path = self.download_image(image_url)
	if not image_path:
	print("圖片下載失敗")
	return False

	print(f"圖片下載成功: {image_path}")

	# 計算可用空間並添加圖片
	available_area = self.calculate_available_space(slide)
	if available_area:
	print(f"找到可用空間: {available_area}")
	self.add_image_to_available_space(slide, image_path, available_area)
	print("圖片添加成功")
	return True
	else:
	print("未找到可用空間，嘗試背景圖片模式")
	# 如果找不到理想空間，將圖片作為背景放置，但在文字底下
	self.add_background_image_to_slide(slide, image_path)
	return True

	except Exception as e:
	print(f"添加圖片錯誤: {e}")
	import traceback
	print(f"詳細錯誤: {traceback.format_exc()}")
	return False

	def calculate_available_space(self, slide):
	"""計算投影片中的可用空間"""
	try:
	slide_width = self.theme_manager.slide_width.inches
	slide_height = self.theme_manager.slide_height.inches

	print(f"投影片尺寸: {slide_width} x {slide_height}")

	# 收集所有現有形狀的位置
	occupied_areas = []
	shape_count = 0
	for shape in slide.shapes:
	if hasattr(shape, 'left') and hasattr(shape, 'top'):
	area = {
	'left': shape.left.inches,
	'top': shape.top.inches,
	'right': shape.left.inches + shape.width.inches,
	'bottom': shape.top.inches + shape.height.inches
	}
	occupied_areas.append(area)
	shape_count += 1
	print(f"形狀 {shape_count}: {area}")

	# 定義可能的圖片位置區域（更大的尺寸）
	possible_areas = [
	# 右側區域 - 更大
	{'left': slide_width * 0.5, 'top': slide_height * 0.1,
	'width': slide_width * 0.45, 'height': slide_height * 0.8},
	# 下方區域 - 更大
	{'left': slide_width * 0.05, 'top': slide_height * 0.55,
	'width': slide_width * 0.9, 'height': slide_height * 0.4},
	# 左側區域 - 更大
	{'left': slide_width * 0.05, 'top': slide_height * 0.1,
	'width': slide_width * 0.45, 'height': slide_height * 0.8},
	# 中央下方區域 - 更大
	{'left': slide_width * 0.2, 'top': slide_height * 0.65,
	'width': slide_width * 0.6, 'height': slide_height * 0.3},
	# 右上區域 - 更大
	{'left': slide_width * 0.6, 'top': slide_height * 0.05,
	'width': slide_width * 0.35, 'height': slide_height * 0.5}
	]

	print(f"檢查 {len(possible_areas)} 個可能區域")

	# 找到最大的可用區域
	for i, area in enumerate(possible_areas):
	print(f"檢查區域 {i+1}: {area}")
	if self.is_area_available(area, occupied_areas):
	print(f"區域 {i+1} 可用")
	return area
	else:
	print(f"區域 {i+1} 被占用")

	print("所有預定義區域都被占用")
	return None

	except Exception as e:
	print(f"計算可用空間錯誤: {e}")
	import traceback
	print(f"詳細錯誤: {traceback.format_exc()}")
	return None

	def add_background_image_to_slide(self, slide, image_path):
	"""將圖片作為背景添加到投影片，確保在文字底下"""
	try:
	print(f"添加背景圖片: {image_path}")

	# 計算較大的圖片尺寸，覆蓋更多區域
	slide_width = self.theme_manager.slide_width.inches
	slide_height = self.theme_manager.slide_height.inches

	# 使用更大的圖片尺寸，稍微偏移以不完全覆蓋標題
	img_left = slide_width * 0.1 # 10% 邊距
	img_top = slide_height * 0.2 # 20% 邊距，避開標題
	img_width = slide_width * 0.8 # 80% 寬度
	img_height = slide_height * 0.7 # 70% 高度

	# 計算圖片比例並調整尺寸
	from PIL import Image as PILImage
	with PILImage.open(image_path) as img:
	img_width_px, img_height_px = img.size
	img_ratio = img_width_px / img_height_px

	# 調整尺寸以保持比例
	if img_ratio > (img_width / img_height):
	# 圖片較寬，以寬度為準
	actual_width = img_width
	actual_height = img_width / img_ratio
	actual_top = img_top + (img_height - actual_height) / 2
	actual_left = img_left
	else:
	# 圖片較高，以高度為準
	actual_height = img_height
	actual_width = img_height * img_ratio
	actual_left = img_left + (img_width - actual_width) / 2
	actual_top = img_top

	print(f"背景圖片尺寸: left={actual_left:.2f}, top={actual_top:.2f}, width={actual_width:.2f}, height={actual_height:.2f}")

	# 添加圖片
	picture = slide.shapes.add_picture(
	image_path,
	Inches(actual_left),
	Inches(actual_top),
	Inches(actual_width),
	Inches(actual_height)
	)

	# 將圖片移到最底層（在所有文字和形狀之下）
	picture.element.getparent().remove(picture.element)
	slide.shapes._spTree.insert(2, picture.element)

	print("背景圖片添加成功並移至底層")

	except Exception as e:
	print(f"添加背景圖片錯誤: {e}")
	import traceback
	print(f"詳細錯誤: {traceback.format_exc()}")

	def is_area_available(self, area, occupied_areas):
	"""檢查區域是否可用（允許少量重疊）"""
	area_right = area['left'] + area['width']
	area_bottom = area['top'] + area['height']

	# 計算重疊程度的閾值（允許10%的重疊）
	overlap_threshold = 0.1

	for occupied in occupied_areas:
	# 計算重疊區域
	overlap_left = max(area['left'], occupied['left'])
	overlap_top = max(area['top'], occupied['top'])
	overlap_right = min(area_right, occupied['right'])
	overlap_bottom = min(area_bottom, occupied['bottom'])

	# 如果有重疊
	if overlap_left < overlap_right and overlap_top < overlap_bottom:
	overlap_width = overlap_right - overlap_left
	overlap_height = overlap_bottom - overlap_top
	overlap_area = overlap_width * overlap_height

	# 計算相對於目標區域的重疊比例
	target_area = area['width'] * area['height']
	overlap_ratio = overlap_area / target_area

	print(f"重疊比例: {overlap_ratio:.2f}")

	# 如果重疊超過閾值，則認為不可用
	if overlap_ratio > overlap_threshold:
	return False

	return True

	def add_image_to_available_space(self, slide, image_path, area):
	"""在可用空間添加圖片"""
	try:
	print(f"準備在區域添加圖片: {area}")
	print(f"圖片路徑: {image_path}")

	left = Inches(area['left'])
	top = Inches(area['top'])
	width = Inches(area['width'])
	height = Inches(area['height'])

	print(f"目標位置: left={left.inches}, top={top.inches}, width={width.inches}, height={height.inches}")

	# 計算圖片比例並調整尺寸
	from PIL import Image as PILImage
	with PILImage.open(image_path) as img:
	img_width, img_height = img.size
	img_ratio = img_width / img_height
	area_ratio = area['width'] / area['height']

	print(f"圖片原始尺寸: {img_width} x {img_height}, 比例: {img_ratio:.2f}")
	print(f"目標區域比例: {area_ratio:.2f}")

	if img_ratio > area_ratio:
	# 圖片較寬，以寬度為準
	actual_width = width
	actual_height = Inches(width.inches / img_ratio)
	actual_top = Inches(top.inches + (height.inches - actual_height.inches) / 2)
	actual_left = left
	else:
	# 圖片較高，以高度為準
	actual_height = height
	actual_width = Inches(height.inches * img_ratio)
	actual_left = Inches(left.inches + (width.inches - actual_width.inches) / 2)
	actual_top = top

	print(f"最終尺寸: left={actual_left.inches:.2f}, top={actual_top.inches:.2f}, width={actual_width.inches:.2f}, height={actual_height.inches:.2f}")

	# 添加圖片
	picture = slide.shapes.add_picture(image_path, actual_left, actual_top,
	actual_width, actual_height)
	print(f"圖片添加成功，picture對象: {picture}")

	except Exception as e:
	print(f"在可用空間添加圖片錯誤: {e}")
	import traceback
	print(f"詳細錯誤: {traceback.format_exc()}")

	def search_pexels_with_style(self, keywords, image_style, per_page=10):
	"""搜尋Pexels圖片"""
	if not self.pexels_headers:
	return None

	import requests

	# 組合關鍵字
	style_modifier = self.image_styles.get(image_style, "")
	enhanced_keywords = f"{keywords} {style_modifier}"

	url = "https://api.pexels.com/v1/search"
	params = {
	"query": enhanced_keywords,
	"per_page": per_page,
	"orientation": "landscape",
	"size": "medium"
	}

	try:
	response = requests.get(url, headers=self.pexels_headers, params=params)
	if response.status_code == 200:
	data = response.json()
	return data["photos"] if data["photos"] else None
	return None
	except Exception as e:
	print(f"Pexels API 錯誤: {e}")
	return None

	def select_best_image(self, photos):
	"""選擇最佳圖片"""
	if not photos:
	return None

	# 選擇解析度較高的圖片
	best_photo = photos[0]
	for photo in photos[:3]:
	if photo["width"] * photo["height"] > best_photo["width"] * best_photo["height"]:
	best_photo = photo

	return best_photo["src"]["medium"]

	def download_image(self, image_url):
	"""下載圖片"""
	if not image_url:
	return None

	import requests
	from PIL import Image

	try:
	response = requests.get(image_url)
	if response.status_code == 200:
	temp_dir = tempfile.mkdtemp()
	image_path = os.path.join(temp_dir, "slide_image.jpg")

	# 處理圖片
	image = Image.open(BytesIO(response.content))

	# 調整圖片大小
	max_size = (800, 600)
	image.thumbnail(max_size, Image.Resampling.LANCZOS)

	# 轉換並儲存
	if image.mode in ("RGBA", "P"):
	image = image.convert("RGB")
	image.save(image_path, "JPEG", quality=85)

	return image_path
	return None
	except Exception as e:
	print(f"圖片下載錯誤: {e}")
	return None

	def save_processed_presentation(self, prs, filename):
	"""儲存處理後的簡報"""
	try:
	temp_dir = tempfile.mkdtemp()
	filepath = os.path.join(temp_dir, filename)
	prs.save(filepath)
	return filepath
	except Exception as e:
	print(f"儲存簡報錯誤: {e}")
	return None

	def generate_analysis_report(self, analysis_result, processed_slides):
	"""生成分析報告"""
	report = f"📊 簡報分析報告\n"
	report += f"總投影片數：{analysis_result['total_slides']}\n\n"

	for i, slide_info in enumerate(processed_slides, 1):
	report += f"{i}. {slide_info.get('title', f'投影片 {i}')}\n"

	# 內容類型
	content_types = []
	if slide_info.get('has_table'):
	content_types.append("表格")
	if slide_info.get('has_chart'):
	content_types.append("圖表")
	if slide_info.get('has_image'):
	content_types.append("原有圖片")

	if content_types:
	report += f" 包含：{', '.join(content_types)}\n"

	# 圖片處理結果
	if slide_info.get('image_added'):
	report += f" ✅ 已添加圖片 (關鍵字: {slide_info.get('search_keywords', 'N/A')})\n"
	elif slide_info.get('skip_reason'):
	report += f" ⏭️ 跳過添加圖片 ({slide_info['skip_reason']})\n"
	else:
	report += f" ❌ 未能添加圖片\n"

	report += "\n"

	return report