Spaces:
Running
Running
| """ | |
| NotebookLM Watermark Remover - Gradio App | |
| 去除 NotebookLM 浮水印並提供 PNG 打包下載或 PPT 轉換 | |
| """ | |
| import gradio as gr | |
| import fitz # PyMuPDF | |
| from pathlib import Path | |
| import tempfile | |
| import shutil | |
| import zipfile | |
| from PIL import Image | |
| import numpy as np | |
| from skimage.restoration import inpaint | |
| from pptx import Presentation | |
| from pptx.util import Inches | |
| import io | |
| class NotebookLMWatermarkRemover: | |
| """NotebookLM 浮水印移除器""" | |
| def __init__(self): | |
| """初始化""" | |
| # NotebookLM logo 的固定位置 (基於 2867x1600 解析度) | |
| self.base_width = 2867 | |
| self.base_height = 1600 | |
| self.logo_coords = (1530, 1595, 2620, 2860) # r1, r2, c1, c2 | |
| def pdf_to_pngs(self, pdf_path: str, output_dir: Path, dpi: int = 150) -> list: | |
| """ | |
| 將 PDF 轉換為 PNG 圖片 | |
| Args: | |
| pdf_path: PDF 檔案路徑 | |
| output_dir: 輸出目錄 | |
| dpi: 解析度 | |
| Returns: | |
| PNG 檔案路徑列表 | |
| """ | |
| pdf_doc = fitz.open(pdf_path) | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| zoom = dpi / 72 | |
| mat = fitz.Matrix(zoom, zoom) | |
| png_files = [] | |
| page_count = len(pdf_doc) | |
| for page_num, page in enumerate(pdf_doc, 1): | |
| pix = page.get_pixmap(matrix=mat, alpha=False) | |
| output_path = output_dir / f"page_{page_num:04d}.png" | |
| pix.save(str(output_path)) | |
| png_files.append(output_path) | |
| yield f"轉換進度: {page_num}/{page_count}", None, None | |
| pdf_doc.close() | |
| return png_files | |
| def remove_watermark(self, image_path: str, output_path: str) -> bool: | |
| """ | |
| 移除 NotebookLM 浮水印 | |
| Args: | |
| image_path: 輸入圖片路徑 | |
| output_path: 輸出圖片路徑 | |
| Returns: | |
| 是否成功 | |
| """ | |
| try: | |
| image = Image.open(image_path) | |
| image_array = np.array(image) | |
| # 創建遮罩 | |
| mask = np.zeros(image_array.shape[:-1], dtype=bool) | |
| # 計算縮放比例 | |
| image_width, image_height = image_array.shape[1], image_array.shape[0] | |
| ratio = image_width / self.base_width | |
| # 檢查比例 | |
| if abs(ratio - (image_height / self.base_height)) > 0.01: | |
| # 比例不符,可能不是 NotebookLM 生成的圖片 | |
| # 直接複製原圖 | |
| image.save(output_path) | |
| return True | |
| # 調整座標 | |
| r1, r2, c1, c2 = self.logo_coords | |
| r1 = int(r1 * ratio) | |
| r2 = int(r2 * ratio) | |
| c1 = int(c1 * ratio) | |
| c2 = int(c2 * ratio) | |
| # 標記 logo 區域 | |
| mask[r1:r2, c1:c2] = True | |
| # 執行修復 | |
| image_result = inpaint.inpaint_biharmonic(image_array, mask, channel_axis=-1) | |
| # 保存結果 | |
| Image.fromarray((image_result * 255).astype("uint8")).save(output_path) | |
| return True | |
| except Exception as e: | |
| print(f"移除浮水印失敗: {e}") | |
| return False | |
| def create_zip(self, png_files: list, zip_path: str): | |
| """ | |
| 將 PNG 檔案打包為 ZIP | |
| Args: | |
| png_files: PNG 檔案路徑列表 | |
| zip_path: ZIP 輸出路徑 | |
| """ | |
| with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| for png_file in png_files: | |
| zipf.write(png_file, png_file.name) | |
| def create_ppt(self, png_files: list, ppt_path: str): | |
| """ | |
| 將 PNG 檔案轉換為 PPT | |
| Args: | |
| png_files: PNG 檔案路徑列表 | |
| ppt_path: PPT 輸出路徑 | |
| """ | |
| prs = Presentation() | |
| prs.slide_width = Inches(10) # 16:9 | |
| prs.slide_height = Inches(5.625) | |
| blank_slide_layout = prs.slide_layouts[6] | |
| for png_file in png_files: | |
| slide = prs.slides.add_slide(blank_slide_layout) | |
| slide.shapes.add_picture( | |
| str(png_file), | |
| 0, 0, | |
| width=prs.slide_width, | |
| height=prs.slide_height | |
| ) | |
| prs.save(ppt_path) | |
| # 全域變數 | |
| remover = NotebookLMWatermarkRemover() | |
| def process_pdf_to_png(pdf_file, progress=gr.Progress()): | |
| """ | |
| 處理 PDF: 轉 PNG + 去浮水印 + 打包下載 | |
| Args: | |
| pdf_file: 上傳的 PDF 檔案 | |
| Returns: | |
| (狀態訊息, ZIP 檔案路徑) | |
| """ | |
| if pdf_file is None: | |
| return "請上傳 PDF 檔案", None | |
| try: | |
| # 創建臨時目錄 | |
| temp_dir = Path(tempfile.mkdtemp()) | |
| png_dir = temp_dir / "pngs" | |
| cleaned_dir = temp_dir / "cleaned" | |
| cleaned_dir.mkdir(parents=True, exist_ok=True) | |
| # 步驟 1: PDF 轉 PNG | |
| progress(0, desc="正在轉換 PDF 為 PNG...") | |
| pdf_doc = fitz.open(pdf_file.name) | |
| page_count = len(pdf_doc) | |
| zoom = 150 / 72 | |
| mat = fitz.Matrix(zoom, zoom) | |
| png_files = [] | |
| for page_num, page in enumerate(pdf_doc, 1): | |
| progress((page_num / page_count) * 0.5, desc=f"轉換 PDF: {page_num}/{page_count}") | |
| pix = page.get_pixmap(matrix=mat, alpha=False) | |
| output_path = cleaned_dir / f"page_{page_num:04d}.png" | |
| pix.save(str(output_path)) | |
| png_files.append(output_path) | |
| pdf_doc.close() | |
| # 步驟 2: 移除浮水印 | |
| progress(0.5, desc="正在移除浮水印...") | |
| cleaned_files = [] | |
| for idx, png_file in enumerate(png_files, 1): | |
| progress(0.5 + (idx / len(png_files)) * 0.4, desc=f"移除浮水印: {idx}/{len(png_files)}") | |
| cleaned_path = cleaned_dir / f"cleaned_{png_file.name}" | |
| success = remover.remove_watermark(str(png_file), str(cleaned_path)) | |
| if success: | |
| # 替換原檔案 | |
| shutil.move(str(cleaned_path), str(png_file)) | |
| cleaned_files.append(png_file) | |
| # 步驟 3: 打包為 ZIP | |
| progress(0.9, desc="正在打包 ZIP...") | |
| zip_path = temp_dir / "notebooklm_cleaned.zip" | |
| with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: | |
| for png_file in cleaned_files: | |
| zipf.write(png_file, png_file.name) | |
| progress(1.0, desc="完成!") | |
| return f"✓ 成功處理 {len(cleaned_files)} 張圖片", str(zip_path) | |
| except Exception as e: | |
| return f"✗ 處理失敗: {str(e)}", None | |
| def process_pdf_to_ppt(pdf_file, progress=gr.Progress()): | |
| """ | |
| 處理 PDF: 轉 PNG + 去浮水印 + 轉 PPT | |
| Args: | |
| pdf_file: 上傳的 PDF 檔案 | |
| Returns: | |
| (狀態訊息, PPT 檔案路徑) | |
| """ | |
| if pdf_file is None: | |
| return "請上傳 PDF 檔案", None | |
| try: | |
| # 創建臨時目錄 | |
| temp_dir = Path(tempfile.mkdtemp()) | |
| png_dir = temp_dir / "pngs" | |
| png_dir.mkdir(parents=True, exist_ok=True) | |
| # 步驟 1: PDF 轉 PNG | |
| progress(0, desc="正在轉換 PDF 為 PNG...") | |
| pdf_doc = fitz.open(pdf_file.name) | |
| page_count = len(pdf_doc) | |
| zoom = 150 / 72 | |
| mat = fitz.Matrix(zoom, zoom) | |
| png_files = [] | |
| for page_num, page in enumerate(pdf_doc, 1): | |
| progress((page_num / page_count) * 0.4, desc=f"轉換 PDF: {page_num}/{page_count}") | |
| pix = page.get_pixmap(matrix=mat, alpha=False) | |
| output_path = png_dir / f"page_{page_num:04d}.png" | |
| pix.save(str(output_path)) | |
| png_files.append(output_path) | |
| pdf_doc.close() | |
| # 步驟 2: 移除浮水印 | |
| progress(0.4, desc="正在移除浮水印...") | |
| for idx, png_file in enumerate(png_files, 1): | |
| progress(0.4 + (idx / len(png_files)) * 0.4, desc=f"移除浮水印: {idx}/{len(png_files)}") | |
| temp_path = png_dir / f"temp_{png_file.name}" | |
| success = remover.remove_watermark(str(png_file), str(temp_path)) | |
| if success: | |
| shutil.move(str(temp_path), str(png_file)) | |
| # 步驟 3: 轉換為 PPT | |
| progress(0.8, desc="正在生成 PPT...") | |
| ppt_path = temp_dir / "notebooklm_cleaned.pptx" | |
| prs = Presentation() | |
| prs.slide_width = Inches(10) | |
| prs.slide_height = Inches(5.625) | |
| blank_slide_layout = prs.slide_layouts[6] | |
| for idx, png_file in enumerate(png_files, 1): | |
| progress(0.8 + (idx / len(png_files)) * 0.2, desc=f"生成 PPT: {idx}/{len(png_files)}") | |
| slide = prs.slides.add_slide(blank_slide_layout) | |
| slide.shapes.add_picture( | |
| str(png_file), | |
| 0, 0, | |
| width=prs.slide_width, | |
| height=prs.slide_height | |
| ) | |
| prs.save(str(ppt_path)) | |
| progress(1.0, desc="完成!") | |
| return f"✓ 成功生成 {len(png_files)} 頁 PPT", str(ppt_path) | |
| except Exception as e: | |
| return f"✗ 處理失敗: {str(e)}", None | |
| # 創建 Gradio 介面 | |
| with gr.Blocks(title="NotebookLM 浮水印移除工具", theme=gr.themes.Soft()) as app: | |
| gr.Markdown(""" | |
| # 🎨 NotebookLM 浮水印移除工具 | |
| 自動移除 NotebookLM 生成的 PDF 右下角浮水印,並提供兩種下載方式: | |
| - **PNG 打包下載**: 將所有頁面轉為 PNG 並打包成 ZIP | |
| - **PPT 格式下載**: 將所有頁面轉為 PowerPoint 簡報 | |
| > 💡 提示: 此工具僅適用於 NotebookLM 生成的 PDF (解析度 2867x1600) | |
| """) | |
| with gr.Tabs(): | |
| # 標籤 1: PNG 打包下載 | |
| with gr.Tab("📦 PNG 打包下載"): | |
| gr.Markdown("### 上傳 PDF,去除浮水印後下載 PNG 圖片包") | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_input_png = gr.File( | |
| label="上傳 PDF 檔案", | |
| file_types=[".pdf"], | |
| type="filepath" | |
| ) | |
| process_png_btn = gr.Button("🚀 開始處理", variant="primary", size="lg") | |
| with gr.Column(): | |
| status_png = gr.Textbox( | |
| label="處理狀態", | |
| placeholder="等待處理...", | |
| interactive=False | |
| ) | |
| zip_output = gr.File( | |
| label="下載 ZIP 檔案", | |
| interactive=False | |
| ) | |
| gr.Markdown(""" | |
| #### 📝 說明 | |
| 1. 上傳 NotebookLM 生成的 PDF 檔案 | |
| 2. 點擊「開始處理」按鈕 | |
| 3. 等待處理完成 (約 30-60 秒) | |
| 4. 下載包含所有去除浮水印後的 PNG 圖片的 ZIP 檔案 | |
| """) | |
| # 標籤 2: PPT 格式下載 | |
| with gr.Tab("📊 PPT 格式下載"): | |
| gr.Markdown("### 上傳 PDF,去除浮水印後轉換為 PowerPoint") | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_input_ppt = gr.File( | |
| label="上傳 PDF 檔案", | |
| file_types=[".pdf"], | |
| type="filepath" | |
| ) | |
| process_ppt_btn = gr.Button("🚀 開始處理", variant="primary", size="lg") | |
| with gr.Column(): | |
| status_ppt = gr.Textbox( | |
| label="處理狀態", | |
| placeholder="等待處理...", | |
| interactive=False | |
| ) | |
| ppt_output = gr.File( | |
| label="下載 PPT 檔案", | |
| interactive=False | |
| ) | |
| gr.Markdown(""" | |
| #### 📝 說明 | |
| 1. 上傳 NotebookLM 生成的 PDF 檔案 | |
| 2. 點擊「開始處理」按鈕 | |
| 3. 等待處理完成 (約 30-60 秒) | |
| 4. 下載去除浮水印後的 PowerPoint 簡報檔案 | |
| > 💡 提示: PPT 中每頁都是一張完整的圖片,保持原始版面 | |
| """) | |
| # 底部資訊 | |
| gr.Markdown(""" | |
| --- | |
| ### ℹ️ 技術資訊 | |
| - **浮水印移除**: 使用雙調和插值法 (Biharmonic Inpainting) | |
| - **支援格式**: PDF 輸入,PNG/PPT 輸出 | |
| - **解析度**: 150 DPI (2867x1600) | |
| - **處理時間**: 約 2-5 秒/頁 | |
| ### ⚠️ 注意事項 | |
| - 僅適用於 NotebookLM 生成的標準格式 PDF | |
| - 浮水印位置固定在右下角 | |
| - 處理大型 PDF 可能需要較長時間 | |
| ### 🔗 相關連結 | |
| - [GitHub Repository](https://github.com/your-repo) | |
| - [Report Issues](https://github.com/your-repo/issues) | |
| """) | |
| # 綁定事件 | |
| process_png_btn.click( | |
| fn=process_pdf_to_png, | |
| inputs=[pdf_input_png], | |
| outputs=[status_png, zip_output] | |
| ) | |
| process_ppt_btn.click( | |
| fn=process_pdf_to_ppt, | |
| inputs=[pdf_input_ppt], | |
| outputs=[status_ppt, ppt_output] | |
| ) | |
| if __name__ == "__main__": | |
| app.launch() | |