| | |
| | """ |
| | Playwright初始化脚本 |
| | 确保浏览器正确安装和配置 |
| | 针对Hugging Face Space环境优化 |
| | """ |
| |
|
| | import subprocess |
| | import sys |
| | import os |
| | from pathlib import Path |
| |
|
| | def setup_environment(): |
| | """设置环境变量,解决权限问题""" |
| | |
| | current_dir = Path.cwd() |
| | app_cache_dir = current_dir / ".cache" |
| | |
| | |
| | if str(current_dir).startswith('/app'): |
| | cache_dir = app_cache_dir |
| | else: |
| | |
| | home_dir = Path.home() |
| | cache_dir = home_dir / ".cache" |
| | |
| | |
| | cache_dir.mkdir(parents=True, exist_ok=True, mode=0o755) |
| | playwright_cache = cache_dir / "ms-playwright" |
| | playwright_cache.mkdir(parents=True, exist_ok=True, mode=0o755) |
| | |
| | |
| | env_vars = { |
| | "PLAYWRIGHT_BROWSERS_PATH": str(playwright_cache), |
| | "PLAYWRIGHT_SKIP_BROWSER_GC": "1", |
| | "XDG_CACHE_HOME": str(cache_dir), |
| | "HOME": str(cache_dir.parent), |
| | "TMPDIR": str(cache_dir / "tmp"), |
| | "TMP": str(cache_dir / "tmp"), |
| | "TEMP": str(cache_dir / "tmp"), |
| | } |
| | |
| | |
| | tmp_dir = cache_dir / "tmp" |
| | tmp_dir.mkdir(parents=True, exist_ok=True, mode=0o755) |
| | |
| | |
| | for key, value in env_vars.items(): |
| | os.environ[key] = value |
| | print(f"🔧 设置环境变量: {key}={value}") |
| | |
| | print(f"📁 缓存目录: {cache_dir}") |
| | print(f"🎭 Playwright缓存: {playwright_cache}") |
| | print(f"📂 当前工作目录: {current_dir}") |
| | |
| | return str(cache_dir) |
| |
|
| | def check_playwright_installation(): |
| | """检查playwright是否正确安装""" |
| | try: |
| | print("🔍 开始检查 Playwright 安装...") |
| | |
| | |
| | from playwright.sync_api import sync_playwright |
| | print("✅ Playwright 模块导入成功") |
| | |
| | |
| | print("🌐 尝试启动 Chromium 浏览器...") |
| | with sync_playwright() as p: |
| | browser = p.chromium.launch(headless=True) |
| | print("✅ 浏览器启动成功") |
| | browser.close() |
| | print("✅ 浏览器关闭成功") |
| | |
| | print("✅ Playwright 浏览器检查通过!") |
| | return True |
| | |
| | except Exception as e: |
| | print(f"❌ Playwright 浏览器检查失败: {e}") |
| | print(f"🔍 错误类型: {type(e).__name__}") |
| | return False |
| |
|
| | def install_browsers(): |
| | """安装playwright浏览器""" |
| | try: |
| | print("🔄 正在安装 Playwright 浏览器...") |
| | |
| | |
| | print("🔍 当前环境变量:") |
| | for key in ["PLAYWRIGHT_BROWSERS_PATH", "XDG_CACHE_HOME", "HOME", "TMPDIR"]: |
| | print(f" {key}: {os.environ.get(key, 'NOT SET')}") |
| | |
| | |
| | cmd = [sys.executable, "-m", "playwright", "install", "chromium"] |
| | print(f"🚀 执行命令: {' '.join(cmd)}") |
| | |
| | result = subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy()) |
| | |
| | print(f"📤 命令返回码: {result.returncode}") |
| | if result.stdout: |
| | print(f"📜 标准输出:\n{result.stdout}") |
| | if result.stderr: |
| | print(f"⚠️ 错误输出:\n{result.stderr}") |
| | |
| | if result.returncode != 0: |
| | print("❌ 安装命令执行失败") |
| | return False |
| | |
| | print("✅ Playwright 浏览器安装完成!") |
| | return True |
| | |
| | except subprocess.CalledProcessError as e: |
| | print(f"❌ 浏览器安装失败: {e}") |
| | return False |
| | except Exception as e: |
| | print(f"❌ 安装过程中出现异常: {e}") |
| | return False |
| |
|
| | def main(): |
| | """主函数""" |
| | print("🚀 初始化 Playwright (Hugging Face Space 增强版)...") |
| | |
| | |
| | cache_dir = setup_environment() |
| | |
| | |
| | print("🔍 第一次检查...") |
| | if check_playwright_installation(): |
| | return True |
| | |
| | |
| | print("🔧 检测到问题,正在重新安装浏览器...") |
| | if install_browsers(): |
| | |
| | print("🔍 安装后重新检查...") |
| | return check_playwright_installation() |
| | |
| | print("😞 所有尝试都失败了") |
| | return False |
| |
|
| | if __name__ == "__main__": |
| | success = main() |
| | if not success: |
| | print("❌ Playwright 初始化失败!") |
| | print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖") |
| | print("🔍 调试信息:") |
| | print(f" - 当前用户: {os.getuid() if hasattr(os, 'getuid') else 'N/A'}") |
| | print(f" - 工作目录: {Path.cwd()}") |
| | print(f" - 环境变量 HOME: {os.environ.get('HOME', 'NOT SET')}") |
| | sys.exit(1) |
| | print("🎉 Playwright 初始化成功!") |