| from playwright.async_api import async_playwright |
| import playwright_stealth |
| from bs4 import BeautifulSoup |
| from database import db |
| from firebase_admin import firestore |
| import asyncio |
| import random |
|
|
| async def start_scrape_generator(target_url): |
| async with async_playwright() as p: |
| yield "π Menjalankan Browser Stealth (Chromium)..." |
| |
| |
| browser = await p.chromium.launch( |
| headless=True, |
| args=["--no-sandbox", "--disable-setuid-sandbox"] |
| ) |
| |
| context = await browser.new_context( |
| user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" |
| ) |
| |
| page = await context.new_page() |
| |
| |
| try: |
| await playwright_stealth.stealth_async(page) |
| except AttributeError: |
| |
| from playwright_stealth import stealth_sync |
| playwright_stealth.stealth_sync(page) |
|
|
| try: |
| if target_url.rstrip('/') == "https://anichin.cafe": |
| target_url = "https://anichin.cafe/anime/?order=update" |
| yield "βοΈ Mode Crawler: Menuju daftar anime..." |
|
|
| yield f"π Membuka: {target_url}" |
| |
| |
| await page.goto(target_url, wait_until="networkidle", timeout=90000) |
| await asyncio.sleep(5) |
| |
| content = await page.content() |
| soup = BeautifulSoup(content, 'html.parser') |
| |
| items = soup.select('.listupd .bsx a') |
| if items: |
| yield f"π Ditemukan {len(items)} judul. Mulai sinkronisasi..." |
| for item in items: |
| link = item['href'] |
| title = item.select_one('.tt').text.strip() if item.select_one('.tt') else "Judul" |
| yield f"π¬ Memproses: {title}" |
| |
| doc_id = title.replace(' ', '_').replace('/', '-') |
| db.collection('streaming').document(doc_id).set({ |
| "title": title, |
| "url": link, |
| "updated_at": firestore.SERVER_TIMESTAMP |
| }, merge=True) |
| yield "β
SEMUA JUDUL BERHASIL DISINKRONISASI!" |
| else: |
| yield "β Gagal bypass Cloudflare atau Selector salah. IP HF lo kemungkinan kena limit." |
|
|
| except Exception as e: |
| yield f"β Error: {str(e)}" |
| finally: |
| await browser.close() |
|
|