File size: 1,777 Bytes
94a7d52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import asyncio
from playwright.async_api import async_playwright
from playwright_stealth.stealth import Stealth
from bs4 import BeautifulSoup

async def main():
    url = "https://www.fragrantica.com.br/perfume/Natura/Frescor-de-Cacau-25963.html"
    
    async with Stealth().use_async(async_playwright()) as p:
        browser = await p.chromium.launch(headless=True)
        
        # Create the page from the stealthy context
        page = await browser.new_page()
        
        try:
            print("Navigating to page with corrected stealth logic...")
            await page.goto(url, timeout=120000)

            print("Waiting for Cloudflare check/content load...")
            main_content_selector = 'h1[itemprop="name"]'
            await page.wait_for_selector(main_content_selector, timeout=60000)
            print("✅ Cloudflare passed! Main content is visible.")

            await page.screenshot(path='success_screenshot.png')

            html_content = await page.content()
            soup = BeautifulSoup(html_content, 'html.parser')
            
            target_div = soup.find('div', class_='grid-x grid-margin-x')
            if target_div:
                div_string = target_div.prettify()
                print("\n--- Targeted Div HTML Content ---")
                print(div_string)
            else:
                print("❌ Could not find the <div class=\"grid-x grid-margin-x\"> tag.")

        except Exception as e:
            print(f"An error occurred: {e}")
            await page.screenshot(path='error_screenshot.png')
            print("Saved 'error_screenshot.png' for debugging.")
        finally:
            await browser.close()
            print("\nBrowser closed.")

if __name__ == "__main__":
    asyncio.run(main())