Spaces:
Running
Running
File size: 1,777 Bytes
94a7d52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import asyncio
from playwright.async_api import async_playwright
from playwright_stealth.stealth import Stealth
from bs4 import BeautifulSoup
async def main():
url = "https://www.fragrantica.com.br/perfume/Natura/Frescor-de-Cacau-25963.html"
async with Stealth().use_async(async_playwright()) as p:
browser = await p.chromium.launch(headless=True)
# Create the page from the stealthy context
page = await browser.new_page()
try:
print("Navigating to page with corrected stealth logic...")
await page.goto(url, timeout=120000)
print("Waiting for Cloudflare check/content load...")
main_content_selector = 'h1[itemprop="name"]'
await page.wait_for_selector(main_content_selector, timeout=60000)
print("✅ Cloudflare passed! Main content is visible.")
await page.screenshot(path='success_screenshot.png')
html_content = await page.content()
soup = BeautifulSoup(html_content, 'html.parser')
target_div = soup.find('div', class_='grid-x grid-margin-x')
if target_div:
div_string = target_div.prettify()
print("\n--- Targeted Div HTML Content ---")
print(div_string)
else:
print("❌ Could not find the <div class=\"grid-x grid-margin-x\"> tag.")
except Exception as e:
print(f"An error occurred: {e}")
await page.screenshot(path='error_screenshot.png')
print("Saved 'error_screenshot.png' for debugging.")
finally:
await browser.close()
print("\nBrowser closed.")
if __name__ == "__main__":
asyncio.run(main()) |