Spaces:
Running
Running
import asyncio | |
from playwright.async_api import async_playwright | |
from playwright_stealth.stealth import Stealth | |
from bs4 import BeautifulSoup | |
async def main(): | |
url = "https://www.fragrantica.com.br/perfume/Natura/Frescor-de-Cacau-25963.html" | |
async with Stealth().use_async(async_playwright()) as p: | |
browser = await p.chromium.launch(headless=True) | |
# Create the page from the stealthy context | |
page = await browser.new_page() | |
try: | |
print("Navigating to page with corrected stealth logic...") | |
await page.goto(url, timeout=120000) | |
print("Waiting for Cloudflare check/content load...") | |
main_content_selector = 'h1[itemprop="name"]' | |
await page.wait_for_selector(main_content_selector, timeout=60000) | |
print("β Cloudflare passed! Main content is visible.") | |
await page.screenshot(path='success_screenshot.png') | |
html_content = await page.content() | |
soup = BeautifulSoup(html_content, 'html.parser') | |
target_div = soup.find('div', class_='grid-x grid-margin-x') | |
if target_div: | |
div_string = target_div.prettify() | |
print("\n--- Targeted Div HTML Content ---") | |
print(div_string) | |
else: | |
print("β Could not find the <div class=\"grid-x grid-margin-x\"> tag.") | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
await page.screenshot(path='error_screenshot.png') | |
print("Saved 'error_screenshot.png' for debugging.") | |
finally: | |
await browser.close() | |
print("\nBrowser closed.") | |
if __name__ == "__main__": | |
asyncio.run(main()) |