nat-ad / scrape.py
ibombonato's picture
Add Mercado Livre support (#4)
94a7d52 verified
import asyncio
from playwright.async_api import async_playwright
from playwright_stealth.stealth import Stealth
from bs4 import BeautifulSoup
async def main():
url = "https://www.fragrantica.com.br/perfume/Natura/Frescor-de-Cacau-25963.html"
async with Stealth().use_async(async_playwright()) as p:
browser = await p.chromium.launch(headless=True)
# Create the page from the stealthy context
page = await browser.new_page()
try:
print("Navigating to page with corrected stealth logic...")
await page.goto(url, timeout=120000)
print("Waiting for Cloudflare check/content load...")
main_content_selector = 'h1[itemprop="name"]'
await page.wait_for_selector(main_content_selector, timeout=60000)
print("βœ… Cloudflare passed! Main content is visible.")
await page.screenshot(path='success_screenshot.png')
html_content = await page.content()
soup = BeautifulSoup(html_content, 'html.parser')
target_div = soup.find('div', class_='grid-x grid-margin-x')
if target_div:
div_string = target_div.prettify()
print("\n--- Targeted Div HTML Content ---")
print(div_string)
else:
print("❌ Could not find the <div class=\"grid-x grid-margin-x\"> tag.")
except Exception as e:
print(f"An error occurred: {e}")
await page.screenshot(path='error_screenshot.png')
print("Saved 'error_screenshot.png' for debugging.")
finally:
await browser.close()
print("\nBrowser closed.")
if __name__ == "__main__":
asyncio.run(main())