File size: 1,341 Bytes
ec1f53c 0ebcd8e ec1f53c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import os
from scrapingbee import ScrapingBeeClient
from logger import setup_logger
import json
logger = setup_logger("scraper")
# Initialize the ScrapingBee client with API key
client = ScrapingBeeClient(api_key=os.getenv('SCRAPINGBEE_API_KEY', ''))
def scrape_url(url: str) -> str:
"""
Scrape content from URL using ScrapingBee with AI extraction
Args:
url: The URL to scrape
Returns:
str: Extracted text content or error message
"""
try:
logger.info(f"Scraping URL: {url}")
response = client.get(
url,
params={
'stealth_proxy': True,
'country_code': 'us',
'ai_query': 'Extract the main text content from this page'
}
)
if response.status_code == 200:
logger.info(f"Successfully scraped URL: {url}")
return response.text if response.text else "No content could be extracted from the URL"
else:
logger.error(f"Failed to scrape URL: {url}, Status: {response.status_code}")
return f"Failed to download the URL. Status code: {response.status_code}"
except Exception as e:
logger.error(f"Error scraping URL: {url}", exc_info=True)
return f"Error scraping the URL: {str(e)}" |