File size: 1,341 Bytes
ec1f53c
 
 
 
0ebcd8e
ec1f53c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
from scrapingbee import ScrapingBeeClient
from logger import setup_logger
import json

logger = setup_logger("scraper")

# Initialize the ScrapingBee client with API key
client = ScrapingBeeClient(api_key=os.getenv('SCRAPINGBEE_API_KEY', ''))

def scrape_url(url: str) -> str:
    """
    Scrape content from URL using ScrapingBee with AI extraction
    
    Args:
        url: The URL to scrape
        
    Returns:
        str: Extracted text content or error message
    """
    try:
        logger.info(f"Scraping URL: {url}")
        response = client.get(
            url,
            params={
                'stealth_proxy': True,
                'country_code': 'us',
                'ai_query': 'Extract the main text content from this page'
            }
        )
        
        if response.status_code == 200:
            logger.info(f"Successfully scraped URL: {url}")
            return response.text if response.text else "No content could be extracted from the URL"
        else:
            logger.error(f"Failed to scrape URL: {url}, Status: {response.status_code}")
            return f"Failed to download the URL. Status code: {response.status_code}"
            
    except Exception as e:
        logger.error(f"Error scraping URL: {url}", exc_info=True)
        return f"Error scraping the URL: {str(e)}"