File size: 690 Bytes
b5cde6a
1349210
 
b5cde6a
1349210
 
b5cde6a
1349210
 
 
 
b5cde6a
 
 
 
 
 
 
 
 
 
 
 
1349210
b5cde6a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# scraper.py
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def scrape_url(url):
    """Fetch text + image URLs from webpage."""
    try:
        res = requests.get(url, timeout=10)
        res.raise_for_status()
        soup = BeautifulSoup(res.text, 'html.parser')
        
        # get text
        text = soup.get_text(separator='\n', strip=True)

        # get image URLs (absolute)
        images = []
        for img in soup.find_all('img'):
            src = img.get('src')
            if src:
                images.append(urljoin(url, src))

        return text, images
    except Exception as e:
        return f"[Error scraping {url}: {e}]", []