Spaces:
Runtime error
Runtime error
File size: 690 Bytes
b5cde6a 1349210 b5cde6a 1349210 b5cde6a 1349210 b5cde6a 1349210 b5cde6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
# scraper.py
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
def scrape_url(url):
"""Fetch text + image URLs from webpage."""
try:
res = requests.get(url, timeout=10)
res.raise_for_status()
soup = BeautifulSoup(res.text, 'html.parser')
# get text
text = soup.get_text(separator='\n', strip=True)
# get image URLs (absolute)
images = []
for img in soup.find_all('img'):
src = img.get('src')
if src:
images.append(urljoin(url, src))
return text, images
except Exception as e:
return f"[Error scraping {url}: {e}]", [] |