Spaces:
Sleeping
Sleeping
File size: 711 Bytes
193bf75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
from bs4 import BeautifulSoup
import requests
from requests.exceptions import HTTPError
class WebScrapingService:
def __init__(self):
pass
def scrape_text_from_url(self, url):
try:
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
unwanted_elements = ['footer', 'script', 'style', 'noscript']
for tag in unwanted_elements:
for el in soup.find_all(tag):
el.extract()
text = ' '.join([p.text for p in soup.find_all('p')])
return text.strip() # Strip leading and trailing whitespaces
except Exception as e:
raise HTTPError(e) |