from bs4 import BeautifulSoup import requests from requests.exceptions import HTTPError class WebScrapingService: def __init__(self): pass def scrape_text_from_url(self, url): try: response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') unwanted_elements = ['footer', 'script', 'style', 'noscript'] for tag in unwanted_elements: for el in soup.find_all(tag): el.extract() text = ' '.join([p.text for p in soup.find_all('p')]) return text.strip() # Strip leading and trailing whitespaces except Exception as e: raise HTTPError(e)