File size: 711 Bytes
193bf75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from bs4 import BeautifulSoup
import requests
from requests.exceptions import HTTPError

class WebScrapingService:

    def __init__(self):
        pass

    def scrape_text_from_url(self, url):
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')

            unwanted_elements = ['footer', 'script', 'style', 'noscript']
            for tag in unwanted_elements:
                for el in soup.find_all(tag):
                    el.extract()

            text = ' '.join([p.text for p in soup.find_all('p')])

            return text.strip()  # Strip leading and trailing whitespaces
        except Exception as e:
            raise HTTPError(e)