File size: 990 Bytes
4531c67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import requests
from bs4 import BeautifulSoup
from langchain.tools import tool

class ScraperTool():
  @tool("Scraper Tool")
  def scrape(url: str):
    "Useful tool to scrap a website content, use to learn more about a given url."

    headers = {
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

    response = requests.get(url, headers=headers)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the page
        soup = BeautifulSoup(response.text, 'html.parser')

        article = soup.find(id='insertArticle')
        
        if article:
            # Extract and print the text from the article
            text = (article.get_text(separator=' ', strip=True))
        else:
            print("Article with specified ID not found.")
        
        return text
    else:
        print("Failed to retrieve the webpage")