Spaces:
Sleeping
Sleeping
File size: 990 Bytes
4531c67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import requests
from bs4 import BeautifulSoup
from langchain.tools import tool
class ScraperTool():
@tool("Scraper Tool")
def scrape(url: str):
"Useful tool to scrap a website content, use to learn more about a given url."
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
response = requests.get(url, headers=headers)
# Check if the request was successful
if response.status_code == 200:
# Parse the HTML content of the page
soup = BeautifulSoup(response.text, 'html.parser')
article = soup.find(id='insertArticle')
if article:
# Extract and print the text from the article
text = (article.get_text(separator=' ', strip=True))
else:
print("Article with specified ID not found.")
return text
else:
print("Failed to retrieve the webpage")
|