import json | |
import os | |
import requests | |
from crewai import Agent, Task | |
from langchain.tools import tool | |
from unstructured.partition.html import partition_html | |
class BrowserTools(): | |
def scrape_and_summarize_website(website): | |
"""Useful to scrape and summarize a website content""" | |
url = f"https://chrome.browserless.io/content?token={os.environ['BROWSERLESS_API_KEY']}" | |
payload = json.dumps({"url": website}) | |
headers = {'cache-control': 'no-cache', 'content-type': 'application/json'} | |
response = requests.request("POST", url, headers=headers, data=payload) | |
elements = partition_html(text=response.text) | |
content = "\n\n".join([str(el) for el in elements]) | |
content = [content[i:i + 8000] for i in range(0, len(content), 8000)] | |
summaries = [] | |
for chunk in content: | |
agent = Agent( | |
role='Principal Researcher', | |
goal= | |
'Do amazing research and summaries based on the content you are working with', | |
backstory= | |
"You're a Principal Researcher at a big company and you need to do research about a given topic.", | |
allow_delegation=False) | |
task = Task( | |
agent=agent, | |
description= | |
f'Analyze and summarize the content below, make sure to include the most relevant information in the summary, return only the summary nothing else.\n\nCONTENT\n----------\n{chunk}' | |
) | |
summary = task.execute() | |
summaries.append(summary) | |
return "\n\n".join(summaries) | |