FinancialStrategist / crewai /tools /browser_tools.py
eaglelandsonce's picture
Upload 22 files
025a7d5
raw history blame
No virus
1.53 kB
import json
import os
import requests
from crewai import Agent, Task
from langchain.tools import tool
from unstructured.partition.html import partition_html
class BrowserTools():
@tool("Scrape website content")
def scrape_and_summarize_website(website):
"""Useful to scrape and summarize a website content"""
url = f"https://chrome.browserless.io/content?token={os.environ['BROWSERLESS_API_KEY']}"
payload = json.dumps({"url": website})
headers = {'cache-control': 'no-cache', 'content-type': 'application/json'}
response = requests.request("POST", url, headers=headers, data=payload)
elements = partition_html(text=response.text)
content = "\n\n".join([str(el) for el in elements])
content = [content[i:i + 8000] for i in range(0, len(content), 8000)]
summaries = []
for chunk in content:
agent = Agent(
role='Principal Researcher',
goal=
'Do amazing research and summaries based on the content you are working with',
backstory=
"You're a Principal Researcher at a big company and you need to do research about a given topic.",
allow_delegation=False)
task = Task(
agent=agent,
description=
f'Analyze and summarize the content below, make sure to include the most relevant information in the summary, return only the summary nothing else.\n\nCONTENT\n----------\n{chunk}'
)
summary = task.execute()
summaries.append(summary)
return "\n\n".join(summaries)