PrabhakarVenkat's picture
Upload 21 files
3208dbf verified
raw
history blame
1.57 kB
import json
import os
import requests
from crewai import Agent, Task
from langchain.tools import tool
from unstructured.partition.html import partition_html
class BrowserTools():
@tool("Scrape website content")
def scrape_and_summarize_website(website):
"""Useful to scrape and summarize a website content"""
url = f"https://chrome.browserless.io/content?token={os.environ['BROWSERLESS_API_KEY']}"
payload = json.dumps({"url": website})
headers = {'cache-control': 'no-cache', 'content-type': 'application/json'}
response = requests.request("POST", url, headers=headers, data=payload)
elements = partition_html(text=response.text)
content = "\n\n".join([str(el) for el in elements])
content = [content[i:i + 8000] for i in range(0, len(content), 8000)]
summaries = []
for chunk in content:
agent = Agent(
role='Principal Researcher',
goal=
'Do amazing research and summaries based on the content you are working with',
backstory=
"You're a Principal Researcher at a big company and you need to do research about a given topic.",
allow_delegation=False)
task = Task(
agent=agent,
description=
f'Analyze and summarize the content below, make sure to include the most relevant information in the summary, return only the summary nothing else.\n\nCONTENT\n----------\n{chunk}'
)
summary = task.execute()
summaries.append(summary)
return "\n\n".join(summaries)