Spaces:
Sleeping
Sleeping
File size: 1,528 Bytes
025a7d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import json
import os
import requests
from crewai import Agent, Task
from langchain.tools import tool
from unstructured.partition.html import partition_html
class BrowserTools():
@tool("Scrape website content")
def scrape_and_summarize_website(website):
"""Useful to scrape and summarize a website content"""
url = f"https://chrome.browserless.io/content?token={os.environ['BROWSERLESS_API_KEY']}"
payload = json.dumps({"url": website})
headers = {'cache-control': 'no-cache', 'content-type': 'application/json'}
response = requests.request("POST", url, headers=headers, data=payload)
elements = partition_html(text=response.text)
content = "\n\n".join([str(el) for el in elements])
content = [content[i:i + 8000] for i in range(0, len(content), 8000)]
summaries = []
for chunk in content:
agent = Agent(
role='Principal Researcher',
goal=
'Do amazing research and summaries based on the content you are working with',
backstory=
"You're a Principal Researcher at a big company and you need to do research about a given topic.",
allow_delegation=False)
task = Task(
agent=agent,
description=
f'Analyze and summarize the content below, make sure to include the most relevant information in the summary, return only the summary nothing else.\n\nCONTENT\n----------\n{chunk}'
)
summary = task.execute()
summaries.append(summary)
return "\n\n".join(summaries)
|