gpt-4-turbo-chatbot / main-scraper.py
mhanagan's picture
Update main-scraper.py
10076f7 verified
raw
history blame
3.25 kB
import requests
from bs4 import BeautifulSoup
import openai
import json
import os
import gradio as gr
api_key = "" # Replace with your key
openai.api_key = api_key
def scrape_webpage(url):
"""
Scrapes the content of a webpage and returns the text.
"""
if not url.startswith(("https://", "http://")):
url = "https://" + url
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
text = soup.get_text()
clean_text = text.splitlines()
clean_text = [element.strip()
for element in clean_text if element.strip()]
clean_text = '\n'.join(clean_text)
return clean_text
else:
return "Failed to retrieve the website content."
def chat_completion_request(model, messages, tools):
"""
Sends a request to the OpenAI API to generate a chat response.
"""
response = openai.chat.completions.create(
model=model,
messages=messages,
tools=tools
)
return response
def chat_with_tools(model, messages, tools):
"""
Checks if a responsed called a tool (funtion), apply this tool and return the response.
"""
try:
response = chat_completion_request(model, messages, tools)
tool_calls = response.choices[0].message.tool_calls
if tool_calls:
# Assuming there's only one tool call per message for simplicity
tool_call = tool_calls[0]
if tool_call.function.name == "scrape_webpage":
url_to_scrape = json.loads(
tool_call.function.arguments)["url"]
scraping_result = scrape_webpage(url_to_scrape)
messages.append(
{"role": "assistant", "content": f"Scraping result: {scraping_result}"})
response_with_data = chat_completion_request(
model, messages, tools)
return {"content": response_with_data.choices[0].message.content, "internet_search": True}
else:
return {"content": response.choices[0].message.content, "internet_search": False}
except Exception as e:
print(f"An error occurred: {e}")
tools = [
{
"type": "function",
"function": {
"name": "scrape_webpage",
"description": "Scrape the content of the specified webpage.",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The URL of the webpage to scrape, it can have the http, https protocol or none and just have the domain.",
}
},
"required": ["url"],
},
}
}
]
messages = [
{"role": "system", "content": "You're an intelligent assistant. \
When an URL is mentioned, use the function tool to scrape the content of the webpage."},
{"role": "user", "content": "Can you scrape the content of this website for me: https://www.rhino.so and give me a summary of what they do?"}
]
model = "gpt-4-turbo"
print(chat_with_tools(model, messages, tools))