Spaces:
Sleeping
Sleeping
import os | |
import requests | |
from dotenv import load_dotenv | |
from bs4 import BeautifulSoup | |
from openai import OpenAI | |
import gradio as gr | |
load_dotenv(override=True) | |
api_key = os.getenv('OPENAI_API_KEY') | |
if not api_key: | |
print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!") | |
elif not api_key.startswith("sk-proj-"): | |
print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook") | |
elif api_key.strip() != api_key: | |
print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook") | |
else: | |
print("API key found and looks good so far!") | |
openai = OpenAI() | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" | |
} | |
class Website: | |
def __init__(self, url): | |
self.url = url | |
response = requests.get(url, headers=headers) | |
soup = BeautifulSoup(response.content, 'html.parser') | |
self.title = soup.title.string if soup.title else "No title found" | |
for irrelivent in soup.body(["script", "style", "img", "input"]): | |
irrelivent.decompose() | |
self.text = soup.body.get_text(separator="\n", strip=True) | |
system_prompt = "You are an assistant that analyzes the contents of a website \ | |
and provides a short summary, ignoring text that might be navigation related. \ | |
Respond in markdown." | |
def user_prompt(website): | |
userPrompt = f'you are looking at the website whose title is {website.title}' | |
userPrompt += '\nThe contents of this website is as follows; \ | |
please provide a short summary of this website in markdown. \ | |
If it includes news or announcements, then summarize these too.\n\n' | |
userPrompt += f'the content of the website are {website.text}' | |
return userPrompt | |
def message(web): | |
return [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": user_prompt(web)} | |
] | |
def summarize_website(website_url): | |
try: | |
web = Website(website_url) | |
response = openai.chat.completions.create( | |
model='gpt-4o-mini', | |
messages=message(web) | |
) | |
return response.choices[0].message.content | |
except Exception as e: | |
return f"An error occurred: {e}" | |
iface = gr.Interface( | |
fn=summarize_website, | |
inputs=gr.Textbox(lines=1, placeholder="Enter website URL here..."), | |
outputs="markdown", | |
title="Website Summarizer", | |
description="Enter a URL and get a summary of the website content." | |
) | |
if __name__ == "__main__": | |
iface.launch() |