File size: 2,712 Bytes
d8c8769
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from openai import OpenAI
import gradio as gr

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

openai = OpenAI()

headers = {
     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    def __init__(self, url):
      self.url = url
      response = requests.get(url, headers=headers)
      soup = BeautifulSoup(response.content, 'html.parser')
      self.title = soup.title.string if soup.title else "No title found"
      for irrelivent in soup.body(["script", "style", "img", "input"]):
          irrelivent.decompose()
      self.text = soup.body.get_text(separator="\n", strip=True)

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

def user_prompt(website):
    userPrompt = f'you are looking at the website whose title is {website.title}'
    userPrompt += '\nThe contents of this website is as follows; \
    please provide a short summary of this website in markdown. \
    If it includes news or announcements, then summarize these too.\n\n'
    userPrompt += f'the content of the website are {website.text}'
    return userPrompt

def message(web):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt(web)}
    ]

def summarize_website(website_url):
    try:
        web = Website(website_url)
        response = openai.chat.completions.create(
            model='gpt-4o-mini',
            messages=message(web)
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"An error occurred: {e}"

iface = gr.Interface(
    fn=summarize_website,
    inputs=gr.Textbox(lines=1, placeholder="Enter website URL here..."),
    outputs="markdown",
    title="Website Summarizer",
    description="Enter a URL and get a summary of the website content."
)

if __name__ == "__main__":
    iface.launch()