Spaces:

jackculpan
/

chatwebpage.com

Running

App Files Files Community

chatwebpage.com / conversation.py

jackculpan

mode

18e7ac6 almost 3 years ago

raw

history blame contribute delete

5.46 kB

	import os
	import openai
	import gradio as gr
	import requests
	from bs4 import BeautifulSoup
	import urllib.parse
	from selenium import webdriver
	from webdriver_manager.chrome import ChromeDriverManager

	try:
	from dotenv import load_dotenv
	load_dotenv()
	except ImportError:
	pass # In production, python-dotenv may not be installed

	openai.api_key = os.getenv("OPEN_API_KEY")

	class Conversation:
	def __init__(self):
	self.messages = []

	# def is_valid_url(self, url):
	# try:
	# result = urlparse(url)
	# return True if all([result.scheme, result.netloc]) else False
	# except ValueError:
	# return False

	def to_valid_url(self, input_string):
	print("url: ", input_string)
	try:
	url = input_string.strip()
	if not url:
	raise ValueError("Invalid URL, please try again.")
	parsed_url = urllib.parse.urlparse(url)
	if not all([parsed_url.scheme, parsed_url.netloc]):
	raise ValueError("Invalid URL, please try again.")
	if not parsed_url.scheme:
	url = "https://" + url
	parsed_url = urllib.parse.urlparse(url)
	return parsed_url.geturl()

	except ValueError:
	raise ValueError("Invalid URL, please try again.")


	def get_data(self, old_url):
	# ... your existing get_data implementation ...
	# Replace `messages` with `self.messages`

	def extract_html_content(url):
	response = requests.get(url)
	return response.text

	def extract_js_content(url):
	options = webdriver.ChromeOptions()
	options.add_argument('--headless')
	driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
	driver.get(url)
	rendered_content = driver.page_source
	driver.quit()
	return rendered_content

	def smart_scraper(url):
	html_content = extract_html_content(url)
	selector_to_find = "body"

	# Check if the content is incomplete or if a specific tag is missing
	# if not html_content or not html_content.find(selector_to_find):
	if not html_content or not html_content.find(selector_to_find):
	# If incomplete, use Selenium to render JavaScript
	print("Using Selenium for JavaScript rendering...")
	js_content = extract_js_content(url)
	return js_content
	else:
	return html_content

	url = self.to_valid_url(old_url)
	self.messages
	html = smart_scraper(url)
	doc = BeautifulSoup(html, 'html.parser')
	if not doc:
	raise ValueError("Please try again")
	doc = doc.body
	headings_1 = [e.text for e in doc.find_all('h1')]
	headings_2 = [e.text for e in doc.find_all('h2')]
	# headings_3 = [e.text for e in doc.find_all('h3')]
	links = [e.text for e in doc.find_all('a')]
	paragraphs = [e.text for e in doc.find_all('p')]
	# spans = [e.text for e in doc.find_all('span')]
	joined_paragraphs = (' '.join(paragraphs))

	if len(joined_paragraphs) > 7500:
	paragraphs = joined_paragraphs[:3000]

	self.messages = []
	self.messages.append({'role': 'system', 'content': "You are a helpful assistant that must answer questions about a website."})
	self.messages.append({'role': 'system', 'content': f"here are the h1s - {headings_1}"})
	self.messages.append({'role': 'system', 'content': f"here are the h2s - {headings_2}"})
	# self.messages.append({'role': 'system', 'content': f"here are the links - {links}"})
	# messages.append({'role': 'system', 'content': f"here are the h3s - {headings_3}"})
	self.messages.append({'role': 'system', 'content': f"here are the paragraphs - {paragraphs}"})
	# messages.append({'role': 'system', 'content': f"here are the spans - {spans}"})
	return self.messages

	def ask_chatbot(self, input):
	# ... your existing ask_chatbot implementation ...
	# Replace `messages` with `self.messages`
	if input:
	self.messages.append({"role": "user", "content": input})
	try:
	chat = openai.ChatCompletion.create(
	model="gpt-3.5-turbo", messages=self.messages
	)
	except openai.error.InvalidRequestError:
	raise ValueError("The website is too large to understand. Please try a different site.")

	reply = chat.choices[0].message.content
	if not reply:
	raise ValueError("Please try again")
	self.messages.append({"role": "assistant", "content": reply})
	return reply


	def user(self, user_message, history):
	# ... your existing user implementation ...
	# Replace `messages` with `self.messages`

	return "", history + [[user_message, None]]

	def bot(self, history):
	# ... your existing bot implementation ...
	# Replace `messages` with `self.messages`
	user_message = history[-1][0]
	try:
	bot_message = self.ask_chatbot(user_message)
	except ValueError:
	bot_message = "Please try again"
	history[-1][1] = bot_message
	return history