Spaces:

drift-ai
/

faq-website

Runtime error

faq-website / scrape_website.py

remove get child pages

4f7d130 over 2 years ago

688 Bytes

	import requests
	from bs4 import BeautifulSoup


	def process_webpage(url: str):
	# Make a GET request to the page and get the HTML content
	response = requests.get(url)
	html_content = response.content

	# Parse the HTML content using BeautifulSoup
	soup = BeautifulSoup(html_content, "html.parser")

	# Get all the text content from the relevant HTML tags
	text_content = ""
	for tag in ["p", "h1", "h2", "h3", "h4", "h5", "h6", "li"]:
	for element in soup.find_all(tag):
	text_content += element.get_text() + " "

	print(text_content)
	return text_content


	if __name__ == "__main__":
	process_webpage(url="https://www.meet-drift.ai/")