Spaces:

shubhendu-ghosh
/

Questo

Sleeping

Questo / app.py

shubhendu-ghosh-DS

added search results and scrape them

74c6795 10 months ago

1.26 kB

	import gradio as gr
	from googlesearch import search
	from bs4 import BeautifulSoup
	import requests


	def google_search(query, num_results=5):
	search_results = search(query, num_results=num_results)
	return search_results

	def scrape_text_from_url(url):
	try:
	response = requests.get(url)
	soup = BeautifulSoup(response.text, 'html.parser')

	# Remove specific elements (customize as needed)
	unwanted_elements = ['footer', 'script', 'style', 'noscript']
	for tag in unwanted_elements:
	for el in soup.find_all(tag):
	el.extract()

	# Extract text from remaining paragraphs
	text = ' '.join([p.text for p in soup.find_all('p')])

	return text.strip() # Strip leading and trailing whitespaces
	except Exception as e:
	print(f"Error scraping {url}: {e}")
	return None

	def get_google_data(search_term):
	whole_result = ''
	search_results = google_search(search_term)
	for i, result in enumerate(search_results, start=1):
	text = scrape_text_from_url(result)
	if text:
	whole_result += text

	return whole_result


	iface = gr.Interface(fn=get_google_data, inputs="text", outputs="text")

	iface.launch(share=True)