Questo / app.py
shubhendu-ghosh-DS
added search results and scrape them
74c6795
raw
history blame
1.26 kB
import gradio as gr
from googlesearch import search
from bs4 import BeautifulSoup
import requests
def google_search(query, num_results=5):
search_results = search(query, num_results=num_results)
return search_results
def scrape_text_from_url(url):
try:
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# Remove specific elements (customize as needed)
unwanted_elements = ['footer', 'script', 'style', 'noscript']
for tag in unwanted_elements:
for el in soup.find_all(tag):
el.extract()
# Extract text from remaining paragraphs
text = ' '.join([p.text for p in soup.find_all('p')])
return text.strip() # Strip leading and trailing whitespaces
except Exception as e:
print(f"Error scraping {url}: {e}")
return None
def get_google_data(search_term):
whole_result = ''
search_results = google_search(search_term)
for i, result in enumerate(search_results, start=1):
text = scrape_text_from_url(result)
if text:
whole_result += text
return whole_result
iface = gr.Interface(fn=get_google_data, inputs="text", outputs="text")
iface.launch(share=True)