Spaces:
Sleeping
Sleeping
import gradio as gr | |
from googlesearch import search | |
from bs4 import BeautifulSoup | |
import requests | |
def google_search(query, num_results=5): | |
search_results = search(query, num_results=num_results) | |
return search_results | |
def scrape_text_from_url(url): | |
try: | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Remove specific elements (customize as needed) | |
unwanted_elements = ['footer', 'script', 'style', 'noscript'] | |
for tag in unwanted_elements: | |
for el in soup.find_all(tag): | |
el.extract() | |
# Extract text from remaining paragraphs | |
text = ' '.join([p.text for p in soup.find_all('p')]) | |
return text.strip() # Strip leading and trailing whitespaces | |
except Exception as e: | |
print(f"Error scraping {url}: {e}") | |
return None | |
def get_google_data(search_term): | |
whole_result = '' | |
search_results = google_search(search_term) | |
for i, result in enumerate(search_results, start=1): | |
text = scrape_text_from_url(result) | |
if text: | |
whole_result += text | |
return whole_result | |
iface = gr.Interface(fn=get_google_data, inputs="text", outputs="text") | |
iface.launch(share=True) |