Spaces:
Runtime error
Runtime error
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
import html | |
# Define the web scraping function | |
def scrape_website(url): | |
# Send a GET request to the website | |
response = requests.get(url) | |
html_content = response.content | |
# Parse the HTML content using BeautifulSoup | |
soup = BeautifulSoup(html_content, "html.parser") | |
# Extract all text from the HTML | |
text = soup.get_text() | |
# Clean the text by removing extra whitespaces and special characters | |
cleaned_text = re.sub(r"\s+", " ", text) | |
cleaned_text = html.unescape(cleaned_text) | |
return cleaned_text | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=scrape_website, | |
inputs="text", | |
outputs="text", | |
title="Web Scraping", | |
description="Enter a website URL to scrape its text", | |
example="https://www.example.com" | |
) | |
iface.launch() | |