webscrapping / app.py
Adit712's picture
Create app.py
be257a0
raw
history blame contribute delete
No virus
817 Bytes
import gradio as gr
import requests
from bs4 import BeautifulSoup
import re
import html
# Define the web scraping function
def scrape_website(url):
# Send a GET request to the website
response = requests.get(url)
html_content = response.content
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
# Extract all text from the HTML
text = soup.get_text()
# Clean the text by removing extra whitespaces and special characters
cleaned_text = re.sub(r"\s+", " ", text)
cleaned_text = html.unescape(cleaned_text)
return cleaned_text
# Create a Gradio interface
iface = gr.Interface(
fn=scrape_website,
inputs="text",
outputs="text",
title="Web Scraping",
description="Enter a website URL to scrape its text",
example="https://www.example.com"
)
iface.launch()