Commit
•
e8da71f
1
Parent(s):
5b8666a
Create 10_ETScrapper.py
Browse files- pages/10_ETScrapper.py +32 -0
pages/10_ETScrapper.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import io
|
5 |
+
|
6 |
+
st.title('Web Scraper')
|
7 |
+
|
8 |
+
url = st.text_input('Enter the URL of the site to scrape:', '')
|
9 |
+
|
10 |
+
if st.button('Scrape'):
|
11 |
+
if url:
|
12 |
+
response = requests.get(url)
|
13 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
14 |
+
|
15 |
+
# Extract the content
|
16 |
+
content = soup.get_text()
|
17 |
+
|
18 |
+
# Display the content
|
19 |
+
st.text_area('Scraped Content', content, height=300)
|
20 |
+
|
21 |
+
# Provide a download link
|
22 |
+
buffer = io.StringIO()
|
23 |
+
buffer.write(content)
|
24 |
+
buffer.seek(0)
|
25 |
+
st.download_button(
|
26 |
+
label="Download Extracted Text",
|
27 |
+
data=buffer,
|
28 |
+
file_name='extracted_content.txt',
|
29 |
+
mime='text/plain'
|
30 |
+
)
|
31 |
+
else:
|
32 |
+
st.error('Please enter a valid URL.')
|