File size: 1,912 Bytes
72b4c46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import streamlit as st
import requests
from bs4 import BeautifulSoup
import urllib3

def simple_web_scraper(url, scrape_option):
    try:
        # Create a PoolManager with urllib3 to handle SSL
        http = urllib3.PoolManager()

        # Send an HTTP request
        response = http.request('GET', url)

        # Check if the request was successful (status code 200)
        if response.status == 200:
            # Parse the HTML content of the page
            soup = BeautifulSoup(response.data, 'html.parser')
            
            # Extract information from the HTML based on user's choice
            if scrape_option == 'data':
                # Extract all text content from the page
                all_text = soup.get_text()
                
                # Prepare data for the table (split text by lines)
                table_data = [{'Data': line.strip()} for line in all_text.split('\n') if line.strip()]
                
                # Display the data in a table
                st.table(table_data)
            elif scrape_option == 'links':
                # Example: Extract all the links on the page
                links = soup.find_all('a')
                
                # Prepare data for the table
                table_data = [{'Links': link.get('href')} for link in links]
                
                # Display the data in a table
                st.table(table_data)
            else:
                st.write('Invalid scrape option. Please choose "data" or "links".')
        else:
            st.write(f'Error: {response.status}')
    
    except Exception as e:
        st.write(f'An error occurred: {e}')

# Streamlit UI
st.title("Web Scraping Tool")
website_url = st.text_input("Enter the URL to scrape:")
scrape_option = st.selectbox("Select what to scrape:", ['data', 'links'])

if st.button("Scrape"):
    simple_web_scraper(website_url, scrape_option)