Makima57 commited on
Commit
0d9b82c
1 Parent(s): 4022207

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +38 -15
app.py CHANGED
@@ -1,8 +1,10 @@
1
 
2
  # app.py
3
- import streamlit as st
4
- from googlesearch import search
5
- import requests
 
 
6
 
7
  # Function to perform Google search and return the first link
8
  def google_search(query):
@@ -14,7 +16,7 @@ def google_search(query):
14
  return first_link
15
  except Exception as e:
16
  st.error(f"An error occurred: {e}")
17
- return None
18
 
19
  # Function to fetch webpage content
20
  def fetch_webpage_content(url):
@@ -24,37 +26,58 @@ def fetch_webpage_content(url):
24
  return response.text
25
  except Exception as e:
26
  st.error(f"Failed to fetch the webpage content: {e}")
27
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  # Streamlit app UI
30
- st.title("Search Link Finder")
31
 
32
  # Input field for search query
33
- query = st.text_input("Enter search query", "")
34
 
35
  # Button to trigger search
36
  if st.button("Search"):
37
  if query:
38
  first_link = google_search(query)
39
  if first_link:
40
- st.success(f"First link: [Click here]({first_link})")
41
-
42
  # Fetch webpage content
43
- webpage_content = fetch_webpage_content(first_link)
44
-
45
  if webpage_content:
 
 
 
 
46
  # Download button for the webpage content
47
  st.download_button(
48
  label="Download Webpage Content",
49
  data=webpage_content,
50
  file_name="webpage_content.html",
51
  mime="text/html"
52
- )
53
  else:
54
- st.warning("No results found")
55
  else:
56
  st.error("Please enter a query")
57
 
58
-
59
-
60
 
 
1
 
2
  # app.py
3
+ #update 1
4
+ import streamlit as st
5
+ from googlesearch import search
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
 
9
  # Function to perform Google search and return the first link
10
  def google_search(query):
 
16
  return first_link
17
  except Exception as e:
18
  st.error(f"An error occurred: {e}")
19
+ return None
20
 
21
  # Function to fetch webpage content
22
  def fetch_webpage_content(url):
 
26
  return response.text
27
  except Exception as e:
28
  st.error(f"Failed to fetch the webpage content: {e}")
29
+ return None
30
+
31
+ # Function to scrape text from webpage content using Beautiful Soup
32
+ def scrape_text(webpage_content):
33
+ try:
34
+ soup = BeautifulSoup(webpage_content, 'html.parser')
35
+ # Remove all script and style elements
36
+ for script in soup(["script", "style"]):
37
+ script.decompose()
38
+ # Get the text from the BeautifulSoup object
39
+ text = soup.get_text()
40
+ # Break the text into lines and remove leading and trailing space on each
41
+ lines = (line.strip() for line in text.splitlines())
42
+ # Break multi-headlines into a line each
43
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
44
+ # Drop blank lines
45
+ text = '
46
+ '.join(chunk for chunk in chunks if chunk)
47
+ return text
48
+ except Exception as e:
49
+ st.error(f"Failed to scrape text from webpage content: {e}")
50
+ return None
51
 
52
  # Streamlit app UI
53
+ st.title("Search Link Finder")
54
 
55
  # Input field for search query
56
+ query = st.text_input("Enter search query", "")
57
 
58
  # Button to trigger search
59
  if st.button("Search"):
60
  if query:
61
  first_link = google_search(query)
62
  if first_link:
63
+ st.success(f"First link: [Click here]({first_link})")
 
64
  # Fetch webpage content
65
+ webpage_content = fetch_webpage_content(first_link)
 
66
  if webpage_content:
67
+ # Scrape text from webpage content
68
+ scraped_text = scrape_text(webpage_content)
69
+ if scraped_text:
70
+ st.write(scraped_text)
71
  # Download button for the webpage content
72
  st.download_button(
73
  label="Download Webpage Content",
74
  data=webpage_content,
75
  file_name="webpage_content.html",
76
  mime="text/html"
77
+ )
78
  else:
79
+ st.warning("No results found")
80
  else:
81
  st.error("Please enter a query")
82
 
 
 
83