Makima57 commited on
Commit
73a33e7
1 Parent(s): 1640937

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +38 -50
app.py CHANGED
@@ -1,71 +1,59 @@
1
 
2
- # app.py
3
- # app.py
4
  # app.py
5
  import streamlit as st
 
6
  import requests
7
- from bs4 import BeautifulSoup
8
- import re
9
 
10
- def get_first_link(query):
 
11
  try:
12
- # Use a basic Google search URL (may not work indefinitely)
13
- url = f"https://www.google.com/search?q={query.replace(' ', '+')}"
14
- headers = {
15
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
16
- }
17
- response = requests.get(url, headers=headers)
18
- response.raise_for_status()
19
-
20
- # Parse the search results page
21
- soup = BeautifulSoup(response.text, 'html.parser')
22
-
23
- # Find the first link in the search results
24
- link_elements = soup.find_all('a', href=re.compile(r"\/url\?q="))
25
- if link_elements:
26
- first_link = re.search(r'/url\?q=(.*?)&', link_elements[0]['href']).group(1)
27
- return first_link
28
- else:
29
- st.error("No links found in the search results.")
30
- return None
31
  except Exception as e:
32
- st.error(f"Error fetching search results: {e}")
33
  return None
34
 
35
- def download_webpage_content(url):
 
36
  try:
37
  response = requests.get(url)
38
- response.raise_for_status()
39
- soup = BeautifulSoup(response.text, 'html.parser')
40
- return soup.prettify()
41
  except Exception as e:
42
- st.error(f"Error fetching webpage content: {e}")
43
  return None
44
 
45
- st.title("Webpage Content Downloader")
 
46
 
47
- query = st.text_input("Enter your search query:")
 
48
 
49
- if st.button("Fetch First Link and Download Content"):
 
50
  if query:
51
- with st.spinner("Fetching the first link..."):
52
- first_link = get_first_link(query)
53
- if first_link:
54
- st.success(f"First Link Found: {first_link}")
55
- with st.spinner("Downloading webpage content..."):
56
- webpage_content = download_webpage_content(first_link)
57
- if webpage_content:
58
- st.success("Content Downloaded!")
59
- st.download_button(
60
- label="Download Webpage Content",
61
- data=webpage_content,
62
- file_name="webpage_content.html",
63
- mime="text/html"
64
- )
65
- else:
66
- st.error("No links found for the query.")
 
67
  else:
68
- st.error("Please enter a query.")
 
69
 
70
 
71
 
 
1
 
 
 
2
  # app.py
3
  import streamlit as st
4
+ from googlesearch import search
5
  import requests
 
 
6
 
7
+ # Function to perform Google search and return the first link
8
+ def google_search(query):
9
  try:
10
+ # Perform the search and get an iterator of results
11
+ search_results = search(query, num_results=10) # Get up to 10 results
12
+ first_link = next(search_results, None) # Get the first result
13
+ return first_link
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  except Exception as e:
15
+ st.error(f"An error occurred: {e}")
16
  return None
17
 
18
+ # Function to fetch webpage content
19
+ def fetch_webpage_content(url):
20
  try:
21
  response = requests.get(url)
22
+ response.raise_for_status() # Check if the request was successful
23
+ return response.text
 
24
  except Exception as e:
25
+ st.error(f"Failed to fetch the webpage content: {e}")
26
  return None
27
 
28
+ # Streamlit app UI
29
+ st.title("Search Link Finder")
30
 
31
+ # Input field for search query
32
+ query = st.text_input("Enter search query", "")
33
 
34
+ # Button to trigger search
35
+ if st.button("Search"):
36
  if query:
37
+ first_link = google_search(query)
38
+ if first_link:
39
+ st.success(f"First link: [Click here]({first_link})")
40
+
41
+ # Fetch webpage content
42
+ webpage_content = fetch_webpage_content(first_link)
43
+
44
+ if webpage_content:
45
+ # Download button for the webpage content
46
+ st.download_button(
47
+ label="Download Webpage Content",
48
+ data=webpage_content,
49
+ file_name="webpage_content.html",
50
+ mime="text/html"
51
+ )
52
+ else:
53
+ st.warning("No results found")
54
  else:
55
+ st.error("Please enter a query")
56
+
57
 
58
 
59