Spaces:

Makima57
/

query-app

Sleeping

App Files Files Community

Makima57 commited on Sep 23

Commit

0d9b82c

•

1 Parent(s): 4022207

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +38 -15

app.py CHANGED Viewed

@@ -1,8 +1,10 @@
 # app.py
-import streamlit as st
-from googlesearch import search
-import requests
 # Function to perform Google search and return the first link
 def google_search(query):
@@ -14,7 +16,7 @@ def google_search(query):
         return first_link
     except Exception as e:
         st.error(f"An error occurred: {e}")
-        return None
 # Function to fetch webpage content
 def fetch_webpage_content(url):
@@ -24,37 +26,58 @@ def fetch_webpage_content(url):
         return response.text
     except Exception as e:
         st.error(f"Failed to fetch the webpage content: {e}")
-        return None
 # Streamlit app UI
-st.title("Search Link Finder")
 # Input field for search query
-query = st.text_input("Enter search query", "")
 # Button to trigger search
 if st.button("Search"):
     if query:
         first_link = google_search(query)
         if first_link:
-            st.success(f"First link: [Click here]({first_link})")
             # Fetch webpage content
-            webpage_content = fetch_webpage_content(first_link)
             if webpage_content:
                 # Download button for the webpage content
                 st.download_button(
                     label="Download Webpage Content",
                     data=webpage_content,
                     file_name="webpage_content.html",
                     mime="text/html"
-                )
         else:
-            st.warning("No results found")
     else:
         st.error("Please enter a query")

 # app.py
+#update 1
+import streamlit as st
+from googlesearch import search
+import requests
+from bs4 import BeautifulSoup
 # Function to perform Google search and return the first link
 def google_search(query):
         return first_link
     except Exception as e:
         st.error(f"An error occurred: {e}")
+        return None
 # Function to fetch webpage content
 def fetch_webpage_content(url):
         return response.text
     except Exception as e:
         st.error(f"Failed to fetch the webpage content: {e}")
+        return None
+# Function to scrape text from webpage content using Beautiful Soup
+def scrape_text(webpage_content):
+    try:
+        soup = BeautifulSoup(webpage_content, 'html.parser')
+        # Remove all script and style elements
+        for script in soup(["script", "style"]):
+            script.decompose()
+        # Get the text from the BeautifulSoup object
+        text = soup.get_text()
+        # Break the text into lines and remove leading and trailing space on each
+        lines = (line.strip() for line in text.splitlines())
+        # Break multi-headlines into a line each
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        # Drop blank lines
+        text = '
+'.join(chunk for chunk in chunks if chunk)
+        return text
+    except Exception as e:
+        st.error(f"Failed to scrape text from webpage content: {e}")
+        return None
 # Streamlit app UI
+st.title("Search Link Finder")
 # Input field for search query
+query = st.text_input("Enter search query", "")
 # Button to trigger search
 if st.button("Search"):
     if query:
         first_link = google_search(query)
         if first_link:
+            st.success(f"First link: [Click here]({first_link})")
             # Fetch webpage content
+            webpage_content = fetch_webpage_content(first_link)
             if webpage_content:
+                # Scrape text from webpage content
+                scraped_text = scrape_text(webpage_content)
+                if scraped_text:
+                    st.write(scraped_text)
                 # Download button for the webpage content
                 st.download_button(
                     label="Download Webpage Content",
                     data=webpage_content,
                     file_name="webpage_content.html",
                     mime="text/html"
+                )
         else:
+            st.warning("No results found")
     else:
         st.error("Please enter a query")