import streamlit as st
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import chunk  # Importing the chunk module

# Function to perform Google search and return the first two links
def google_search(query):
    try:
        search_results = search(query, num_results=2)  # Get first two results
        first_two_links = [next(search_results, None), next(search_results, None)]
        return first_two_links
    except Exception as e:
        st.error(f"An error occurred: {e}")
        return None

# Function to fetch webpage content
def fetch_webpage_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check if the request was successful
        return response.text
    except Exception as e:
        st.error(f"Failed to fetch the webpage content: {e}")
        return None

# Function to scrape text from webpage content using BeautifulSoup
def scrape_text(webpage_content):
    try:
        soup = BeautifulSoup(webpage_content, 'html.parser')
        for script in soup(["script", "style"]):
            script.decompose()  # Remove unnecessary elements
        text = soup.get_text()  # Get raw text
        lines = (line.strip() for line in text.splitlines())  # Strip lines
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))  # Split and clean
        text = '\n'.join(chunk for chunk in chunks if chunk)  # Join cleaned text
        return text
    except Exception as e:
        st.error(f"Failed to scrape text from webpage content: {e}")
        return None

# Streamlit app UI
st.title("Search and Chunk Webpage Content")

# Input field for search query
query = st.text_input("Enter search query", "")

# Button to trigger search
if st.button("Search"):
    if query:
        first_two_links = google_search(query)  # Get first two links
        if first_two_links:
            for i, link in enumerate(first_two_links, 1):
                st.success(f"Link {i}: [Click here]({link})")  # Display links
                
                # Fetch webpage content
                webpage_content = fetch_webpage_content(link)
                if webpage_content:
                    # Scrape text from webpage content
                    scraped_text = scrape_text(webpage_content)
                    
                    if scraped_text:  # Ensure scraped_text is not empty
                        st.write(f"Scraped Content for Link {i}:")
                        st.text(scraped_text[:500])  # Display first 500 characters of the content
                        
                        # Chunk the scraped text using chunk.py
                        chunked_text = chunk.chunk_text(scraped_text)
                        
                        if chunked_text:  # Ensure chunked_text is not empty
                            st.write(f"Chunked Data for Link {i}:")
                            for chunk_part in chunked_text:
                                st.write(chunk_part)  # Display each chunk

                            # Save and download chunked data using chunk.py
                            chunk.save_and_download_chunked_data(chunked_text, file_name=f"chunked_data_link_{i}.txt")
                        else:
                            st.warning("No chunked data available")
                    else:
                        st.warning("No content scraped from this link")
        else:
            st.warning("No results found")
    else:
        st.error("Please enter a query")