# ========================
# 📄 streamlit_app.py
# LangChain + Gemini 1.5 Flash without FAISS
# ========================

import streamlit as st
from PyPDF2 import PdfReader
from docx import Document
from bs4 import BeautifulSoup
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain_core.documents import Document

# ========================
# 1️⃣ Configuration and Setup
# ========================

load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

if not GOOGLE_API_KEY:
    st.error("Missing GOOGLE_API_KEY in environment variables.")
    st.stop()

# ========================
# 2️⃣ File Size Limits
# ========================

MAX_TOTAL_SIZE_MB = 5
MAX_FILE_SIZE_MB = 2

def validate_file_sizes(uploaded_files):
    total_size = 0
    for file in uploaded_files:
        size_mb = file.size / (1024 * 1024)
        if size_mb > MAX_FILE_SIZE_MB:
            st.warning(f"{file.name} is too large ({size_mb:.2f} MB). Limit is {MAX_FILE_SIZE_MB} MB per file.")
            return False
        total_size += size_mb
    if total_size > MAX_TOTAL_SIZE_MB:
        st.warning(f"Total size of all files is {total_size:.2f} MB. Limit is {MAX_TOTAL_SIZE_MB} MB total.")
        return False
    return True

# ========================
# 3️⃣ Text Extraction
# ========================

def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        reader = PdfReader(pdf)
        for page in reader.pages:
            content = page.extract_text()
            if content:
                text += content
    return text

def get_docx_text(docx_file):
    doc = Document(docx_file)
    return "\n".join([para.text for para in doc.paragraphs])

def get_html_text(html_file):
    content = html_file.read()
    soup = BeautifulSoup(content, "html.parser")
    return soup.get_text()

# ========================
# 4️⃣ LangChain Q&A Chain
# ========================

def get_conversational_chain():
    prompt_template = """
    Answer the question as detailed as possible from the provided context. If the answer is not available, say "answer is not available in the context."

    Context:
    {context}

    Question:
    {question}

    Answer:
    """
    model = ChatGoogleGenerativeAI(
        model="gemini-1.5-flash",
        temperature=0.3,
        google_api_key=GOOGLE_API_KEY
    )
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    return chain

# ========================
# 5️⃣ Streamlit App
# ========================

def main():
    st.set_page_config(page_title="Gemini Q&A Without FAISS")
    st.header("📄 Chat with Uploaded Documents (FAISS-Free Gemini Q&A)")

    # Upload and extract
    with st.sidebar:
        st.title("Upload Files")
        uploaded_files = st.file_uploader(
            "Upload PDF, DOCX, or HTML files (Max 2MB/file, 5MB total)",
            accept_multiple_files=True,
            type=['pdf', 'docx', 'html']
        )

        full_text = ""
        if st.button("Submit & Extract"):
            if not uploaded_files:
                st.warning("Please upload at least one file.")
                return

            if not validate_file_sizes(uploaded_files):
                return

            with st.spinner("Extracting file content..."):
                for file in uploaded_files:
                    if file.name.endswith(".pdf"):
                        full_text += get_pdf_text([file])
                    elif file.name.endswith(".docx"):
                        full_text += get_docx_text(file)
                    elif file.name.endswith(".html"):
                        full_text += get_html_text(file)
                    else:
                        st.warning(f"Unsupported file type: {file.name}")

                st.session_state["context_text"] = full_text[:3000]  # Limit for Gemini token safety
                st.success("Text extracted. You can now ask questions.")

    # Ask questions
    if "context_text" in st.session_state:
        user_question = st.text_input("Ask a question based on the uploaded document:")
        if user_question:
            with st.spinner("Thinking..."):
                try:
                    chain = get_conversational_chain()

                    # ✅ Wrap the extracted context text in a Document object
                    doc = Document(page_content=st.session_state["context_text"])

                    # ✅ Pass it using the correct input key
                    response = chain(
                        {
                            "input_documents": [doc],
                            "question": user_question
                        },
                        return_only_outputs=True
                    )

                    st.markdown(f"**Gemini says:**\n\n{response['output_text']}")

                except Exception as e:
                    st.error(f"Error from Gemini: {e}")

if __name__ == "__main__":
    main()