import streamlit as st
import fitz  # PyMuPDF
import os
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import HuggingFaceHub
from langchain.prompts import ChatPromptTemplate  # Use correct import

api_token = os.environ.get("HF_TOKEN", None)

# Simple document class
class Document:
    def __init__(self, page_content):
        self.page_content = page_content
        self.metadata = {}  # Add a metadata attribute

# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Function to embed PDF text in the vector store
def pdf_to_vector_store(pdf_text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

    documents = [Document(page_content=pdf_text)]
    print("Documents before splitting:", documents)

    split_docs = text_splitter.split_documents(documents)
    print("Documents after splitting:", split_docs)

    if len(split_docs) > 0:
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        db = FAISS.from_documents(split_docs, embeddings)
        return db
    return None

# Streamlit app
st.title("Chat with PDF using LLAMA Model")

# File uploader
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

if uploaded_file is not None:
    # Extract text from the uploaded PDF
    pdf_text = extract_text_from_pdf(uploaded_file)

    # Display extracted text (or handle it as needed)
    st.write("Extracted Text from PDF:")
    st.write(pdf_text[:100])  # Display first 100 characters for brevity

    # Embed PDF text in the vector store
    st.write("Embedding PDF text into the vector store...")
    db = pdf_to_vector_store(pdf_text)
    if db:
        st.write("FAISS and embeddings setup completed.")
    else:
        st.write("Failed to setup FAISS and embeddings.")

    # If embedding was successful, proceed to Q&A
    if db:
        st.write("You can now ask questions about the PDF.")

        # Text input for user question
        user_question = st.text_input("Enter your question:")

        if user_question:
            # Function to answer questions using LLAMA model and vector store
            def answer_question(query, db):
                # Define the search type, e.g., 'similarity'
                search_type = "similarity"
                docs = db.search(
                    query, search_type=search_type, k=5
                )  # Retrieve top 5 relevant document chunks

                # Extract text from the documents
                context = " ".join([doc.page_content for doc in docs])

                # Construct the prompt
                prompt_template = ChatPromptTemplate.from_template(
                    """
                    Answer the following question based only on the context from vector store I have provided. Think step by step before providing a detailed answer.  
                    <context>
                    {context}
                    </context>
                    Question: {input}
                    """
                )

                prompt = prompt_template.format(context=context, input=query)

                # Define model parameters
                model_id = "google/flan-t5-large"  # Use a smaller model
                temperature = 0.7
                max_tokens = 300
                top_k = 450

                # Initialize the HuggingFaceHub model
                llm = HuggingFaceHub(
                    repo_id=model_id,
                    huggingfacehub_api_token=api_token
                )

                # Get the response
                response = llm(prompt)
                return response

            # Get the answer
            answer = answer_question(user_question, db)
            st.write("Answer from LLAMA Model:")
            st.write(answer)

# Note: Ensure you handle large PDFs appropriately to avoid performance issues