import streamlit as st
import os
from huggingface_hub import InferenceClient
from langchain_community.document_loaders import PDFPlumberLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.prompts import ChatPromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings


# Set up Hugging Face model and token
model = "mistralai/Mixtral-8x7B-Instruct-v0.1"  # You can change to a model of your choice from Hugging Face
access_token = os.getenv("HF_TOKEN")  # Your Hugging Face API token
client = InferenceClient(model=model, token=access_token)

# Template for response generation
template = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:
"""

# Directory to store uploaded PDFs
pdfs_directory = '../pdfs'
os.makedirs(pdfs_directory, exist_ok=True)

# Initialize the embedding model
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")  # You can choose any model from Hugging Face

# Initialize the vector store for document indexing
vector_store = InMemoryVectorStore(embedding=embedding)

# Function to upload PDF file
def upload_pdf(file):
    with open(pdfs_directory + file.name, "wb") as f:
        f.write(file.getbuffer())

# Function to load PDF content
def load_pdf(file_path):
    loader = PDFPlumberLoader(file_path)
    documents = loader.load()
    return documents

# Function to split text into manageable chunks
def split_text(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        add_start_index=True
    )
    return text_splitter.split_documents(documents)

# Function to index documents in the vector store
def index_docs(documents):
    vector_store.add_documents(documents)

# Function to retrieve relevant documents based on query
def retrieve_docs(query):
    return vector_store.similarity_search(query)

# Function to generate an answer based on retrieved documents using text generation
def answer_question(question, documents):
    context = "\n\n".join([doc.page_content for doc in documents])
    full_context = f"{context}"
    prompt = ChatPromptTemplate.from_template(template)
    
    # Format the prompt with the user's question and context
    question_with_context = prompt.format(question=question, context=full_context)
    
    # Use the Hugging Face InferenceClient's text_generation method
    generate_kwargs = {
        "temperature": 0.7,  # Control the creativity of the generated response
        "max_new_tokens": 150,  # Limit the length of the output
        "top_p": 0.9  # Control diversity via nucleus sampling
    }
    
    # Generate the response using the text generation method
    response = client.text_generation(question_with_context, **generate_kwargs)
    
    # Print the response to inspect its structure
    print(f"Response: {response}")
    
    # If the response is a string, we can directly return it
    if isinstance(response, str):
        return response
    else:
        # If it's a dictionary, extract the generated text
        return response.get("generated_text", "No answer generated.")


# Streamlit file uploader for PDF
uploaded_file = st.file_uploader(
    "Upload PDF",
    type="pdf",
    accept_multiple_files=False
)

if uploaded_file:
    # Upload, load, split, and index documents
    upload_pdf(uploaded_file)
    documents = load_pdf(pdfs_directory + uploaded_file.name)
    chunked_documents = split_text(documents)
    index_docs(chunked_documents)

    # User input for a question
    question = st.chat_input()

    if question:
        st.chat_message("user").write(question)
        related_documents = retrieve_docs(question)
        answer = answer_question(question, related_documents)
        st.chat_message("assistant").write(answer)