import os
import pickle
import streamlit as st
from streamlit_extras.add_vertical_space import add_vertical_space
from PyPDF2 import PdfReader
from openai.embeddings_utils import get_embedding
import openai
#from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback

# Load environment variables
#load_dotenv()
#
# Retrieve OpenAI API key
#openai_api_key = os.getenv("OPENAI_API_KEY")
#if openai_api_key is None:
#    raise ValueError("The OPENAI_API_KEY environment variable is not set")
#
# Set the OpenAI API key for the OpenAI library
#openai.api_key = openai_api_key

def extract_text_from_pdf(pdf):
    pdf_reader = PdfReader(pdf)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def get_embeddings(text_list):
    return [get_embedding(text) for text in text_list]

def main():
    st.header("Chat with PDF 💬")

    # Upload a PDF file
    pdf = st.file_uploader("Upload your PDF file", type='pdf')

    if pdf is not None:
        # Extract text from the PDF
        text = extract_text_from_pdf(pdf)

        # Split text into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
        )
        chunks = text_splitter.split_text(text=text)

        # chunks data with langchain
        #chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size - chunk_overlap)]

        st.write("PDF content successfully extracted.")
        #st.write("Below is chunks data")
        #st.write(chunks)

        # Create or load embeddings
        store_name =[:-4]
        st.write(f'Processing: {store_name}')

        if os.path.exists(f"{store_name}.pkl"):
            with open(f"{store_name}.pkl", "rb") as f:
                VectorStore = pickle.load(f)
            st.write('Embeddings loaded from the disk')
        else:
            embeddings = OpenAIEmbeddings()
            VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
            with open(f"{store_name}.pkl", "wb") as f:
                pickle.dump(VectorStore, f)
            st.write('Embeddings created and saved to disk')

        # Accept user questions/query
        query = st.text_input("Ask questions about your PDF file:")

        if query:
            docs = VectorStore.similarity_search(query=query, k=3)

            llm = OpenAI(model_name="gpt-3.5-turbo")
            chain = load_qa_chain(llm=llm, chain_type="stuff")
            with get_openai_callback() as cb:
                response =, question=query)
                print(cb)
            st.write(response)

if __name__ == '__main__':
    main()