import os
import sys
import openai
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter

__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

from langchain.vectorstores import Chroma
import gradio as gr

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")

docs = []

for f in os.listdir("multiple_docs"):
    if f.endswith(".pdf"):
        pdf_path = "./multiple_docs/" + f
        loader = PyPDFLoader(pdf_path)
        docs.extend(loader.load())
    elif f.endswith('.docx') or f.endswith('.doc'):
        doc_path = "./multiple_docs/" + f
        loader = Docx2txtLoader(doc_path)
        docs.extend(loader.load())
    elif f.endswith('.txt'):
        text_path = "./multiple_docs/" + f
        loader = TextLoader(text_path)
        docs.extend(loader.load())

splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = splitter.split_documents(docs)

# Convert the document chunks to embedding and save them to the vector store
vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./db")
vectorstore.persist()

chain = ConversationalRetrievalChain.from_llm(
    ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
    retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
    return_source_documents=True,
    verbose=False
)

chat_history = []

with gr.Blocks() as demo:
    chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")],avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
    msg = gr.Textbox()
    clear = gr.Button("Clear")
    chat_history = []

    def user(query, chat_history):
        # print("User query:", query)
        # print("Chat history:", chat_history)

        # Convert chat history to list of tuples
        chat_history_tuples = []
        for message in chat_history:
            chat_history_tuples.append((message[0], message[1]))

        # Get result from QA chain
        result = chain({"question": query, "chat_history": chat_history_tuples})

        # Append user message and response to chat history
        chat_history.append((query, result["answer"]))
        # print("Updated chat history:", chat_history)

        return gr.update(value=""), chat_history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch(debug=True)

# import os
# import sys
# from langchain.chains import ConversationalRetrievalChain
# from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
# from langchain.text_splitter import CharacterTextSplitter
# from langchain.vectorstores import Chroma
# import gradio as gr
# from transformers import pipeline
# from sentence_transformers import SentenceTransformer

# __import__('pysqlite3')
# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

# docs = []

# for f in os.listdir("multiple_docs"):
#     if f.endswith(".pdf"):
#         pdf_path = "./multiple_docs/" + f
#         loader = PyPDFLoader(pdf_path)
#         docs.extend(loader.load())
#     elif f.endswith('.docx') or f.endswith('.doc'):
#         doc_path = "./multiple_docs/" + f
#         loader = Docx2txtLoader(doc_path)
#         docs.extend(loader.load())
#     elif f.endswith('.txt'):
#         text_path = "./multiple_docs/" + f
#         loader = TextLoader(text_path)
#         docs.extend(loader.load())

# splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
# docs = splitter.split_documents(docs)

# # Extract the content from documents and create embeddings
# embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# texts = [doc.page_content for doc in docs]
# embeddings = embedding_model.encode(texts).tolist()  # Convert numpy arrays to lists

# # Create a Chroma vector store and add documents and their embeddings
# vectorstore = Chroma(persist_directory="./db", embedding_function=embedding_model.encode)
# vectorstore.add_texts(texts=texts, metadatas=[{"id": i} for i in range(len(texts))], embeddings=embeddings)
# vectorstore.persist()

# # Load the Hugging Face model for text generation
# generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")

# class HuggingFaceLLMWrapper:
#     def __init__(self, generator):
#         self.generator = generator
    
#     def __call__(self, prompt, max_length=512):
#         result = self.generator(prompt, max_length=max_length, num_return_sequences=1)
#         return result[0]['generated_text']

# llm = HuggingFaceLLMWrapper(generator)

# chain = ConversationalRetrievalChain.from_llm(
#     llm,
#     retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
#     return_source_documents=True,
#     verbose=False
# )

# chat_history = []

# with gr.Blocks() as demo:
#     chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")], avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
#     msg = gr.Textbox()
#     clear = gr.Button("Clear")
#     chat_history = []

#     def user(query, chat_history):
#         # Convert chat history to list of tuples
#         chat_history_tuples = []
#         for message in chat_history:
#             chat_history_tuples.append((message[0], message[1]))

#         # Get result from QA chain
#         result = chain({"question": query, "chat_history": chat_history_tuples})

#         # Append user message and response to chat history
#         chat_history.append((query, result["answer"]))

#         return gr.update(value=""), chat_history

#     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
#     clear.click(lambda: None, None, chatbot, queue=False)

# demo.launch(debug=True)