import gradio as gr
import time
import os
import shutil
import streamlit as st
openai_api = st.secrets["OPENAI_API_KEY"]

doc_store_path = os.path.join(os.path.dirname(__file__), "doc_dir")
if not os.path.isdir(doc_store_path):
    os.makedirs(doc_store_path)

from llama_index.core import SimpleDirectoryReader, VectorStoreIndex,Settings
from llama_index.core.node_parser import SentenceSplitter,SemanticSplitterNodeParser
from llama_index.llms.openai import OpenAI
from llama_index.llms.openai import OpenAI as OpenAIsum
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.storage import StorageContext
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.storage.chat_store import SimpleChatStore
from llama_index.core.memory import ChatMemoryBuffer,ChatSummaryMemoryBuffer

import json
import chromadb
import tiktoken


chat_store = SimpleChatStore()
# chat_memory = ChatMemoryBuffer.from_defaults(
#     token_limit=3000,
#     chat_store=chat_store,
#     chat_store_key="user1",
# )


sum_llm = OpenAIsum(api_key=openai_api, model="gpt-3.5-turbo", max_tokens=256)
chat_summary_memory = ChatSummaryMemoryBuffer.from_defaults(
    token_limit=256,
    chat_store=chat_store,
    chat_store_key="user1",
    llm = sum_llm,
    tokenizer_fn = tiktoken.encoding_for_model("gpt-3.5-turbo").encode
)


chat_store = SimpleChatStore.from_persist_path(
    persist_path="chat_store.json"
)



# documents = SimpleDirectoryReader("./data").load_data()
db = chromadb.PersistentClient(path="./chroma_db")

chroma_collection = db.get_or_create_collection("quickstart")

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

Settings.llm = OpenAI(model="gpt-3.5-turbo",api_key=openai_api,)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

vector_index = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context,)
query_engine = vector_index.as_chat_engine(chat_memory=chat_summary_memory,storage_context=storage_context,use_async=True,similarity_top_k=2)

current_refs = ""

def metadata_from_doc(vec_index: VectorStoreIndex) -> dict:
    qe = vec_index.as_chat_engine()
    # f_prompt = """
    # Given the text excerpts, analyze and provide the document's title and creation date in a structured JSON format. Here are a few examples:
    
    # In this format:

    # {
    # "creation_date": "YYYY-MM-DD",
    # "title": "Title of the Document"
    # }

    # Text: 'An analysis of historical events. Written by Alex Johnson on 5 March 2019.'
    # Output: { "title": "An analysis of historical events", "creation_date": "2019-03-05" }

    # Text: 'Exploring the depths of the ocean. This comprehensive guide was authored by Dr. Emily White, published on 10-July 2021.'
    # Output: { "title": "Exploring the depths of the ocean", "creation_date": "2021-07-10" }

    # Text: 'The history of the Roman Empire.'
    # Output: { "title": "The history of the Roman Empire", "creation_date": "Unknown" }


    # Now, analyze the context from the provided document and generate json object.
    # """
    f_prompt ="""give me a only the data when this document was written and title of this document? in json format parameter (created_date,title),
        example context: 'An analysis of historical events. Written by Alex Johnson on 5 March 2019.'
        example output: { "title": "An analysis of historical events", "creation_date": "2019-03-05" }
    now analyse the context make sure to return output only in json format object only.
    """
    res = qe.query(f_prompt)
    parsed = json.loads(res.response)
    return parsed

def filter_unsaved(file_paths:list):
    for i in file_paths:
        if os.path.isfile(os.path.join(doc_store_path,os.path.basename(i))):
            file_paths.remove(i)
            print("File already exist : {}".format(i))
        else:
            shutil.copy2(i,doc_store_path)
    return file_paths

def add_doc(file_paths:list):
    print(file_paths)
    file_paths = filter_unsaved(file_paths)
    print(file_paths)
    if len(file_paths) == 0:
        return
    docs = SimpleDirectoryReader(input_files=file_paths).load_data()
    splitter = SemanticSplitterNodeParser(buffer_size=1, breakpoint_percentile_threshold=95, embed_model=Settings.embed_model,chunk_size=256)
    nodes = splitter.get_nodes_from_documents(docs)
    vector_index2 = VectorStoreIndex(nodes)
    for i in range (5):
        try:
            meta = metadata_from_doc(vector_index2)
            break
        except:
            meta = {
                "title": "Unknown",
                "creation_date": "Unknown"
            }
            continue
    
    print(meta)
    for i in range(len(nodes)):
        nodes[i].metadata.update(meta)
    vector_index.insert_nodes(nodes)
    




CSS ="""
.contain { display: flex; flex-direction: column; }
.gradio-container { height: 100vh !important; }
#component-0 { height: 100%; }
#chatbot { flex-grow: 1; overflow: auto;}
"""


def new_chat(chatbot:gr.Chatbot,textbox):
    query_engine.reset()
    return gr.update(value=""),[],"",gr.File(visible=False),gr.File(visible=False)


def chat(history, input):
    response = query_engine.chat(str(input))
    global current_refs
    files = []
    current_refs = ""
    for node in response.source_nodes:
        try:
            current_refs += f"{str(node.metadata['title'])},"
        except:
            current_refs += ""
        try:
            current_refs += f"Pg - {str(node.metadata['page_label'])},"
        except:
            current_refs += "Pg - ,"
        try:
            current_refs += f"File - {str(node.metadata['file_name'])} \n,"
        except:
            current_refs += "File - ,\n"
        
        try:
            files.append({'path':node.metadata['file_path'],'show':True,})
        except:
            files.append({'path':None,'show':False,})

    if len(files) < 2:
        for _ in range(2-len(files)):
            files.append({'path':None,'show':False,})
        
    return gr.update(value=""),history + [(input, response.response)],current_refs,gr.update(visible=files[0]['show'],value=files[0]['path']),gr.update(visible=files[1]['show'],value=files[1]['path'])

def file_upload(file,chatbot):
    print(file)
    add_doc(file)
    return gr.update(value="ChatDoc"),chatbot

with gr.Blocks(fill_height=True, css=CSS) as demo:
    with gr.Row():
        with gr.Column(scale=1):
            title = gr.Label(value="chatdoc", label="ChatDoc")
            files = gr.UploadButton(
                    "📁 Upload PDF or doc files", file_types=[
                        '.pdf',
                        '.doc'
                    ],
                    file_count="multiple")
            references = gr.Textbox(label="References",interactive=False)
            file_down1 = gr.File(visible=False)
            file_down2 = gr.File(visible=False) 
            

        
        with gr.Column(scale=9,):
            chatbot = gr.Chatbot(
                elem_id="chatbot",
                bubble_full_width=False,
                label="ChatDoc",
                avatar_images=["https://www.freeiconspng.com/thumbs/person-icon-blue/person-icon-blue-25.png","https://cdn-icons-png.flaticon.com/512/8943/8943377.png"],
            )
            with gr.Row():
                textbox = gr.Textbox(label="Type your message", scale=10)
                clear = gr.Button(value="New Chat", size="sm", scale=1)
                clear.click(new_chat,[],[textbox, chatbot,references,file_down1,file_down2])
                textbox.submit(chat, [chatbot, textbox], [textbox, chatbot,references,file_down1,file_down2])
                

        files.upload(file_upload,[files,chatbot],[title,chatbot])
                
    
demo.launch(share=True)