from llama_index import SimpleDirectoryReader, Prompt, LLMPredictor, VectorStoreIndex, PromptHelper, ServiceContext, load_index_from_storage, StorageContext
from langchain.chat_models import ChatOpenAI
#from llama_index.llms.message import Message, Role
import os
import openai
import tiktoken
import gradio as gr


#if you have OpenAI API key as an environment variable, enable the below
#openai.api_key = os.getenv("OPENAI_API_KEY")

#if you have OpenAI API key as a string, enable the below
openai.api_key = ""
os.environ["OPENAI_API_KEY"] = ''

#set system prompt
#messages = [
#    {"role":"system", "content": "This Chatbot is helpful, accurate, and provides all relevant information from the Treasury Financial Manual (TFM) when responding. This Chatbot provides accurate citations from the Treasury financial Manual in the following format for all reponses, Volume>Part>chapter>Sub Chapter"},
 #   {"role":"user","content": "where should public money be stored?"},
  #  {"role":"assistant","content": "Public money should be stored in Treasury accounts as outlined in the Treasury Financial Manual (TFM). Specifically, Section 2025 of the TFM provides guidance on requesting a Treasury account. Entities should submit requests to the Fiscal Service, Budget Reporting Branch, Budget and Appropriations Analysis Section (BAAS) at baasgroup@fiscal.treasury.gov. It is recommended to submit these requests as early in the month as possible to allow for timely establishment and coordination. This information was located in Volume I>Part 1>Chapter 2000>Section 2045—Making Deposits"}
#]
messages = []
Chat_message = []
chat_history=[]
custom_chat_history=[]
max_input_size = 4096
num_outputs = 512
chunk_size_limit = 1000
chunk_overlap_ratio = .1


prompt_helper = PromptHelper(max_input_size, num_outputs, chunk_overlap_ratio, chunk_size_limit)

store = './storage'

max_response_tokens = 1000
token_limit= 4097

template = (
    "This Chatbot is helpful, accurate, and will use the context below for answering all questions. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this information, please answer the question by providing a detailed summary and provide accurate citations for all referenced areas from the Treasury financial Manual in the following format, Volume>Part>chapter>Section: at ther end of each response. The sections are listed above the associated content. {query_str}\n"
)
qa_template = Prompt(template)
#create index prompt value
#createIndex = input('Do you wish to recreate the index (Y/N)? ')

#set LLM predictor values
#llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.1, model_name="gpt-3.5-turbo", max_tokens=num_outputs))

#Set service context
#service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)

#loads openai key
def load_api_key (api_key):
    os.environ["OPENAI_API_KEY"] = str(api_key)
    openai.api_key = str(api_key)

#identifies the current number of tokens used for the conversation
def num_tokens_from_messages(messages, model="gpt-3.5-turbo"):
    encoding = tiktoken.encoding_for_model(model)
    num_tokens = 0
    for message in messages:
        num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":  # if there's a name, the role is omitted
                num_tokens += -1  # role is always required and always 1 token
    num_tokens += 2  # every reply is primed with <im_start>assistant
    print(num_tokens)
    return num_tokens
 
#constructs the index and saves to a subfolder 
def construct_index():
    print('Constructing indx')
    # load in the documents from the docs subfolder
    docs = SimpleDirectoryReader('docs').load_data()
    index = VectorStoreIndex.from_documents(docs, service_context=service_context)
    index.set_index_id('vector_index')
    #Stores json files in a subfolder
    index.storage_context.persist(persist_dir=store)
    
    return index
	
	
#resets the conversation
def generate_restart(prompt):

    messages.clear()
    messages.append({"role":"system", "content": "Tell the user that this conversation has been reset due to the discussion size reaching maximum size, and to please start by asking a new question."})
    storage_context = StorageContext.from_defaults(persist_dir=store)
    llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.1, model_name="gpt-3.5-turbo", max_tokens=num_outputs))
    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
    #index = load_index_from_storage(storage_context)
    index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=store),
            service_context=service_context,
            )
    #query_engine = index.as_query_engine(text_qa_template=qa_template)
    chat_engine = index.as_chat_engine(text_qa_template=qa_template)
    string_message=str(messages)
    #response = query_engine.query(string_message)
    response = chat_engine.chat(messages)
    messages.clear()
    messages.append({"role":"system", "content": "This Chatbot is helpful, accurate, and provides all relevnt information from the Treasury Financial Manual (TFM) when responding. This Chatbot always provides accurate citations from the TFM."})
    messages.append({"role":"user","content": ""})
    messages.append({"role":"assistant","content": ""})
    print("restert initiated")
    print(messages)
    return response.response

#generates the ChatGPT call
def generate_response(prompt):

        messages.append({"role": "user", "content": prompt})
        storage_context = StorageContext.from_defaults(persist_dir=store)
        llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.1, model_name="gpt-3.5-turbo", max_tokens=num_outputs))
        service_context = ServiceContext.from_defaults(llm=ChatOpenAI(temperature=0., model_name="gpt-3.5-turbo"))
        #service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
        index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=store),
            service_context=service_context,
            )
        #user_message = Message(
        #    content=prompt,
        #    role=Role.User,
        #    timestamp=datetime.datetime.now(),
        #    )
        #query_engine = index.as_query_engine()
        #chat_engine = CondenseQuestionChatEngine.from_defaults(query_engine=query_engine, condense_question_prompt=qa_template, chat_history=custom_chat_history, verbose=True)
        chat_engine = index.as_chat_engine(verbose=True, chat_history=chat_history, text_qa_template=qa_template, chat_mode='condense_question')
        string_message=str(messages)
        #response = query_engine.query(string_message)
        response = chat_engine.chat(prompt, chat_history)
        string_response=str(response)
        #custom_chat_history.append({"role": "Assistant", "content": string_response})
        #setattr(custom_chat_history, 'role', 'Assistant')
        #setattr(prompt, 'role', 'User')
        #history_message = Message(
        #content="response",
        #role=Role.SYSTEM,
        #timestamp=datetime.datetime.now(),
        #) 
        messages.append({"role": "assistant", "content":string_response})
        num_tokens_from_messages(messages)
        print(messages)
        print("below is history")
        print(chat_history)
    
        return ('TFM Chatbot: '+response.response)


def generate_response_backup(prompt):

        messages.append({"role": "user", "content": prompt})
        storage_context = StorageContext.from_defaults(persist_dir=store)
        llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.1, model_name="gpt-3.5-turbo", max_tokens=num_outputs))
        service_context = ServiceContext.from_defaults(llm=ChatOpenAI(temperature=0., model_name="gpt-3.5-turbo"))
        #service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
        index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=store),
            service_context=service_context,
            )
        #query_engine = index.as_query_engine(text_qa_template=qa_template)
        #chat_engine = index.as_chat_engine(chat_mode='react', verbose=True, text_qa_template=qa_template)
        chat_engine = index.as_chat_engine(verbose=True, text_qa_template=qa_template, chat_history=custom_chat_history)
        string_message=str(messages)
        #response = query_engine.query(string_message)
        response = chat_engine.chat(prompt, custom_chat_history)
        string_response=str(response)
        custom_chat_history.append({"role": "Assistant", "content": string_response})        
        #Optional[List[ChatMessage]] = None) → Union[Response, StreamingResponse]
        messages.append({"role": "assistant", "content":string_response})
        #messages.clear()
        #string_message=''
        num_tokens_from_messages(messages)
        print(messages)
        print("below is history")
        print(chat_history)
        print("below is type")
        print(type(custom_chat_history))
        type(chat_history)
        return ('TFM Chatbot: '+response.response)

#Called by the send button on the interface and tracks the history of the conversation
def my_chatbot_backup(input, history):
    history = history or []
    if num_tokens_from_messages(messages)<(int(token_limit)-int(max_response_tokens)):
      output = generate_response(input)
      history.append((input, output))
      return history, history
    else:
      history.clear()
      output = generate_restart(input)
      history.append((input, output))
      return history, history

def my_chatbot(input, history):
    history = history or []
    if num_tokens_from_messages(messages)<(int(token_limit)-int(max_response_tokens)):
      output = generate_response(input)
      history.append((input, output))
      return history, history
    else:
      history.clear()
      output = generate_restart(input)
      history.append((input, output))
      prompt=input
      return prompt, prompt

def index_chatbot(input_text):
    if not hasattr(chatbot, 'index'):
        storage_context = StorageContext.from_defaults(persist_dir=store)
        index = load_index_from_storage(storage_context)
        query_engine = chatbot.index.as_query_engine(text_qa_template=QA_TEMPLATE)
        response = chatbot.query_engine.query(input_text)
    return response.response

with gr.Blocks() as demo:
    gr.Markdown("""<h1><center>CGFS TFM Chatbot 2.1</center></h1>""")
    api_key = gr.Textbox(type='password', label="Enter the API key")
    chatbot = gr.Chatbot()
    state = gr.State()
    text = gr.Textbox(label="Input", info="", lines=2, placeholder="Hello. Ask me a question about the Treasury Financial Manual. Please approach each question as if it is a new question, my memory is limited in this model.")
    submit = gr.Button("SEND")
    submit.click(load_api_key, [api_key])
    submit.click(my_chatbot, inputs=[text, state], outputs=[chatbot, state])


#if createIndex in ["Y","y"]:
 #   construct_index()
	
demo.launch(share = False)