# Import librairies 
from pathlib import Path
import sys
import os 
import openai
import llama_index
from llama_index import SimpleDirectoryReader, GPTListIndex, readers, LLMPredictor, PromptHelper, ServiceContext, GPTVectorStoreIndex, StorageContext, load_index_from_storage, download_loader,  GPTRAKEKeywordTableIndex
from llama_index.retrievers import VectorIndexRetriever
from langchain import OpenAI
from llama_index.node_parser import SimpleNodeParser
import gradio as gr
from llama_index.optimization.optimizer import SentenceEmbeddingOptimizer
from langchain.chat_models import ChatOpenAI
from llama_index.readers import Document
import io
from PyPDF2 import PdfReader
from azure.storage.filedatalake import DataLakeServiceClient
from llama_index.indices.vector_store.base import GPTVectorStoreIndex
from adlfs import AzureBlobFileSystem
import time 

# Blob storage parameters
account_name = 'apeazdlkini07s'
account_key = os.environ['account_key']
file_system_name = "gpt"
service_client = DataLakeServiceClient(account_url=f"https://{account_name}.dfs.core.windows.net", credential=account_key)
file_system_client = service_client.get_file_system_client(file_system_name)

AZURE_ACCOUNT_NAME = account_name
AZURE_ACCOUNT_KEY = account_key

assert AZURE_ACCOUNT_NAME is not None and AZURE_ACCOUNT_NAME != ""

fs = AzureBlobFileSystem(account_name=AZURE_ACCOUNT_NAME, account_key=AZURE_ACCOUNT_KEY)
# Retrieve the documents name whose indexes are stored
path_list = fs.ls('gpt/storage_demo')
global documents_list 
documents_list  = [Path(path).name[:-4] for path in path_list]

def construct_index(doc):

  ## Define the prompt helper
    # Set maximum input size
  max_input_size = 1800

    # Set number of output tokens
  num_output = 400 # About 300 words

    #Set the chunk size limit 
  chunk_size_limit = 600 # About 450 words ~ 1 page

    # Set maximum chunk overlap
  max_chunk_overlap = 1

    # Set chunk overlap ratio 
  chunk_overlap_ratio = 0.5

    # Define prompt helper
  prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap, chunk_size_limit, chunk_overlap_ratio)

  ## Define the LLM predictor 
  llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.4, model_name="gpt-4-32k", max_tokens=num_output))
  
  ## Define Service Context
  service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)

  ## Indexation process and saving in the disk
  index = GPTVectorStoreIndex.from_documents(doc, service_context=service_context)

  return index


def extract_text(file):
    # Open the PDF file in binary mode
    with open(file.name, 'rb') as f:
        # Initialize a PDF file reader object
        pdf_reader = PdfReader(f)

        # Initialize an empty string for storing the extracted text
        text = ''

        # Loop through the number of pages
        for page in pdf_reader.pages:
            # Add the text from each page to the text string
            text += page.extract_text()

        return text
  
def extract_name(file):
    return  os.path.basename(file.name)
        
def ask_ai_upload(doc, question):
  file_name = extract_name(doc)
    
  try:
      storage_context = StorageContext.from_defaults(persist_dir=f'gpt/storage_demo/{file_name}', fs=fs)
      # Load index
      index = load_index_from_storage(storage_context)
      
  except:
      # Construct index
      text = extract_text(doc)
      index = construct_index([Document(text)])
    
      # Save index to Azure blob storage
      index.storage_context.persist(f'gpt/storage_demo/{file_name}', fs=fs)
    
      # Rebuild storage context
      storage_context = StorageContext.from_defaults(persist_dir=f'gpt/storage_demo/{file_name}', fs=fs)
    
      # Load index
      index = load_index_from_storage(storage_context)
  
  # Define the query & the querying method
  query_engine = index.as_query_engine(optimizer=SentenceEmbeddingOptimizer(percentile_cutoff=0.8), similarity_top_k=7) 
  query = 'Answer the question truthfully based on the text provided. Use bullet points. Write a step by step explanation and generate an answer as detailed and precise as possible. The task is:' + str(question)
  response = query_engine.query(query)

  return response.response

def respond_document_upload(message, chat_history, doc):

  bot_message = ask_ai_upload(doc, message)
  chat_history.append((message, bot_message))
  time.sleep(2)

  return "", chat_history


def ask_ai_choose(doc, question):
  
  # Rebuild storage context
  name_doc = str(doc)+'.pdf'
  storage_context = StorageContext.from_defaults(persist_dir=f'gpt/storage_demo/{name_doc}', fs=fs)

  # Load index
  index = load_index_from_storage(storage_context)
  
  # Define the query & the querying method
  query_engine = index.as_query_engine(optimizer=SentenceEmbeddingOptimizer(percentile_cutoff=0.8), similarity_top_k=7) 
  query = 'Answer the question truthfully based on the text provided. Use bullet points. Write a step by step explanation and generate an answer as detailed and precise as possible. The task is:' + str(question)
  response = query_engine.query(query)

  return response.response

def respond_document_choose(message, chat_history, doc):

  bot_message = ask_ai_choose(doc, message)
  chat_history.append((message, bot_message))
  time.sleep(2)

  return "", chat_history
    

# Configure Gradio platform 

header = """<center><b><p style=\"color: #E13C32; font-size: 36px;\">My Ardian Chatbot</p></b></center>
<i><p style=\"font-size: 16px; color: grey;\">Please make sure to formulate clear and precise questions and to add contextual information when possible. This will help the tool produce the most relevant response. Adopt an iterative approach and ask for more details or explanations when necessary.</br><i/></p>"""

footnote = "<p style=\"font-size: 16px; color: grey;\"> ⚠ The chatbot doesn't have a memory, it doesn't remember what it previously generated.</a></p>"

theme = gr.themes.Base(
    primary_hue="red",
    secondary_hue="gray",
    font=['FuturaTOT', '=']
)

with gr.Blocks(theme=theme) as demo:
    gr.Markdown(header)

    with gr.Tab("Upload a document & ask a question 📥"):
      upload_file = gr.inputs.File(label="Upload your PDF document")
      output = gr.Textbox(label='Output', visible=False)
      chatbot = gr.Chatbot()
      question = gr.Textbox(label='Question', info="Please write your question here.")
      clear = gr.Button("Clear")

      question.submit(respond_document_upload, [question, chatbot, upload_file], [question, chatbot])
      clear.click(lambda: None, None, chatbot, queue=False)
        
    with gr.Tab("Choose a document & ask a question 📚"):
      list_button = gr.Dropdown(documents_list, multiselect=False, label="Document", info="Please select the report you want to ask questions on.")
      chatbot = gr.Chatbot()
      question = gr.Textbox(label='Question', info="Please write your question here.")
      clear = gr.Button("Clear")
  
      question.submit(respond_document_choose, [question, chatbot, list_button], [question, chatbot])
      clear.click(lambda: None, None, chatbot, queue=False)

demo.launch(auth=(os.environ['username'],os.environ['password']))