import gradio as gr
from datetime import datetime
import pandas as pd
from transformers import pipeline
# # Load in packages

# +
import os

# Need to overwrite version of gradio present in Huggingface spaces as it doesn't have like buttons/avatars (Oct 2023)
#os.system("pip uninstall -y gradio")
os.system("pip install gradio==3.50.0")

from typing import TypeVar
#from langchain.embeddings import HuggingFaceEmbeddings#, HuggingFaceInstructEmbeddings
#from langchain.vectorstores import FAISS
import gradio as gr

from transformers import AutoTokenizer

# Alternative model sources
import ctransformers

PandasDataFrame = TypeVar('pd.core.frame.DataFrame')

import chatfuncs.chatfuncs as chatf

# Disable cuda devices if necessary
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1' 

def create_hf_model(model_name):

    tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = chatf.context_length)

    summariser = pipeline("summarization", model=model_name, tokenizer=tokenizer) # philschmid/bart-large-cnn-samsum

    #from transformers import AutoModelForSeq2SeqLM,  AutoModelForCausalLM
    
    #     if torch_device == "cuda":
    #         if "flan" in model_name:
    #             model = AutoModelForSeq2SeqLM.from_pretrained(model_name, device_map="auto")
    #         else:
    #             model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
    #     else:
    #         if "flan" in model_name:
    #             model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    #         else: 
    #             model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)

    
    return summariser, tokenizer, model_name

def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None):
    print("Loading model ", model_type)

    # Default values inside the function
    if gpu_config is None:
        gpu_config = chatf.gpu_config
    if cpu_config is None:
        cpu_config = chatf.cpu_config
    if torch_device is None:
        torch_device = chatf.torch_device

    if model_type == "Mistral Open Orca (larger, slow)":
        hf_checkpoint = 'TheBloke/MistralLite-7B-GGUF'

        if torch_device == "cuda":
            gpu_config.update_gpu(gpu_layers)
        else:
            gpu_config.update_gpu(gpu_layers)
            cpu_config.update_gpu(gpu_layers)

        print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")

        print(vars(gpu_config))
        print(vars(cpu_config))

        #try:
            #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
            #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
            #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
            
        #except:
            #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
            #model = ctransformers.AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
            #model = ctransformers.AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(cpu_config), hf=True) # **asdict(CtransRunConfig_cpu())
            
        #tokenizer = ctransformers.AutoTokenizer.from_pretrained(model)
        #summariser = pipeline("text-generation", model=model, tokenizer=tokenizer)

        model = []
        tokenizer = []
        summariser = []

    if model_type == "flan-t5-large-stacked-samsum":
        # Huggingface chat model
        hf_checkpoint = 'stacked-summaries/flan-t5-large-stacked-samsum-1024'#'declare-lab/flan-alpaca-base' # # #

        summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)

    if model_type == "flan-t5-small-stacked-samsum":
        # Huggingface chat model
        hf_checkpoint = 'stacked-summaries/flan-t5-small-stacked-samsum-1024' #'philschmid/flan-t5-small-stacked-samsum'#'declare-lab/flan-alpaca-base' # # #


        summariser, tokenizer, model_type = create_hf_model(model_name = hf_checkpoint)

    chatf.model = summariser
    chatf.tokenizer = tokenizer
    chatf.model_type = model_type

    load_confirmation = "Finished loading model: " + model_type

    print(load_confirmation)
    return model_type, load_confirmation, model_type

# Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
#model_type = "Mistral Open Orca (larger, slow)"
#load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)

model_type = "flan-t5-large-stacked-samsum"
load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)

model_type = "flan-t5-small-stacked-samsum"
load_model(model_type, 0, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)

today = datetime.now().strftime("%d%m%Y")
today_rev = datetime.now().strftime("%Y%m%d")

def summarise_text(text, text_df, length_slider, in_colnames, model_type):      
         
        if text_df == None:
            in_colnames="text"
            in_colnames_list_first = in_colnames

            in_text_df = pd.DataFrame({in_colnames_list_first:[text]})
            
        else: 
            in_text_df = pd.read_csv(text_df.name, delimiter = ",", low_memory=False, encoding='cp1252')
            in_colnames_list_first = in_colnames.tolist()[0][0]

        if model_type != "Mistral Open Orca (larger, slow)":
            summarised_text = chatf.model(list(in_text_df[in_colnames_list_first]), max_length=length_slider)

        if model_type == "Mistral Open Orca (larger, slow)":

            length = str(length_slider)

            prompt = """<|im_start|>system
You are an AI assistant that follows instruction extremely well. Help as much as you can.
<|im_start|>user
Summarise the following text in less than {length} words.
Text: {text}
Answer:<|im_end|>"""

            formatted_string = prompt.format(length=length, text=text)

            print(formatted_string)

            #summarised_text = chatf.model(formatted_string, max_new_tokens=length_slider)

            summarised_text = "Mistral Open Orca summaries currently not working. Sorry!"

        if text_df == None:
            if model_type != "Mistral Open Orca (larger, slow)":
                summarised_text_out = summarised_text[0].values()

            if model_type == "Mistral Open Orca (larger, slow)":
                summarised_text_out = summarised_text

        else: 
            summarised_text_out = [d['summary_text'] for d in summarised_text] #summarised_text[0].values()

        output_name = "summarise_output_" + today_rev + ".csv"
        output_df = pd.DataFrame({"Original text":in_text_df[in_colnames_list_first],
                                  "Summarised text":summarised_text_out})
        
        summarised_text_out_str = str(output_df["Summarised text"][0])#.str.replace("dict_values([","").str.replace("])",""))
        
        output_df.to_csv(output_name, index = None)
        
        return summarised_text_out_str, output_name

# ## Gradio app - summarise
block = gr.Blocks(theme = gr.themes.Base())

with block:  

    model_type_state = gr.State(model_type)
      
    gr.Markdown(
    """
    # Text summariser
    Enter open text below to get a summary. You can copy and paste text directly, or upload a file and specify the column that you want to summarise. Note that summarisation with Mistral Open Orca is still in development and does not currently work.
    """)    
    
    with gr.Tab("Summariser"):
        current_model = gr.Textbox(label="Current model", value=model_type, scale = 3)

        with gr.Accordion("Paste open text", open = False):
            in_text = gr.Textbox(label="Copy and paste your open text here", lines = 5)
            
        with gr.Accordion("Summarise open text from a file", open = False):
            in_text_df = gr.File(label="Input text from file")
            in_colnames = gr.Dataframe(label="Write the column name for the open text to summarise",
                                    type="numpy", row_count=(1,"fixed"), col_count = (1,"fixed"),
                                headers=["Open text column name"])#, "Address column name 2", "Address column name 3", "Address column name 4"])
    
        with gr.Row():
            summarise_btn = gr.Button("Summarise")
            length_slider = gr.Slider(minimum = 30, maximum = 200, value = 100, step = 10, label = "Maximum length of summary")
        
        with gr.Row():
            output_single_text = gr.Textbox(label="Output example (first example in dataset)")
            output_file = gr.File(label="Output file")

    with gr.Tab("Advanced features"):
        #out_passages = gr.Slider(minimum=1, value = 2, maximum=10, step=1, label="Choose number of passages to retrieve from the document. Numbers greater than 2 may lead to increased hallucinations or input text being truncated.")
        #temp_slide = gr.Slider(minimum=0.1, value = 0.1, maximum=1, step=0.1, label="Choose temperature setting for response generation.")
        with gr.Row():
            model_choice = gr.Radio(label="Choose a summariser model", value="flan-t5-small-stacked-samsum", choices = ["flan-t5-small-stacked-samsum", "flan-t5-large-stacked-samsum", "Mistral Open Orca (larger, slow)"])
            change_model_button = gr.Button(value="Load model", scale=0)
        with gr.Accordion("Choose number of model layers to send to GPU (WARNING: please don't modify unless you are sure you have a GPU).", open = False):
            gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU.", value=0, minimum=0, maximum=5, step = 1, visible=True)

        load_text = gr.Text(label="Load status")


    change_model_button.click(fn=load_model, inputs=[model_choice, gpu_layer_choice], outputs = [model_type_state, load_text, current_model])

    summarise_btn.click(fn=summarise_text, inputs=[in_text, in_text_df, length_slider, in_colnames, model_type_state],
                        outputs=[output_single_text, output_file], api_name="summarise_single_text")

block.queue(concurrency_count=1).launch()
# -