import torch

import gradio as gr
from transformers import pipeline

from pyChatGPT import ChatGPT

from speechbrain.pretrained import Tacotron2
from speechbrain.pretrained import HIFIGAN

import json
import soundfile as sf


session_token = os.environ.get("SessionToken") 

device = 0 if torch.cuda.is_available() else "cpu"

# Intialise STT (Whisper)
pipe = pipeline(
    task="automatic-speech-recognition",
    model="openai/whisper-base.en",
    chunk_length_s=30,
    device=device,
) 

# Intialise TTS (tacotron2) and Vocoder (HiFIGAN)
tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts", overrides={"max_decoder_steps": 2000}, run_opts={"device":device})
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")

def get_response_from_chatbot(text, reset_conversation):
    try:
        if reset_conversation:
            api.refresh_auth()
            api.reset_conversation() 
        resp = api.send_message(text)
        response = resp["message"]
    except:
        response = "Sorry, the chatGPT queue is full. Please try again later."
    return response


def chat(input_audio, chat_history, reset_conversation):
    # speech -> text (Whisper)
    message = pipe(input_audio)["text"]
    
    # text -> response (chatGPT)
    response = get_response_from_chatbot(message, reset_conversation)

    # response -> speech (tacotron2)
    mel_output, mel_length, alignment = tacotron2.encode_text(response)
    wav = hifi_gan.decode_batch(mel_output)
    sf.write("out.wav", wav.squeeze().cpu().numpy(), 22050)

    out_chat = []
    chat_history = chat_history if not reset_conversation else ""
    if chat_history != "":
        out_chat = json.loads(chat_history)

    out_chat.append((message, response))
    chat_history = json.dumps(out_chat)

    return out_chat, chat_history, "out.wav"


start_work= """async() => {
    function isMobile() {
        try {
            document.createEvent("TouchEvent"); return true;
        } catch(e) {
            return false; 
        }
    }
	function getClientHeight()
	{
	  var clientHeight=0;
	  if(document.body.clientHeight&&document.documentElement.clientHeight) {
		var clientHeight = (document.body.clientHeight<document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight;
	  } else {
		var clientHeight = (document.body.clientHeight>document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight;
	  }
	  return clientHeight;
	}
 
    function setNativeValue(element, value) {
      const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set;
      const prototype = Object.getPrototypeOf(element);
      const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set;
      
      if (valueSetter && valueSetter !== prototypeValueSetter) {
            prototypeValueSetter.call(element, value);
      } else {
            valueSetter.call(element, value);
      }
    }
    var gradioEl = document.querySelector('body > gradio-app').shadowRoot;
    if (!gradioEl) {
        gradioEl = document.querySelector('body > gradio-app');
    }
    
    if (typeof window['gradioEl'] === 'undefined') {
        window['gradioEl'] = gradioEl;
       
        const page1 = window['gradioEl'].querySelectorAll('#page_1')[0];
        const page2 = window['gradioEl'].querySelectorAll('#page_2')[0]; 
    
        page1.style.display = "none";
        page2.style.display = "block"; 
        window['div_count'] = 0;
        window['chat_bot'] = window['gradioEl'].querySelectorAll('#chat_bot')[0];
        window['chat_bot1'] = window['gradioEl'].querySelectorAll('#chat_bot1')[0];   
        chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0]; 
        prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0]; 
        window['chat_bot1'].children[1].textContent = '';
        
        clientHeight = getClientHeight();
        new_height = (clientHeight-300) + 'px';
        chat_row.style.height = new_height;
        window['chat_bot'].style.height = new_height;
        window['chat_bot'].children[2].style.height = new_height;
        window['chat_bot1'].style.height = new_height;
        window['chat_bot1'].children[2].style.height = new_height;
        prompt_row.children[0].style.flex = 'auto';
        prompt_row.children[0].style.width = '100%';
        
        window['checkChange'] = function checkChange() {
            try {
                if (window['chat_bot'].children[2].children[0].children.length > window['div_count']) {
                    new_len = window['chat_bot'].children[2].children[0].children.length - window['div_count'];
                    for (var i = 0; i < new_len; i++) { 
                        new_div = window['chat_bot'].children[2].children[0].children[window['div_count'] + i].cloneNode(true);
                        window['chat_bot1'].children[2].children[0].appendChild(new_div);
                    }
                    window['div_count'] = chat_bot.children[2].children[0].children.length;
                }
                if (window['chat_bot'].children[0].children.length > 1) {
                     window['chat_bot1'].children[1].textContent = window['chat_bot'].children[0].children[1].textContent;
                } else {
                    window['chat_bot1'].children[1].textContent = '';
                }
              
            } catch(e) {
            }        
        }
        window['checkChange_interval'] = window.setInterval("window.checkChange()", 500);         
    }
   
    return false;
}"""


with gr.Blocks(title="Talk to chatGPT") as demo:
    gr.Markdown("## Talk to chatGPT ##")
    gr.HTML("<p> Demo uses <a href='https://huggingface.co/openai/whisper-base.en'>Whisper</a> to convert the input speech to transcribed text, <a href='https://chat.openai.com/chat'>chatGPT</a> to generate responses, and <a href='https://huggingface.co/speechbrain/tts-tacotron2-ljspeech'>tacotron2</a> to convert the response to output speech. </p>")
    gr.HTML("<p>You can duplicate this space and use your own session token: <a style='display:inline-block' href='https://huggingface.co/spaces/yizhangliu/chatGPT?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=10' alt='Duplicate Space'></a></p>")
    gr.HTML("<p> Instruction on how to get session token can be seen in video <a style='display:inline-block' href='https://www.youtube.com/watch?v=TdNSj_qgdFk'><font style='color:blue;weight:bold;'>here</font></a>. Add your session token by going to settings and add under secrets. </p>")
    with gr.Group(elem_id="page_1", visible=True) as page_1:
        with gr.Box():            
            with gr.Row():
                start_button = gr.Button("Let's talk to chatGPT! 🗣", elem_id="start-btn", visible=True) 
                start_button.click(fn=None, inputs=[], outputs=[], _js=start_work)
                
    with gr.Group(elem_id="page_2", visible=False) as page_2:        
        with gr.Row(elem_id="chat_row"):
            chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue"))
            chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue"))    
        with gr.Row():
            prompt_input_audio = gr.Audio(
                                        source="microphone",
                                        type="filepath",
                                        label="Record Audio Input",
                                        
                                    )
            prompt_output_audio = gr.Audio()

        reset_conversation = gr.Checkbox(label="Reset conversation?", value=False)
        with gr.Row(elem_id="prompt_row"):
            chat_history = gr.Textbox(lines=4, label="prompt", visible=False)
            submit_btn = gr.Button(value="Send to chatGPT", elem_id="submit-btn").style(
                    margin=True,
                    rounded=(True, True, True, True),
                    width=100,
                )
            
            
        submit_btn.click(fn=chat, 
                             inputs=[prompt_input_audio, chat_history, reset_conversation], 
                             outputs=[chatbot, chat_history, prompt_output_audio],
                        )

demo.launch(debug=True)