import gradio as gr
import random
import time
import requests
import soundfile as sf
from pydub import AudioSegment
import os

# Load FastPitch
from nemo.collections.tts.models import FastPitchModel
spec_generator = FastPitchModel.from_pretrained("nvidia/tts_en_fastpitch")

# Load vocoder
from nemo.collections.tts.models import HifiGanModel
model = HifiGanModel.from_pretrained(model_name="nvidia/tts_hifigan")

"""
os.environ["COQUI_TOS_AGREED"] = "1"
from TTS.api import TTS
import torch
from TTS.api import TTS

# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"

# List available 🐸TTS models
print(TTS().list_models())

# Init TTS
xtts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)  """

# Define the URL and the file path
url = 'https://neuralleap-sdr-v2-api.hf.space/get_sdr_response/'

global chat_chain,id
chat_chain = "SDR : hi is this (customer name)?"
id=0


with gr.Blocks() as demo:
    chatbot = gr.Chatbot([[None,"Hi is this (customer name)?"]],avatar_images=("https://cdnl.iconscout.com/lottie/premium/thumb/user-profile-5568736-4644453.gif","https://cdn.dribbble.com/users/77598/screenshots/16399264/media/d86ceb1ad552398787fb76f343080aa6.gif"),height=400,show_label=False,show_copy_button=True,show_share_button=True,likeable=True,layout="panel")
    with gr.Row():
        msg = gr.Textbox()
        slider = gr.Slider(1,3, value=1.2, label="Speed", info="voice speed")
    clear = gr.Button("Clear")
    audio = gr.Audio(autoplay=True)
    #xtts.tts_to_file("this is testing audio sample",speaker_wav="1.wav",language="en",file_path="output.wav")
    starting_text = " This is [SDR’s name] with Neural Leap. I saw you schedule a call with us for [Insert day and time] to learn more about our AI engineering services. Does that ring a bell?"
    #xtts.tts_to_file(starting_text,speaker_wav="1.wav",language="en",file_path="output.wav")
    parsed = spec_generator.parse(starting_text)
    spectrogram = spec_generator.generate_spectrogram(tokens=parsed)
    audio_tts = model.convert_spectrogram_to_audio(spec=spectrogram)
    

    def new_chat():
        global chat_chain,id
        chat_chain = "SDR : hi is this (customer name)?"
        id=0
        print("\n==================new chat started==================")
        return [[None,"Hi is this (customer name)?"]],""

    def user(user_message, history):
        return history + [[user_message, None]]

    def bot(history,msg,slider):
        print(float(slider))
        global chat_chain,id
        id = id + 1
        chat_chain = chat_chain + "\nProspect: " +  msg + "\n\n"
        params = {
            "userText": chat_chain,
            "idf":str(id)
        }
        response = requests.post(url, params=params,stream=True)
        #response = ["Hi is this (customer name)?","Hi is this (customer name)?","Hi is this (customer name)?"]
        full_text = ""
        sound_text = ""
        history[-1][1] = ""
        for chunk in response:
            processed_chunk = chunk.decode('utf-8')
            if id==1:
                history[-1][1] += processed_chunk
                full_text = full_text + processed_chunk
                time.sleep(0.01)
                yield history,"",gr.Audio(autoplay=True)
            else:
                history[-1][1] += processed_chunk
                full_text = full_text + processed_chunk
                #sound_text = sound_text + processed_chunk
                #if "." in sound_text:
                print(processed_chunk)
                #xtts.tts_to_file(processed_chunk.replace(".",""),speaker_wav="1.wav",language="en",file_path="output.wav")
                #sound_text = ""
                #audio = AudioSegment.from_file("output.wav", format="wav")
                #audio = audio.speedup(playback_speed=float(slider)) # speed up by 2x
                    # export to wav
                #audio.export("final.wav", format="wav")
                yield history,"",gr.Audio(autoplay=True)
                    
        full_text = full_text[:-1]
        chat_chain = chat_chain + "SDR : "+ full_text
        print(chat_chain)
        full_text = full_text.replace("[SDR’s name] with","")
        full_text = full_text.replace("SDR:","")
        """
        for text_line in full_text.split("."):
            
            xtts.tts_to_file(text_line,speaker_wav="1.wav",language="en",file_path="output.wav")
            audio = AudioSegment.from_file("output.wav", format="wav")
            audio = audio.speedup(playback_speed=float(slider)) # speed up by 2x
            # export to wav
            audio.export("final.wav", format="wav")
            yield history,"","final.wav"   """
        
        parsed = spec_generator.parse(full_text)
        spectrogram = spec_generator.generate_spectrogram(tokens=parsed)
        audio_tts = model.convert_spectrogram_to_audio(spec=spectrogram)
        # Save the audio to disk in a file called speech.wav
        sf.write("speech.wav", audio_tts.to('cpu').detach().numpy()[0], 22050)
        yield history,"","speech.wav"

    msg.submit(user, [msg, chatbot], [chatbot], queue=False).then(
        bot, [chatbot,msg,slider], [chatbot,msg,audio]
    )
    #slider.change(change_speed,[slider],[audio])
    clear.click(new_chat,outputs=[chatbot,msg])#.then(lambda: None, None, chatbot, queue=False)
    
demo.queue()
demo.launch()