import gradio as gr
import whisper
import numpy as np
import openai

def greet(name):
    return "Hello " + name + "!!"

with open('app.css','r') as f:
    css_file = f.read() 

markdown="""
# Polish ASR BIGOS workspace
"""
def whisper_model_change(radio_whisper_model):
    whisper_model = whisper.load_model(radio_whisper_model)
    return(whisper_model)

def prompt_gpt(input_text):
    messages = [
    {"role": "system", "content": "You are a helpful assistant."}]
    
    if input_text:
        messages.append(
            {"role": "user", "content": input_text},
        )
        chat_completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo", messages=messages
        )
    
    reply = chat_completion.choices[0].message.content
    return reply

def process_pipeline(audio):
    asr_out = transcribe(audio)
    gpt_out = prompt_gpt(asr_out)
    tts_out = synthesize_speech(gpt_out)
    return(tts_out)

def transcribe(audio, language, whisper_model, whisper_model_type):
    if not whisper_model:
        whisper_model=init_whisper_model(whisper_model_type)
    
    print(f"Transcribing {audio} for language {language} and model {whisper_model_type}")
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)

    mel = whisper.log_mel_spectrogram(audio)

    options = whisper.DecodingOptions(language=language, without_timestamps=True, fp16=False)
    result = whisper.decode(whisper_model, mel, options)
    result_text = result.text
    return result_text

def init_whisper_model(whisper_model_type):
    print("Initializing whisper model")
    print(whisper_model_type)
    whisper_model = whisper.load_model(whisper_model_type)
    return whisper_model

def synthesize_speech(text):    
    audioobj = gTTS(text = out_result, 
                    lang = lang, 
                    slow = False)
    
    audioobj.save("Temp.mp3")
    return("Temp.mp3")
    
block = gr.Blocks(css=css_file)
with block:

    #state variables
    language = gr.State("en")
    whisper_model_type = gr.State("base")
    whisper_model = gr.State()

    # state handling functions
    def change_language(choice):
        if choice == "Polish":
            language="pl"
            print("Switching to Polish")
            print("language")
            print(language)
        elif choice == "English":
            language="en"
            print("Switching to English")
            print("language")
            print(language)
        return(language)
    
    def change_whisper_model(choice):
        whisper_model_type = choice
        print("Switching Whisper model")
        print(whisper_model_type)
        whisper_model = init_whisper_model(whisper_model_type)
        return [whisper_model_type, whisper_model]

    gr.Markdown(markdown)
    with gr.Tabs():
        with gr.TabItem('Voicebot playground'):
            with gr.Box():
                gr.HTML("<p class=\"apikey\">API Key:</p>")
                # API key textbox (password-style)
                api_key = gr.Textbox(label="", elem_id="pw")
        
            radio_lang = gr.Radio(["Polish", "English"], label="Language", info="If none selected, English is used")
            #radio_asr_type = gr.Radio(["Local", "Cloud"], label="Select ASR type", info="Cloud models are faster and more accurate, but costs money")
            #radio_cloud_asr = gr.Radio(["Whisper", "Google", "Azure"], label="Select Cloud ASR provider", info="You need to provide API keys for specific service")
            radio_whisper_model = gr.Radio(["tiny", "base", "small", "medium", "large"], label="Whisper ASR model (local)", info="Larger models are better, but slower. Default - base")

            mic_recording = gr.Audio(source="microphone", type="filepath", label='Record your voice')

            out_asr = gr.Textbox(placeholder="ASR output",
                               lines=5,
                               max_lines=10,
                               show_label=False)
            out_gpt = gr.Textbox(placeholder="ChatGPT output",
                               lines=10,
                               max_lines=25,
                               show_label=False)
    
            button_transcribe = gr.Button("Transcribe")
            button_prompt_gpt = gr.Button("Prompt ChatGPT")
            
            button_transcribe.click(transcribe, inputs=[mic_recording,language, whisper_model,whisper_model_type], outputs=out_asr)
            button_prompt_gpt.click(prompt_gpt, inputs=out_asr, outputs=out_gpt)

            radio_lang.change(fn=change_language, inputs=radio_lang, outputs=language)
            radio_whisper_model.change(fn=change_whisper_model, inputs=radio_whisper_model, outputs=[whisper_model_type, whisper_model])

block.launch()