import gradio as gr
from gradio_client import Client

PASSWORD = "071295"

def get_speech(text, voice):
    client = Client("https://collabora-whisperspeech.hf.space/")
    result = client.predict(
        text,   # str  in 'Enter multilingual text💬📝' Textbox component
        voice,  # filepath  in 'Upload or Record Speaker Audio (optional)🌬️💬' Audio component
        "",     # str  in 'alternatively, you can paste in an audio file URL:' Textbox component
        14,     # float (numeric value between 10 and 15) in 'Tempo (in characters per second)' Slider component
        api_name="/whisper_speech_demo"
    )
    print(result)
    return result

def get_dreamtalk(image_in, speech):
    client = Client("https://fffiloni-dreamtalk.hf.space/")
    result = client.predict(
        speech, # filepath  in 'Audio input' Audio component
        image_in,   # filepath  in 'Image' Image component
        "M030_front_neutral_level1_001.mat", # Literal[...] in 'emotional style' Dropdown component
        api_name="/infer"
    )
    print(result)
    return result['video']

def pipe(text, voice, image_in):
    speech = get_speech(text, voice)
    try:
        video = get_dreamtalk(image_in, speech)
    except:
        raise gr.Error('An error occurred while loading DreamTalk: Image may not contain any face')
    return video

def authenticate(password):
    if password == PASSWORD:
        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
    else:
        return gr.update(visible=False), gr.update(visible=True, value="Invalid password"), gr.update(visible=True)

with gr.Blocks() as demo:
    with gr.Column(visible=True) as login_column:
        gr.HTML("""
        <h2 style="text-align: center;">
        Welcome to Chân Dung Biết Nói
        </h2>
        """)
        with gr.Row():
            logo = gr.Image(label="Logo", type="filepath", value="./logo.png", height=200, width=200)
            introduction = gr.Textbox(label="Introduction", value="This is a demo application where you can input text and voice to generate talking portraits.", interactive=False)
        password_input = gr.Textbox(label="Password", type="password")
        login_btn = gr.Button("Login")
        error_message = gr.Textbox(label="", visible=False)

    with gr.Column(visible=False) as main_column:
        gr.HTML("""
        <h2 style="text-align: center;">
        Chân Dung Biết Nói
        </h2>
        <p style="text-align: center;"></p>
        """)
        with gr.Row():
            with gr.Column():
                image_in = gr.Image(label="Portrait IN", type="filepath", value="./maian.PNG")
            with gr.Column():
                voice = gr.Audio(type="filepath", label="Tải lên hoặc ghi âm trực tiếp (nên là voice cloning)")
                text = gr.Textbox(label="text")
                submit_btn = gr.Button('Submit')
            with gr.Column():
                video_o = gr.Video(label="Video result")

    login_btn.click(
        fn=authenticate,
        inputs=password_input,
        outputs=[main_column, error_message, login_column]
    )

    submit_btn.click(
        fn=pipe,
        inputs=[text, voice, image_in],
        outputs=[video_o],
        concurrency_limit=3
    )

demo.queue(max_size=10).launch(show_error=True, show_api=False)