File size: 3,284 Bytes
7572555
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1242299
 
 
6c305c0
 
99acf19
7572555
 
 
 
 
 
b5e0088
e217c8c
b5e0088
 
 
 
20200ff
b5e0088
60f71bf
7572555
 
 
0974d7a
1e7779c
84ddd8e
1e7779c
7572555
 
6b8c3fc
7572555
 
 
 
 
 
 
2c65526
 
7572555
2c65526
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
from gradio_client import Client

def get_speech(text, voice):
    client = Client("https://collabora-whisperspeech.hf.space/")
    result = client.predict(
    		text,	# str  in 'Enter multilingual text💬📝' Textbox component
    		voice,	# filepath  in 'Upload or Record Speaker Audio (optional)🌬️💬' Audio component
    		"",	# str  in 'alternatively, you can paste in an audio file URL:' Textbox component
    		14,	# float (numeric value between 10 and 15) in 'Tempo (in characters per second)' Slider component
    		api_name="/whisper_speech_demo"
    )
    print(result)
    return result

def get_dreamtalk(image_in, speech):
    client = Client("https://fffiloni-dreamtalk.hf.space/")
    result = client.predict(
    		speech,	# filepath  in 'Audio input' Audio component
    		image_in,	# filepath  in 'Image' Image component
    		"M030_front_neutral_level1_001.mat",	# Literal['M030_front_angry_level3_001.mat', 'M030_front_contempt_level3_001.mat', 'M030_front_disgusted_level3_001.mat', 'M030_front_fear_level3_001.mat', 'M030_front_happy_level3_001.mat', 'M030_front_neutral_level1_001.mat', 'M030_front_sad_level3_001.mat', 'M030_front_surprised_level3_001.mat', 'W009_front_angry_level3_001.mat', 'W009_front_contempt_level3_001.mat', 'W009_front_disgusted_level3_001.mat', 'W009_front_fear_level3_001.mat', 'W009_front_happy_level3_001.mat', 'W009_front_neutral_level1_001.mat', 'W009_front_sad_level3_001.mat', 'W009_front_surprised_level3_001.mat', 'W011_front_angry_level3_001.mat', 'W011_front_contempt_level3_001.mat', 'W011_front_disgusted_level3_001.mat', 'W011_front_fear_level3_001.mat', 'W011_front_happy_level3_001.mat', 'W011_front_neutral_level1_001.mat', 'W011_front_sad_level3_001.mat', 'W011_front_surprised_level3_001.mat']  in 'emotional style' Dropdown component
    		api_name="/infer"
    )
    print(result)
    return result['video']

def pipe (text, voice, image_in):

    speech = get_speech(text, voice)
    
    try:
        video = get_dreamtalk(image_in, speech)
    except:
       
        raise gr.Error('An error occurred while loading: Image may not contain any face - try again')

    return video

with gr.Blocks() as demo:
    with gr.Column():
        gr.HTML("""
         <h1 style="text-align: center;">
        Talking Image 
        </h1>
        <p style="text-align: center;"></p>
        
        <h3 style="text-align: center;">
        Clone your voice and make your photos speak. 
        </h3>
        <p style="text-align: center;"></p>
        """)
        with gr.Row():
            with gr.Column():
                image_in = gr.Image(label="Portrait IN", type="filepath", value="./creatus.jpg")
            with gr.Column():
                voice = gr.Audio(type="filepath", label="Upload or Record Speaker audio (Optional voice cloning)")
                text = gr.Textbox(label="text")
                submit_btn = gr.Button('Submit')
            with gr.Column():
                video_o = gr.Video(label="Video result")
    submit_btn.click(
        fn = pipe,
        inputs = [
            text, voice, image_in
        ],
        outputs = [
            video_o
        ],
        concurrency_limit = 3
    )
demo.queue(max_size=10).launch(show_error=True, show_api=False)