File size: 4,975 Bytes
cff9535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2299694
cff9535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a22eb82
 
cff9535
a22eb82
cff9535
 
 
 
 
a22eb82
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import os, sys
import tempfile
import gradio as gr
from modules.text2speech import text2speech 
from modules.gfpgan_inference import gfpgan
from modules.sadtalker_test import SadTalker  

def get_driven_audio(audio):  
    if os.path.isfile(audio):
        return audio
    else:
        save_path = tempfile.NamedTemporaryFile(
                delete=False,
                suffix=("." + "wav"),
            )
        gen_audio = text2speech(audio, save_path.name)
        return gen_audio, gen_audio 

def get_source_image(image):   
        return image

def sadtalker_demo(result_dir):

    sad_talker = SadTalker()
    with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
        gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
                    <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
                    <a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a>  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
                     <a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
        
        with gr.Row().style(equal_height=False):
            with gr.Column(variant='panel'):
                with gr.Tabs(elem_id="sadtalker_source_image"):
                    with gr.TabItem('Upload image'):
                        with gr.Row():
                            source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
 
                with gr.Tabs(elem_id="sadtalker_driven_audio"):
                    with gr.TabItem('Upload audio'):
                        with gr.Column(variant='panel'):
                            driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
                            # submit_audio_1 = gr.Button('Submit', variant='primary')
                        # submit_audio_1.click(fn=get_driven_audio, inputs=input_audio1, outputs=driven_audio)
                

            with gr.Column(variant='panel'): 
                with gr.Tabs(elem_id="sadtalker_checkbox"):
                    with gr.TabItem('Settings'):
                        with gr.Column(variant='panel'):
                            is_still_mode = gr.Checkbox(label="w/ Still Mode (fewer hand motion)")
                            enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer")
                            submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')

                with gr.Tabs(elem_id="sadtalker_genearted"):
                        gen_video = gr.Video(label="Generated video", format="mp4").style(height=256,width=256)
                        gen_text = gr.Textbox(visible=False)


        with gr.Row():
            examples = [
                [
                    'examples/source_image/art_10.png',
                    'examples/driven_audio/deyu.wav',
                    True,
                    False
                ],
                [
                    'examples/source_image/art_1.png',
                    'examples/driven_audio/chinese_poem1.wav',
                    True,
                    False
                ],
                [
                    'examples/source_image/art_13.png',
                    'examples/driven_audio/fayu.wav',
                    True,
                    False
                ],
                [
                    'examples/source_image/art_5.png',
                    'examples/driven_audio/chinese_news.wav',
                    True,
                    False
                ],
            ]
            gr.Examples(examples=examples,
                        inputs=[
                            source_image,
                            driven_audio,
                            is_still_mode,
                            enhancer,
                            gr.Textbox(value=result_dir, visible=False)], 
                        outputs=[gen_video, gen_text],
                        fn=sad_talker.test,
                        cache_examples=os.getenv('SYSTEM') == 'spaces')

        submit.click(
                    fn=sad_talker.test, 
                    inputs=[source_image,
                            driven_audio,
                            is_still_mode,
                            enhancer,
                            gr.Textbox(value=result_dir, visible=False)], 
                    outputs=[gen_video, gen_text]
                    )

    return sadtalker_interface
 

if __name__ == "__main__":

    current_code_path = sys.argv[0]
    current_root_dir = os.path.split(current_code_path)[0] 
    sadtalker_result_dir = os.path.join(current_root_dir, 'results', 'sadtalker')
    demo = sadtalker_demo(sadtalker_result_dir)
    demo.launch()