File size: 7,018 Bytes
cff9535 416263d 95ba447 0ce42bd cff9535 0ce42bd 416263d cff9535 0ce42bd 95ba447 416263d cff9535 416263d cff9535 416263d 2299694 ed7bb0b 416263d cff9535 416263d cff9535 d7e9ac0 cff9535 d7e9ac0 416263d 95ba447 416263d cff9535 9beb764 0ce42bd 416263d 9beb764 a86a2b8 416263d cff9535 416263d 0ce42bd cff9535 a86a2b8 416263d 0ce42bd a86a2b8 416263d 0ce42bd a86a2b8 416263d 0ce42bd 416263d 0ce42bd cff9535 0ce42bd cff9535 416263d cff9535 ed7bb0b cff9535 0ce42bd cff9535 416263d cff9535 a22eb82 cff9535 a22eb82 416263d ed7bb0b a22eb82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import os, sys
import tempfile
import gradio as gr
from src.gradio_demo import SadTalker
# from src.utils.text2speech import TTSTalker
from huggingface_hub import snapshot_download
def get_source_image(image):
return image
def download_model():
REPO_ID = 'vinthony/SadTalker'
snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
def sadtalker_demo():
download_model()
sad_talker = SadTalker(lazy_load=True)
# tts_talker = TTSTalker()
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
<a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> \
<a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> \
<a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
gr.Markdown("""
<b>You may duplicate the space and upgrade to GPU in settings for better performance and faster inference without waiting in the queue. <a style='display:inline-block' href="https://huggingface.co/spaces/vinthony/SadTalker?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></b> \
<br/><b>Alternatively, try our GitHub <a href=https://github.com/Winfredy/SadTalker> code </a> on your own GPU. </b> <a style='display:inline-block' href="https://github.com/Winfredy/SadTalker"><img src="https://img.shields.io/github/stars/Winfredy/SadTalker?style=social"/></a> \
""")
with gr.Row().style(equal_height=False):
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="sadtalker_source_image"):
with gr.TabItem('Upload image'):
with gr.Row():
source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
with gr.Tabs(elem_id="sadtalker_driven_audio"):
with gr.TabItem('Upload or Generating from TTS'):
with gr.Column(variant='panel'):
driven_audio = gr.Audio(label="Input audio(.wav/.mp3)", source="upload", type="filepath")
# with gr.Column(variant='panel'):
# input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.")
# tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
# tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="sadtalker_checkbox"):
with gr.TabItem('Settings'):
with gr.Column(variant='panel'):
preprocess_type = gr.Radio(['crop','resize','full'], value='crop', label='preprocess', info="How to handle input image?")
is_still_mode = gr.Checkbox(label="w/ Still Mode (fewer hand motion, works with preprocess `full`)")
enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer")
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
with gr.Tabs(elem_id="sadtalker_genearted"):
gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
with gr.Row():
examples = [
[
'examples/source_image/full_body_1.png',
'examples/driven_audio/bus_chinese.wav',
'crop',
True,
False
],
[
'examples/source_image/full_body_2.png',
'examples/driven_audio/japanese.wav',
'crop',
False,
False
],
[
'examples/source_image/full3.png',
'examples/driven_audio/deyu.wav',
'crop',
False,
True
],
[
'examples/source_image/full4.jpeg',
'examples/driven_audio/eluosi.wav',
'full',
False,
True
],
[
'examples/source_image/full4.jpeg',
'examples/driven_audio/imagine.wav',
'full',
True,
True
],
[
'examples/source_image/full_body_1.png',
'examples/driven_audio/bus_chinese.wav',
'full',
True,
False
],
[
'examples/source_image/art_13.png',
'examples/driven_audio/fayu.wav',
'resize',
True,
False
],
[
'examples/source_image/art_5.png',
'examples/driven_audio/chinese_news.wav',
'resize',
False,
False
],
[
'examples/source_image/art_5.png',
'examples/driven_audio/RD_Radio31_000.wav',
'resize',
True,
True
],
]
gr.Examples(examples=examples,
inputs=[
source_image,
driven_audio,
preprocess_type,
is_still_mode,
enhancer],
outputs=[gen_video],
fn=sad_talker.test,
cache_examples=os.getenv('SYSTEM') == 'spaces') #
submit.click(
fn=sad_talker.test,
inputs=[source_image,
driven_audio,
preprocess_type,
is_still_mode,
enhancer],
outputs=[gen_video]
)
return sadtalker_interface
if __name__ == "__main__":
demo = sadtalker_demo()
demo.queue(max_size=10)
demo.launch(debug=True)
|