Spaces:

lazhrach
/

AvatarTest

Runtime error

App Files Files

lazhrach commited on Mar 8

Commit

bd79dbb

•

1 Parent(s): 83cb595

Remove 'checkpoints' directory from .gitignore

Browse files

Files changed (1) hide show

app.py +26 -26

app.py CHANGED Viewed

@@ -49,8 +49,8 @@ def ref_video_fn(path_of_ref_video):
         return gr.update(value=False)
 def download_model():
-     REPO_ID = 'vinthony/SadTalker-V002rc'
-     snapshot_download(REPO_ID)
 def sadtalker_demo():
@@ -62,21 +62,21 @@ def sadtalker_demo():
         with gr.Row():
             with gr.Column(variant='panel'):
                 with gr.Tabs(elem_id="sadtalker_source_image"):
-                    with gr.TabItem('Source image'):
                         with gr.Row():
-                            source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image")
                 with gr.Tabs(elem_id="sadtalker_driven_audio"):
-                    with gr.TabItem('Driving Methods'):
                         with gr.Row():
-                            model_choice = gr.Dropdown(choices=list(models.keys()), value="TeraTTS/natasha-g2p-vits", label="Choose TTS model")
                         with gr.Row():
-                            length_scale = gr.Slider(minimum=0.1, maximum=2.0, label="Length scale (increase length of sound) Default: 1.2", value=1.2)
                         with gr.Row():
-                            input_text = gr.Textbox(label="Enter text")
-                        with gr.Row():
-                            driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
                             driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
                             with gr.Column(visible=False):
@@ -84,44 +84,44 @@ def sadtalker_demo():
                                 length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
                                 use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
                         with gr.Row():
-                            play_button = gr.Button('Text To Speech', variant='primary')
                             play_button.click(
                                 fn=text_to_speech, inputs=[model_choice, length_scale, input_text], outputs=[driven_audio]
                             )
                         with gr.Row():
-                            ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref")
                             with gr.Column():
-                                use_ref_video = gr.Checkbox(label="Use Reference Video")
-                                ref_info = gr.Radio(['pose', 'blink','pose+blink', 'all'], value='pose', label='Reference Video',info="How to borrow from reference Video?((fully transfer, aka, video driving mode))")
                             ref_video.change(ref_video_fn, inputs=ref_video, outputs=[use_ref_video]) # todo
             with gr.Column(variant='panel'):
                 with gr.Tabs(elem_id="sadtalker_checkbox"):
-                    with gr.TabItem('Settings'):
                         with gr.Column(variant='panel'):
                             with gr.Row():
-                                pose_style = gr.Slider(minimum=0, maximum=45, step=1, label="Pose style", value=0) #
-                                exp_weight = gr.Slider(minimum=0, maximum=3, step=0.1, label="expression scale", value=1) #
-                                blink_every = gr.Checkbox(label="use eye blink", value=True)
                             with gr.Row():
-                                size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
-                                preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
                             with gr.Row():
-                                is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion, works with preprocess `full`)")
-                                facerender = gr.Radio(['facevid2vid','pirender'], value='facevid2vid', label='facerender', info="which face render?")
                             with gr.Row():
-                                batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=1)
-                                enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
-                            submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
                 with gr.Tabs(elem_id="sadtalker_genearted"):
-                        gen_video = gr.Video(label="Generated video", format="mp4")
         submit.click(
                 fn=sad_talker.test,

         return gr.update(value=False)
 def download_model():
+    REPO_ID = 'vinthony/SadTalker-V002rc'
+    snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
 def sadtalker_demo():
         with gr.Row():
             with gr.Column(variant='panel'):
                 with gr.Tabs(elem_id="sadtalker_source_image"):
+                    with gr.TabItem('Исходное изображение'):
                         with gr.Row():
+                            source_image = gr.Image(label="Аватарка", source="upload", type="filepath", elem_id="img2img_image")
                 with gr.Tabs(elem_id="sadtalker_driven_audio"):
+                    with gr.TabItem('Генерация или загрузка аудио'):
                         with gr.Row():
+                            model_choice = gr.Dropdown(choices=list(models.keys()), value="TeraTTS/natasha-g2p-vits", label="Выберите модель TTS для синтеза речи:")
+                        with gr.Row(visible=False):
+                            length_scale = gr.Slider(minimum=0.1, maximum=2.0, label="Length scale (увеличить длину звучания) По умолчанию: 1.2", value=1.2)
                         with gr.Row():
+                            input_text = gr.Textbox(label="Введите текст для синтеза речи:")
                         with gr.Row():
+                            driven_audio = gr.Audio(label="Аудиофайл", source="upload", type="filepath")
                             driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
                             with gr.Column(visible=False):
                                 length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
                                 use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
                         with gr.Row():
+                            play_button = gr.Button('Синтез речи', variant='primary')
                             play_button.click(
                                 fn=text_to_speech, inputs=[model_choice, length_scale, input_text], outputs=[driven_audio]
                             )
                         with gr.Row():
+                            ref_video = gr.Video(label="Видео для генерации", source="upload", type="filepath", elem_id="vidref")
                             with gr.Column():
+                                use_ref_video = gr.Checkbox(label="Использовать видео для генерации")
+                                ref_info = gr.Radio(['pose', 'blink','pose+blink', 'all'], value='pose', label='Видео для эталона',info="Как использовать эталонное видео?((video driving mode))")
                             ref_video.change(ref_video_fn, inputs=ref_video, outputs=[use_ref_video]) # todo
             with gr.Column(variant='panel'):
                 with gr.Tabs(elem_id="sadtalker_checkbox"):
+                    with gr.TabItem('Настройки генерации видео'):
                         with gr.Column(variant='panel'):
                             with gr.Row():
+                                pose_style = gr.Slider(minimum=0, maximum=45, step=1, label="Стиль", value=0) #
+                                exp_weight = gr.Slider(minimum=0, maximum=3, step=0.1, label="Степень выразительности", value=1) #
+                                blink_every = gr.Checkbox(label="Моргание", value=True)
                             with gr.Row():
+                                size_of_image = gr.Radio([256, 512], value=256, label='Разрешение модели лица', info="256/512?") #
+                                preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='Предварительная обработка', info="Как обрабатывать входное изображение?")
                             with gr.Row():
+                                is_still_mode = gr.Checkbox(label="Режим неподвижности (меньше движений головы, работает с full режимом)")
+                                facerender = gr.Radio(['facevid2vid','pirender'], value='facevid2vid', label='Рендер лица', info="Какой использовать?")
                             with gr.Row():
+                                batch_size = gr.Slider(label="Размер пакета при генерации", step=1, maximum=10, value=1)
+                                enhancer = gr.Checkbox(label="GFPGAN как пост-обработчик лица")
+                            submit = gr.Button('Генерировать', elem_id="sadtalker_generate", variant='primary')
                 with gr.Tabs(elem_id="sadtalker_genearted"):
+                        gen_video = gr.Video(label="Сгенерированное изображение", format="mp4")
         submit.click(
                 fn=sad_talker.test,