Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,16 +2,34 @@ import os
|
|
2 |
import shutil
|
3 |
from huggingface_hub import snapshot_download
|
4 |
import gradio as gr
|
5 |
-
|
6 |
-
from
|
|
|
7 |
import argparse
|
8 |
import uuid
|
9 |
|
|
|
|
|
|
|
10 |
is_shared_ui = True if "fudan-generative-ai/hallo" in os.environ['SPACE_ID'] else False
|
11 |
|
12 |
if not is_shared_ui:
|
13 |
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def run_inference(source_image, driving_audio, pose_weight, face_weight, lip_weight, face_expand_ratio, progress=gr.Progress(track_tqdm=True)):
|
16 |
if is_shared_ui:
|
17 |
raise gr.Error("This Space only works in duplicated instances")
|
@@ -33,23 +51,26 @@ def run_inference(source_image, driving_audio, pose_weight, face_weight, lip_wei
|
|
33 |
inference_process(args)
|
34 |
return f'output-{unique_id}.mp4'
|
35 |
|
36 |
-
with gr.Blocks(theme='freddyaboulton/dracula_revamped@0.3.8'
|
37 |
gr.Markdown(
|
38 |
"""
|
39 |
-
# Talking Head Generation
|
40 |
Upload a face image and driving audio, and adjust the weights to generate a talking head video.
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
"""
|
42 |
)
|
43 |
|
44 |
with gr.Row():
|
45 |
with gr.Column():
|
46 |
-
avatar_face = gr.Image(type="filepath", label="Face", elem_id="face-input")
|
47 |
-
driving_audio = gr.Audio(type="filepath", label="Driving Audio", elem_id="audio-input")
|
48 |
-
|
49 |
-
|
50 |
|
51 |
with gr.Column():
|
52 |
-
output_video = gr.Video(label="Your Talking Head", elem_id="output-video")
|
53 |
with gr.Accordion("Advanced Settings", open=False):
|
54 |
pose_weight = gr.Slider(minimum=0.0, value=1.5, label="Pose Weight")
|
55 |
face_weight = gr.Slider(minimum=0.0, value=1.0, label="Face Weight")
|
@@ -57,6 +78,7 @@ with gr.Blocks(theme='freddyaboulton/dracula_revamped@0.3.8' ) as demo:
|
|
57 |
face_expand_ratio = gr.Slider(minimum=0.0, value=1.2, label="Face Expand Ratio")
|
58 |
|
59 |
generate = gr.Button("Generate", elem_id="generate-button")
|
|
|
60 |
|
61 |
generate.click(
|
62 |
fn=run_inference,
|
|
|
2 |
import shutil
|
3 |
from huggingface_hub import snapshot_download
|
4 |
import gradio as gr
|
5 |
+
import numpy as np
|
6 |
+
from PIL import Image
|
7 |
+
import soundfile as sf
|
8 |
import argparse
|
9 |
import uuid
|
10 |
|
11 |
+
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
12 |
+
from scripts.inference import inference_process
|
13 |
+
|
14 |
is_shared_ui = True if "fudan-generative-ai/hallo" in os.environ['SPACE_ID'] else False
|
15 |
|
16 |
if not is_shared_ui:
|
17 |
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
|
18 |
|
19 |
+
def check_image_square(image_path):
|
20 |
+
image = Image.open(image_path)
|
21 |
+
if image.width != image.height:
|
22 |
+
raise gr.Error("The uploaded image is not square. Please upload a square image.")
|
23 |
+
return image_path
|
24 |
+
|
25 |
+
def convert_audio_to_wav(audio_path):
|
26 |
+
if not audio_path.endswith('.wav'):
|
27 |
+
audio_data, samplerate = sf.read(audio_path)
|
28 |
+
wav_path = audio_path.rsplit('.', 1)[0] + '.wav'
|
29 |
+
sf.write(wav_path, audio_data, samplerate)
|
30 |
+
return wav_path
|
31 |
+
return audio_path
|
32 |
+
|
33 |
def run_inference(source_image, driving_audio, pose_weight, face_weight, lip_weight, face_expand_ratio, progress=gr.Progress(track_tqdm=True)):
|
34 |
if is_shared_ui:
|
35 |
raise gr.Error("This Space only works in duplicated instances")
|
|
|
51 |
inference_process(args)
|
52 |
return f'output-{unique_id}.mp4'
|
53 |
|
54 |
+
with gr.Blocks(theme='freddyaboulton/dracula_revamped@0.3.8') as demo:
|
55 |
gr.Markdown(
|
56 |
"""
|
57 |
+
# Talking Head Generation :🗣️📢
|
58 |
Upload a face image and driving audio, and adjust the weights to generate a talking head video.
|
59 |
+
|
60 |
+
> **Note:**
|
61 |
+
> - The face should be the main focus, making up 50%-70% of the image.
|
62 |
+
> - The face should be facing forward, with a rotation angle of less than 30° (no side profiles).
|
63 |
+
> - To make it work, duplicate the Space and run it on your own profile using a private GPU.
|
64 |
+
> - An L4 costs US$0.80/h.
|
65 |
"""
|
66 |
)
|
67 |
|
68 |
with gr.Row():
|
69 |
with gr.Column():
|
70 |
+
avatar_face = gr.Image(type="filepath", label="Face", elem_id="face-input").change(check_image_square, avatar_face)
|
71 |
+
driving_audio = gr.Audio(type="filepath", label="Driving Audio", elem_id="audio-input").change(convert_audio_to_wav, driving_audio)
|
|
|
|
|
72 |
|
73 |
with gr.Column():
|
|
|
74 |
with gr.Accordion("Advanced Settings", open=False):
|
75 |
pose_weight = gr.Slider(minimum=0.0, value=1.5, label="Pose Weight")
|
76 |
face_weight = gr.Slider(minimum=0.0, value=1.0, label="Face Weight")
|
|
|
78 |
face_expand_ratio = gr.Slider(minimum=0.0, value=1.2, label="Face Expand Ratio")
|
79 |
|
80 |
generate = gr.Button("Generate", elem_id="generate-button")
|
81 |
+
output_video = gr.Video(label="Your Talking Head", elem_id="output-video")
|
82 |
|
83 |
generate.click(
|
84 |
fn=run_inference,
|