Update app.py
Browse files
app.py
CHANGED
@@ -8,8 +8,12 @@ import argparse
|
|
8 |
import uuid
|
9 |
|
10 |
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
|
|
|
11 |
|
12 |
def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
13 |
unique_id = uuid.uuid4()
|
14 |
|
15 |
args = argparse.Namespace(
|
@@ -27,13 +31,71 @@ def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=T
|
|
27 |
inference_process(args)
|
28 |
return f'output-{unique_id}.mp4'
|
29 |
|
30 |
-
iface = gr.Interface(
|
31 |
-
title="Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation",
|
32 |
-
description="Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab",
|
33 |
-
fn=run_inference,
|
34 |
-
inputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")],
|
35 |
-
cache_examples=False,
|
36 |
-
outputs="video"
|
37 |
-
)
|
38 |
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import uuid
|
9 |
|
10 |
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
|
11 |
+
is_shared_ui = True if "multimodalart/hallo" in os.environ['SPACE_ID'] else False
|
12 |
|
13 |
def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
|
14 |
+
if is_shared_ui:
|
15 |
+
raise gr.Error("This Space only works in duplicated instances")
|
16 |
+
|
17 |
unique_id = uuid.uuid4()
|
18 |
|
19 |
args = argparse.Namespace(
|
|
|
31 |
inference_process(args)
|
32 |
return f'output-{unique_id}.mp4'
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
css = '''
|
36 |
+
div#warning-ready {
|
37 |
+
background-color: #ecfdf5;
|
38 |
+
padding: 0 16px 16px;
|
39 |
+
margin: 20px 0;
|
40 |
+
color: #030303!important;
|
41 |
+
}
|
42 |
+
div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
|
43 |
+
color: #057857!important;
|
44 |
+
}
|
45 |
+
div#warning-duplicate {
|
46 |
+
background-color: #ebf5ff;
|
47 |
+
padding: 0 16px 16px;
|
48 |
+
margin: 20px 0;
|
49 |
+
color: #030303!important;
|
50 |
+
}
|
51 |
+
div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
|
52 |
+
color: #0f4592!important;
|
53 |
+
}
|
54 |
+
div#warning-duplicate strong {
|
55 |
+
color: #0f4592;
|
56 |
+
}
|
57 |
+
p.actions {
|
58 |
+
display: flex;
|
59 |
+
align-items: center;
|
60 |
+
margin: 20px 0;
|
61 |
+
}
|
62 |
+
div#warning-duplicate .actions a {
|
63 |
+
display: inline-block;
|
64 |
+
margin-right: 10px;
|
65 |
+
}
|
66 |
+
'''
|
67 |
+
with gr.Blocks(css=css) as demo:
|
68 |
+
if is_shared_ui:
|
69 |
+
top_description = gr.HTML(f'''
|
70 |
+
<div class="gr-prose">
|
71 |
+
<h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
|
72 |
+
Attention: this Space need to be duplicated to work</h2>
|
73 |
+
<p class="main-message custom-color">
|
74 |
+
To make it work, <strong>duplicate the Space</strong> and run it on your own profile using a <strong>private</strong> GPU.<br />
|
75 |
+
An L4 costs <strong>US$0.80/h</strong>, so it should cost ~US$0.08 to generate 10s of talking head avatar.
|
76 |
+
</p>
|
77 |
+
<p class="actions custom-color">
|
78 |
+
<a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true">
|
79 |
+
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg-dark.svg" alt="Duplicate this Space" />
|
80 |
+
</a>
|
81 |
+
to start generate your talking head
|
82 |
+
</p>
|
83 |
+
</div>
|
84 |
+
''', elem_id="warning-duplicate")
|
85 |
+
gr.Markdown("# Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation")
|
86 |
+
gr.Markdown("Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab")
|
87 |
+
with gr.Row():
|
88 |
+
with gr.Column():
|
89 |
+
avatar_face = gr.Image(type="filepath", label="Face")
|
90 |
+
driving_audio = gr.Audio(type="filepath", label="Driving audio")
|
91 |
+
generate = gr.Button("Generate")
|
92 |
+
with gr.Column():
|
93 |
+
output_video = gr.Video(label="Your talking head")
|
94 |
+
|
95 |
+
generate.click(
|
96 |
+
fn=run_inference,
|
97 |
+
intputs=[avatar_face, driving_audio],
|
98 |
+
outputs=output_video
|
99 |
+
)
|
100 |
+
|
101 |
+
demo.launch()
|