File size: 4,460 Bytes
63f899c
 
 
 
59d9186
63f899c
 
59d9186
63f899c
a6075c0
bd786ec
c39b894
 
63f899c
8c79f6a
a6075c0
 
 
59d9186
 
63f899c
59d9186
751c5b7
 
59d9186
63f899c
 
 
 
59d9186
63f899c
 
 
59d9186
63f899c
 
a6075c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baa1646
 
 
 
a6075c0
bd786ec
a6075c0
 
 
 
 
 
 
 
e4e4cf1
a6075c0
 
 
 
 
 
 
 
 
 
e4e4cf1
a6075c0
 
 
 
 
 
 
 
 
 
c39b894
a6075c0
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import shutil
from huggingface_hub import snapshot_download
import gradio as gr
os.chdir(os.path.dirname(os.path.abspath(__file__)))
from scripts.inference import inference_process
import argparse
import uuid

is_shared_ui = True if "multimodalart/hallo" in os.environ['SPACE_ID'] else False

if(not is_shared_ui):
    hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")

def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
    if is_shared_ui:
        raise gr.Error("This Space only works in duplicated instances")
        
    unique_id = uuid.uuid4()
    
    args = argparse.Namespace(
        config='configs/inference/default.yaml',
        source_image=source_image,
        driving_audio=driving_audio,
        output=f'output-{unique_id}.mp4',
        pose_weight=1.0,
        face_weight=1.0,
        lip_weight=1.0,
        face_expand_ratio=1.2,
        checkpoint=None
    )
    
    inference_process(args)
    return f'output-{unique_id}.mp4' 


css = '''
div#warning-ready {
    background-color: #ecfdf5;
    padding: 0 16px 16px;
    margin: 20px 0;
    color: #030303!important;
}
div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
    color: #057857!important;
}
div#warning-duplicate {
    background-color: #ebf5ff;
    padding: 0 16px 16px;
    margin: 20px 0;
    color: #030303!important;
}
div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
    color: #0f4592!important;
}
div#warning-duplicate strong {
    color: #0f4592;
}
p.actions {
    display: flex;
    align-items: center;
    margin: 20px 0;
}
div#warning-duplicate .actions a {
    display: inline-block;
    margin-right: 10px;
}
.dark #warning-duplicate {
    background-color: #0c0c0c !important;
    border: 1px solid white !important;
}
'''

with gr.Blocks(css=css) as demo:
    if is_shared_ui:
        top_description = gr.HTML(f'''
            <div class="gr-prose">
                <h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
                Attention: this Space need to be duplicated to work</h2>
                <p class="main-message custom-color">
                    To make it work, <strong>duplicate the Space</strong> and run it on your own profile using a <strong>private</strong> GPU.<br />
                    An L4 costs <strong>US$0.80/h</strong>
                </p>
                <p class="actions custom-color">
                    <a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true">
                        <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg-dark.svg" alt="Duplicate this Space" />
                    </a>
                    to start generate your talking head
                </p>
            </div>
        ''', elem_id="warning-duplicate")
    gr.Markdown("# Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation")
    gr.Markdown("Generate talking head avatars driven from audio. **5 seconds of audio takes >10 minutes to generate on an L4** - duplicate the space for private use or try for free on Google Colab")
    with gr.Row():
        with gr.Column():
            avatar_face = gr.Image(type="filepath", label="Face")
            driving_audio = gr.Audio(type="filepath", label="Driving audio")
            generate = gr.Button("Generate")
        with gr.Column():
            output_video = gr.Video(label="Your talking head")

    generate.click(
        fn=run_inference,
        inputs=[avatar_face, driving_audio],
        outputs=output_video
    )
    
demo.launch()