File size: 4,056 Bytes
2b34e02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eca9dba
2b34e02
 
 
 
 
 
eae1cca
2b34e02
 
 
 
cb78db8
41e07ce
 
cb78db8
2b34e02
41e07ce
2b34e02
41e07ce
2b34e02
eae1cca
 
2b34e02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eae1cca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import glob
from natsort import natsorted
import gradio as gr

from inference_util import init_model, infenrece
from attributtes_utils import input_pose, input_emotion, input_blink

model = init_model()


def process(input_vid, audio_path, pose_select, emotion_select, blink_select):
    pose = input_pose(pose_select)
    emotion = input_emotion(emotion_select)
    blink = input_blink(blink_select)

    print("input_vid: ", input_vid)
    result = infenrece(model, os.path.join("./assets/videos/", input_vid), os.path.join("./assets/audios/", audio_path), pose, emotion, blink)
    print("result: ", result)

    print("finished !")

    return result  # , gr.Group.update(visible=True)


available_videos = natsorted(glob.glob("./assets/videos/*.mp4"))
available_videos = [os.path.basename(x) for x in available_videos]

# prepare audio
for video in available_videos:
    audio = video.replace(".mp4", ".wav")
    if not os.path.exists(os.path.join("./assets/audios/", audio)):
        os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}")
available_audios = natsorted(glob.glob("./assets/audios/*.wav"))
available_audios = [os.path.basename(x) for x in available_audios]


with gr.Blocks() as demo:
    gr.HTML(
        """
            <h1 style="text-align: center; font-size: 40px; font-family: 'Times New Roman', Times, serif;">
                Free-View Expressive Talking Head Video Editing
            </h1>
            <p style="text-align: center; font-size: 20px; font-family: 'Times New Roman', Times, serif;">
                <a style="text-align: center; display:inline-block"
                    href="https://sky24h.github.io/websites/icassp2023_free-view_video-editing">
                    <img src="https://huggingface.co/datasets/huggingface/badges/raw/main/paper-page-sm.svg#center"
                    alt="Project Page">
                </a>
                <a style="text-align: center; display:inline-block" href="https://huggingface.co/spaces/sky24h/Free-View_Expressive_Talking_Head_Video_Editing?duplicate=true">
                    <img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center" alt="Duplicate Space">
                </a>
            </p>
            """
    )
    with gr.Column(elem_id="col-container"):
        with gr.Row():
            with gr.Column():
                # select and preview video from a list of examples
                video_preview = gr.Video(label="Video Preview", elem_id="video-preview", height=360, value="./assets/videos/sample1.mp4")
                video_input = gr.Dropdown(available_videos, label="Input Video", value="sample1.mp4")
                audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", height=360, value="./assets/audios/sample2.wav")
                audio_input = gr.Dropdown(available_audios, label="Input Audio", value="sample2.wav")
                pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front")
                emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral")
                blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes")
                # with gr.Row():
            with gr.Column():
                video_out = gr.Video(label="Video Output", elem_id="video-output", height=360)
                # titile: Free-View Expressive Talking Head Video Editing

                submit_btn = gr.Button("Generate video")

        inputs = [video_input, audio_input, pose_select, emotion_select, blink_select]
        outputs = [video_out]

        video_preview_output = [video_preview]
        audio_preview_output = [audio_preview]

    video_input.select(lambda x: "./assets/videos/" + x, video_input, video_preview_output)
    audio_input.select(lambda x: "./assets/audios/" + x, audio_input, audio_preview_output)
    submit_btn.click(process, inputs, outputs)

demo.queue(max_size=10).launch()