Spaces:

sky24h
/

Free-View_Expressive_Talking_Head_Video_Editing

Running on Zero

Free-View_Expressive_Talking_Head_Video_Editing

File size: 4,056 Bytes

2b34e02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eca9dba
2b34e02
 
 
 
 
 
eae1cca
2b34e02
 
 
 
cb78db8
41e07ce
 
cb78db8
2b34e02
41e07ce
2b34e02
41e07ce
2b34e02
eae1cca
 
2b34e02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eae1cca

import os
import glob
from natsort import natsorted
import gradio as gr

from inference_util import init_model, infenrece
from attributtes_utils import input_pose, input_emotion, input_blink

model = init_model()


def process(input_vid, audio_path, pose_select, emotion_select, blink_select):
    pose = input_pose(pose_select)
    emotion = input_emotion(emotion_select)
    blink = input_blink(blink_select)

    print("input_vid: ", input_vid)
    result = infenrece(model, os.path.join("./assets/videos/", input_vid), os.path.join("./assets/audios/", audio_path), pose, emotion, blink)
    print("result: ", result)

    print("finished !")

    return result  # , gr.Group.update(visible=True)


available_videos = natsorted(glob.glob("./assets/videos/*.mp4"))
available_videos = [os.path.basename(x) for x in available_videos]

# prepare audio
for video in available_videos:
    audio = video.replace(".mp4", ".wav")
    if not os.path.exists(os.path.join("./assets/audios/", audio)):
        os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}")
available_audios = natsorted(glob.glob("./assets/audios/*.wav"))
available_audios = [os.path.basename(x) for x in available_audios]


with gr.Blocks() as demo:
    gr.HTML(
        """
            <h1 style="text-align: center; font-size: 40px; font-family: 'Times New Roman', Times, serif;">
                Free-View Expressive Talking Head Video Editing
            </h1>
            <p style="text-align: center; font-size: 20px; font-family: 'Times New Roman', Times, serif;">
                <a style="text-align: center; display:inline-block"
                    href="https://sky24h.github.io/websites/icassp2023_free-view_video-editing">
                    <img src="https://huggingface.co/datasets/huggingface/badges/raw/main/paper-page-sm.svg#center"
                    alt="Project Page">
                </a>
                <a style="text-align: center; display:inline-block" href="https://huggingface.co/spaces/sky24h/Free-View_Expressive_Talking_Head_Video_Editing?duplicate=true">
                    <img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center" alt="Duplicate Space">
                </a>
            </p>
            """
    )
    with gr.Column(elem_id="col-container"):
        with gr.Row():
            with gr.Column():
                # select and preview video from a list of examples
                video_preview = gr.Video(label="Video Preview", elem_id="video-preview", height=360, value="./assets/videos/sample1.mp4")
                video_input = gr.Dropdown(available_videos, label="Input Video", value="sample1.mp4")
                audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", height=360, value="./assets/audios/sample2.wav")
                audio_input = gr.Dropdown(available_audios, label="Input Audio", value="sample2.wav")
                pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front")
                emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral")
                blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes")
                # with gr.Row():
            with gr.Column():
                video_out = gr.Video(label="Video Output", elem_id="video-output", height=360)
                # titile: Free-View Expressive Talking Head Video Editing

                submit_btn = gr.Button("Generate video")

        inputs = [video_input, audio_input, pose_select, emotion_select, blink_select]
        outputs = [video_out]

        video_preview_output = [video_preview]
        audio_preview_output = [audio_preview]

    video_input.select(lambda x: "./assets/videos/" + x, video_input, video_preview_output)
    audio_input.select(lambda x: "./assets/audios/" + x, audio_input, audio_preview_output)
    submit_btn.click(process, inputs, outputs)

demo.queue(max_size=10).launch()