Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import glob | |
from natsort import natsorted | |
import gradio as gr | |
from inference_util import init_model, infenrece | |
from attributtes_utils import input_pose, input_emotion, input_blink | |
model = init_model() | |
def process(input_vid, audio_path, pose_select, emotion_select, blink_select): | |
pose = input_pose(pose_select) | |
emotion = input_emotion(emotion_select) | |
blink = input_blink(blink_select) | |
print("input_vid: ", input_vid) | |
result = infenrece(model, os.path.join("./assets/videos/", input_vid), os.path.join("./assets/audios/", audio_path), pose, emotion, blink) | |
print("result: ", result) | |
print("finished !") | |
return result # , gr.Group.update(visible=True) | |
available_videos = natsorted(glob.glob("./assets/videos/*.mp4")) | |
available_videos = [os.path.basename(x) for x in available_videos] | |
# prepare audio | |
for video in available_videos: | |
audio = video.replace(".mp4", ".wav") | |
if not os.path.exists(os.path.join("./assets/audios/", audio)): | |
os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}") | |
available_audios = natsorted(glob.glob("./assets/audios/*.wav")) | |
available_audios = [os.path.basename(x) for x in available_audios] | |
with gr.Blocks() as demo: | |
gr.HTML( | |
""" | |
<h1 style="text-align: center; font-size: 40px; font-family: 'Times New Roman', Times, serif;"> | |
Free-View Expressive Talking Head Video Editing | |
</h1> | |
<p style="text-align: center; font-size: 20px; font-family: 'Times New Roman', Times, serif;"> | |
<a style="text-align: center; display:inline-block" | |
href="https://sky24h.github.io/websites/icassp2023_free-view_video-editing"> | |
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/paper-page-sm.svg#center" | |
alt="Project Page"> | |
</a> | |
<a style="text-align: center; display:inline-block" href="https://huggingface.co/spaces/sky24h/Free-View_Expressive_Talking_Head_Video_Editing?duplicate=true"> | |
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center" alt="Duplicate Space"> | |
</a> | |
</p> | |
""" | |
) | |
with gr.Column(elem_id="col-container"): | |
with gr.Row(): | |
with gr.Column(): | |
# select and preview video from a list of examples | |
video_preview = gr.Video(label="Video Preview", elem_id="video-preview", height=360, value="./assets/videos/sample1.mp4") | |
video_input = gr.Dropdown(available_videos, label="Input Video", value="sample1.mp4") | |
audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", height=360, value="./assets/audios/sample2.wav") | |
audio_input = gr.Dropdown(available_audios, label="Input Audio", value="sample2.wav") | |
pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front") | |
emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral") | |
blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes") | |
# with gr.Row(): | |
with gr.Column(): | |
video_out = gr.Video(label="Video Output", elem_id="video-output", height=360) | |
# titile: Free-View Expressive Talking Head Video Editing | |
submit_btn = gr.Button("Generate video") | |
inputs = [video_input, audio_input, pose_select, emotion_select, blink_select] | |
outputs = [video_out] | |
video_preview_output = [video_preview] | |
audio_preview_output = [audio_preview] | |
video_input.select(lambda x: "./assets/videos/" + x, video_input, video_preview_output) | |
audio_input.select(lambda x: "./assets/audios/" + x, audio_input, audio_preview_output) | |
submit_btn.click(process, inputs, outputs) | |
demo.queue(max_size=10).launch() | |