sky24h's picture
Add a description.
aa8730f verified
import os
import glob
import spaces
from natsort import natsorted
import gradio as gr
from inference_util import init_model, infenrece
from attributtes_utils import input_pose, input_emotion, input_blink
model = init_model()
@spaces.GPU
def process(input_vid, audio_path, pose_select, emotion_select, blink_select):
pose = input_pose(pose_select)
emotion = input_emotion(emotion_select)
blink = input_blink(blink_select)
print("input_vid: ", input_vid)
result = infenrece(model, os.path.join("./assets/videos/", input_vid), os.path.join("./assets/audios/", audio_path), pose, emotion, blink)
print("result: ", result)
print("finished !")
return result # , gr.Group.update(visible=True)
available_videos = natsorted(glob.glob("./assets/videos/*.mp4"))
available_videos = [os.path.basename(x) for x in available_videos]
# prepare audio
for video in available_videos:
audio = video.replace(".mp4", ".wav")
if not os.path.exists(os.path.join("./assets/audios/", audio)):
os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}")
available_audios = natsorted(glob.glob("./assets/audios/*.wav"))
available_audios = [os.path.basename(x) for x in available_audios]
with gr.Blocks() as demo:
gr.HTML(
"""
<h1 style="text-align: center; font-size: 40px; font-family: 'Times New Roman', Times, serif;">
Free-View Expressive Talking Head Video Editing
</h1>
<p style="text-align: center; font-size: 20px; font-family: 'Times New Roman', Times, serif;">
<a style="text-align: center; display:inline-block"
href="https://sky24h.github.io/websites/icassp2023_free-view_video-editing">
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/paper-page-sm.svg#center"
alt="Project Page">
</a>
<a style="text-align: center; display:inline-block" href="https://huggingface.co/spaces/sky24h/Free-View_Expressive_Talking_Head_Video_Editing?duplicate=true">
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center" alt="Duplicate Space">
</a>
</p>
<p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;">
If you wish to use your custom input files, please duplicate this space or clone it to your local environment.</p>
<p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;">
Alternatively, you can check our official <a href="https://github.com/sky24h/Free-View_Expressive_Talking_Head_Video_Editing">repository</a> on GitHub.
</p>
"""
)
with gr.Column(elem_id="col-container"):
with gr.Row():
with gr.Column():
# select and preview video from a list of examples
video_preview = gr.Video(label="Video Preview", elem_id="video-preview", value="./assets/videos/sample1.mp4")
video_input = gr.Dropdown(available_videos, label="Input Video", value="sample1.mp4")
audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", value="./assets/audios/sample2.wav")
audio_input = gr.Dropdown(available_audios, label="Input Audio", value="sample2.wav")
pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front")
emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral")
blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes")
# with gr.Row():
with gr.Column():
video_out = gr.Video(label="Video Output", elem_id="video-output", height=360)
submit_btn = gr.Button("Generate video")
inputs = [video_input, audio_input, pose_select, emotion_select, blink_select]
outputs = [video_out]
video_preview_output = [video_preview]
audio_preview_output = [audio_preview]
video_input.select(lambda x: "./assets/videos/" + x, video_input, video_preview_output)
audio_input.select(lambda x: "./assets/audios/" + x, audio_input, audio_preview_output)
submit_btn.click(process, inputs, outputs)
demo.queue(max_size=10).launch()