|
import os |
|
import glob |
|
import spaces |
|
from natsort import natsorted |
|
import gradio as gr |
|
|
|
from inference_util import init_model, infenrece |
|
from attributtes_utils import input_pose, input_emotion, input_blink |
|
|
|
model = init_model() |
|
|
|
@spaces.GPU |
|
def process(input_vid, audio_path, pose_select, emotion_select, blink_select): |
|
pose = input_pose(pose_select) |
|
emotion = input_emotion(emotion_select) |
|
blink = input_blink(blink_select) |
|
|
|
print("input_vid: ", input_vid) |
|
result = infenrece(model, os.path.join("./assets/videos/", input_vid), os.path.join("./assets/audios/", audio_path), pose, emotion, blink) |
|
print("result: ", result) |
|
|
|
print("finished !") |
|
|
|
return result |
|
|
|
|
|
available_videos = natsorted(glob.glob("./assets/videos/*.mp4")) |
|
available_videos = [os.path.basename(x) for x in available_videos] |
|
|
|
|
|
for video in available_videos: |
|
audio = video.replace(".mp4", ".wav") |
|
if not os.path.exists(os.path.join("./assets/audios/", audio)): |
|
os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}") |
|
available_audios = natsorted(glob.glob("./assets/audios/*.wav")) |
|
available_audios = [os.path.basename(x) for x in available_audios] |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.HTML( |
|
""" |
|
<h1 style="text-align: center; font-size: 40px; font-family: 'Times New Roman', Times, serif;"> |
|
Free-View Expressive Talking Head Video Editing |
|
</h1> |
|
<p style="text-align: center; font-size: 20px; font-family: 'Times New Roman', Times, serif;"> |
|
<a style="text-align: center; display:inline-block" |
|
href="https://sky24h.github.io/websites/icassp2023_free-view_video-editing"> |
|
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/paper-page-sm.svg#center" |
|
alt="Project Page"> |
|
</a> |
|
<a style="text-align: center; display:inline-block" href="https://huggingface.co/spaces/sky24h/Free-View_Expressive_Talking_Head_Video_Editing?duplicate=true"> |
|
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center" alt="Duplicate Space"> |
|
</a> |
|
</p> |
|
<p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;"> |
|
If you wish to use your custom input files, please duplicate this space or clone it to your local environment.</p> |
|
<p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;"> |
|
Alternatively, you can check our official <a href="https://github.com/sky24h/Free-View_Expressive_Talking_Head_Video_Editing">repository</a> on GitHub. |
|
</p> |
|
""" |
|
) |
|
with gr.Column(elem_id="col-container"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
video_preview = gr.Video(label="Video Preview", elem_id="video-preview", value="./assets/videos/sample1.mp4") |
|
video_input = gr.Dropdown(available_videos, label="Input Video", value="sample1.mp4") |
|
audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", value="./assets/audios/sample2.wav") |
|
audio_input = gr.Dropdown(available_audios, label="Input Audio", value="sample2.wav") |
|
pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front") |
|
emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral") |
|
blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes") |
|
|
|
with gr.Column(): |
|
video_out = gr.Video(label="Video Output", elem_id="video-output", height=360) |
|
submit_btn = gr.Button("Generate video") |
|
|
|
inputs = [video_input, audio_input, pose_select, emotion_select, blink_select] |
|
outputs = [video_out] |
|
|
|
video_preview_output = [video_preview] |
|
audio_preview_output = [audio_preview] |
|
|
|
video_input.select(lambda x: "./assets/videos/" + x, video_input, video_preview_output) |
|
audio_input.select(lambda x: "./assets/audios/" + x, audio_input, audio_preview_output) |
|
submit_btn.click(process, inputs, outputs) |
|
|
|
demo.queue(max_size=10).launch() |
|
|