Spaces:

sky24h
/

Free-View_Expressive_Talking_Head_Video_Editing

Running on Zero

Free-View_Expressive_Talking_Head_Video_Editing / app.py

hytian2@gmail.com

update

eca9dba over 1 year ago

4.06 kB

	import os
	import glob
	from natsort import natsorted
	import gradio as gr

	from inference_util import init_model, infenrece
	from attributtes_utils import input_pose, input_emotion, input_blink

	model = init_model()


	def process(input_vid, audio_path, pose_select, emotion_select, blink_select):
	pose = input_pose(pose_select)
	emotion = input_emotion(emotion_select)
	blink = input_blink(blink_select)

	print("input_vid: ", input_vid)
	result = infenrece(model, os.path.join("./assets/videos/", input_vid), os.path.join("./assets/audios/", audio_path), pose, emotion, blink)
	print("result: ", result)

	print("finished !")

	return result # , gr.Group.update(visible=True)


	available_videos = natsorted(glob.glob("./assets/videos/*.mp4"))
	available_videos = [os.path.basename(x) for x in available_videos]

	# prepare audio
	for video in available_videos:
	audio = video.replace(".mp4", ".wav")
	if not os.path.exists(os.path.join("./assets/audios/", audio)):
	os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}")
	available_audios = natsorted(glob.glob("./assets/audios/*.wav"))
	available_audios = [os.path.basename(x) for x in available_audios]


	with gr.Blocks() as demo:
	gr.HTML(
	"""
	<h1 style="text-align: center; font-size: 40px; font-family: 'Times New Roman', Times, serif;">
	Free-View Expressive Talking Head Video Editing
	</h1>
	<p style="text-align: center; font-size: 20px; font-family: 'Times New Roman', Times, serif;">
	<a style="text-align: center; display:inline-block"
	href="https://sky24h.github.io/websites/icassp2023_free-view_video-editing">
	<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/paper-page-sm.svg#center"
	alt="Project Page">
	</a>
	<a style="text-align: center; display:inline-block" href="https://huggingface.co/spaces/sky24h/Free-View_Expressive_Talking_Head_Video_Editing?duplicate=true">
	<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center" alt="Duplicate Space">
	</a>
	</p>
	"""
	)
	with gr.Column(elem_id="col-container"):
	with gr.Row():
	with gr.Column():
	# select and preview video from a list of examples
	video_preview = gr.Video(label="Video Preview", elem_id="video-preview", height=360, value="./assets/videos/sample1.mp4")
	video_input = gr.Dropdown(available_videos, label="Input Video", value="sample1.mp4")
	audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", height=360, value="./assets/audios/sample2.wav")
	audio_input = gr.Dropdown(available_audios, label="Input Audio", value="sample2.wav")
	pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front")
	emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral")
	blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes")
	# with gr.Row():
	with gr.Column():
	video_out = gr.Video(label="Video Output", elem_id="video-output", height=360)
	# titile: Free-View Expressive Talking Head Video Editing

	submit_btn = gr.Button("Generate video")

	inputs = [video_input, audio_input, pose_select, emotion_select, blink_select]
	outputs = [video_out]

	video_preview_output = [video_preview]
	audio_preview_output = [audio_preview]

	video_input.select(lambda x: "./assets/videos/" + x, video_input, video_preview_output)
	audio_input.select(lambda x: "./assets/audios/" + x, audio_input, audio_preview_output)
	submit_btn.click(process, inputs, outputs)

	demo.queue(max_size=10).launch()