Spaces:

newturok
/

test2

No application file

App Files Files Community

test2 / server.py

newturok

Add application file

f61a8e6 over 1 year ago

raw

history blame

16.6 kB

	import os
	import gradio as gr
	from sd_model_cfg import model_dict
	from app import process, process0, process1, process2, get_frame_count, cfg_to_input

	DESCRIPTION = '''
	## Rerender A Video
	### This space provides the function of key frame translation. Full code for full video translation will be released upon the publication of the paper.
	### To avoid overload, we set limitations to the maximum frame number (8) and the maximum frame resolution (512x768).
	### The running time of a video of size 512x640 is about 1 minute per keyframe under T4 GPU.
	### How to use:
	1. Run 1st Key Frame: only translate the first frame, so you can adjust the prompts/models/parameters to find your ideal output appearance before run the whole video.
	2. Run Key Frames: translate all the key frames based on the settings of the first frame
	3. Run All: Run 1st Key Frame and Run Key Frames
	4. Run Propagation: propogate the key frames to other frames for full video translation. This part will be released upon the publication of the paper.
	### Tips:
	1. This method cannot handle large or quick motions where the optical flow is hard to estimate. Videos with stable motions are preferred.
	2. Pixel-aware fusion may not work for large or quick motions.
	3. Try different color-aware AdaIN settings and even unuse it to avoid color jittering.
	4. `revAnimated_v11` model for non-photorealstic style, `realisticVisionV20_v20` model for photorealstic style.
	5. To use your own SD/LoRA model, you may clone the space and specify your model with [sd_model_cfg.py](https://huggingface.co/spaces/Anonymous-sub/Rerender/blob/main/sd_model_cfg.py).
	6. This method is based on the original SD model. You may need to [convert](https://github.com/huggingface/diffusers/blob/main/scripts/convert_diffusers_to_original_stable_diffusion.py) Diffuser/Automatic1111 models to the original one.

	This code is for research purpose and non-commercial use only.

	<a href="https://huggingface.co/spaces/Anonymous-sub/Rerender?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
	<img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for no queue on your own hardware.</p>
	'''

	MAX_KEYFRAME = 100000000

	block = gr.Blocks().queue()
	with block:
	with gr.Row():
	gr.Markdown(DESCRIPTION)
	with gr.Row():
	with gr.Column():
	input_path = gr.Video(label='Input Video',
	source='upload',
	format='mp4',
	visible=True)
	prompt = gr.Textbox(label='Prompt')
	seed = gr.Slider(label='Seed',
	minimum=0,
	maximum=2147483647,
	step=1,
	value=0,
	randomize=True)
	run_button = gr.Button(value='Run All')
	with gr.Row():
	run_button1 = gr.Button(value='Run 1st Key Frame')
	run_button2 = gr.Button(value='Run Key Frames')
	run_button3 = gr.Button(value='Run Propagation')
	with gr.Accordion('Advanced options for the 1st frame translation',
	open=False):
	image_resolution = gr.Slider(
	label='Frame rsolution',
	minimum=256,
	maximum=512,
	value=512,
	step=64,
	info='To avoid overload, maximum 512')
	control_strength = gr.Slider(label='ControNet strength',
	minimum=0.0,
	maximum=2.0,
	value=1.0,
	step=0.01)
	x0_strength = gr.Slider(
	label='Denoising strength',
	minimum=0.00,
	maximum=1.05,
	value=0.75,
	step=0.05,
	info=('0: fully recover the input.'
	'1.05: fully rerender the input.'))
	color_preserve = gr.Checkbox(
	label='Preserve color',
	value=True,
	info='Keep the color of the input video')
	with gr.Row():
	left_crop = gr.Slider(label='Left crop length',
	minimum=0,
	maximum=512,
	value=0,
	step=1)
	right_crop = gr.Slider(label='Right crop length',
	minimum=0,
	maximum=512,
	value=0,
	step=1)
	with gr.Row():
	top_crop = gr.Slider(label='Top crop length',
	minimum=0,
	maximum=512,
	value=0,
	step=1)
	bottom_crop = gr.Slider(label='Bottom crop length',
	minimum=0,
	maximum=512,
	value=0,
	step=1)
	with gr.Row():
	control_type = gr.Dropdown(['HED', 'canny'],
	label='Control type',
	value='HED')
	low_threshold = gr.Slider(label='Canny low threshold',
	minimum=1,
	maximum=255,
	value=100,
	step=1)
	high_threshold = gr.Slider(label='Canny high threshold',
	minimum=1,
	maximum=255,
	value=200,
	step=1)
	ddim_steps = gr.Slider(label='Steps',
	minimum=1,
	maximum=20,
	value=20,
	step=1,
	info='To avoid overload, maximum 20')
	scale = gr.Slider(label='CFG scale',
	minimum=0.1,
	maximum=30.0,
	value=7.5,
	step=0.1)
	sd_model_list = list(model_dict.keys())
	sd_model = gr.Dropdown(sd_model_list,
	label='Base model',
	value='Stable Diffusion 1.5')
	a_prompt = gr.Textbox(label='Added prompt',
	value='best quality, extremely detailed')
	n_prompt = gr.Textbox(
	label='Negative prompt',
	value=('longbody, lowres, bad anatomy, bad hands, '
	'missing fingers, extra digit, fewer digits, '
	'cropped, worst quality, low quality'))
	with gr.Accordion('Advanced options for the key fame translation',
	open=False):
	interval = gr.Slider(
	label='Key frame frequency (K)',
	minimum=1,
	maximum=1,
	value=1,
	step=1,
	info='Uniformly sample the key frames every K frames')
	keyframe_count = gr.Slider(
	label='Number of key frames',
	minimum=1,
	maximum=1,
	value=1,
	step=1,
	info='To avoid overload, maximum 8 key frames')

	use_constraints = gr.CheckboxGroup(
	[
	'shape-aware fusion', 'pixel-aware fusion',
	'color-aware AdaIN'
	],
	label='Select the cross-frame contraints to be used',
	value=[
	'shape-aware fusion', 'pixel-aware fusion',
	'color-aware AdaIN'
	]),
	with gr.Row():
	cross_start = gr.Slider(
	label='Cross-frame attention start',
	minimum=0,
	maximum=1,
	value=0,
	step=0.05)
	cross_end = gr.Slider(label='Cross-frame attention end',
	minimum=0,
	maximum=1,
	value=1,
	step=0.05)
	style_update_freq = gr.Slider(
	label='Cross-frame attention update frequency',
	minimum=1,
	maximum=100,
	value=1,
	step=1,
	info=
	('Update the key and value for '
	'cross-frame attention every N key frames (recommend N*K>=10)'
	))
	with gr.Row():
	warp_start = gr.Slider(label='Shape-aware fusion start',
	minimum=0,
	maximum=1,
	value=0,
	step=0.05)
	warp_end = gr.Slider(label='Shape-aware fusion end',
	minimum=0,
	maximum=1,
	value=0.1,
	step=0.05)
	with gr.Row():
	mask_start = gr.Slider(label='Pixel-aware fusion start',
	minimum=0,
	maximum=1,
	value=0.5,
	step=0.05)
	mask_end = gr.Slider(label='Pixel-aware fusion end',
	minimum=0,
	maximum=1,
	value=0.8,
	step=0.05)
	with gr.Row():
	ada_start = gr.Slider(label='Color-aware AdaIN start',
	minimum=0,
	maximum=1,
	value=0.8,
	step=0.05)
	ada_end = gr.Slider(label='Color-aware AdaIN end',
	minimum=0,
	maximum=1,
	value=1,
	step=0.05)
	mask_strength = gr.Slider(label='Pixel-aware fusion stength',
	minimum=0,
	maximum=1,
	value=0.5,
	step=0.01)
	inner_strength = gr.Slider(
	label='Pixel-aware fusion detail level',
	minimum=0.5,
	maximum=1,
	value=0.9,
	step=0.01,
	info='Use a low value to prevent artifacts')
	smooth_boundary = gr.Checkbox(
	label='Smooth fusion boundary',
	value=True,
	info='Select to prevent artifacts at boundary')

	with gr.Accordion('Example configs', open=True):
	config_dir = 'config'
	config_list = os.listdir(config_dir)
	args_list = []
	for config in config_list:
	try:
	config_path = os.path.join(config_dir, config)
	args = cfg_to_input(config_path)
	args_list.append(args)
	except FileNotFoundError:
	# The video file does not exist, skipped
	pass

	ips = [
	prompt, image_resolution, control_strength, color_preserve,
	left_crop, right_crop, top_crop, bottom_crop, control_type,
	low_threshold, high_threshold, ddim_steps, scale, seed,
	sd_model, a_prompt, n_prompt, interval, keyframe_count,
	x0_strength, use_constraints[0], cross_start, cross_end,
	style_update_freq, warp_start, warp_end, mask_start,
	mask_end, ada_start, ada_end, mask_strength,
	inner_strength, smooth_boundary
	]

	with gr.Column():
	result_image = gr.Image(label='Output first frame',
	type='numpy',
	interactive=False)
	result_keyframe = gr.Video(label='Output key frame video',
	format='mp4',
	interactive=False)
	with gr.Row():
	gr.Examples(examples=args_list,
	inputs=[input_path, *ips],
	fn=process0,
	outputs=[result_image, result_keyframe],
	cache_examples=True)

	def input_uploaded(path):
	frame_count = get_frame_count(path)
	if frame_count <= 2:
	raise gr.Error('The input video is too short!'
	'Please input another video.')

	default_interval = min(10, frame_count - 2)
	max_keyframe = min((frame_count - 2) // default_interval, MAX_KEYFRAME)

	global video_frame_count
	video_frame_count = frame_count
	global global_video_path
	global_video_path = path

	return gr.Slider.update(value=default_interval,
	maximum=MAX_KEYFRAME), gr.Slider.update(
	value=max_keyframe, maximum=max_keyframe)

	def input_changed(path):
	frame_count = get_frame_count(path)
	if frame_count <= 2:
	return gr.Slider.update(maximum=1), gr.Slider.update(maximum=1)

	default_interval = min(10, frame_count - 2)
	max_keyframe = min((frame_count - 2) // default_interval, MAX_KEYFRAME)

	global video_frame_count
	video_frame_count = frame_count
	global global_video_path
	global_video_path = path

	return gr.Slider.update(maximum=max_keyframe), \
	gr.Slider.update(maximum=max_keyframe)

	def interval_changed(interval):
	global video_frame_count
	if video_frame_count is None:
	return gr.Slider.update()

	max_keyframe = (video_frame_count - 2) // interval

	return gr.Slider.update(value=max_keyframe, maximum=max_keyframe)

	input_path.change(input_changed, input_path, [interval, keyframe_count])
	input_path.upload(input_uploaded, input_path, [interval, keyframe_count])
	interval.change(interval_changed, interval, keyframe_count)

	run_button.click(fn=process,
	inputs=ips,
	outputs=[result_image, result_keyframe])
	run_button1.click(fn=process1, inputs=ips, outputs=[result_image])
	run_button2.click(fn=process2, inputs=ips, outputs=[result_keyframe])

	def process3():
	raise gr.Error(
	"Coming Soon. Full code for full video translation will be "
	"released upon the publication of the paper.")

	run_button3.click(fn=process3, outputs=[result_keyframe])

	block.queue(concurrency_count=1, max_size=20)
	block.launch(server_name='0.0.0.0')