FateZero / inference_fatezero.py
chenyangqi's picture
try rebuild with 50 ddim steps
976ac7c
from FateZero.test_fatezero import *
import copy
import gradio as gr
class merge_config_then_run():
def __init__(self) -> None:
# Load the tokenizer
pretrained_model_path = 'FateZero/ckpt/stable-diffusion-v1-4'
self.tokenizer = None
self.text_encoder = None
self.vae = None
self.unet = None
cache_ckpt = True
if cache_ckpt:
self.tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_path,
# 'FateZero/ckpt/stable-diffusion-v1-4',
subfolder="tokenizer",
use_fast=False,
)
# Load models and create wrapper for stable diffusion
self.text_encoder = CLIPTextModel.from_pretrained(
pretrained_model_path,
subfolder="text_encoder",
)
self.vae = AutoencoderKL.from_pretrained(
pretrained_model_path,
subfolder="vae",
)
model_config = {
"lora": 160,
# temporal_downsample_time: 4
"SparseCausalAttention_index": ['mid'],
"least_sc_channel": 640
}
self.unet = UNetPseudo3DConditionModel.from_2d_model(
os.path.join(pretrained_model_path, "unet"), model_config=model_config
)
def run(
self,
# def merge_config_then_run(
model_id,
data_path,
source_prompt,
target_prompt,
cross_replace_steps,
self_replace_steps,
enhance_words,
enhance_words_value,
num_steps,
guidance_scale,
user_input_video=None,
# Temporal and spatial crop of the video
start_sample_frame=0,
n_sample_frame=8,
stride=1,
left_crop=0,
right_crop=0,
top_crop=0,
bottom_crop=0,
):
# , ] = inputs
default_edit_config='FateZero/config/low_resource_teaser/jeep_watercolor_ddim_10_steps.yaml'
Omegadict_default_edit_config = OmegaConf.load(default_edit_config)
dataset_time_string = get_time_string()
config_now = copy.deepcopy(Omegadict_default_edit_config)
print(f"config_now['pretrained_model_path'] = model_id {model_id}")
# config_now['pretrained_model_path'] = model_id
config_now['train_dataset']['prompt'] = source_prompt
config_now['train_dataset']['path'] = data_path
# ImageSequenceDataset_dict = { }
offset_dict = {
"left": left_crop,
"right": right_crop,
"top": top_crop,
"bottom": bottom_crop,
}
ImageSequenceDataset_dict = {
"start_sample_frame" : start_sample_frame,
"n_sample_frame" : n_sample_frame,
"sampling_rate" : stride,
"offset": offset_dict,
}
config_now['train_dataset'].update(ImageSequenceDataset_dict)
if user_input_video and data_path is None:
raise gr.Error('You need to upload a video or choose a provided video')
if user_input_video is not None:
if isinstance(user_input_video, str):
config_now['train_dataset']['path'] = user_input_video
elif hasattr(user_input_video, 'name') and user_input_video.name is not None:
config_now['train_dataset']['path'] = user_input_video.name
config_now['validation_sample_logger_config']['prompts'] = [target_prompt]
# fatezero config
p2p_config_now = copy.deepcopy(config_now['validation_sample_logger_config']['p2p_config'][0])
p2p_config_now['cross_replace_steps']['default_'] = cross_replace_steps
p2p_config_now['self_replace_steps'] = self_replace_steps
p2p_config_now['eq_params']['words'] = enhance_words.split(" ")
p2p_config_now['eq_params']['values'] = [enhance_words_value,]*len(p2p_config_now['eq_params']['words'])
config_now['validation_sample_logger_config']['p2p_config'][0] = copy.deepcopy(p2p_config_now)
# ddim config
config_now['validation_sample_logger_config']['guidance_scale'] = guidance_scale
config_now['validation_sample_logger_config']['num_inference_steps'] = num_steps
logdir = default_edit_config.replace('config', 'result').replace('.yml', '').replace('.yaml', '')+f'_{dataset_time_string}'
config_now['logdir'] = logdir
print(f'Saving at {logdir}')
save_path = test(tokenizer = self.tokenizer,
text_encoder = self.text_encoder,
vae = self.vae,
unet = self.unet,
config=default_edit_config, **config_now)
mp4_path = save_path.replace('_0.gif', '_0_0_0.mp4')
return mp4_path