FollowYourPose / inference_followyourpose.py
mayuema
first release
d2dd1cd
raw
history blame
No virus
3.69 kB
from FollowYourPose.test_followyourpose import *
import copy
import gradio as gr
from transformers import AutoTokenizer, CLIPTextModel
def get_time_string() -> str:
x = datetime.datetime.now()
return f"{(x.year - 2000):02d}{x.month:02d}{x.day:02d}-{x.hour:02d}{x.minute:02d}{x.second:02d}"
class merge_config_then_run():
def __init__(self) -> None:
# Load the tokenizer
# pretrained_model_path = 'FateZero/ckpt/stable-diffusion-v1-4'
self.tokenizer = None
self.text_encoder = None
self.vae = None
self.unet = None
# cache_ckpt = False
# if cache_ckpt:
# self.tokenizer = AutoTokenizer.from_pretrained(
# pretrained_model_path,
# # 'FateZero/ckpt/stable-diffusion-v1-4',
# subfolder="tokenizer",
# use_fast=False,
# )
# # Load models and create wrapper for stable diffusion
# self.text_encoder = CLIPTextModel.from_pretrained(
# pretrained_model_path,
# subfolder="text_encoder",
# )
# self.vae = AutoencoderKL.from_pretrained(
# pretrained_model_path,
# subfolder="vae",
# )
# model_config = {
# "lora": 160,
# # temporal_downsample_time: 4
# "SparseCausalAttention_index": ['mid'],
# "least_sc_channel": 640
# }
# self.unet = UNetPseudo3DConditionModel.from_2d_model(
# os.path.join(pretrained_model_path, "unet"), model_config=model_config
# )
def run(
self,
data_path,
target_prompt,
num_steps,
guidance_scale,
user_input_video=None,
start_sample_frame=0,
n_sample_frame=8,
stride=1,
left_crop=0,
right_crop=0,
top_crop=0,
bottom_crop=0,
):
default_edit_config='FollowYourPose/configs/pose_sample.yaml'
Omegadict_default_edit_config = OmegaConf.load(default_edit_config)
dataset_time_string = get_time_string()
config_now = copy.deepcopy(Omegadict_default_edit_config)
# print(f"config_now['pretrained_model_path'] = model_id {model_id}")
offset_dict = {
"left": left_crop,
"right": right_crop,
"top": top_crop,
"bottom": bottom_crop,
}
ImageSequenceDataset_dict = {
"start_sample_frame" : start_sample_frame,
"n_sample_frame" : n_sample_frame,
"sampling_rate" : stride,
"offset": offset_dict,
}
config_now['validation_data'].update(ImageSequenceDataset_dict)
if user_input_video and data_path is None:
raise gr.Error('You need to upload a video or choose a provided video')
if user_input_video is not None:
if isinstance(user_input_video, str):
config_now['validation_data']['path'] = user_input_video
elif hasattr(user_input_video, 'name') and user_input_video.name is not None:
config_now['validation_data']['path'] = user_input_video.name
config_now['validation_data']['prompts'] = [target_prompt]
# ddim config
config_now['validation_data']['guidance_scale'] = guidance_scale
config_now['validation_data']['num_inference_steps'] = num_steps
config_now['skeleton_path'] = data_path
save_path = test(**config_now)
mp4_path = save_path.replace('_0.gif', '_0_0_0.mp4')
return mp4_path