FateZero / FateZero /config /teaser /jeep_posche.yaml
chenyangqi's picture
add FateZero code
3060b7e
# CUDA_VISIBLE_DEVICES=0 python test_fatezero.py --config config/teaser/jeep_posche.yaml
pretrained_model_path: "./ckpt/jeep_tuned_200"
train_dataset:
path: "data/teaser_car-turn"
prompt: "a silver jeep driving down a curvy road in the countryside,"
n_sample_frame: 8
sampling_rate: 1
stride: 80
offset:
left: 0
right: 0
top: 0
bottom: 0
validation_sample_logger_config:
use_train_latents: true
use_inversion_attention: true
guidance_scale: 7.5
prompts: [
a silver jeep driving down a curvy road in the countryside,
a Porsche car driving down a curvy road in the countryside,
]
p2p_config:
0:
# Whether to directly copy the cross attention from source
# True: directly copy, better for object replacement
# False: keep source attention, better for style
is_replace_controller: False
# Semantic layout preserving. High steps, replace more cross attention to preserve semantic layout
cross_replace_steps:
default_: 0.8
# Source background structure preserving, in [0, 1].
# e.g., =0.6 Replace the first 60% steps self-attention
self_replace_steps: 0.9
# Amplify the target-words cross attention, larger value, more close to target
# Usefull in style editing
eq_params:
words: ["watercolor", "painting"]
values: [10,10]
# Target structure-divergence hyperparames
# If you change the shape of object better to use all three line, otherwise, no need.
# Without following three lines, all self-attention will be replaced
# Usefull in shape editing
blend_words: [['jeep',], ["car",]]
masked_self_attention: True
# masked_latents: False # Directly copy the latents, performance not so good in our case
# preserve source structure of blend_words , [0, 1]
# bend_th-> [1.0, 1.0], mask -> 0, use inversion-time attention, the structure is similar to the input
# bend_th-> [0.0, 0.0], mask -> 1, use more edit self-attention, more generated shape, less source acttention
bend_th: [0.3, 0.3]
1:
cross_replace_steps:
default_: 0.5
self_replace_steps: 0.5
use_inversion_attention: True
is_replace_controller: True
blend_words: [['silver', 'jeep'], ["Porsche", 'car']] # for local edit. If it is not local yet - use only the source object: blend_word = ((('cat',), ("cat",))).
masked_self_attention: True
bend_th: [0.3, 0.3]
clip_length: "${..train_dataset.n_sample_frame}"
sample_seeds: [0]
num_inference_steps: 50
prompt2prompt_edit: True
model_config:
lora: 160
test_pipeline_config:
target: video_diffusion.pipelines.p2pDDIMSpatioTemporalPipeline.p2pDDIMSpatioTemporalPipeline
num_inference_steps: "${..validation_sample_logger.num_inference_steps}"
epsilon: 1e-5
train_steps: 10
seed: 0
learning_rate: 1e-5
train_temporal_conv: False
guidance_scale: "${validation_sample_logger_config.guidance_scale}"