FateZero / FateZero /config /style /lily_monet.yaml
chenyangqi's picture
add FateZero code
3060b7e
pretrained_model_path: "./ckpt/stable-diffusion-v1-4"
train_dataset:
path: "data/style/red_water_lily_opening"
prompt: "a pink water lily"
start_sample_frame: 1
n_sample_frame: 8
# n_sample_frame: 22
sampling_rate: 20
stride: 8000
# offset:
# left: 300
# right: 0
# top: 0
# bottom: 0
validation_sample_logger_config:
use_train_latents: True
use_inversion_attention: True
guidance_scale: 7.5
prompts: [
a pink water lily,
Claude Monet painting of a pink water lily,
]
p2p_config:
0:
# Whether to directly copy the cross attention from source
# True: directly copy, better for object replacement
# False: keep source attention, better for style
is_replace_controller: False
# Semantic preserving and replacement Debug me
cross_replace_steps:
default_: 0.7
# Source background structure preserving, in [0, 1].
# e.g., =0.6 Replace the first 60% steps self-attention
self_replace_steps: 0.7
# Amplify the target-words cross attention, larger value, more close to target
eq_params:
words: ["silver", "sculpture"]
values: [2,2]
# Target structure-divergence hyperparames
# If you change the shape of object better to use all three line, otherwise, no need.
# Without following three lines, all self-attention will be replaced
blend_words: [['cat',], ["cat",]]
masked_self_attention: True
# masked_latents: False # performance not so good in our case, need debug
bend_th: [2, 2]
# preserve source structure of blend_words , [0, 1]
# default is bend_th: [2, 2] # preserve all source self-attention
# bend_th : [0.0, 0.0], mask -> 1, use more att_replace, more generated attention, less source acttention
1:
is_replace_controller: False
cross_replace_steps:
default_: 0.5
self_replace_steps: 0.5
eq_params:
words: ["Monet"]
values: [10]
clip_length: "${..train_dataset.n_sample_frame}"
sample_seeds: [0]
val_all_frames: False
num_inference_steps: 50
prompt2prompt_edit: True
model_config:
lora: 160
# temporal_downsample_time: 4
SparseCausalAttention_index: ['mid']
least_sc_channel: 1280
# least_sc_channel: 100000
test_pipeline_config:
target: video_diffusion.pipelines.p2pDDIMSpatioTemporalPipeline.p2pDDIMSpatioTemporalPipeline
num_inference_steps: "${..validation_sample_logger.num_inference_steps}"
epsilon: 1e-5
train_steps: 10
seed: 0
learning_rate: 1e-5
train_temporal_conv: False
guidance_scale: "${validation_sample_logger_config.guidance_scale}"