V3D / scripts /pub /configs /V3D_512.yaml
heheyas
init
cfb7702
raw
history blame
4.68 kB
model:
base_learning_rate: 1.0e-04
target: sgm.models.video_diffusion.DiffusionEngine
params:
ckpt_path: ckpts/V3D_512.ckpt
scale_factor: 0.18215
disable_first_stage_autocast: true
input_key: latents
log_keys: []
scheduler_config:
target: sgm.lr_scheduler.LambdaLinearScheduler
params:
warm_up_steps:
- 1
cycle_lengths:
- 10000000000000
f_start:
- 1.0e-06
f_max:
- 1.0
f_min:
- 1.0
denoiser_config:
target: sgm.modules.diffusionmodules.denoiser.Denoiser
params:
scaling_config:
target: sgm.modules.diffusionmodules.denoiser_scaling.VScalingWithEDMcNoise
network_config:
target: sgm.modules.diffusionmodules.video_model.VideoUNet
params:
adm_in_channels: 768
num_classes: sequential
use_checkpoint: true
in_channels: 8
out_channels: 4
model_channels: 320
attention_resolutions:
- 4
- 2
- 1
num_res_blocks: 2
channel_mult:
- 1
- 2
- 4
- 4
num_head_channels: 64
use_linear_in_transformer: true
transformer_depth: 1
context_dim: 1024
spatial_transformer_attn_type: softmax-xformers
extra_ff_mix_layer: true
use_spatial_context: true
merge_strategy: learned_with_images
video_kernel_size:
- 3
- 1
- 1
conditioner_config:
target: sgm.modules.GeneralConditioner
params:
emb_models:
- is_trainable: false
ucg_rate: 0.2
input_key: cond_frames_without_noise
target: sgm.modules.encoders.modules.IdentityEncoder
- input_key: fps_id
is_trainable: true
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256
- input_key: motion_bucket_id
is_trainable: true
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256
- input_key: cond_frames
is_trainable: false
ucg_rate: 0.2
target: sgm.modules.encoders.modules.IdentityEncoder
- input_key: cond_aug
is_trainable: true
target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
params:
outdim: 256
first_stage_config:
target: sgm.models.autoencoder.AutoencodingEngine
params:
loss_config:
target: torch.nn.Identity
regularizer_config:
target: sgm.modules.autoencoding.regularizers.DiagonalGaussianRegularizer
encoder_config:
target: sgm.modules.diffusionmodules.model.Encoder
params:
attn_type: vanilla
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
decoder_config:
target: sgm.modules.autoencoding.temporal_ae.VideoDecoder
params:
attn_type: vanilla
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
video_kernel_size:
- 3
- 1
- 1
sampler_config:
target: sgm.modules.diffusionmodules.sampling.EulerEDMSampler
params:
num_steps: 30
discretization_config:
target: sgm.modules.diffusionmodules.discretizer.EDMDiscretization
params:
sigma_max: 700.0
guider_config:
target: sgm.modules.diffusionmodules.guiders.LinearPredictionGuider
params:
max_scale: 3.5
min_scale: 3.5
num_frames: 18
loss_fn_config:
target: sgm.modules.diffusionmodules.loss.StandardDiffusionLoss
params:
batch2model_keys:
- num_video_frames
- image_only_indicator
loss_weighting_config:
target: sgm.modules.diffusionmodules.loss_weighting.EDMWeighting
params:
sigma_data: 1.0
sigma_sampler_config:
target: sgm.modules.diffusionmodules.sigma_sampling.EDMSampling
params:
p_mean: 1.5
p_std: 2.0