HReynaud's picture
Upload folder using huggingface_hub
cfb9037 verified
globals:
target_fps: 32
target_nframes: 64
unet:
_class_name: UNetSpatioTemporalConditionModel
addition_time_embed_dim: 1
block_out_channels:
- 128
- 256
- 256
- 512
cross_attention_dim: 1
down_block_types:
- CrossAttnDownBlockSpatioTemporal
- CrossAttnDownBlockSpatioTemporal
- CrossAttnDownBlockSpatioTemporal
- DownBlockSpatioTemporal
in_channels: 8
layers_per_block: 2
num_attention_heads:
- 8
- 16
- 16
- 32
num_frames: ${globals.target_nframes}
out_channels: 4
projection_class_embeddings_input_dim: 1
sample_size: 14
transformer_layers_per_block: 1
up_block_types:
- UpBlockSpatioTemporal
- CrossAttnUpBlockSpatioTemporal
- CrossAttnUpBlockSpatioTemporal
- CrossAttnUpBlockSpatioTemporal
noise_scheduler:
_class_name: DDPMScheduler
num_train_timesteps: 1000
beta_start: 0.0001
beta_end: 0.02
beta_schedule: linear # linear, scaled_linear, or squaredcos_cap_v2
variance_type: fixed_small # fixed_small, fixed_small_log, fixed_large, fixed_large_log, learned or learned_range
clip_sample: true
clip_sample_range: 4.0 # default 1
prediction_type: v_prediction # epsilon, sample, v_prediction
thresholding: false # do not touch
dynamic_thresholding_ratio: 0.995 # unused
sample_max_value: 1.0 # unused
timestep_spacing: "leading" #
steps_offset: 0 # unused