Spaces:
Sleeping
Sleeping
globals: | |
target_fps: 32 | |
target_nframes: 64 | |
unet: | |
_class_name: UNetSpatioTemporalConditionModel | |
addition_time_embed_dim: 1 | |
block_out_channels: | |
- 128 | |
- 256 | |
- 256 | |
- 512 | |
cross_attention_dim: 1 | |
down_block_types: | |
- CrossAttnDownBlockSpatioTemporal | |
- CrossAttnDownBlockSpatioTemporal | |
- CrossAttnDownBlockSpatioTemporal | |
- DownBlockSpatioTemporal | |
in_channels: 8 | |
layers_per_block: 2 | |
num_attention_heads: | |
- 8 | |
- 16 | |
- 16 | |
- 32 | |
num_frames: ${globals.target_nframes} | |
out_channels: 4 | |
projection_class_embeddings_input_dim: 1 | |
sample_size: 14 | |
transformer_layers_per_block: 1 | |
up_block_types: | |
- UpBlockSpatioTemporal | |
- CrossAttnUpBlockSpatioTemporal | |
- CrossAttnUpBlockSpatioTemporal | |
- CrossAttnUpBlockSpatioTemporal | |
noise_scheduler: | |
_class_name: DDPMScheduler | |
num_train_timesteps: 1000 | |
beta_start: 0.0001 | |
beta_end: 0.02 | |
beta_schedule: linear # linear, scaled_linear, or squaredcos_cap_v2 | |
variance_type: fixed_small # fixed_small, fixed_small_log, fixed_large, fixed_large_log, learned or learned_range | |
clip_sample: true | |
clip_sample_range: 4.0 # default 1 | |
prediction_type: v_prediction # epsilon, sample, v_prediction | |
thresholding: false # do not touch | |
dynamic_thresholding_ratio: 0.995 # unused | |
sample_max_value: 1.0 # unused | |
timestep_spacing: "leading" # | |
steps_offset: 0 # unused |