|
unets: |
|
unet1: |
|
dim: 64 |
|
num_resnet_blocks: 2 |
|
dim_mults: |
|
- 1 |
|
- 2 |
|
- 4 |
|
max_text_len: 1 |
|
layer_attns: false |
|
layer_cross_attns: |
|
- false |
|
- false |
|
- true |
|
cond_images_channels: 3 |
|
unet2: |
|
dim: 64 |
|
num_resnet_blocks: 2 |
|
dim_mults: |
|
- 1 |
|
- 2 |
|
- 4 |
|
max_text_len: 1 |
|
layer_attns: false |
|
layer_cross_attns: |
|
- false |
|
- false |
|
- true |
|
memory_efficient: true |
|
cond_images_channels: 3 |
|
unet3: |
|
dim: 64 |
|
num_resnet_blocks: 2 |
|
dim_mults: |
|
- 1 |
|
- 2 |
|
- 4 |
|
max_text_len: 1 |
|
layer_attns: false |
|
layer_cross_attns: |
|
- false |
|
- false |
|
- true |
|
memory_efficient: true |
|
cond_images_channels: 3 |
|
unet4: |
|
dim: 64 |
|
num_resnet_blocks: 2 |
|
dim_mults: |
|
- 1 |
|
- 2 |
|
- 4 |
|
max_text_len: 1 |
|
layer_attns: false |
|
layer_cross_attns: false |
|
cond_images_channels: 3 |
|
attend_at_middle: false |
|
imagen: |
|
condition_on_text: true |
|
image_sizes: |
|
- 56 |
|
- 56 |
|
- 56 |
|
- 112 |
|
text_embed_dim: 1 |
|
num_sample_steps: |
|
- 32 |
|
- 32 |
|
- 32 |
|
- 64 |
|
random_crop_sizes: |
|
- null |
|
- null |
|
- null |
|
- 56 |
|
temporal_downsample_factor: |
|
- 4 |
|
- 2 |
|
- 1 |
|
- 1 |
|
lowres_sample_noise_level: 0.2 |
|
sigma_min: 0.002 |
|
sigma_max: 80 |
|
sigma_data: 0.25 |
|
rho: 7 |
|
P_mean: -1.2 |
|
P_std: 1.2 |
|
S_churn: |
|
- 40 |
|
- 80 |
|
- 160 |
|
- 160 |
|
S_tmin: 0.05 |
|
S_tmax: 50 |
|
S_noise: 1.003 |
|
trainer: |
|
split_batches: false |
|
lr: 0.0005 |
|
dl_tuple_output_keywords_names: |
|
- images |
|
- text_embeds |
|
- cond_images |
|
dataset: |
|
data_path: /path/to/EchoNet-Dynamic |
|
deactivate_cache: false |
|
fps: 32 |
|
duration: 2.0 |
|
grayscale: false |
|
dataloader: |
|
batch_size: 8 |
|
num_workers: 8 |
|
wandb: |
|
project: EchoDiffusion |
|
|
|
checkpoint: |
|
path: /path/to/checkpoints |
|
batch_size: 4 |
|
cond_scale: 5.0 |
|
save_every_x_it: 5000 |
|
|