File size: 1,603 Bytes
1675e22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
{
"_class_name": "CausalVAEModel",
"_diffusers_version": "0.27.2",
"_name_or_path": "../results/pretrained_488_tail",
"attn_resolutions": [],
"decoder_attention": "AttnBlock3DFix",
"decoder_conv_in": "CausalConv3d",
"decoder_conv_out": "CausalConv3d",
"decoder_mid_resnet": "ResnetBlock3D",
"decoder_resnet_blocks": [
"ResnetBlock3D",
"ResnetBlock3D",
"ResnetBlock3D",
"ResnetBlock3D"
],
"decoder_spatial_upsample": [
"",
"SpatialUpsample2x",
"SpatialUpsample2x",
"SpatialUpsample2x"
],
"decoder_temporal_upsample": [
"",
"",
"TimeUpsampleRes2x",
"TimeUpsampleRes2x"
],
"double_z": true,
"dropout": 0.0,
"embed_dim": 4,
"encoder_attention": "AttnBlock3DFix",
"encoder_conv_in": "Conv2d",
"encoder_conv_out": "CausalConv3d",
"encoder_mid_resnet": "ResnetBlock3D",
"encoder_resnet_blocks": [
"ResnetBlock2D",
"ResnetBlock2D",
"ResnetBlock3D",
"ResnetBlock3D"
],
"encoder_spatial_downsample": [
"Downsample",
"Downsample",
"Downsample",
""
],
"encoder_temporal_downsample": [
"",
"TimeDownsampleRes2x",
"TimeDownsampleRes2x",
""
],
"hidden_size": 128,
"hidden_size_mult": [
1,
2,
4,
4
],
"in_channels": 3,
"loss_params": {
"disc_start": 2001,
"disc_weight": 0.5,
"kl_weight": 1e-06,
"logvar_init": 0.0
},
"loss_type": "opensora.models.ae.videobase.losses.LPIPSWithDiscriminator3D",
"lr": 1e-05,
"num_res_blocks": 2,
"out_channels": 3,
"q_conv": "CausalConv3d",
"resolution": 256,
"z_channels": 4
}
|