MedCoDi-M / configs /model /openai_unet.yaml
dmolino's picture
Upload 276 files
168a510 verified
raw
history blame
2.29 kB
openai_unet_sd:
type: openai_unet
args:
image_size: null # no use
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: [ 2, 2, 2, 2 ]
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False
openai_unet_dual_context:
super_cfg: openai_unet_sd
type: openai_unet_dual_context
########################
# Code cleaned version #
########################
openai_unet_2d_audio:
type: openai_unet_2d
args:
input_channels: 8
model_channels: 192
output_channels: 8
num_noattn_blocks: [ 2, 2, 2, 2 ]
channel_mult: [ 1, 2, 4, 4 ]
with_attn: [true, true, true, false]
channel_mult_connector: [1, 2, 4]
num_noattn_blocks_connector: [1, 1, 1]
with_connector: [True, True, True, False]
connector_output_channel: 1280
num_heads: 8
context_dim: 768
use_checkpoint: False
openai_unet_2d:
type: openai_unet_2d
args:
input_channels: 4
model_channels: 320
output_channels: 4
num_noattn_blocks: [ 2, 2, 2, 2 ]
channel_mult: [ 1, 2, 4, 4 ]
with_attn: [true, true, true, false]
channel_mult_connector: [1, 2, 4]
num_noattn_blocks_connector: [1, 1, 1]
with_connector: [True, True, True, False]
connector_output_channel: 1280
num_heads: 8
context_dim: 768
use_checkpoint: True
use_video_architecture: True
openai_unet_0dmd:
type: openai_unet_0dmd
args:
input_channels: 768
model_channels: 320
output_channels: 768
num_noattn_blocks: [ 2, 2, 2, 2 ]
channel_mult: [ 1, 2, 4, 4 ]
second_dim: [ 4, 4, 4, 4 ]
with_attn: [true, true, true, false]
num_noattn_blocks_connector: [1, 1, 1]
second_dim_connector: [4, 4, 4]
with_connector: [True, True, True, False]
connector_output_channel: 1280
num_heads: 8
context_dim: 768
use_checkpoint: True
openai_unet_codi:
type: openai_unet_codi
args:
unet_image_cfg: MODEL(openai_unet_2d)
unet_text_cfg: MODEL(openai_unet_0dmd)
unet_audio_cfg: MODEL(openai_unet_2d_audio)
# model_type: ['video', 'image']
# model_type: ['text']
model_type: ['audio', 'image', 'video', 'text']