{"name": "diffusion_model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": [null, 320], "dtype": "float32", "sparse": false, "ragged": false, "name": "input_2"}, "name": "input_2", "inbound_nodes": []}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 1280, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_2", 0, 0, {}]]]}, {"class_name": "InputLayer", "config": {"batch_input_shape": [null, 32, 32, 4], "dtype": "float32", "sparse": false, "ragged": false, "name": "input_3"}, "name": "input_3", "inbound_nodes": []}, {"class_name": "Activation", "config": {"name": "activation", "trainable": true, "dtype": "float32", "activation": "swish"}, "name": "activation", "inbound_nodes": [[["dense", 0, 0, {}]]]}, {"class_name": "PaddedConv2D", "config": {"name": "padded_conv2d", "trainable": true, "dtype": "float32", "kernel_size": 3, "padding": 1, "filters": 320}, "name": "padded_conv2d", "inbound_nodes": [[["input_3", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 1280, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_1", "inbound_nodes": [[["activation", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block", "trainable": true, "dtype": "float32", "output_dim": 320}, "name": "res_block", "inbound_nodes": [[["padded_conv2d", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "InputLayer", "config": {"batch_input_shape": [null, 77, 768], "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": []}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 40}, "name": "spatial_transformer", "inbound_nodes": [[["res_block", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_1", "trainable": true, "dtype": "float32", "output_dim": 320}, "name": "res_block_1", "inbound_nodes": [[["spatial_transformer", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_1", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 40}, "name": "spatial_transformer_1", "inbound_nodes": [[["res_block_1", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "PaddedConv2D", "config": {"name": "padded_conv2d_9", "trainable": true, "dtype": "float32", "strides": 2, "padding": 1, "filters": 320, "kernel_size": 3}, "name": "padded_conv2d_9", "inbound_nodes": [[["spatial_transformer_1", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_2", "trainable": true, "dtype": "float32", "output_dim": 640}, "name": "res_block_2", "inbound_nodes": [[["padded_conv2d_9", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_2", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 80}, "name": "spatial_transformer_2", "inbound_nodes": [[["res_block_2", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_3", "trainable": true, "dtype": "float32", "output_dim": 640}, "name": "res_block_3", "inbound_nodes": [[["spatial_transformer_2", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_3", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 80}, "name": "spatial_transformer_3", "inbound_nodes": [[["res_block_3", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "PaddedConv2D", "config": {"name": "padded_conv2d_18", "trainable": true, "dtype": "float32", "strides": 2, "padding": 1, "filters": 640, "kernel_size": 3}, "name": "padded_conv2d_18", "inbound_nodes": [[["spatial_transformer_3", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_4", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_4", "inbound_nodes": [[["padded_conv2d_18", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_4", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 160}, "name": "spatial_transformer_4", "inbound_nodes": [[["res_block_4", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_5", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_5", "inbound_nodes": [[["spatial_transformer_4", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_5", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 160}, "name": "spatial_transformer_5", "inbound_nodes": [[["res_block_5", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "PaddedConv2D", "config": {"name": "padded_conv2d_27", "trainable": true, "dtype": "float32", "strides": 2, "padding": 1, "filters": 1280, "kernel_size": 3}, "name": "padded_conv2d_27", "inbound_nodes": [[["spatial_transformer_5", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_6", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_6", "inbound_nodes": [[["padded_conv2d_27", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_7", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_7", "inbound_nodes": [[["res_block_6", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_8", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_8", "inbound_nodes": [[["res_block_7", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_6", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 160}, "name": "spatial_transformer_6", "inbound_nodes": [[["res_block_8", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_9", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_9", "inbound_nodes": [[["spatial_transformer_6", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate", "inbound_nodes": [[["res_block_9", 0, 0, {}], ["res_block_7", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_10", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_10", "inbound_nodes": [[["concatenate", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_1", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_1", "inbound_nodes": [[["res_block_10", 0, 0, {}], ["res_block_6", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_11", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_11", "inbound_nodes": [[["concatenate_1", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_2", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_2", "inbound_nodes": [[["res_block_11", 0, 0, {}], ["padded_conv2d_27", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_12", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_12", "inbound_nodes": [[["concatenate_2", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "Upsample", "config": {"name": "upsample", "trainable": true, "dtype": "float32", "channels": 1280}, "name": "upsample", "inbound_nodes": [[["res_block_12", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_3", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_3", "inbound_nodes": [[["upsample", 0, 0, {}], ["spatial_transformer_5", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_13", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_13", "inbound_nodes": [[["concatenate_3", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_7", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 160}, "name": "spatial_transformer_7", "inbound_nodes": [[["res_block_13", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_4", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_4", "inbound_nodes": [[["spatial_transformer_7", 0, 0, {}], ["spatial_transformer_4", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_14", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_14", "inbound_nodes": [[["concatenate_4", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_8", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 160}, "name": "spatial_transformer_8", "inbound_nodes": [[["res_block_14", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_5", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_5", "inbound_nodes": [[["spatial_transformer_8", 0, 0, {}], ["padded_conv2d_18", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_15", "trainable": true, "dtype": "float32", "output_dim": 1280}, "name": "res_block_15", "inbound_nodes": [[["concatenate_5", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_9", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 160}, "name": "spatial_transformer_9", "inbound_nodes": [[["res_block_15", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "Upsample", "config": {"name": "upsample_1", "trainable": true, "dtype": "float32", "channels": 1280}, "name": "upsample_1", "inbound_nodes": [[["spatial_transformer_9", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_6", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_6", "inbound_nodes": [[["upsample_1", 0, 0, {}], ["spatial_transformer_3", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_16", "trainable": true, "dtype": "float32", "output_dim": 640}, "name": "res_block_16", "inbound_nodes": [[["concatenate_6", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_10", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 80}, "name": "spatial_transformer_10", "inbound_nodes": [[["res_block_16", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_7", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_7", "inbound_nodes": [[["spatial_transformer_10", 0, 0, {}], ["spatial_transformer_2", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_17", "trainable": true, "dtype": "float32", "output_dim": 640}, "name": "res_block_17", "inbound_nodes": [[["concatenate_7", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_11", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 80}, "name": "spatial_transformer_11", "inbound_nodes": [[["res_block_17", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_8", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_8", "inbound_nodes": [[["spatial_transformer_11", 0, 0, {}], ["padded_conv2d_9", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_18", "trainable": true, "dtype": "float32", "output_dim": 640}, "name": "res_block_18", "inbound_nodes": [[["concatenate_8", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_12", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 80}, "name": "spatial_transformer_12", "inbound_nodes": [[["res_block_18", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "Upsample", "config": {"name": "upsample_2", "trainable": true, "dtype": "float32", "channels": 640}, "name": "upsample_2", "inbound_nodes": [[["spatial_transformer_12", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_9", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_9", "inbound_nodes": [[["upsample_2", 0, 0, {}], ["spatial_transformer_1", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_19", "trainable": true, "dtype": "float32", "output_dim": 320}, "name": "res_block_19", "inbound_nodes": [[["concatenate_9", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_13", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 40}, "name": "spatial_transformer_13", "inbound_nodes": [[["res_block_19", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_10", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_10", "inbound_nodes": [[["spatial_transformer_13", 0, 0, {}], ["spatial_transformer", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_20", "trainable": true, "dtype": "float32", "output_dim": 320}, "name": "res_block_20", "inbound_nodes": [[["concatenate_10", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_14", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 40}, "name": "spatial_transformer_14", "inbound_nodes": [[["res_block_20", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "Concatenate", "config": {"name": "concatenate_11", "trainable": true, "dtype": "float32", "axis": -1}, "name": "concatenate_11", "inbound_nodes": [[["spatial_transformer_14", 0, 0, {}], ["padded_conv2d", 0, 0, {}]]]}, {"class_name": "ResBlock", "config": {"name": "res_block_21", "trainable": true, "dtype": "float32", "output_dim": 320}, "name": "res_block_21", "inbound_nodes": [[["concatenate_11", 0, 0, {}], ["dense_1", 0, 0, {}]]]}, {"class_name": "SpatialTransformer", "config": {"name": "spatial_transformer_15", "trainable": true, "dtype": "float32", "fully_connected": false, "num_heads": 8, "head_size": 40}, "name": "spatial_transformer_15", "inbound_nodes": [[["res_block_21", 0, 0, {}], ["input_1", 0, 0, {}]]]}, {"class_name": "GroupNormalization", "config": {"name": "group_normalization_60", "trainable": true, "dtype": "float32", "epsilon": 1e-05}, "name": "group_normalization_60", "inbound_nodes": [[["spatial_transformer_15", 0, 0, {}]]]}, {"class_name": "Activation", "config": {"name": "activation_67", "trainable": true, "dtype": "float32", "activation": "swish"}, "name": "activation_67", "inbound_nodes": [[["group_normalization_60", 0, 0, {}]]]}, {"class_name": "PaddedConv2D", "config": {"name": "padded_conv2d_83", "trainable": true, "dtype": "float32", "kernel_size": 3, "padding": 1, "filters": 4}, "name": "padded_conv2d_83", "inbound_nodes": [[["activation_67", 0, 0, {}]]]}], "input_layers": [["input_3", 0, 0], ["input_2", 0, 0], ["input_1", 0, 0]], "output_layers": [["padded_conv2d_83", 0, 0]]}