katielink commited on Aug 16, 2023

Commit

509db6f

1 Parent(s): f227b8a

Initial release

Browse files

Files changed (19) hide show

LICENSE +201 -0
README.md +176 -0
configs/inference.json +108 -0
configs/inference_autoencoder.json +156 -0
configs/logging.conf +21 -0
configs/metadata.json +103 -0
configs/multi_gpu_train_autoencoder.json +42 -0
configs/multi_gpu_train_diffusion.json +16 -0
configs/train_autoencoder.json +227 -0
configs/train_diffusion.json +174 -0
docs/README.md +169 -0
docs/data_license.txt +49 -0
models/model.pt +3 -0
models/model_autoencoder.pt +3 -0
scripts/__init__.py +12 -0
scripts/ldm_sampler.py +60 -0
scripts/ldm_trainer.py +380 -0
scripts/losses.py +52 -0
scripts/utils.py +50 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,176 @@

+---
+tags:
+- monai
+- medical
+library_name: monai
+license: apache-2.0
+---
+# Model Overview
+A pre-trained model for 2D Latent Diffusion Generative Model on axial slices of BraTS MRI.
+This model is trained on BraTS 2016 and 2017 data from [Medical Decathlon](http://medicaldecathlon.com/), using the Latent diffusion model [1].
+![model workflow](https://developer.download.nvidia.com/assets/Clara/Images/monai_brain_image_gen_ldm3d_network.png)
+This model is a generator for creating images like the Flair MRIs based on BraTS 2016 and 2017 data. It was trained as a 2d latent diffusion model and accepts Gaussian random noise as inputs to produce an image output. The `train_autoencoder.json` file describes the training process of the variational autoencoder with GAN loss. The `train_diffusion.json` file describes the training process of the 2D latent diffusion model.
+In this bundle, the autoencoder uses perceptual loss, which is based on ResNet50 with pre-trained weights (the network is frozen and will not be trained in the bundle). In default, the `pretrained` parameter is specified as `False` in `train_autoencoder.json`. To ensure correct training, changing the default settings is necessary. There are two ways to utilize pretrained weights:
+1. if set `pretrained` to `True`, ImageNet pretrained weights from [torchvision](https://pytorch.org/vision/stable/_modules/torchvision/models/resnet.html#ResNet50_Weights) will be used. However, the weights are for non-commercial use only.
+2. if set `pretrained` to `True` and specifies the `perceptual_loss_model_weights_path` parameter, users are able to load weights from a local path. This is the way this bundle used to train, and the pre-trained weights are from some internal data.
+Please note that each user is responsible for checking the data source of the pre-trained models, the applicable licenses, and determining if suitable for the intended use.
+#### Example synthetic image
+An example result from inference is shown below:
+![Example synthetic image](https://developer.download.nvidia.com/assets/Clara/Images/monai_brain_image_gen_ldm2d_example_generation_v2.png)
+**This is a demonstration network meant to just show the training process for this sort of network with MONAI. To achieve better performance, users need to use larger dataset like [BraTS 2021](https://www.synapse.org/#!Synapse:syn25829067/wiki/610865).**
+## MONAI Generative Model Dependencies
+[MONAI generative models](https://github.com/Project-MONAI/GenerativeModels) can be installed by
+```
+pip install lpips==0.1.4
+pip install git+https://github.com/Project-MONAI/GenerativeModels.git@0.2.1
+```
+## Data
+The training data is BraTS 2016 and 2017 from the Medical Segmentation Decathalon. Users can find more details on the dataset (`Task01_BrainTumour`) at http://medicaldecathlon.com/.
+- Target: Image Generation
+- Task: Synthesis
+- Modality: MRI
+- Size: 388 3D MRI volumes (1 channel used)
+- Training data size: 38800 2D MRI axial slices (1 channel used)
+## Training Configuration
+If you have a GPU with less than 32G of memory, you may need to decrease the batch size when training. To do so, modify the `"train_batch_size_img"` and `"train_batch_size_slice"` parameters in the `configs/train_autoencoder.json` and `configs/train_diffusion.json` configuration files.
+- `"train_batch_size_img"` is number of 3D volumes loaded in each batch.
+- `"train_batch_size_slice"` is the number of 2D axial slices extracted from each image. The actual batch size is the product of them.
+### Training Configuration of Autoencoder
+The autoencoder was trained using the following configuration:
+- GPU: at least 32GB GPU memory
+- Actual Model Input: 240 x 240
+- AMP: False
+- Optimizer: Adam
+- Learning Rate: 5e-5
+- Loss: L1 loss, perceptual loss, KL divergence loss, adversarial  loss, GAN BCE loss
+#### Input
+1 channel 2D MRI Flair axial patches
+#### Output
+- 1 channel 2D MRI reconstructed patches
+- 1 channel mean of latent features
+- 1 channel standard deviation of latent features
+### Training Configuration of Diffusion Model
+The latent diffusion model was trained using the following configuration:
+- GPU: at least 32GB GPU memory
+- Actual Model Input: 64 x 64
+- AMP: False
+- Optimizer: Adam
+- Learning Rate: 5e-5
+- Loss: MSE loss
+#### Training Input
+- 1 channel noisy latent features
+- a long int that indicates the time step
+#### Training Output
+1 channel predicted added noise
+#### Inference Input
+1 channel noise
+#### Inference Output
+1 channel denoised latent features
+### Memory Consumption Warning
+If you face memory issues with data loading, you can lower the caching rate `cache_rate` in the configurations within range [0, 1] to minimize the System RAM requirements.
+## Performance
+#### Training Loss
+![A graph showing the autoencoder training curve](https://developer.download.nvidia.com/assets/Clara/Images/monai_brain_image_gen_ldm2d_train_autoencoder_loss_v3.png)
+![A graph showing the latent diffusion training curve](https://developer.download.nvidia.com/assets/Clara/Images/monai_brain_image_gen_ldm2d_train_diffusion_loss_v3.png)
+## MONAI Bundle Commands
+In addition to the Pythonic APIs, a few command line interfaces (CLI) are provided to interact with the bundle. The CLI supports flexible use cases, such as overriding configs at runtime and predefining arguments in a file.
+For more details usage instructions, visit the [MONAI Bundle Configuration Page](https://docs.monai.io/en/latest/config_syntax.html).
+### Execute Autoencoder Training
+#### Execute Autoencoder Training on single GPU
+```
+python -m monai.bundle run --config_file configs/train_autoencoder.json
+```
+Please note that if the default dataset path is not modified with the actual path (it should be the path that contains Task01_BrainTumour) in the bundle config files, you can also override it by using `--dataset_dir`:
+```
+python -m monai.bundle run --config_file configs/train_autoencoder.json --dataset_dir <actual dataset path>
+```
+#### Override the `train` config to execute multi-GPU training for Autoencoder
+To train with multiple GPUs, use the following command, which requires scaling up the learning rate according to the number of GPUs.
+```
+torchrun --standalone --nnodes=1 --nproc_per_node=8 -m monai.bundle run --config_file "['configs/train_autoencoder.json','configs/multi_gpu_train_autoencoder.json']" --lr 4e-4
+```
+#### Check the Autoencoder Training result
+The following code generates a reconstructed image from a random input image.
+We can visualize it to see if the autoencoder is trained correctly.
+```
+python -m monai.bundle run --config_file configs/inference_autoencoder.json
+```
+An example of reconstructed image from inference is shown below. If the autoencoder is trained correctly, the reconstructed image should look similar to original image.
+![Example reconstructed image](https://developer.download.nvidia.com/assets/Clara/Images/monai_brain_image_gen_ldm2d_recon_example.png)
+### Execute Latent Diffusion Model Training
+#### Execute Latent Diffusion Model Training on single GPU
+After training the autoencoder, run the following command to train the latent diffusion model. This command will print out the scale factor of the latent feature space. If your autoencoder is well trained, this value should be close to 1.0.
+```
+python -m monai.bundle run --config_file "['configs/train_autoencoder.json','configs/train_diffusion.json']"
+```
+#### Override the `train` config to execute multi-GPU training for Latent Diffusion Model
+To train with multiple GPUs, use the following command, which requires scaling up the learning rate according to the number of GPUs.
+```
+torchrun --standalone --nnodes=1 --nproc_per_node=8 -m monai.bundle run --config_file "['configs/train_autoencoder.json','configs/train_diffusion.json','configs/multi_gpu_train_autoencoder.json','configs/multi_gpu_train_diffusion.json']"  --lr 4e-4
+```
+### Execute inference
+The following code generates a synthetic image from a random sampled noise.
+```
+python -m monai.bundle run --config_file configs/inference.json
+```
+# References
+[1] Rombach, Robin, et al. "High-resolution image synthesis with latent diffusion models." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2022. https://openaccess.thecvf.com/content/CVPR2022/papers/Rombach_High-Resolution_Image_Synthesis_With_Latent_Diffusion_Models_CVPR_2022_paper.pdf
+# License
+Copyright (c) MONAI Consortium
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

configs/inference.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+    "imports": [
+        "$import torch",
+        "$from datetime import datetime",
+        "$from pathlib import Path",
+        "$from PIL import Image",
+        "$from scripts.utils import visualize_2d_image"
+    ],
+    "bundle_root": ".",
+    "model_dir": "$@bundle_root + '/models'",
+    "output_dir": "$@bundle_root + '/output'",
+    "create_output_dir": "$Path(@output_dir).mkdir(exist_ok=True)",
+    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
+    "output_postfix": "$datetime.now().strftime('sample_%Y%m%d_%H%M%S')",
+    "channel": 0,
+    "spatial_dims": 2,
+    "image_channels": 1,
+    "latent_channels": 1,
+    "latent_shape": [
+        "@latent_channels",
+        64,
+        64
+    ],
+    "autoencoder_def": {
+        "_target_": "generative.networks.nets.AutoencoderKL",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@image_channels",
+        "out_channels": "@image_channels",
+        "latent_channels": "@latent_channels",
+        "num_channels": [
+            64,
+            128,
+            256
+        ],
+        "num_res_blocks": 2,
+        "norm_num_groups": 32,
+        "norm_eps": 1e-06,
+        "attention_levels": [
+            false,
+            false,
+            false
+        ],
+        "with_encoder_nonlocal_attn": true,
+        "with_decoder_nonlocal_attn": true
+    },
+    "network_def": {
+        "_target_": "generative.networks.nets.DiffusionModelUNet",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@latent_channels",
+        "out_channels": "@latent_channels",
+        "num_channels": [
+            32,
+            64,
+            128,
+            256
+        ],
+        "attention_levels": [
+            false,
+            true,
+            true,
+            true
+        ],
+        "num_head_channels": [
+            0,
+            32,
+            32,
+            32
+        ],
+        "num_res_blocks": 2
+    },
+    "load_autoencoder_path": "$@bundle_root + '/models/model_autoencoder.pt'",
+    "load_autoencoder": "$@autoencoder_def.load_state_dict(torch.load(@load_autoencoder_path))",
+    "autoencoder": "$@autoencoder_def.to(@device)",
+    "load_diffusion_path": "$@model_dir + '/model.pt'",
+    "load_diffusion": "$@network_def.load_state_dict(torch.load(@load_diffusion_path))",
+    "diffusion": "$@network_def.to(@device)",
+    "noise_scheduler": {
+        "_target_": "generative.networks.schedulers.DDIMScheduler",
+        "_requires_": [
+            "@load_diffusion",
+            "@load_autoencoder"
+        ],
+        "num_train_timesteps": 1000,
+        "beta_start": 0.0015,
+        "beta_end": 0.0195,
+        "beta_schedule": "scaled_linear",
+        "clip_sample": false
+    },
+    "noise": "$torch.randn([1]+@latent_shape).to(@device)",
+    "set_timesteps": "$@noise_scheduler.set_timesteps(num_inference_steps=50)",
+    "inferer": {
+        "_target_": "scripts.ldm_sampler.LDMSampler",
+        "_requires_": "@set_timesteps"
+    },
+    "sample": "$@inferer.sampling_fn(@noise, @autoencoder, @diffusion, @noise_scheduler)",
+    "saver": {
+        "_target_": "SaveImage",
+        "_requires_": "@create_output_dir",
+        "output_dir": "@output_dir",
+        "output_postfix": "@output_postfix"
+    },
+    "generated_image": "$@sample",
+    "generated_image_np": "$@generated_image[0,0].cpu().numpy().transpose(1, 0)[::-1, ::-1]",
+    "img_pil": "$Image.fromarray(visualize_2d_image(@generated_image_np), 'RGB')",
+    "run": [
+        "$@img_pil.save(@output_dir+'/synimg_'+@output_postfix+'.png')"
+    ]
+}

configs/inference_autoencoder.json ADDED Viewed

	@@ -0,0 +1,156 @@

+{
+    "imports": [
+        "$import torch",
+        "$from datetime import datetime",
+        "$from pathlib import Path",
+        "$from PIL import Image",
+        "$from scripts.utils import visualize_2d_image"
+    ],
+    "bundle_root": ".",
+    "model_dir": "$@bundle_root + '/models'",
+    "dataset_dir": "@bundle_root",
+    "output_dir": "$@bundle_root + '/output'",
+    "create_output_dir": "$Path(@output_dir).mkdir(exist_ok=True)",
+    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
+    "output_postfix": "$datetime.now().strftime('%Y%m%d_%H%M%S')",
+    "channel": 0,
+    "spatial_dims": 2,
+    "image_channels": 1,
+    "latent_channels": 1,
+    "infer_patch_size": [
+        240,
+        240
+    ],
+    "infer_batch_size_img": 1,
+    "infer_batch_size_slice": 1,
+    "autoencoder_def": {
+        "_target_": "generative.networks.nets.AutoencoderKL",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@image_channels",
+        "out_channels": "@image_channels",
+        "latent_channels": "@latent_channels",
+        "num_channels": [
+            64,
+            128,
+            256
+        ],
+        "num_res_blocks": 2,
+        "norm_num_groups": 32,
+        "norm_eps": 1e-06,
+        "attention_levels": [
+            false,
+            false,
+            false
+        ],
+        "with_encoder_nonlocal_attn": true,
+        "with_decoder_nonlocal_attn": true
+    },
+    "load_autoencoder_path": "$@bundle_root + '/models/model_autoencoder.pt'",
+    "load_autoencoder": "$@autoencoder_def.load_state_dict(torch.load(@load_autoencoder_path))",
+    "autoencoder": "$@autoencoder_def.to(@device)",
+    "preprocessing_transforms": [
+        {
+            "_target_": "LoadImaged",
+            "keys": "image"
+        },
+        {
+            "_target_": "EnsureChannelFirstd",
+            "keys": "image"
+        },
+        {
+            "_target_": "Lambdad",
+            "keys": "image",
+            "func": "$lambda x: x[@channel, :, :, :]"
+        },
+        {
+            "_target_": "AddChanneld",
+            "keys": "image"
+        },
+        {
+            "_target_": "EnsureTyped",
+            "keys": "image"
+        },
+        {
+            "_target_": "Orientationd",
+            "keys": "image",
+            "axcodes": "RAS"
+        },
+        {
+            "_target_": "CenterSpatialCropd",
+            "keys": "image",
+            "roi_size": "$[@infer_patch_size[0], @infer_patch_size[1], 20]"
+        },
+        {
+            "_target_": "ScaleIntensityRangePercentilesd",
+            "keys": "image",
+            "lower": 0,
+            "upper": 100,
+            "b_min": 0,
+            "b_max": 1
+        }
+    ],
+    "crop_transforms": [
+        {
+            "_target_": "DivisiblePadd",
+            "keys": "image",
+            "k": [
+                4,
+                4,
+                1
+            ]
+        },
+        {
+            "_target_": "RandSpatialCropSamplesd",
+            "keys": "image",
+            "random_size": false,
+            "roi_size": "$[@infer_patch_size[0], @infer_patch_size[1], 1]",
+            "num_samples": "@infer_batch_size_slice"
+        },
+        {
+            "_target_": "SqueezeDimd",
+            "keys": "image",
+            "dim": 3
+        }
+    ],
+    "final_transforms": [
+        {
+            "_target_": "ScaleIntensityRangePercentilesd",
+            "keys": "image",
+            "lower": 0,
+            "upper": 100,
+            "b_min": 0,
+            "b_max": 1
+        }
+    ],
+    "preprocessing": {
+        "_target_": "Compose",
+        "transforms": "$@preprocessing_transforms + @crop_transforms + @final_transforms"
+    },
+    "dataset": {
+        "_target_": "monai.apps.DecathlonDataset",
+        "root_dir": "@dataset_dir",
+        "task": "Task01_BrainTumour",
+        "section": "validation",
+        "cache_rate": 0.0,
+        "num_workers": 8,
+        "download": false,
+        "transform": "@preprocessing"
+    },
+    "dataloader": {
+        "_target_": "DataLoader",
+        "dataset": "@dataset",
+        "batch_size": 1,
+        "shuffle": true,
+        "num_workers": 0
+    },
+    "recon_img_pil": "$Image.fromarray(visualize_2d_image(@recon_img), 'RGB')",
+    "orig_img_pil": "$Image.fromarray(visualize_2d_image(@input_img[0,0,...]), 'RGB')",
+    "input_img": "$monai.utils.first(@dataloader)['image'].to(@device)",
+    "recon_img": "$@autoencoder(@input_img)[0][0,0,...]",
+    "run": [
+        "$@create_output_dir",
+        "$@load_autoencoder",
+        "$@orig_img_pil.save(@output_dir+'/orig_img_'+@output_postfix+'.png')",
+        "$@recon_img_pil.save(@output_dir+'/recon_img_'+@output_postfix+'.png')"
+    ]
+}

configs/logging.conf ADDED Viewed

	@@ -0,0 +1,21 @@

+[loggers]
+keys=root
+[handlers]
+keys=consoleHandler
+[formatters]
+keys=fullFormatter
+[logger_root]
+level=INFO
+handlers=consoleHandler
+[handler_consoleHandler]
+class=StreamHandler
+level=INFO
+formatter=fullFormatter
+args=(sys.stdout,)
+[formatter_fullFormatter]
+format=%(asctime)s - %(name)s - %(levelname)s - %(message)s

configs/metadata.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+    "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_generator_ldm_20230507.json",
+    "version": "1.0.0",
+    "changelog": {
+        "1.0.0": "Initial release"
+    },
+    "monai_version": "1.2.0rc5",
+    "pytorch_version": "1.13.1",
+    "numpy_version": "1.22.2",
+    "optional_packages_version": {
+        "nibabel": "5.1.0",
+        "lpips": "0.1.4"
+    },
+    "name": "BraTS MRI axial slices latent diffusion generation",
+    "task": "BraTS MRI axial slices synthesis",
+    "description": "A generative model for creating 2D brain MRI axial slices from Gaussian noise based on BraTS dataset",
+    "authors": "MONAI team",
+    "copyright": "Copyright (c) MONAI Consortium",
+    "data_source": "http://medicaldecathlon.com/",
+    "data_type": "nibabel",
+    "image_classes": "Flair brain MRI axial slices with 1x1 mm voxel size",
+    "eval_metrics": {},
+    "intended_use": "This is a research tool/prototype and not to be used clinically",
+    "references": [],
+    "autoencoder_data_format": {
+        "inputs": {
+            "image": {
+                "type": "image",
+                "format": "image",
+                "num_channels": 1,
+                "spatial_shape": [
+                    240,
+                    240
+                ],
+                "dtype": "float32",
+                "value_range": [
+                    0,
+                    1
+                ],
+                "is_patch_data": true
+            }
+        },
+        "outputs": {
+            "pred": {
+                "type": "image",
+                "format": "image",
+                "num_channels": 1,
+                "spatial_shape": [
+                    240,
+                    240
+                ],
+                "dtype": "float32",
+                "value_range": [
+                    0,
+                    1
+                ],
+                "is_patch_data": true,
+                "channel_def": {
+                    "0": "image"
+                }
+            }
+        }
+    },
+    "generator_data_format": {
+        "inputs": {
+            "latent": {
+                "type": "noise",
+                "format": "image",
+                "num_channels": 1,
+                "spatial_shape": [
+                    64,
+                    64
+                ],
+                "dtype": "float32",
+                "value_range": [
+                    0,
+                    1
+                ],
+                "is_patch_data": true
+            }
+        },
+        "outputs": {
+            "pred": {
+                "type": "feature",
+                "format": "image",
+                "num_channels": 1,
+                "spatial_shape": [
+                    64,
+                    64
+                ],
+                "dtype": "float32",
+                "value_range": [
+                    0,
+                    1
+                ],
+                "is_patch_data": true,
+                "channel_def": {
+                    "0": "image"
+                }
+            }
+        }
+    }
+}

configs/multi_gpu_train_autoencoder.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+    "device": "$torch.device(f'cuda:{dist.get_rank()}')",
+    "gnetwork": {
+        "_target_": "torch.nn.parallel.DistributedDataParallel",
+        "module": "$@autoencoder_def.to(@device)",
+        "device_ids": [
+            "@device"
+        ],
+        "find_unused_parameters": true
+    },
+    "dnetwork": {
+        "_target_": "torch.nn.parallel.DistributedDataParallel",
+        "module": "$@discriminator_def.to(@device)",
+        "device_ids": [
+            "@device"
+        ],
+        "find_unused_parameters": true
+    },
+    "train#sampler": {
+        "_target_": "DistributedSampler",
+        "dataset": "@train#dataset",
+        "even_divisible": true,
+        "shuffle": true
+    },
+    "train#dataloader#sampler": "@train#sampler",
+    "train#dataloader#shuffle": false,
+    "train#trainer#train_handlers": "$@train#handlers[: -2 if dist.get_rank() > 0 else None]",
+    "initialize": [
+        "$import torch.distributed as dist",
+        "$dist.is_initialized() or dist.init_process_group(backend='nccl')",
+        "$torch.cuda.set_device(@device)",
+        "$monai.utils.set_determinism(seed=123)",
+        "$import logging",
+        "$@train#trainer.logger.setLevel(logging.WARNING if dist.get_rank() > 0 else logging.INFO)"
+    ],
+    "run": [
+        "$@train#trainer.run()"
+    ],
+    "finalize": [
+        "$dist.is_initialized() and dist.destroy_process_group()"
+    ]
+}

configs/multi_gpu_train_diffusion.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "diffusion": {
+        "_target_": "torch.nn.parallel.DistributedDataParallel",
+        "module": "$@network_def.to(@device)",
+        "device_ids": [
+            "@device"
+        ],
+        "find_unused_parameters": true
+    },
+    "run": [
+        "@load_autoencoder",
+        "$@autoencoder.eval()",
+        "$print('scale factor:',@scale_factor)",
+        "$@train#trainer.run()"
+    ]
+}

configs/train_autoencoder.json ADDED Viewed

	@@ -0,0 +1,227 @@

+{
+    "imports": [
+        "$import functools",
+        "$import glob",
+        "$import scripts"
+    ],
+    "bundle_root": ".",
+    "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
+    "ckpt_dir": "$@bundle_root + '/models'",
+    "tf_dir": "$@bundle_root + '/eval'",
+    "dataset_dir": "@bundle_root",
+    "pretrained": false,
+    "perceptual_loss_model_weights_path": null,
+    "train_batch_size_img": 1,
+    "train_batch_size_slice": 26,
+    "lr": 5e-05,
+    "train_patch_size": [
+        240,
+        240
+    ],
+    "channel": 0,
+    "spatial_dims": 2,
+    "image_channels": 1,
+    "latent_channels": 1,
+    "discriminator_def": {
+        "_target_": "generative.networks.nets.PatchDiscriminator",
+        "spatial_dims": "@spatial_dims",
+        "num_layers_d": 3,
+        "num_channels": 32,
+        "in_channels": 1,
+        "out_channels": 1,
+        "norm": "INSTANCE"
+    },
+    "autoencoder_def": {
+        "_target_": "generative.networks.nets.AutoencoderKL",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@image_channels",
+        "out_channels": "@image_channels",
+        "latent_channels": "@latent_channels",
+        "num_channels": [
+            64,
+            128,
+            256
+        ],
+        "num_res_blocks": 2,
+        "norm_num_groups": 32,
+        "norm_eps": 1e-06,
+        "attention_levels": [
+            false,
+            false,
+            false
+        ],
+        "with_encoder_nonlocal_attn": true,
+        "with_decoder_nonlocal_attn": true
+    },
+    "perceptual_loss_def": {
+        "_target_": "generative.losses.PerceptualLoss",
+        "spatial_dims": "@spatial_dims",
+        "network_type": "resnet50",
+        "pretrained": "@pretrained",
+        "pretrained_path": "@perceptual_loss_model_weights_path",
+        "pretrained_state_dict_key": "state_dict"
+    },
+    "dnetwork": "$@discriminator_def.to(@device)",
+    "gnetwork": "$@autoencoder_def.to(@device)",
+    "loss_perceptual": "$@perceptual_loss_def.to(@device)",
+    "doptimizer": {
+        "_target_": "torch.optim.Adam",
+        "params": "$@dnetwork.parameters()",
+        "lr": "@lr"
+    },
+    "goptimizer": {
+        "_target_": "torch.optim.Adam",
+        "params": "$@gnetwork.parameters()",
+        "lr": "@lr"
+    },
+    "preprocessing_transforms": [
+        {
+            "_target_": "LoadImaged",
+            "keys": "image"
+        },
+        {
+            "_target_": "EnsureChannelFirstd",
+            "keys": "image"
+        },
+        {
+            "_target_": "Lambdad",
+            "keys": "image",
+            "func": "$lambda x: x[@channel, :, :, :]"
+        },
+        {
+            "_target_": "AddChanneld",
+            "keys": "image"
+        },
+        {
+            "_target_": "EnsureTyped",
+            "keys": "image"
+        },
+        {
+            "_target_": "Orientationd",
+            "keys": "image",
+            "axcodes": "RAS"
+        },
+        {
+            "_target_": "CenterSpatialCropd",
+            "keys": "image",
+            "roi_size": "$[@train_patch_size[0], @train_patch_size[1], 100]"
+        },
+        {
+            "_target_": "ScaleIntensityRangePercentilesd",
+            "keys": "image",
+            "lower": 0,
+            "upper": 100,
+            "b_min": 0,
+            "b_max": 1
+        }
+    ],
+    "train": {
+        "crop_transforms": [
+            {
+                "_target_": "DivisiblePadd",
+                "keys": "image",
+                "k": [
+                    4,
+                    4,
+                    1
+                ]
+            },
+            {
+                "_target_": "RandSpatialCropSamplesd",
+                "keys": "image",
+                "random_size": false,
+                "roi_size": "$[@train_patch_size[0], @train_patch_size[1], 1]",
+                "num_samples": "@train_batch_size_slice"
+            },
+            {
+                "_target_": "SqueezeDimd",
+                "keys": "image",
+                "dim": 3
+            },
+            {
+                "_target_": "RandFlipd",
+                "keys": [
+                    "image"
+                ],
+                "prob": 0.5,
+                "spatial_axis": 0
+            },
+            {
+                "_target_": "RandFlipd",
+                "keys": [
+                    "image"
+                ],
+                "prob": 0.5,
+                "spatial_axis": 1
+            }
+        ],
+        "preprocessing": {
+            "_target_": "Compose",
+            "transforms": "$@preprocessing_transforms + @train#crop_transforms"
+        },
+        "dataset": {
+            "_target_": "monai.apps.DecathlonDataset",
+            "root_dir": "@dataset_dir",
+            "task": "Task01_BrainTumour",
+            "section": "training",
+            "cache_rate": 1.0,
+            "num_workers": 8,
+            "download": false,
+            "transform": "@train#preprocessing"
+        },
+        "dataloader": {
+            "_target_": "DataLoader",
+            "dataset": "@train#dataset",
+            "batch_size": "@train_batch_size_img",
+            "shuffle": true,
+            "num_workers": 0
+        },
+        "handlers": [
+            {
+                "_target_": "CheckpointSaver",
+                "save_dir": "@ckpt_dir",
+                "save_dict": {
+                    "model": "@gnetwork"
+                },
+                "save_interval": 0,
+                "save_final": true,
+                "epoch_level": true,
+                "final_filename": "model_autoencoder.pt"
+            },
+            {
+                "_target_": "StatsHandler",
+                "tag_name": "train_loss",
+                "output_transform": "$lambda x: monai.handlers.from_engine(['g_loss'], first=True)(x)[0]"
+            },
+            {
+                "_target_": "TensorBoardStatsHandler",
+                "log_dir": "@tf_dir",
+                "tag_name": "train_loss",
+                "output_transform": "$lambda x: monai.handlers.from_engine(['g_loss'], first=True)(x)[0]"
+            }
+        ],
+        "trainer": {
+            "_target_": "scripts.ldm_trainer.VaeGanTrainer",
+            "device": "@device",
+            "max_epochs": 1500,
+            "train_data_loader": "@train#dataloader",
+            "g_network": "@gnetwork",
+            "g_optimizer": "@goptimizer",
+            "g_loss_function": "$functools.partial(scripts.losses.generator_loss, disc_net=@dnetwork, loss_perceptual=@loss_perceptual)",
+            "d_network": "@dnetwork",
+            "d_optimizer": "@doptimizer",
+            "d_loss_function": "$functools.partial(scripts.losses.discriminator_loss, disc_net=@dnetwork)",
+            "d_train_steps": 1,
+            "g_update_latents": true,
+            "latent_shape": "@latent_channels",
+            "key_train_metric": "$None",
+            "train_handlers": "@train#handlers"
+        }
+    },
+    "initialize": [
+        "$monai.utils.set_determinism(seed=0)"
+    ],
+    "run": [
+        "$@train#trainer.run()"
+    ]
+}

configs/train_diffusion.json ADDED Viewed

	@@ -0,0 +1,174 @@

+{
+    "ckpt_dir": "$@bundle_root + '/models'",
+    "train_batch_size_img": 2,
+    "train_batch_size_slice": 50,
+    "lr": 5e-05,
+    "train_patch_size": [
+        256,
+        256
+    ],
+    "latent_shape": [
+        "@latent_channels",
+        64,
+        64
+    ],
+    "load_autoencoder_path": "$@bundle_root + '/models/model_autoencoder.pt'",
+    "load_autoencoder": "$@autoencoder_def.load_state_dict(torch.load(@load_autoencoder_path))",
+    "autoencoder": "$@autoencoder_def.to(@device)",
+    "network_def": {
+        "_target_": "generative.networks.nets.DiffusionModelUNet",
+        "spatial_dims": "@spatial_dims",
+        "in_channels": "@latent_channels",
+        "out_channels": "@latent_channels",
+        "num_channels": [
+            32,
+            64,
+            128,
+            256
+        ],
+        "attention_levels": [
+            false,
+            true,
+            true,
+            true
+        ],
+        "num_head_channels": [
+            0,
+            32,
+            32,
+            32
+        ],
+        "num_res_blocks": 2
+    },
+    "diffusion": "$@network_def.to(@device)",
+    "optimizer": {
+        "_target_": "torch.optim.Adam",
+        "params": "$@diffusion.parameters()",
+        "lr": "@lr"
+    },
+    "lr_scheduler": {
+        "_target_": "torch.optim.lr_scheduler.MultiStepLR",
+        "optimizer": "@optimizer",
+        "milestones": [
+            1000
+        ],
+        "gamma": 0.1
+    },
+    "scale_factor": "$scripts.utils.compute_scale_factor(@autoencoder,@train#dataloader,@device)",
+    "noise_scheduler": {
+        "_target_": "generative.networks.schedulers.DDPMScheduler",
+        "_requires_": [
+            "@load_autoencoder"
+        ],
+        "beta_schedule": "scaled_linear",
+        "num_train_timesteps": 1000,
+        "beta_start": 0.0015,
+        "beta_end": 0.0195
+    },
+    "inferer": {
+        "_target_": "generative.inferers.LatentDiffusionInferer",
+        "scheduler": "@noise_scheduler",
+        "scale_factor": "@scale_factor"
+    },
+    "loss": {
+        "_target_": "torch.nn.MSELoss"
+    },
+    "train": {
+        "crop_transforms": [
+            {
+                "_target_": "DivisiblePadd",
+                "keys": "image",
+                "k": [
+                    32,
+                    32,
+                    1
+                ]
+            },
+            {
+                "_target_": "RandSpatialCropSamplesd",
+                "keys": "image",
+                "random_size": false,
+                "roi_size": "$[@train_patch_size[0], @train_patch_size[1], 1]",
+                "num_samples": "@train_batch_size_slice"
+            },
+            {
+                "_target_": "SqueezeDimd",
+                "keys": "image",
+                "dim": 3
+            }
+        ],
+        "preprocessing": {
+            "_target_": "Compose",
+            "transforms": "$@preprocessing_transforms + @train#crop_transforms"
+        },
+        "dataset": {
+            "_target_": "monai.apps.DecathlonDataset",
+            "root_dir": "@dataset_dir",
+            "task": "Task01_BrainTumour",
+            "section": "training",
+            "cache_rate": 1.0,
+            "num_workers": 8,
+            "download": "@download_brats",
+            "transform": "@train#preprocessing"
+        },
+        "dataloader": {
+            "_target_": "DataLoader",
+            "dataset": "@train#dataset",
+            "batch_size": "@train_batch_size_img",
+            "shuffle": true,
+            "num_workers": 0
+        },
+        "handlers": [
+            {
+                "_target_": "LrScheduleHandler",
+                "lr_scheduler": "@lr_scheduler",
+                "print_lr": true
+            },
+            {
+                "_target_": "CheckpointSaver",
+                "save_dir": "@ckpt_dir",
+                "save_dict": {
+                    "model": "@diffusion"
+                },
+                "save_interval": 0,
+                "save_final": true,
+                "epoch_level": true,
+                "final_filename": "model.pt"
+            },
+            {
+                "_target_": "StatsHandler",
+                "tag_name": "train_diffusion_loss",
+                "output_transform": "$lambda x: monai.handlers.from_engine(['loss'], first=True)(x)"
+            },
+            {
+                "_target_": "TensorBoardStatsHandler",
+                "log_dir": "@tf_dir",
+                "tag_name": "train_diffusion_loss",
+                "output_transform": "$lambda x: monai.handlers.from_engine(['loss'], first=True)(x)"
+            }
+        ],
+        "trainer": {
+            "_target_": "scripts.ldm_trainer.LDMTrainer",
+            "device": "@device",
+            "max_epochs": 1000,
+            "train_data_loader": "@train#dataloader",
+            "network": "@diffusion",
+            "autoencoder_model": "@autoencoder",
+            "optimizer": "@optimizer",
+            "loss_function": "@loss",
+            "latent_shape": "@latent_shape",
+            "inferer": "@inferer",
+            "key_train_metric": "$None",
+            "train_handlers": "@train#handlers"
+        }
+    },
+    "initialize": [
+        "$monai.utils.set_determinism(seed=0)"
+    ],
+    "run": [
+        "@load_autoencoder",
+        "$@autoencoder.eval()",
+        "$print('scale factor:',@scale_factor)",
+        "$@train#trainer.run()"
+    ]
+}

docs/README.md ADDED Viewed

	@@ -0,0 +1,169 @@

+# Model Overview
+A pre-trained model for 2D Latent Diffusion Generative Model on axial slices of BraTS MRI.
+This model is trained on BraTS 2016 and 2017 data from [Medical Decathlon](http://medicaldecathlon.com/), using the Latent diffusion model [1].
+![model workflow](https://developer.download.nvidia.com/assets/Clara/Images/monai_brain_image_gen_ldm3d_network.png)
+This model is a generator for creating images like the Flair MRIs based on BraTS 2016 and 2017 data. It was trained as a 2d latent diffusion model and accepts Gaussian random noise as inputs to produce an image output. The `train_autoencoder.json` file describes the training process of the variational autoencoder with GAN loss. The `train_diffusion.json` file describes the training process of the 2D latent diffusion model.
+In this bundle, the autoencoder uses perceptual loss, which is based on ResNet50 with pre-trained weights (the network is frozen and will not be trained in the bundle). In default, the `pretrained` parameter is specified as `False` in `train_autoencoder.json`. To ensure correct training, changing the default settings is necessary. There are two ways to utilize pretrained weights:
+1. if set `pretrained` to `True`, ImageNet pretrained weights from [torchvision](https://pytorch.org/vision/stable/_modules/torchvision/models/resnet.html#ResNet50_Weights) will be used. However, the weights are for non-commercial use only.
+2. if set `pretrained` to `True` and specifies the `perceptual_loss_model_weights_path` parameter, users are able to load weights from a local path. This is the way this bundle used to train, and the pre-trained weights are from some internal data.
+Please note that each user is responsible for checking the data source of the pre-trained models, the applicable licenses, and determining if suitable for the intended use.
+#### Example synthetic image
+An example result from inference is shown below:
+![Example synthetic image](https://developer.download.nvidia.com/assets/Clara/Images/monai_brain_image_gen_ldm2d_example_generation_v2.png)
+**This is a demonstration network meant to just show the training process for this sort of network with MONAI. To achieve better performance, users need to use larger dataset like [BraTS 2021](https://www.synapse.org/#!Synapse:syn25829067/wiki/610865).**
+## MONAI Generative Model Dependencies
+[MONAI generative models](https://github.com/Project-MONAI/GenerativeModels) can be installed by
+```
+pip install lpips==0.1.4
+pip install git+https://github.com/Project-MONAI/GenerativeModels.git@0.2.1
+```
+## Data
+The training data is BraTS 2016 and 2017 from the Medical Segmentation Decathalon. Users can find more details on the dataset (`Task01_BrainTumour`) at http://medicaldecathlon.com/.
+- Target: Image Generation
+- Task: Synthesis
+- Modality: MRI
+- Size: 388 3D MRI volumes (1 channel used)
+- Training data size: 38800 2D MRI axial slices (1 channel used)
+## Training Configuration
+If you have a GPU with less than 32G of memory, you may need to decrease the batch size when training. To do so, modify the `"train_batch_size_img"` and `"train_batch_size_slice"` parameters in the `configs/train_autoencoder.json` and `configs/train_diffusion.json` configuration files.
+- `"train_batch_size_img"` is number of 3D volumes loaded in each batch.
+- `"train_batch_size_slice"` is the number of 2D axial slices extracted from each image. The actual batch size is the product of them.
+### Training Configuration of Autoencoder
+The autoencoder was trained using the following configuration:
+- GPU: at least 32GB GPU memory
+- Actual Model Input: 240 x 240
+- AMP: False
+- Optimizer: Adam
+- Learning Rate: 5e-5
+- Loss: L1 loss, perceptual loss, KL divergence loss, adversarial  loss, GAN BCE loss
+#### Input
+1 channel 2D MRI Flair axial patches
+#### Output
+- 1 channel 2D MRI reconstructed patches
+- 1 channel mean of latent features
+- 1 channel standard deviation of latent features
+### Training Configuration of Diffusion Model
+The latent diffusion model was trained using the following configuration:
+- GPU: at least 32GB GPU memory
+- Actual Model Input: 64 x 64
+- AMP: False
+- Optimizer: Adam
+- Learning Rate: 5e-5
+- Loss: MSE loss
+#### Training Input
+- 1 channel noisy latent features
+- a long int that indicates the time step
+#### Training Output
+1 channel predicted added noise
+#### Inference Input
+1 channel noise
+#### Inference Output
+1 channel denoised latent features
+### Memory Consumption Warning
+If you face memory issues with data loading, you can lower the caching rate `cache_rate` in the configurations within range [0, 1] to minimize the System RAM requirements.
+## Performance
+#### Training Loss
+![A graph showing the autoencoder training curve](https://developer.download.nvidia.com/assets/Clara/Images/monai_brain_image_gen_ldm2d_train_autoencoder_loss_v3.png)
+![A graph showing the latent diffusion training curve](https://developer.download.nvidia.com/assets/Clara/Images/monai_brain_image_gen_ldm2d_train_diffusion_loss_v3.png)
+## MONAI Bundle Commands
+In addition to the Pythonic APIs, a few command line interfaces (CLI) are provided to interact with the bundle. The CLI supports flexible use cases, such as overriding configs at runtime and predefining arguments in a file.
+For more details usage instructions, visit the [MONAI Bundle Configuration Page](https://docs.monai.io/en/latest/config_syntax.html).
+### Execute Autoencoder Training
+#### Execute Autoencoder Training on single GPU
+```
+python -m monai.bundle run --config_file configs/train_autoencoder.json
+```
+Please note that if the default dataset path is not modified with the actual path (it should be the path that contains Task01_BrainTumour) in the bundle config files, you can also override it by using `--dataset_dir`:
+```
+python -m monai.bundle run --config_file configs/train_autoencoder.json --dataset_dir <actual dataset path>
+```
+#### Override the `train` config to execute multi-GPU training for Autoencoder
+To train with multiple GPUs, use the following command, which requires scaling up the learning rate according to the number of GPUs.
+```
+torchrun --standalone --nnodes=1 --nproc_per_node=8 -m monai.bundle run --config_file "['configs/train_autoencoder.json','configs/multi_gpu_train_autoencoder.json']" --lr 4e-4
+```
+#### Check the Autoencoder Training result
+The following code generates a reconstructed image from a random input image.
+We can visualize it to see if the autoencoder is trained correctly.
+```
+python -m monai.bundle run --config_file configs/inference_autoencoder.json
+```
+An example of reconstructed image from inference is shown below. If the autoencoder is trained correctly, the reconstructed image should look similar to original image.
+![Example reconstructed image](https://developer.download.nvidia.com/assets/Clara/Images/monai_brain_image_gen_ldm2d_recon_example.png)
+### Execute Latent Diffusion Model Training
+#### Execute Latent Diffusion Model Training on single GPU
+After training the autoencoder, run the following command to train the latent diffusion model. This command will print out the scale factor of the latent feature space. If your autoencoder is well trained, this value should be close to 1.0.
+```
+python -m monai.bundle run --config_file "['configs/train_autoencoder.json','configs/train_diffusion.json']"
+```
+#### Override the `train` config to execute multi-GPU training for Latent Diffusion Model
+To train with multiple GPUs, use the following command, which requires scaling up the learning rate according to the number of GPUs.
+```
+torchrun --standalone --nnodes=1 --nproc_per_node=8 -m monai.bundle run --config_file "['configs/train_autoencoder.json','configs/train_diffusion.json','configs/multi_gpu_train_autoencoder.json','configs/multi_gpu_train_diffusion.json']"  --lr 4e-4
+```
+### Execute inference
+The following code generates a synthetic image from a random sampled noise.
+```
+python -m monai.bundle run --config_file configs/inference.json
+```
+# References
+[1] Rombach, Robin, et al. "High-resolution image synthesis with latent diffusion models." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2022. https://openaccess.thecvf.com/content/CVPR2022/papers/Rombach_High-Resolution_Image_Synthesis_With_Latent_Diffusion_Models_CVPR_2022_paper.pdf
+# License
+Copyright (c) MONAI Consortium
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.

docs/data_license.txt ADDED Viewed

	@@ -0,0 +1,49 @@

+Third Party Licenses
+-----------------------------------------------------------------------
+/*********************************************************************/
+i. Multimodal Brain Tumor Segmentation Challenge 2018
+    https://www.med.upenn.edu/sbia/brats2018/data.html
+/*********************************************************************/
+Data Usage Agreement / Citations
+You are free to use and/or refer to the BraTS datasets in your own
+research, provided that you always cite the following two manuscripts:
+[1] Menze BH, Jakab A, Bauer S, Kalpathy-Cramer J, Farahani K, Kirby
+[J, Burren Y, Porz N, Slotboom J, Wiest R, Lanczi L, Gerstner E, Weber
+[MA, Arbel T, Avants BB, Ayache N, Buendia P, Collins DL, Cordier N,
+[Corso JJ, Criminisi A, Das T, Delingette H, Demiralp Γ, Durst CR,
+[Dojat M, Doyle S, Festa J, Forbes F, Geremia E, Glocker B, Golland P,
+[Guo X, Hamamci A, Iftekharuddin KM, Jena R, John NM, Konukoglu E,
+[Lashkari D, Mariz JA, Meier R, Pereira S, Precup D, Price SJ, Raviv
+[TR, Reza SM, Ryan M, Sarikaya D, Schwartz L, Shin HC, Shotton J,
+[Silva CA, Sousa N, Subbanna NK, Szekely G, Taylor TJ, Thomas OM,
+[Tustison NJ, Unal G, Vasseur F, Wintermark M, Ye DH, Zhao L, Zhao B,
+[Zikic D, Prastawa M, Reyes M, Van Leemput K. "The Multimodal Brain
+[Tumor Image Segmentation Benchmark (BRATS)", IEEE Transactions on
+[Medical Imaging 34(10), 1993-2024 (2015) DOI:
+[10.1109/TMI.2014.2377694
+[2] Bakas S, Akbari H, Sotiras A, Bilello M, Rozycki M, Kirby JS,
+[Freymann JB, Farahani K, Davatzikos C. "Advancing The Cancer Genome
+[Atlas glioma MRI collections with expert segmentation labels and
+[radiomic features", Nature Scientific Data, 4:170117 (2017) DOI:
+[10.1038/sdata.2017.117
+In addition, if there are no restrictions imposed from the
+journal/conference you submit your paper about citing "Data
+Citations", please be specific and also cite the following:
+[3] Bakas S, Akbari H, Sotiras A, Bilello M, Rozycki M, Kirby J,
+[Freymann J, Farahani K, Davatzikos C. "Segmentation Labels and
+[Radiomic Features for the Pre-operative Scans of the TCGA-GBM
+[collection", The Cancer Imaging Archive, 2017. DOI:
+[10.7937/K9/TCIA.2017.KLXWJJ1Q
+[4] Bakas S, Akbari H, Sotiras A, Bilello M, Rozycki M, Kirby J,
+[Freymann J, Farahani K, Davatzikos C. "Segmentation Labels and
+[Radiomic Features for the Pre-operative Scans of the TCGA-LGG
+[collection", The Cancer Imaging Archive, 2017. DOI:
+[10.7937/K9/TCIA.2017.GJQ7R0EF

models/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff03d51a63541e4795869d7edc9176ccea8df91e1afdcd0fedb7600b6b6c54d1
+size 63696253

models/model_autoencoder.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b90968ce8a5eb8e71de1c6bf0cbe79e5dc6104fe289a2058ddd62ea18ce78d69
+size 49200645

scripts/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from . import ldm_sampler, ldm_trainer, losses, utils

scripts/ldm_sampler.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import torch
+import torch.nn as nn
+from monai.utils import optional_import
+from torch.cuda.amp import autocast
+tqdm, has_tqdm = optional_import("tqdm", name="tqdm")
+class LDMSampler:
+    def __init__(self) -> None:
+        super().__init__()
+    @torch.no_grad()
+    def sampling_fn(
+        self,
+        input_noise: torch.Tensor,
+        autoencoder_model: nn.Module,
+        diffusion_model: nn.Module,
+        scheduler: nn.Module,
+        conditioning: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        if has_tqdm:
+            progress_bar = tqdm(scheduler.timesteps)
+        else:
+            progress_bar = iter(scheduler.timesteps)
+        image = input_noise
+        if conditioning is not None:
+            cond_concat = conditioning.squeeze(1).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
+            cond_concat = cond_concat.expand(list(cond_concat.shape[0:2]) + list(input_noise.shape[2:]))
+        for t in progress_bar:
+            with torch.no_grad():
+                if conditioning is not None:
+                    input_t = torch.cat((image, cond_concat), dim=1)
+                else:
+                    input_t = image
+                model_output = diffusion_model(
+                    input_t, timesteps=torch.Tensor((t,)).to(input_noise.device).long(), context=conditioning
+                )
+                image, _ = scheduler.step(model_output, t, image)
+        with torch.no_grad():
+            with autocast():
+                sample = autoencoder_model.decode_stage_2_outputs(image)
+        return sample

scripts/ldm_trainer.py ADDED Viewed

	@@ -0,0 +1,380 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence
+import torch
+from monai.config import IgniteInfo
+from monai.engines.utils import IterationEvents, default_metric_cmp_fn, default_prepare_batch
+from monai.inferers import Inferer, SimpleInferer
+from monai.transforms import Transform
+from monai.utils import min_version, optional_import
+from monai.utils.enums import CommonKeys, GanKeys
+from torch.optim.optimizer import Optimizer
+from torch.utils.data import DataLoader
+if TYPE_CHECKING:
+    from ignite.engine import Engine, EventEnum
+    from ignite.metrics import Metric
+else:
+    Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
+    Metric, _ = optional_import("ignite.metrics", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Metric")
+    EventEnum, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "EventEnum")
+from monai.engines.trainer import SupervisedTrainer, Trainer
+class VaeGanTrainer(Trainer):
+    """
+    Generative adversarial network training based on Goodfellow et al. 2014 https://arxiv.org/abs/1406.266,
+    inherits from ``Trainer`` and ``Workflow``.
+    Training Loop: for each batch of data size `m`
+        1. Generate `m` fakes from random latent codes.
+        2. Update discriminator with these fakes and current batch reals, repeated d_train_steps times.
+        3. If g_update_latents, generate `m` fakes from new random latent codes.
+        4. Update generator with these fakes using discriminator feedback.
+    Args:
+        device: an object representing the device on which to run.
+        max_epochs: the total epoch number for engine to run.
+        train_data_loader: Core ignite engines uses `DataLoader` for training loop batchdata.
+        g_network: generator (G) network architecture.
+        g_optimizer: G optimizer function.
+        g_loss_function: G loss function for optimizer.
+        d_network: discriminator (D) network architecture.
+        d_optimizer: D optimizer function.
+        d_loss_function: D loss function for optimizer.
+        epoch_length: number of iterations for one epoch, default to `len(train_data_loader)`.
+        g_inferer: inference method to execute G model forward. Defaults to ``SimpleInferer()``.
+        d_inferer: inference method to execute D model forward. Defaults to ``SimpleInferer()``.
+        d_train_steps: number of times to update D with real data minibatch. Defaults to ``1``.
+        latent_shape: size of G input latent code. Defaults to ``64``.
+        non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously
+            with respect to the host. For other cases, this argument has no effect.
+        d_prepare_batch: callback function to prepare batchdata for D inferer.
+            Defaults to return ``GanKeys.REALS`` in batchdata dict. for more details please refer to:
+            https://pytorch.org/ignite/generated/ignite.engine.create_supervised_trainer.html.
+        g_prepare_batch: callback function to create batch of latent input for G inferer.
+            Defaults to return random latents. for more details please refer to:
+            https://pytorch.org/ignite/generated/ignite.engine.create_supervised_trainer.html.
+        g_update_latents: Calculate G loss with new latent codes. Defaults to ``True``.
+        iteration_update: the callable function for every iteration, expect to accept `engine`
+            and `engine.state.batch` as inputs, return data will be stored in `engine.state.output`.
+            if not provided, use `self._iteration()` instead. for more details please refer to:
+            https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html.
+        postprocessing: execute additional transformation for the model output data.
+            Typically, several Tensor based transforms composed by `Compose`.
+        key_train_metric: compute metric when every iteration completed, and save average value to
+            engine.state.metrics when epoch completed. key_train_metric is the main metric to compare and save the
+            checkpoint into files.
+        additional_metrics: more Ignite metrics that also attach to Ignite Engine.
+        metric_cmp_fn: function to compare current key metric with previous best key metric value,
+            it must accept 2 args (current_metric, previous_best) and return a bool result: if `True`, will update
+            `best_metric` and `best_metric_epoch` with current metric and epoch, default to `greater than`.
+        train_handlers: every handler is a set of Ignite Event-Handlers, must have `attach` function, like:
+            CheckpointHandler, StatsHandler, etc.
+        decollate: whether to decollate the batch-first data to a list of data after model computation,
+            recommend `decollate=True` when `postprocessing` uses components from `monai.transforms`.
+            default to `True`.
+        optim_set_to_none: when calling `optimizer.zero_grad()`, instead of setting to zero, set the grads to None.
+            more details: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html.
+        to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
+            `device`, `non_blocking`.
+        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+    """
+    def __init__(
+        self,
+        device: str | torch.device,
+        max_epochs: int,
+        train_data_loader: DataLoader,
+        g_network: torch.nn.Module,
+        g_optimizer: Optimizer,
+        g_loss_function: Callable,
+        d_network: torch.nn.Module,
+        d_optimizer: Optimizer,
+        d_loss_function: Callable,
+        epoch_length: int | None = None,
+        g_inferer: Inferer | None = None,
+        d_inferer: Inferer | None = None,
+        d_train_steps: int = 1,
+        latent_shape: int = 64,
+        non_blocking: bool = False,
+        d_prepare_batch: Callable = default_prepare_batch,
+        g_prepare_batch: Callable = default_prepare_batch,
+        g_update_latents: bool = True,
+        iteration_update: Callable[[Engine, Any], Any] | None = None,
+        postprocessing: Transform | None = None,
+        key_train_metric: dict[str, Metric] | None = None,
+        additional_metrics: dict[str, Metric] | None = None,
+        metric_cmp_fn: Callable = default_metric_cmp_fn,
+        train_handlers: Sequence | None = None,
+        decollate: bool = True,
+        optim_set_to_none: bool = False,
+        to_kwargs: dict | None = None,
+        amp_kwargs: dict | None = None,
+    ):
+        if not isinstance(train_data_loader, DataLoader):
+            raise ValueError("train_data_loader must be PyTorch DataLoader.")
+        # set up Ignite engine and environments
+        super().__init__(
+            device=device,
+            max_epochs=max_epochs,
+            data_loader=train_data_loader,
+            epoch_length=epoch_length,
+            non_blocking=non_blocking,
+            prepare_batch=d_prepare_batch,
+            iteration_update=iteration_update,
+            key_metric=key_train_metric,
+            additional_metrics=additional_metrics,
+            metric_cmp_fn=metric_cmp_fn,
+            handlers=train_handlers,
+            postprocessing=postprocessing,
+            decollate=decollate,
+            to_kwargs=to_kwargs,
+            amp_kwargs=amp_kwargs,
+        )
+        self.g_network = g_network
+        self.g_optimizer = g_optimizer
+        self.g_loss_function = g_loss_function
+        self.g_inferer = SimpleInferer() if g_inferer is None else g_inferer
+        self.d_network = d_network
+        self.d_optimizer = d_optimizer
+        self.d_loss_function = d_loss_function
+        self.d_inferer = SimpleInferer() if d_inferer is None else d_inferer
+        self.d_train_steps = d_train_steps
+        self.latent_shape = latent_shape
+        self.g_prepare_batch = g_prepare_batch
+        self.g_update_latents = g_update_latents
+        self.optim_set_to_none = optim_set_to_none
+    def _iteration(
+        self, engine: VaeGanTrainer, batchdata: dict | Sequence
+    ) -> dict[str, torch.Tensor | int | float | bool]:
+        """
+        Callback function for Adversarial Training processing logic of 1 iteration in Ignite Engine.
+        Args:
+            engine: `VaeGanTrainer` to execute operation for an iteration.
+            batchdata: input data for this iteration, usually can be dictionary or tuple of Tensor data.
+        Raises:
+            ValueError: must provide batch data for current iteration.
+        """
+        if batchdata is None:
+            raise ValueError("must provide batch data for current iteration.")
+        d_input = engine.prepare_batch(batchdata, engine.state.device, engine.non_blocking, **engine.to_kwargs)[0]
+        g_input = d_input
+        g_output, z_mu, z_sigma = engine.g_inferer(g_input, engine.g_network)
+        # Train Generator
+        engine.g_optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
+        g_loss = engine.g_loss_function(g_output, g_input, z_mu, z_sigma)
+        g_loss.backward()
+        engine.g_optimizer.step()
+        # Train Discriminator
+        d_total_loss = torch.zeros(1)
+        for _ in range(engine.d_train_steps):
+            engine.d_optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
+            dloss = engine.d_loss_function(g_output, d_input)
+            dloss.backward()
+            engine.d_optimizer.step()
+            d_total_loss += dloss.item()
+        return {
+            GanKeys.REALS: d_input,
+            GanKeys.FAKES: g_output,
+            GanKeys.LATENTS: g_input,
+            GanKeys.GLOSS: g_loss.item(),
+            GanKeys.DLOSS: d_total_loss.item(),
+        }
+class LDMTrainer(SupervisedTrainer):
+    """
+    Standard supervised training method with image and label, inherits from ``Trainer`` and ``Workflow``.
+    Args:
+        device: an object representing the device on which to run.
+        max_epochs: the total epoch number for trainer to run.
+        train_data_loader: Ignite engine use data_loader to run, must be Iterable or torch.DataLoader.
+        network: network to train in the trainer, should be regular PyTorch `torch.nn.Module`.
+        optimizer: the optimizer associated to the network, should be regular PyTorch optimizer from `torch.optim`
+            or its subclass.
+        loss_function: the loss function associated to the optimizer, should be regular PyTorch loss,
+            which inherit from `torch.nn.modules.loss`.
+        epoch_length: number of iterations for one epoch, default to `len(train_data_loader)`.
+        non_blocking: if True and this copy is between CPU and GPU, the copy may occur asynchronously
+            with respect to the host. For other cases, this argument has no effect.
+        prepare_batch: function to parse expected data (usually `image`, `label` and other network args)
+            from `engine.state.batch` for every iteration, for more details please refer to:
+            https://pytorch.org/ignite/generated/ignite.engine.create_supervised_trainer.html.
+        iteration_update: the callable function for every iteration, expect to accept `engine`
+            and `engine.state.batch` as inputs, return data will be stored in `engine.state.output`.
+            if not provided, use `self._iteration()` instead. for more details please refer to:
+            https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html.
+        inferer: inference method that execute model forward on input data, like: SlidingWindow, etc.
+        postprocessing: execute additional transformation for the model output data.
+            Typically, several Tensor based transforms composed by `Compose`.
+        key_train_metric: compute metric when every iteration completed, and save average value to
+            engine.state.metrics when epoch completed. key_train_metric is the main metric to compare and save the
+            checkpoint into files.
+        additional_metrics: more Ignite metrics that also attach to Ignite Engine.
+        metric_cmp_fn: function to compare current key metric with previous best key metric value,
+            it must accept 2 args (current_metric, previous_best) and return a bool result: if `True`, will update
+            `best_metric` and `best_metric_epoch` with current metric and epoch, default to `greater than`.
+        train_handlers: every handler is a set of Ignite Event-Handlers, must have `attach` function, like:
+            CheckpointHandler, StatsHandler, etc.
+        amp: whether to enable auto-mixed-precision training, default is False.
+        event_names: additional custom ignite events that will register to the engine.
+            new events can be a list of str or `ignite.engine.events.EventEnum`.
+        event_to_attr: a dictionary to map an event to a state attribute, then add to `engine.state`.
+            for more details, check: https://pytorch.org/ignite/generated/ignite.engine.engine.Engine.html
+            #ignite.engine.engine.Engine.register_events.
+        decollate: whether to decollate the batch-first data to a list of data after model computation,
+            recommend `decollate=True` when `postprocessing` uses components from `monai.transforms`.
+            default to `True`.
+        optim_set_to_none: when calling `optimizer.zero_grad()`, instead of setting to zero, set the grads to None.
+            more details: https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html.
+        to_kwargs: dict of other args for `prepare_batch` API when converting the input data, except for
+            `device`, `non_blocking`.
+        amp_kwargs: dict of the args for `torch.cuda.amp.autocast()` API, for more details:
+            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast.
+    """
+    def __init__(
+        self,
+        device: str | torch.device,
+        max_epochs: int,
+        train_data_loader: Iterable | DataLoader,
+        network: torch.nn.Module,
+        autoencoder_model: torch.nn.Module,
+        optimizer: Optimizer,
+        loss_function: Callable,
+        latent_shape: Sequence,
+        inferer: Inferer,
+        epoch_length: int | None = None,
+        non_blocking: bool = False,
+        prepare_batch: Callable = default_prepare_batch,
+        iteration_update: Callable[[Engine, Any], Any] | None = None,
+        postprocessing: Transform | None = None,
+        key_train_metric: dict[str, Metric] | None = None,
+        additional_metrics: dict[str, Metric] | None = None,
+        metric_cmp_fn: Callable = default_metric_cmp_fn,
+        train_handlers: Sequence | None = None,
+        amp: bool = False,
+        event_names: list[str | EventEnum | type[EventEnum]] | None = None,
+        event_to_attr: dict | None = None,
+        decollate: bool = True,
+        optim_set_to_none: bool = False,
+        to_kwargs: dict | None = None,
+        amp_kwargs: dict | None = None,
+    ) -> None:
+        super().__init__(
+            device=device,
+            max_epochs=max_epochs,
+            train_data_loader=train_data_loader,
+            network=network,
+            optimizer=optimizer,
+            loss_function=loss_function,
+            inferer=inferer,
+            optim_set_to_none=optim_set_to_none,
+            epoch_length=epoch_length,
+            non_blocking=non_blocking,
+            prepare_batch=prepare_batch,
+            iteration_update=iteration_update,
+            postprocessing=postprocessing,
+            key_train_metric=key_train_metric,
+            additional_metrics=additional_metrics,
+            metric_cmp_fn=metric_cmp_fn,
+            train_handlers=train_handlers,
+            amp=amp,
+            event_names=event_names,
+            event_to_attr=event_to_attr,
+            decollate=decollate,
+            to_kwargs=to_kwargs,
+            amp_kwargs=amp_kwargs,
+        )
+        self.latent_shape = latent_shape
+        self.autoencoder_model = autoencoder_model
+    def _iteration(self, engine: LDMTrainer, batchdata: dict[str, torch.Tensor]) -> dict:
+        """
+        Callback function for the Supervised Training processing logic of 1 iteration in Ignite Engine.
+        Return below items in a dictionary:
+            - IMAGE: image Tensor data for model input, already moved to device.
+            - LABEL: label Tensor data corresponding to the image, already moved to device.
+            - PRED: prediction result of model.
+            - LOSS: loss value computed by loss function.
+        Args:
+            engine: `SupervisedTrainer` to execute operation for an iteration.
+            batchdata: input data for this iteration, usually can be dictionary or tuple of Tensor data.
+        Raises:
+            ValueError: When ``batchdata`` is None.
+        """
+        if batchdata is None:
+            raise ValueError("Must provide batch data for current iteration.")
+        batch = engine.prepare_batch(batchdata, engine.state.device, engine.non_blocking, **engine.to_kwargs)
+        if len(batch) == 2:
+            images, labels = batch
+            args: tuple = ()
+            kwargs: dict = {}
+        else:
+            images, labels, args, kwargs = batch
+        # put iteration outputs into engine.state
+        engine.state.output = {CommonKeys.IMAGE: images}
+        # generate noise
+        noise_shape = [images.shape[0]] + list(self.latent_shape)
+        noise = torch.randn(noise_shape, dtype=images.dtype).to(images.device)
+        engine.state.output = {"noise": noise}
+        # Create timesteps
+        timesteps = torch.randint(
+            0, engine.inferer.scheduler.num_train_timesteps, (images.shape[0],), device=images.device
+        ).long()
+        def _compute_pred_loss():
+            # predicted noise
+            engine.state.output[CommonKeys.PRED] = engine.inferer(
+                inputs=images,
+                autoencoder_model=self.autoencoder_model,
+                diffusion_model=engine.network,
+                noise=noise,
+                timesteps=timesteps,
+            )
+            engine.fire_event(IterationEvents.FORWARD_COMPLETED)
+            # compute loss
+            engine.state.output[CommonKeys.LOSS] = engine.loss_function(
+                engine.state.output[CommonKeys.PRED], noise
+            ).mean()
+            engine.fire_event(IterationEvents.LOSS_COMPLETED)
+        engine.network.train()
+        engine.optimizer.zero_grad(set_to_none=engine.optim_set_to_none)
+        if engine.amp and engine.scaler is not None:
+            with torch.cuda.amp.autocast(**engine.amp_kwargs):
+                _compute_pred_loss()
+            engine.scaler.scale(engine.state.output[CommonKeys.LOSS]).backward()
+            engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
+            engine.scaler.step(engine.optimizer)
+            engine.scaler.update()
+        else:
+            _compute_pred_loss()
+            engine.state.output[CommonKeys.LOSS].backward()
+            engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
+            engine.optimizer.step()
+        engine.fire_event(IterationEvents.MODEL_COMPLETED)
+        return engine.state.output

scripts/losses.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+import torch
+from generative.losses import PatchAdversarialLoss
+intensity_loss = torch.nn.L1Loss()
+adv_loss = PatchAdversarialLoss(criterion="least_squares")
+adv_weight = 0.5
+perceptual_weight = 1.0
+# kl_weight: important hyper-parameter.
+#     If too large, decoder cannot recon good results from latent space.
+#     If too small, latent space will not be regularized enough for the diffusion model
+kl_weight = 1e-6
+def compute_kl_loss(z_mu, z_sigma):
+    kl_loss = 0.5 * torch.sum(
+        z_mu.pow(2) + z_sigma.pow(2) - torch.log(z_sigma.pow(2)) - 1, dim=list(range(1, len(z_sigma.shape)))
+    )
+    return torch.sum(kl_loss) / kl_loss.shape[0]
+def generator_loss(gen_images, real_images, z_mu, z_sigma, disc_net, loss_perceptual):
+    recons_loss = intensity_loss(gen_images, real_images)
+    kl_loss = compute_kl_loss(z_mu, z_sigma)
+    p_loss = loss_perceptual(gen_images.float(), real_images.float())
+    loss_g = recons_loss + kl_weight * kl_loss + perceptual_weight * p_loss
+    logits_fake = disc_net(gen_images)[-1]
+    generator_loss = adv_loss(logits_fake, target_is_real=True, for_discriminator=False)
+    loss_g = loss_g + adv_weight * generator_loss
+    return loss_g
+def discriminator_loss(gen_images, real_images, disc_net):
+    logits_fake = disc_net(gen_images.contiguous().detach())[-1]
+    loss_d_fake = adv_loss(logits_fake, target_is_real=False, for_discriminator=True)
+    logits_real = disc_net(real_images.contiguous().detach())[-1]
+    loss_d_real = adv_loss(logits_real, target_is_real=True, for_discriminator=True)
+    discriminator_loss = (loss_d_fake + loss_d_real) * 0.5
+    loss_d = adv_weight * discriminator_loss
+    return loss_d

scripts/utils.py ADDED Viewed

	@@ -0,0 +1,50 @@

+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+import numpy as np
+import torch
+from monai.utils import first
+from monai.utils.type_conversion import convert_to_numpy
+def compute_scale_factor(autoencoder, train_loader, device):
+    with torch.no_grad():
+        check_data = first(train_loader)
+        z = autoencoder.encode_stage_2_inputs(check_data["image"].to(device))
+    scale_factor = 1 / torch.std(z)
+    return scale_factor.item()
+def normalize_image_to_uint8(image):
+    """
+    Normalize image to uint8
+    Args:
+        image: numpy array
+    """
+    draw_img = image
+    if np.amin(draw_img) < 0:
+        draw_img[draw_img < 0] = 0
+    if np.amax(draw_img) > 0.1:
+        draw_img /= np.amax(draw_img)
+    draw_img = (255 * draw_img).astype(np.uint8)
+    return draw_img
+def visualize_2d_image(image):
+    """
+    Prepare a 2D image for visualization.
+    Args:
+        image: image numpy array, sized (H, W)
+    """
+    image = convert_to_numpy(image)
+    # draw image
+    draw_img = normalize_image_to_uint8(image)
+    draw_img = np.stack([draw_img, draw_img, draw_img], axis=-1)
+    return draw_img