| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import unittest |
|
|
| from diffusers import AutoencoderKLLTX2Video |
|
|
| from ...testing_utils import ( |
| enable_full_determinism, |
| floats_tensor, |
| torch_device, |
| ) |
| from ..test_modeling_common import ModelTesterMixin |
| from .testing_utils import AutoencoderTesterMixin |
|
|
|
|
| enable_full_determinism() |
|
|
|
|
| class AutoencoderKLLTX2VideoTests(ModelTesterMixin, AutoencoderTesterMixin, unittest.TestCase): |
| model_class = AutoencoderKLLTX2Video |
| main_input_name = "sample" |
| base_precision = 1e-2 |
|
|
| def get_autoencoder_kl_ltx_video_config(self): |
| return { |
| "in_channels": 3, |
| "out_channels": 3, |
| "latent_channels": 8, |
| "block_out_channels": (8, 8, 8, 8), |
| "decoder_block_out_channels": (16, 32, 64), |
| "layers_per_block": (1, 1, 1, 1, 1), |
| "decoder_layers_per_block": (1, 1, 1, 1), |
| "spatio_temporal_scaling": (True, True, True, True), |
| "decoder_spatio_temporal_scaling": (True, True, True), |
| "decoder_inject_noise": (False, False, False, False), |
| "downsample_type": ("spatial", "temporal", "spatiotemporal", "spatiotemporal"), |
| "upsample_residual": (True, True, True), |
| "upsample_factor": (2, 2, 2), |
| "timestep_conditioning": False, |
| "patch_size": 1, |
| "patch_size_t": 1, |
| "encoder_causal": True, |
| "decoder_causal": False, |
| "encoder_spatial_padding_mode": "zeros", |
| |
| "decoder_spatial_padding_mode": "zeros", |
| } |
|
|
| @property |
| def dummy_input(self): |
| batch_size = 2 |
| num_frames = 9 |
| num_channels = 3 |
| sizes = (16, 16) |
|
|
| image = floats_tensor((batch_size, num_channels, num_frames) + sizes).to(torch_device) |
|
|
| input_dict = {"sample": image} |
| return input_dict |
|
|
| @property |
| def input_shape(self): |
| return (3, 9, 16, 16) |
|
|
| @property |
| def output_shape(self): |
| return (3, 9, 16, 16) |
|
|
| def prepare_init_args_and_inputs_for_common(self): |
| init_dict = self.get_autoencoder_kl_ltx_video_config() |
| inputs_dict = self.dummy_input |
| return init_dict, inputs_dict |
|
|
| def test_gradient_checkpointing_is_applied(self): |
| expected_set = { |
| "LTX2VideoEncoder3d", |
| "LTX2VideoDecoder3d", |
| "LTX2VideoDownBlock3D", |
| "LTX2VideoMidBlock3d", |
| "LTX2VideoUpBlock3d", |
| } |
| super().test_gradient_checkpointing_is_applied(expected_set=expected_set) |
|
|
| @unittest.skip("Unsupported test.") |
| def test_outputs_equivalence(self): |
| pass |
|
|
| @unittest.skip("AutoencoderKLLTXVideo does not support `norm_num_groups` because it does not use GroupNorm.") |
| def test_forward_with_norm_groups(self): |
| pass |
|
|