from dataclasses import dataclass from typing import List, Union import numpy as np import torch from diffusers.utils import BaseOutput, OptionalDependencyNotAvailable, is_torch_available, is_transformers_available @dataclass class TextToVideoPipelineOutput(BaseOutput): """ Output class for text to video pipelines. Args: frames (`List[np.ndarray]` or `torch.FloatTensor`) List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as a `torch` tensor. NumPy array present the denoised images of the diffusion pipeline. The length of the list denotes the video length i.e., the number of frames. """ frames: Union[List[np.ndarray], torch.FloatTensor] try: if not (is_transformers_available() and is_torch_available()): raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: from diffusers.utils.dummy_torch_and_transformers_objects import * # noqa F403 else: # from .pipeline_t2v_base_latent import TextToVideoSDPipeline # noqa: F401 # from .pipeline_t2v_base_latent_sdxl import TextToVideoSDXLPipeline from .pipeline_t2v_base_pixel import TextToVideoIFPipeline from .pipeline_t2v_interp_pixel import TextToVideoIFInterpPipeline # from .pipeline_t2v_sr_latent import TextToVideoSDSuperResolutionPipeline from .pipeline_t2v_sr_pixel import TextToVideoIFSuperResolutionPipeline # from .pipeline_t2v_base_latent_controlnet import TextToVideoSDControlNetPipeline