{ "_class_name": "TortoiseTTSPipeline", "audio_candidate_model": [ "transformers", "ClvpModelForConditionalGeneration" ], "audio_processor": [ "transformers", "ClvpFeatureExtractor" ], "tokenizer": [ "transformers", "ClvpTokenizer" ], "unet": [ "diffusers.pipelines.tortoise_tts.pipeline_tortoise_tts", "TortoiseTTSDenoisingModel" ], "vocoder": [ "transformers", "UnivNetModel" ], "autoregressive_random_latent_converter": [ "diffusers.pipelines.tortoise_tts.modeling_common", "RandomLatentConverter" ], "diffusion_conditioning_encoder": [ "diffusers.pipelines.tortoise_tts.modeling_diffusion", "DiffusionConditioningEncoder" ], "diffusion_random_latent_converter": [ "diffusers.pipelines.tortoise_tts.modeling_common", "RandomLatentConverter" ] }