from diffusers import AudioLDMPipeline import torch import gradio as gr from transformers import pipeline #from googletrans import Translator import os if torch.cuda.is_available(): device = "cuda" torch_dtype = torch.float16 else: device = "cpu" torch_dtype = torch.float32 print(device) repo_id = "cvssp/audioldm-m-full" pipe = AudioLDMPipeline.from_pretrained(repo_id, torch_dtype=torch_dtype) pipe = pipe.to(device) # pipe.unet = torch.compile(pipe.unet) #pipe.unet = torch.compile(pipe.unet) def generate_sound(text): print(text) # text=translate_text(text) text = translate_text(text) #translator = Translator() #text=translator.translate(text, src='es',dest="en").text print(text) waveforms = pipe(text, num_inference_steps=25, audio_length_in_s=5, negative_prompt = "low quality, average quality").audios rate =16000 return rate, waveforms[0] #return gr.make_waveform((rate, waveforms[0])) es_en_translator = pipeline("translation",model = "Helsinki-NLP/opus-mt-es-en") def translate_text(text): text = es_en_translator(text)[0].get("translation_text") return text demo = gr.Blocks() with demo: with gr.Row(): with gr.Column(): text = gr.Textbox(value="Ingrese el texto:") button = gr.Button(value="Generar") with gr.Column(): output = gr.Audio() #output = gr.Video(label="Output") button.click(generate_sound,text,output) demo.launch()