import gradio as gr
import torch
from diffusers import AudioLDM2Pipeline

# make Space compatible with CPU duplicates
if torch.cuda.is_available():
    device = "cuda"
    torch_dtype = torch.float16
else:
    device = "cpu"
    torch_dtype = torch.float32

# load the diffusers pipeline
repo_id = "cvssp/audioldm2"
pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device)
# pipe.unet = torch.compile(pipe.unet)

# set the generator for reproducibility
generator = torch.Generator(device)


def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_candidates):
    if text is None:
        raise gr.Error("Please provide a text input.")

    # test values
    negative_prompt="Low quality."
    duration=10
    guidance_scale=3.5
    random_seed=45
    n_candidates=3

    waveforms = pipe(
        text,
        audio_length_in_s=duration,
        guidance_scale=guidance_scale,
        num_inference_steps=20,
        negative_prompt=negative_prompt,
        num_waveforms_per_prompt=n_candidates if n_candidates else 1,
        generator=generator.manual_seed(int(random_seed)),
    )["audios"]

    print(waveforms)

    return [gr.make_waveform((16000, waveforms[0])), gr.make_waveform((16000, waveforms[1])), gr.make_waveform((16000, waveforms[2]))]   

gradio_interface = gr.Interface(
	fn = text2audio,
	inputs = "text",
	outputs = ["audio", "audio", "audio"],
)

gradio_interface.launch()