import gradio as gr from spectro import wav_bytes_from_spectrogram_image from diffusers import StableDiffusionPipeline model_id = "riffusion/riffusion-model-v1" pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) pipe = pipe.to("cuda") def predict(prompt): spec = pipe(prompt).images[0] wav = wav_bytes_from_spectrogram_image(spec) with open("output.wav", "wb") as f: f.write(wav[0].getbuffer()) return 'output.wav' gr.Interface( predict, inputs="text", outputs=gr.outputs.Audio(type='filepath'), title="Riffusion", ).launch(share=True, debug=True)