Spaces:

aquietlife
/

audioldm2_api

Paused

audioldm2_api / app.py

Johann Diedrick

Adjusted number of inference steps to 20

955b728 7 months ago

No virus

1.45 kB

	import gradio as gr
	import torch
	from diffusers import AudioLDM2Pipeline

	# make Space compatible with CPU duplicates
	if torch.cuda.is_available():
	device = "cuda"
	torch_dtype = torch.float16
	else:
	device = "cpu"
	torch_dtype = torch.float32

	# load the diffusers pipeline
	repo_id = "cvssp/audioldm2"
	pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device)
	# pipe.unet = torch.compile(pipe.unet)

	# set the generator for reproducibility
	generator = torch.Generator(device)


	def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_candidates):
	if text is None:
	raise gr.Error("Please provide a text input.")

	# test values
	negative_prompt="Low quality."
	duration=10
	guidance_scale=3.5
	random_seed=45
	n_candidates=3

	waveforms = pipe(
	text,
	audio_length_in_s=duration,
	guidance_scale=guidance_scale,
	num_inference_steps=20,
	negative_prompt=negative_prompt,
	num_waveforms_per_prompt=n_candidates if n_candidates else 1,
	generator=generator.manual_seed(int(random_seed)),
	)["audios"]

	print(waveforms)

	return [gr.make_waveform((16000, waveforms[0])), gr.make_waveform((16000, waveforms[1])), gr.make_waveform((16000, waveforms[2]))]

	gradio_interface = gr.Interface(
	fn = text2audio,
	inputs = "text",
	outputs = ["audio", "audio", "audio"],
	)

	gradio_interface.launch()