Spaces:

stardate69
/

StableAudioOpen2

Runtime error

App Files Files Community

StableAudioOpen2 / app.py

stardate69

Update app.py

880a1d9 verified 12 months ago

raw

history blame

2.14 kB

	import torch
	import os
	import soundfile as sf
	from diffusers import StableAudioPipeline
	from huggingface_hub import login

	# Retrieve the token securely from environment variables
	HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
	if HUGGINGFACE_TOKEN is None:
	raise ValueError("Missing Hugging Face API token. Set 'HF_TOKEN' in Secrets.")

	# Authenticate with Hugging Face Hub
	login(HUGGINGFACE_TOKEN)

	# Set up the device
	device = "cuda" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if device == "cuda" else torch.float32

	# Load the model
	pipe = StableAudioPipeline.from_pretrained(
	"stabilityai/stable-audio-open-1.0",
	torch_dtype=torch_dtype,
	use_auth_token=True # Token provided via 'login' earlier
	)
	pipe = pipe.to(device)

	# Audio generation function
	def generate_audio(prompt, negative_prompt, duration, seed):
	generator = torch.Generator(device).manual_seed(seed)
	audio_output = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=50,
	audio_end_in_s=duration,
	num_waveforms_per_prompt=1,
	generator=generator
	).audios

	# Save the generated audio
	output_audio = audio_output[0].T.float().cpu().numpy()
	sf.write("output.wav", output_audio, pipe.vae.sampling_rate)
	return "output.wav"

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## 🎧 Stable Audio Open - Interactive Audio Generation 🎼")
	with gr.Row():
	prompt_input = gr.Textbox(label="Prompt", value="The sound of a hammer hitting a wooden surface.")
	negative_input = gr.Textbox(label="Negative Prompt", value="Low quality.")
	with gr.Row():
	duration_input = gr.Slider(minimum=1, maximum=10, step=0.5, value=2, label="Duration (seconds)")
	seed_input = gr.Number(label="Random Seed", value=0)
	generate_button = gr.Button("Generate Audio")
	output_audio = gr.Audio(label="Generated Audio", type="filepath")

	generate_button.click(
	generate_audio,
	inputs=[prompt_input, negative_input, duration_input, seed_input],
	outputs=output_audio
	)

	demo.launch()