Spaces:

ybang
/

stable-audio

Sleeping

App Files Files Community

stable-audio / app.py

ybang

Update app.py

f119108 verified 5 months ago

raw

history blame contribute delete

2.83 kB

	import gradio as gr
	import torch
	import torchaudio
	import tempfile
	import os
	import json

	def generate_audio(prompt, duration=10):
	try:
	# Import required modules
	from stable_audio_tools.inference.generation import generate_diffusion_cond
	from stable_audio_tools.models.utils import load_ckpt_state_dict
	from stable_audio_tools.models.factory import create_model_from_config
	from huggingface_hub import hf_hub_download

	# Get token
	token = os.getenv("HF_TOKEN")

	# Download model files
	model_config_path = hf_hub_download(
	repo_id="stabilityai/stable-audio-open-1.0",
	filename="model_config.json",
	token=token
	)

	model_ckpt_path = hf_hub_download(
	repo_id="stabilityai/stable-audio-open-1.0",
	filename="model.safetensors",
	token=token
	)

	# Load model config
	with open(model_config_path) as f:
	model_config = json.load(f)

	# Create and load model
	model = create_model_from_config(model_config)
	model.load_state_dict(load_ckpt_state_dict(model_ckpt_path))

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = model.to(device)
	model.eval()

	# Generate audio
	conditioning = [{
	"prompt": prompt,
	"seconds_start": 0,
	"seconds_total": duration
	}]

	# Generate the audio
	output = generate_diffusion_cond(
	model,
	steps=100,
	cfg_scale=7,
	conditioning=conditioning,
	sample_rate=44100,
	sigma_min=0.3,
	sigma_max=500,
	sampler_type="dpmpp-3m-sde",
	device=device
	)

	# Convert to numpy and save
	audio_output = output[0].cpu().numpy()

	# Create temporary file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	torchaudio.save(temp_file.name, torch.from_numpy(audio_output), 44100)

	return temp_file.name

	except Exception as e:
	return f"Error: {str(e)}"

	# Create interface with AUDIO output
	demo = gr.Interface(
	fn=generate_audio,
	inputs=[
	gr.Textbox(
	label="🎵 Audio Prompt",
	placeholder="heavy boots thudding on wet sand",
	value="heavy boots thudding on wet sand"
	),
	gr.Slider(5, 47, 10, step=1, label="⏱️ Duration (seconds)")
	],
	outputs=gr.Audio(label="🔊 Generated Audio"), # This will play audio!
	title="🎵 Stable Audio Generator - WORKING!",
	description="Generate real audio from text descriptions"
	)

	demo.launch()