stable-audio / app.py
ybang's picture
Update app.py
f119108 verified
import gradio as gr
import torch
import torchaudio
import tempfile
import os
import json
def generate_audio(prompt, duration=10):
try:
# Import required modules
from stable_audio_tools.inference.generation import generate_diffusion_cond
from stable_audio_tools.models.utils import load_ckpt_state_dict
from stable_audio_tools.models.factory import create_model_from_config
from huggingface_hub import hf_hub_download
# Get token
token = os.getenv("HF_TOKEN")
# Download model files
model_config_path = hf_hub_download(
repo_id="stabilityai/stable-audio-open-1.0",
filename="model_config.json",
token=token
)
model_ckpt_path = hf_hub_download(
repo_id="stabilityai/stable-audio-open-1.0",
filename="model.safetensors",
token=token
)
# Load model config
with open(model_config_path) as f:
model_config = json.load(f)
# Create and load model
model = create_model_from_config(model_config)
model.load_state_dict(load_ckpt_state_dict(model_ckpt_path))
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model.eval()
# Generate audio
conditioning = [{
"prompt": prompt,
"seconds_start": 0,
"seconds_total": duration
}]
# Generate the audio
output = generate_diffusion_cond(
model,
steps=100,
cfg_scale=7,
conditioning=conditioning,
sample_rate=44100,
sigma_min=0.3,
sigma_max=500,
sampler_type="dpmpp-3m-sde",
device=device
)
# Convert to numpy and save
audio_output = output[0].cpu().numpy()
# Create temporary file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
torchaudio.save(temp_file.name, torch.from_numpy(audio_output), 44100)
return temp_file.name
except Exception as e:
return f"Error: {str(e)}"
# Create interface with AUDIO output
demo = gr.Interface(
fn=generate_audio,
inputs=[
gr.Textbox(
label="🎡 Audio Prompt",
placeholder="heavy boots thudding on wet sand",
value="heavy boots thudding on wet sand"
),
gr.Slider(5, 47, 10, step=1, label="⏱️ Duration (seconds)")
],
outputs=gr.Audio(label="πŸ”Š Generated Audio"), # This will play audio!
title="🎡 Stable Audio Generator - WORKING!",
description="Generate real audio from text descriptions"
)
demo.launch()