Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| import tempfile | |
| import os | |
| import json | |
| def generate_audio(prompt, duration=10): | |
| try: | |
| # Import required modules | |
| from stable_audio_tools.inference.generation import generate_diffusion_cond | |
| from stable_audio_tools.models.utils import load_ckpt_state_dict | |
| from stable_audio_tools.models.factory import create_model_from_config | |
| from huggingface_hub import hf_hub_download | |
| # Get token | |
| token = os.getenv("HF_TOKEN") | |
| # Download model files | |
| model_config_path = hf_hub_download( | |
| repo_id="stabilityai/stable-audio-open-1.0", | |
| filename="model_config.json", | |
| token=token | |
| ) | |
| model_ckpt_path = hf_hub_download( | |
| repo_id="stabilityai/stable-audio-open-1.0", | |
| filename="model.safetensors", | |
| token=token | |
| ) | |
| # Load model config | |
| with open(model_config_path) as f: | |
| model_config = json.load(f) | |
| # Create and load model | |
| model = create_model_from_config(model_config) | |
| model.load_state_dict(load_ckpt_state_dict(model_ckpt_path)) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model = model.to(device) | |
| model.eval() | |
| # Generate audio | |
| conditioning = [{ | |
| "prompt": prompt, | |
| "seconds_start": 0, | |
| "seconds_total": duration | |
| }] | |
| # Generate the audio | |
| output = generate_diffusion_cond( | |
| model, | |
| steps=100, | |
| cfg_scale=7, | |
| conditioning=conditioning, | |
| sample_rate=44100, | |
| sigma_min=0.3, | |
| sigma_max=500, | |
| sampler_type="dpmpp-3m-sde", | |
| device=device | |
| ) | |
| # Convert to numpy and save | |
| audio_output = output[0].cpu().numpy() | |
| # Create temporary file | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
| torchaudio.save(temp_file.name, torch.from_numpy(audio_output), 44100) | |
| return temp_file.name | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Create interface with AUDIO output | |
| demo = gr.Interface( | |
| fn=generate_audio, | |
| inputs=[ | |
| gr.Textbox( | |
| label="π΅ Audio Prompt", | |
| placeholder="heavy boots thudding on wet sand", | |
| value="heavy boots thudding on wet sand" | |
| ), | |
| gr.Slider(5, 47, 10, step=1, label="β±οΈ Duration (seconds)") | |
| ], | |
| outputs=gr.Audio(label="π Generated Audio"), # This will play audio! | |
| title="π΅ Stable Audio Generator - WORKING!", | |
| description="Generate real audio from text descriptions" | |
| ) | |
| demo.launch() |