Spaces:

ybang
/

stable-audio

Sleeping

App Files Files Community

ybang commited on Jul 9

Commit

f119108

verified ·

1 Parent(s): 3578727

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -19

app.py CHANGED Viewed

@@ -1,42 +1,92 @@
 import gradio as gr
 import os
-import traceback
 def generate_audio(prompt, duration=10):
     try:
-        updates = []
-        updates.append("🔄 Starting with authentication...")
-        # Get token from environment
         token = os.getenv("HF_TOKEN")
-        if not token:
-            return "❌ No HF_TOKEN found in environment variables"
-        updates.append("✅ Token found")
-        from huggingface_hub import hf_hub_download
-        updates.append("📦 Downloading with token...")
-        model_config = hf_hub_download(
             repo_id="stabilityai/stable-audio-open-1.0",
             filename="model_config.json",
             token=token
         )
-        updates.append("✅ Model config downloaded!")
-        return "\n".join(updates) + f"\n\n🎯 Ready to generate: '{prompt}'"
     except Exception as e:
-        return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}"
 demo = gr.Interface(
     fn=generate_audio,
     inputs=[
-        gr.Textbox(label="Prompt", value="heavy boots thudding on wet sand"),
-        gr.Slider(5, 20, 10, label="Duration")
     ],
-    outputs=gr.Textbox(label="Status", lines=8),
-    title="🔧 Token Test"
 )
 demo.launch()

 import gradio as gr
+import torch
+import torchaudio
+import tempfile
 import os
+import json
 def generate_audio(prompt, duration=10):
     try:
+        # Import required modules
+        from stable_audio_tools.inference.generation import generate_diffusion_cond
+        from stable_audio_tools.models.utils import load_ckpt_state_dict
+        from stable_audio_tools.models.factory import create_model_from_config
+        from huggingface_hub import hf_hub_download
+        # Get token
         token = os.getenv("HF_TOKEN")
+        # Download model files
+        model_config_path = hf_hub_download(
             repo_id="stabilityai/stable-audio-open-1.0",
             filename="model_config.json",
             token=token
         )
+        model_ckpt_path = hf_hub_download(
+            repo_id="stabilityai/stable-audio-open-1.0",
+            filename="model.safetensors",
+            token=token
+        )
+        # Load model config
+        with open(model_config_path) as f:
+            model_config = json.load(f)
+        # Create and load model
+        model = create_model_from_config(model_config)
+        model.load_state_dict(load_ckpt_state_dict(model_ckpt_path))
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        model = model.to(device)
+        model.eval()
+        # Generate audio
+        conditioning = [{
+            "prompt": prompt,
+            "seconds_start": 0,
+            "seconds_total": duration
+        }]
+        # Generate the audio
+        output = generate_diffusion_cond(
+            model,
+            steps=100,
+            cfg_scale=7,
+            conditioning=conditioning,
+            sample_rate=44100,
+            sigma_min=0.3,
+            sigma_max=500,
+            sampler_type="dpmpp-3m-sde",
+            device=device
+        )
+        # Convert to numpy and save
+        audio_output = output[0].cpu().numpy()
+        # Create temporary file
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        torchaudio.save(temp_file.name, torch.from_numpy(audio_output), 44100)
+        return temp_file.name
     except Exception as e:
+        return f"Error: {str(e)}"
+# Create interface with AUDIO output
 demo = gr.Interface(
     fn=generate_audio,
     inputs=[
+        gr.Textbox(
+            label="🎵 Audio Prompt",
+            placeholder="heavy boots thudding on wet sand",
+            value="heavy boots thudding on wet sand"
+        ),
+        gr.Slider(5, 47, 10, step=1, label="⏱️ Duration (seconds)")
     ],
+    outputs=gr.Audio(label="🔊 Generated Audio"),  # This will play audio!
+    title="🎵 Stable Audio Generator - WORKING!",
+    description="Generate real audio from text descriptions"
 )
 demo.launch()