ybang commited on
Commit
f119108
Β·
verified Β·
1 Parent(s): 3578727

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -19
app.py CHANGED
@@ -1,42 +1,92 @@
1
  import gradio as gr
 
 
 
2
  import os
3
- import traceback
4
 
5
  def generate_audio(prompt, duration=10):
6
  try:
7
- updates = []
8
- updates.append("πŸ”„ Starting with authentication...")
 
 
 
9
 
10
- # Get token from environment
11
  token = os.getenv("HF_TOKEN")
12
- if not token:
13
- return "❌ No HF_TOKEN found in environment variables"
14
-
15
- updates.append("βœ… Token found")
16
-
17
- from huggingface_hub import hf_hub_download
18
- updates.append("πŸ“¦ Downloading with token...")
19
 
20
- model_config = hf_hub_download(
 
21
  repo_id="stabilityai/stable-audio-open-1.0",
22
  filename="model_config.json",
23
  token=token
24
  )
25
- updates.append("βœ… Model config downloaded!")
26
 
27
- return "\n".join(updates) + f"\n\n🎯 Ready to generate: '{prompt}'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  except Exception as e:
30
- return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}"
31
 
 
32
  demo = gr.Interface(
33
  fn=generate_audio,
34
  inputs=[
35
- gr.Textbox(label="Prompt", value="heavy boots thudding on wet sand"),
36
- gr.Slider(5, 20, 10, label="Duration")
 
 
 
 
37
  ],
38
- outputs=gr.Textbox(label="Status", lines=8),
39
- title="πŸ”§ Token Test"
 
40
  )
41
 
42
  demo.launch()
 
1
  import gradio as gr
2
+ import torch
3
+ import torchaudio
4
+ import tempfile
5
  import os
6
+ import json
7
 
8
  def generate_audio(prompt, duration=10):
9
  try:
10
+ # Import required modules
11
+ from stable_audio_tools.inference.generation import generate_diffusion_cond
12
+ from stable_audio_tools.models.utils import load_ckpt_state_dict
13
+ from stable_audio_tools.models.factory import create_model_from_config
14
+ from huggingface_hub import hf_hub_download
15
 
16
+ # Get token
17
  token = os.getenv("HF_TOKEN")
 
 
 
 
 
 
 
18
 
19
+ # Download model files
20
+ model_config_path = hf_hub_download(
21
  repo_id="stabilityai/stable-audio-open-1.0",
22
  filename="model_config.json",
23
  token=token
24
  )
 
25
 
26
+ model_ckpt_path = hf_hub_download(
27
+ repo_id="stabilityai/stable-audio-open-1.0",
28
+ filename="model.safetensors",
29
+ token=token
30
+ )
31
+
32
+ # Load model config
33
+ with open(model_config_path) as f:
34
+ model_config = json.load(f)
35
+
36
+ # Create and load model
37
+ model = create_model_from_config(model_config)
38
+ model.load_state_dict(load_ckpt_state_dict(model_ckpt_path))
39
+
40
+ device = "cuda" if torch.cuda.is_available() else "cpu"
41
+ model = model.to(device)
42
+ model.eval()
43
+
44
+ # Generate audio
45
+ conditioning = [{
46
+ "prompt": prompt,
47
+ "seconds_start": 0,
48
+ "seconds_total": duration
49
+ }]
50
+
51
+ # Generate the audio
52
+ output = generate_diffusion_cond(
53
+ model,
54
+ steps=100,
55
+ cfg_scale=7,
56
+ conditioning=conditioning,
57
+ sample_rate=44100,
58
+ sigma_min=0.3,
59
+ sigma_max=500,
60
+ sampler_type="dpmpp-3m-sde",
61
+ device=device
62
+ )
63
+
64
+ # Convert to numpy and save
65
+ audio_output = output[0].cpu().numpy()
66
+
67
+ # Create temporary file
68
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
69
+ torchaudio.save(temp_file.name, torch.from_numpy(audio_output), 44100)
70
+
71
+ return temp_file.name
72
 
73
  except Exception as e:
74
+ return f"Error: {str(e)}"
75
 
76
+ # Create interface with AUDIO output
77
  demo = gr.Interface(
78
  fn=generate_audio,
79
  inputs=[
80
+ gr.Textbox(
81
+ label="🎡 Audio Prompt",
82
+ placeholder="heavy boots thudding on wet sand",
83
+ value="heavy boots thudding on wet sand"
84
+ ),
85
+ gr.Slider(5, 47, 10, step=1, label="⏱️ Duration (seconds)")
86
  ],
87
+ outputs=gr.Audio(label="πŸ”Š Generated Audio"), # This will play audio!
88
+ title="🎡 Stable Audio Generator - WORKING!",
89
+ description="Generate real audio from text descriptions"
90
  )
91
 
92
  demo.launch()