sanchit-gandhi HF staff commited on
Commit
871eacf
1 Parent(s): 05b81fb
Files changed (3) hide show
  1. app.py +6 -5
  2. packages.txt +1 -0
  3. requirements.txt +2 -1
app.py CHANGED
@@ -15,7 +15,7 @@ else:
15
  # load the diffusers pipeline
16
  repo_id = "cvssp/audioldm2"
17
  pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device)
18
- pipe.unet = torch.compile(pipe.unet)
19
 
20
  # set the generator for reproducibility
21
  generator = torch.Generator(device)
@@ -29,7 +29,7 @@ def text2audio(text, negative_prompt, duration, guidance_scale, random_seed, n_c
29
  text,
30
  audio_length_in_s=duration,
31
  guidance_scale=guidance_scale,
32
- num_inference_steps=100,
33
  negative_prompt=negative_prompt,
34
  num_waveforms_per_prompt=n_candidates if n_candidates else 1,
35
  generator=generator.manual_seed(int(random_seed)),
@@ -141,7 +141,7 @@ with iface:
141
  gr.HTML(
142
  """
143
  <p>This is the demo for AudioLDM 2, powered by 🧨 Diffusers. Demo uses the checkpoint <a
144
- href="https://huggingface.co/cvssp/audioldm2"> AudioLDM 2 base </a>. For faster inference without waiting in
145
  queue, you may duplicate the space and upgrade to a GPU in the settings. <br/> <a
146
  href="https://huggingface.co/spaces/haoheliu/audioldm2-text2audio-text2music?duplicate=true"> <img
147
  style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> <p/>
@@ -174,7 +174,7 @@ with iface:
174
  duration = gr.Slider(5, 15, value=10, step=2.5, label="Duration (seconds)")
175
  guidance_scale = gr.Slider(
176
  0,
177
- 6.5,
178
  value=3.5,
179
  step=0.5,
180
  label="Guidance scale",
@@ -258,4 +258,5 @@ with iface:
258
  """
259
  )
260
 
261
- iface.queue(max_size=10).launch(share=True)
 
 
15
  # load the diffusers pipeline
16
  repo_id = "cvssp/audioldm2"
17
  pipe = AudioLDM2Pipeline.from_pretrained(repo_id, torch_dtype=torch_dtype).to(device)
18
+ # pipe.unet = torch.compile(pipe.unet)
19
 
20
  # set the generator for reproducibility
21
  generator = torch.Generator(device)
 
29
  text,
30
  audio_length_in_s=duration,
31
  guidance_scale=guidance_scale,
32
+ num_inference_steps=200,
33
  negative_prompt=negative_prompt,
34
  num_waveforms_per_prompt=n_candidates if n_candidates else 1,
35
  generator=generator.manual_seed(int(random_seed)),
 
141
  gr.HTML(
142
  """
143
  <p>This is the demo for AudioLDM 2, powered by 🧨 Diffusers. Demo uses the checkpoint <a
144
+ href="https://huggingface.co/cvssp/audioldm2"> AudioLDM 2 base</a>. For faster inference without waiting in
145
  queue, you may duplicate the space and upgrade to a GPU in the settings. <br/> <a
146
  href="https://huggingface.co/spaces/haoheliu/audioldm2-text2audio-text2music?duplicate=true"> <img
147
  style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> <p/>
 
174
  duration = gr.Slider(5, 15, value=10, step=2.5, label="Duration (seconds)")
175
  guidance_scale = gr.Slider(
176
  0,
177
+ 7,
178
  value=3.5,
179
  step=0.5,
180
  label="Guidance scale",
 
258
  """
259
  )
260
 
261
+ iface.queue(max_size=20).launch()
262
+
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
- torch
 
2
  librosa
3
  transformers
4
  git+https://github.com/huggingface/diffusers.git
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu113
2
+ torch>=2.0
3
  librosa
4
  transformers
5
  git+https://github.com/huggingface/diffusers.git