jytole commited on
Commit
c647bb3
1 Parent(s): 21e6702

Casted inf_steps to an integer, and changed the audio variable resulting from the pipe call into what AudioLDM writes in their code

Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -18,15 +18,15 @@ def texttoaudio(prompt, neg_prompt, seed, inf_steps, guidance_scale):
18
  audio = pipe(
19
  prompt,
20
  negative_prompt=neg_prompt,
21
- num_inference_steps=inf_steps,
22
  guidance_scale=guidance_scale,
23
  audio_length_in_s=5.0,
24
  generator=generator.manual_seed(int(seed)),
25
- ).audios[0]
26
 
27
  # save the audio sample as a .wav file
28
  # scipy.io.wavfile.write("output.wav", rate=16000, data=audio)
29
- return (16000, audio)
30
 
31
  iface = gr.Interface(fn=texttoaudio, title="Prompt, Neg Prompt, Seed, Inf Steps, Guidance Scale", inputs=["text", "text", "number", "number", "number"], outputs="audio")
32
 
 
18
  audio = pipe(
19
  prompt,
20
  negative_prompt=neg_prompt,
21
+ num_inference_steps=int(inf_steps),
22
  guidance_scale=guidance_scale,
23
  audio_length_in_s=5.0,
24
  generator=generator.manual_seed(int(seed)),
25
+ )["audios"]
26
 
27
  # save the audio sample as a .wav file
28
  # scipy.io.wavfile.write("output.wav", rate=16000, data=audio)
29
+ return (16000, audio[0])
30
 
31
  iface = gr.Interface(fn=texttoaudio, title="Prompt, Neg Prompt, Seed, Inf Steps, Guidance Scale", inputs=["text", "text", "number", "number", "number"], outputs="audio")
32