haoheliu commited on
Commit
83dc4c8
1 Parent(s): 858c11b
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import gradio as gr
2
  import numpy as np
3
- from audioldm import text_to_audio, seed_everything, build_model
4
 
5
  audioldm = build_model()
6
 
7
- def text2audio(text, duration, guidance_scale, random_seed):
8
  # print(text, length, guidance_scale)
9
- waveform = text_to_audio(audioldm, text, random_seed, duration=duration, guidance_scale=guidance_scale, n_candidate_gen_per_text=1) # [bs, 1, samples]
10
  waveform = [(16000, wave[0]) for wave in waveform]
11
  # waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
12
  return waveform
@@ -49,14 +49,15 @@ with iface:
49
  with gr.Box():
50
  ############# Input
51
  textbox = gr.Textbox(value="A hammer is hitting a wooden surface", max_lines=1)
52
- duration = gr.Slider(2.5, 10, value=5, step=2.5)
53
- guidance_scale = gr.Slider(0, 5, value=2.5, step=0.5)
54
- seed = gr.Number(value=42)
 
55
  ############# Output
56
  outputs=[gr.Audio(label="Output", type="numpy"), gr.Audio(label="Output", type="numpy")]
57
 
58
  btn = gr.Button("Submit").style(full_width=True)
59
- btn.click(text2audio, inputs=[textbox, duration, guidance_scale, seed], outputs=outputs)
60
  gr.HTML('''
61
  <hr>
62
  <div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
@@ -66,4 +67,5 @@ with iface:
66
  ''')
67
 
68
  iface.queue(concurrency_count=2)
69
- iface.launch(debug=True, share=True)
 
 
1
  import gradio as gr
2
  import numpy as np
3
+ from audioldm import text_to_audio, build_model
4
 
5
  audioldm = build_model()
6
 
7
+ def text2audio(text, duration, guidance_scale, random_seed, n_candidates):
8
  # print(text, length, guidance_scale)
9
+ waveform = text_to_audio(audioldm, text, random_seed, duration=duration, guidance_scale=guidance_scale, n_candidate_gen_per_text=int(n_candidates)) # [bs, 1, samples]
10
  waveform = [(16000, wave[0]) for wave in waveform]
11
  # waveform = [(16000, np.random.randn(16000)), (16000, np.random.randn(16000))]
12
  return waveform
 
49
  with gr.Box():
50
  ############# Input
51
  textbox = gr.Textbox(value="A hammer is hitting a wooden surface", max_lines=1)
52
+ seed = gr.Number(value=42, label="Change this value (any integer number) will lead to a different generation result.")
53
+ duration = gr.Slider(2.5, 10, value=5, step=2.5, label="Duration (seconds)")
54
+ guidance_scale = gr.Slider(0, 5, value=2.5, step=0.5, label="Guidance scale (Large => better quality and relavancy to text; Small => better diversity)")
55
+ n_candidates = gr.Slider(1, 5, value=1, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
56
  ############# Output
57
  outputs=[gr.Audio(label="Output", type="numpy"), gr.Audio(label="Output", type="numpy")]
58
 
59
  btn = gr.Button("Submit").style(full_width=True)
60
+ btn.click(text2audio, inputs=[textbox, duration, guidance_scale, seed, n_candidates], outputs=outputs)
61
  gr.HTML('''
62
  <hr>
63
  <div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
 
67
  ''')
68
 
69
  iface.queue(concurrency_count=2)
70
+ iface.launch(debug=True)
71
+ # iface.launch(debug=True, share=True)