haoheliu commited on
Commit
3901b16
1 Parent(s): 807c6f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -23,7 +23,7 @@ current_model_name = None
23
  # response = [(response[i], response[i+1]) for i in range(0, len(response)-1, 2)] # convert to tuples of list
24
  # return response, history
25
 
26
- def text2audio(text, duration, guidance_scale, random_seed, n_candidates, model_name):
27
  global audioldm, current_model_name
28
 
29
  if audioldm is None or model_name != current_model_name:
@@ -238,9 +238,9 @@ with iface:
238
  duration = gr.Slider(2.5, 10, value=10, step=2.5, label="Duration (seconds)")
239
  guidance_scale = gr.Slider(0, 4, value=2.5, step=0.5, label="Guidance scale (Large => better quality and relavancy to text; Small => better diversity)")
240
  n_candidates = gr.Slider(1, 5, value=3, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
241
- model_name = gr.Dropdown(
242
- ["audioldm-m-text-ft", "audioldm-s-text-ft", "audioldm-m-full","audioldm-s-full-v2", "audioldm-s-full", "audioldm-l-full"], value="audioldm-m-text-ft", label="Choose the model to use. audioldm-m-text-ft and audioldm-s-text-ft are recommanded. -s- means small, -m- means medium and -l- means large",
243
- )
244
  ############# Output
245
  # outputs=gr.Audio(label="Output", type="numpy")
246
  outputs=gr.Video(label="Output", elem_id="output-video")
@@ -259,9 +259,11 @@ with iface:
259
  loading_icon = gr.HTML(loading_icon_html)
260
  share_button = gr.Button("Share to community", elem_id="share-btn")
261
 
 
 
262
  btn.click(text2audio, inputs=[
263
- textbox, duration, guidance_scale, seed, n_candidates, model_name], outputs=[outputs])
264
-
265
  share_button.click(None, [], [], _js=share_js)
266
  gr.HTML('''
267
  <div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
@@ -280,7 +282,8 @@ with iface:
280
  ["Wooden table tapping sound followed by water pouring sound.", 5, 2.5, 45, 3, "audioldm-m-text-ft"],
281
  ],
282
  fn=text2audio,
283
- inputs=[textbox, duration, guidance_scale, seed, n_candidates, model_name],
 
284
  outputs=[outputs],
285
  cache_examples=True,
286
  )
 
23
  # response = [(response[i], response[i+1]) for i in range(0, len(response)-1, 2)] # convert to tuples of list
24
  # return response, history
25
 
26
+ def text2audio(text, duration, guidance_scale, random_seed, n_candidates, model_name="audioldm-m-text-ft"):
27
  global audioldm, current_model_name
28
 
29
  if audioldm is None or model_name != current_model_name:
 
238
  duration = gr.Slider(2.5, 10, value=10, step=2.5, label="Duration (seconds)")
239
  guidance_scale = gr.Slider(0, 4, value=2.5, step=0.5, label="Guidance scale (Large => better quality and relavancy to text; Small => better diversity)")
240
  n_candidates = gr.Slider(1, 5, value=3, step=1, label="Automatic quality control. This number control the number of candidates (e.g., generate three audios and choose the best to show you). A Larger value usually lead to better quality with heavier computation")
241
+ # model_name = gr.Dropdown(
242
+ # ["audioldm-m-text-ft", "audioldm-s-text-ft", "audioldm-m-full","audioldm-s-full-v2", "audioldm-s-full", "audioldm-l-full"], value="audioldm-m-text-ft", label="Choose the model to use. audioldm-m-text-ft and audioldm-s-text-ft are recommanded. -s- means small, -m- means medium and -l- means large",
243
+ # )
244
  ############# Output
245
  # outputs=gr.Audio(label="Output", type="numpy")
246
  outputs=gr.Video(label="Output", elem_id="output-video")
 
259
  loading_icon = gr.HTML(loading_icon_html)
260
  share_button = gr.Button("Share to community", elem_id="share-btn")
261
 
262
+ # btn.click(text2audio, inputs=[
263
+ # textbox, duration, guidance_scale, seed, n_candidates, model_name], outputs=[outputs])
264
  btn.click(text2audio, inputs=[
265
+ textbox, duration, guidance_scale, seed, n_candidates], outputs=[outputs])
266
+
267
  share_button.click(None, [], [], _js=share_js)
268
  gr.HTML('''
269
  <div class="footer" style="text-align: center; max-width: 700px; margin: 0 auto;">
 
282
  ["Wooden table tapping sound followed by water pouring sound.", 5, 2.5, 45, 3, "audioldm-m-text-ft"],
283
  ],
284
  fn=text2audio,
285
+ # inputs=[textbox, duration, guidance_scale, seed, n_candidates, model_name],
286
+ inputs=[textbox, duration, guidance_scale, seed, n_candidates],
287
  outputs=[outputs],
288
  cache_examples=True,
289
  )