Spaces:

Fabrice-TIERCELIN
/

Text-to-Audio

Running

App Files Files Community

Fabrice-TIERCELIN commited on Apr 20

Commit

e206c8a

•

1 Parent(s): 7b7c424

Use accordion

Browse files

Files changed (1) hide show

app.py +25 -8

app.py CHANGED Viewed

@@ -47,12 +47,12 @@ class Tango:
         self.scheduler = DDPMScheduler.from_pretrained(main_config["scheduler_name"], subfolder = "scheduler")
     def chunks(self, lst, n):
-        """ Yield successive n-sized chunks from a list. """
         for i in range(0, len(lst), n):
             yield lst[i:i + n]
     def generate(self, prompt, steps = 100, guidance = 3, samples = 1, disable_progress = True):
-        """ Generate audio for a single prompt string. """
         with torch.no_grad():
             latents = self.model.inference([prompt], self.scheduler, steps, guidance, samples, disable_progress = disable_progress)
             mel = self.vae.decode_first_stage(latents)
@@ -60,7 +60,7 @@ class Tango:
         return wave[0]
     def generate_for_batch(self, prompts, steps = 200, guidance = 3, samples = 1, batch_size = 8, disable_progress = True):
-        """ Generate audio for a list of prompt strings. """
         outputs = []
         for k in tqdm(range(0, len(prompts), batch_size)):
             batch = prompts[k: k + batch_size]
@@ -80,7 +80,19 @@ tango.vae.to(device_type)
 tango.stft.to(device_type)
 tango.model.to(device_type)
-def gradio_generate(prompt, steps, guidance):
     output_wave = tango.generate(prompt, steps, guidance)
     return gr.make_waveform((16000, output_wave))
@@ -106,14 +118,19 @@ with gr.Blocks() as interface:
         """
     )
     input_text = gr.Textbox(label = "Prompt", value = "Snort of a horse", lines = 2, autofocus = True)
-    denoising_steps = gr.Slider(label = "Steps", minimum = 100, maximum = 200, value = 100, step = 1, interactive = True)
-    guidance_scale = gr.Slider(label = "Guidance Scale", minimum = 1, maximum = 10, value = 3, step = 0.1, interactive = True)
     submit = gr.Button("Generate 🚀", variant = "primary")
     output_audio = gr.Audio(label = "Generated Audio")
-    submit.click(fn = gradio_generate, inputs = [
         input_text,
         denoising_steps,
         guidance_scale
@@ -122,7 +139,7 @@ with gr.Blocks() as interface:
     ], scroll_to_output = True)
     gr.Examples(
-        fn = gradio_generate,
 	    inputs = [
             input_text,
             denoising_steps,

         self.scheduler = DDPMScheduler.from_pretrained(main_config["scheduler_name"], subfolder = "scheduler")
     def chunks(self, lst, n):
+        # Yield successive n-sized chunks from a list
         for i in range(0, len(lst), n):
             yield lst[i:i + n]
     def generate(self, prompt, steps = 100, guidance = 3, samples = 1, disable_progress = True):
+        # Generate audio for a single prompt string
         with torch.no_grad():
             latents = self.model.inference([prompt], self.scheduler, steps, guidance, samples, disable_progress = disable_progress)
             mel = self.vae.decode_first_stage(latents)
         return wave[0]
     def generate_for_batch(self, prompts, steps = 200, guidance = 3, samples = 1, batch_size = 8, disable_progress = True):
+        # Generate audio for a list of prompt strings
         outputs = []
         for k in tqdm(range(0, len(prompts), batch_size)):
             batch = prompts[k: k + batch_size]
 tango.stft.to(device_type)
 tango.model.to(device_type)
+def check(
+    prompt,
+    steps,
+    guidance
+):
+    if prompt is None or prompt == "":
+        raise gr.Error("Please provide a prompt input.")
+def text2audio(
+    prompt,
+    steps,
+    guidance
+):
     output_wave = tango.generate(prompt, steps, guidance)
     return gr.make_waveform((16000, output_wave))
         """
     )
     input_text = gr.Textbox(label = "Prompt", value = "Snort of a horse", lines = 2, autofocus = True)
+    with gr.Accordion("Advanced options", open = False):
+        denoising_steps = gr.Slider(label = "Steps", info = "lower=faster & variant, higher=audio quality & similar", minimum = 100, maximum = 200, value = 100, step = 1, interactive = True)
+        guidance_scale = gr.Slider(label = "Guidance Scale", info = "lower=audio quality, higher=follow the prompt", minimum = 1, maximum = 10, value = 3, step = 0.1, interactive = True)
     submit = gr.Button("Generate 🚀", variant = "primary")
     output_audio = gr.Audio(label = "Generated Audio")
+    submit.click(fn = check, inputs = [
+        input_text,
+        denoising_steps,
+        guidance_scale
+    ], outputs = [], queue = False, show_progress = False).success(fn = text2audio, inputs = [
         input_text,
         denoising_steps,
         guidance_scale
     ], scroll_to_output = True)
     gr.Examples(
+        fn = text2audio,
 	    inputs = [
             input_text,
             denoising_steps,