soujanyaporia commited on
Commit
c463b7e
β€’
1 Parent(s): 0c677ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -6
app.py CHANGED
@@ -10,8 +10,16 @@ from audioldm.variational_autoencoder import AutoencoderKL
10
  from gradio import Markdown
11
  import spaces
12
 
 
 
 
 
 
 
 
 
13
  class Tango:
14
- def __init__(self, name="declare-lab/tango-full-ft-audiocaps", device="cuda:0"):
15
 
16
  path = snapshot_download(repo_id=name)
17
 
@@ -70,9 +78,9 @@ class Tango:
70
  # Initialize TANGO
71
 
72
  tango = Tango(device="cpu")
73
- tango.vae.to("cuda")
74
- tango.stft.to("cuda")
75
- tango.model.to("cuda")
76
 
77
  @spaces.GPU(duration=60)
78
  def gradio_generate(prompt, steps, guidance):
@@ -102,7 +110,12 @@ def gradio_generate(prompt, steps, guidance):
102
  # Using this ChatGPT-generated description of the sound, TANGO provides superior results.
103
  # <p/>
104
  # """
105
- description_text = "Read the paper: https://arxiv.org/abs/2304.13731"
 
 
 
 
 
106
  # Gradio input and output components
107
  input_text = gr.Textbox(lines=2, label="Prompt")
108
  output_audio = gr.Audio(label="Generated Audio", type="filepath")
@@ -114,7 +127,7 @@ gr_interface = gr.Interface(
114
  fn=gradio_generate,
115
  inputs=[input_text, denoising_steps, guidance_scale],
116
  outputs=[output_audio],
117
- title="TANGO: Text to Audio using Instruction-Guided Diffusion",
118
  description=description_text,
119
  allow_flagging=False,
120
  examples=[
 
10
  from gradio import Markdown
11
  import spaces
12
 
13
+ # Automatic device detection
14
+ if torch.cuda.is_available():
15
+ device_type = "cuda"
16
+ device_selection = "cuda:0"
17
+ else:
18
+ device_type = "cpu"
19
+ device_selection = "cpu"
20
+
21
  class Tango:
22
+ def __init__(self, name="declare-lab/tango", device=device_selection):
23
 
24
  path = snapshot_download(repo_id=name)
25
 
 
78
  # Initialize TANGO
79
 
80
  tango = Tango(device="cpu")
81
+ tango.vae.to(device_type)
82
+ tango.stft.to(device_type)
83
+ tango.model.to(device_type)
84
 
85
  @spaces.GPU(duration=60)
86
  def gradio_generate(prompt, steps, guidance):
 
110
  # Using this ChatGPT-generated description of the sound, TANGO provides superior results.
111
  # <p/>
112
  # """
113
+ description_text = """
114
+ <p><a href="https://huggingface.co/spaces/declare-lab/tango2/blob/main/app.py?duplicate=true"> <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> For faster inference without waiting in queue, you may duplicate the space and upgrade to a GPU in the settings. <br/><br/>
115
+ Generate audio using Tango2 by providing a text prompt. Tango2 was built from Tango and was trained on <a href="https://huggingface.co/datasets/declare-lab/audio-alpaca">Audio-alpaca</a>
116
+ <br/><br/> This is the demo for Tango2 for text to audio generation: <a href="https://arxiv.org/abs/2404.09956">Read our paper.</a>
117
+ <p/>
118
+ """
119
  # Gradio input and output components
120
  input_text = gr.Textbox(lines=2, label="Prompt")
121
  output_audio = gr.Audio(label="Generated Audio", type="filepath")
 
127
  fn=gradio_generate,
128
  inputs=[input_text, denoising_steps, guidance_scale],
129
  outputs=[output_audio],
130
+ title="Tango 2: Aligning Diffusion-based Text-to-Audio Generations through Direct Preference Optimization",
131
  description=description_text,
132
  allow_flagging=False,
133
  examples=[