hungchiayu
commited on
Commit
•
d193c14
1
Parent(s):
d16c6dd
Update app.py
Browse files
app.py
CHANGED
@@ -6,13 +6,13 @@ from tqdm import tqdm
|
|
6 |
from huggingface_hub import snapshot_download
|
7 |
from models import AudioDiffusion, DDPMScheduler
|
8 |
from audioldm.audio.stft import TacotronSTFT
|
9 |
-
from audioldm.variational_autoencoder import AutoencoderKL
|
10 |
from pydub import AudioSegment
|
11 |
from gradio import Markdown
|
12 |
import spaces
|
13 |
|
14 |
import torch
|
15 |
-
|
16 |
from diffusers.models.unet_2d_condition import UNet2DConditionModel
|
17 |
from diffusers import DiffusionPipeline,AudioPipelineOutput
|
18 |
from transformers import CLIPTextModel, T5EncoderModel, AutoModel, T5Tokenizer, T5TokenizerFast
|
@@ -239,21 +239,21 @@ class Tango:
|
|
239 |
|
240 |
tango = Tango(device="cpu")
|
241 |
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
tango.vae.to(device_type)
|
250 |
-
tango.stft.to(device_type)
|
251 |
-
tango.model.to(device_type)
|
252 |
|
253 |
@spaces.GPU(duration=60)
|
254 |
def gradio_generate(prompt, output_format, steps, guidance):
|
255 |
-
|
256 |
-
output_wave = tango.generate(prompt, steps, guidance)
|
257 |
# output_filename = f"{prompt.replace(' ', '_')}_{steps}_{guidance}"[:250] + ".wav"
|
258 |
output_filename = "temp.wav"
|
259 |
wavio.write(output_filename, output_wave, rate=16000, sampwidth=2)
|
|
|
6 |
from huggingface_hub import snapshot_download
|
7 |
from models import AudioDiffusion, DDPMScheduler
|
8 |
from audioldm.audio.stft import TacotronSTFT
|
9 |
+
#from audioldm.variational_autoencoder import AutoencoderKL
|
10 |
from pydub import AudioSegment
|
11 |
from gradio import Markdown
|
12 |
import spaces
|
13 |
|
14 |
import torch
|
15 |
+
from diffusers.models.autoencoder_kl import AutoencoderKL
|
16 |
from diffusers.models.unet_2d_condition import UNet2DConditionModel
|
17 |
from diffusers import DiffusionPipeline,AudioPipelineOutput
|
18 |
from transformers import CLIPTextModel, T5EncoderModel, AutoModel, T5Tokenizer, T5TokenizerFast
|
|
|
239 |
|
240 |
tango = Tango(device="cpu")
|
241 |
|
242 |
+
pipe = Tango2Pipeline(vae=tango.vae,
|
243 |
+
text_encoder=tango.model.text_encoder,
|
244 |
+
tokenizer=tango.model.tokenizer,
|
245 |
+
unet=tango.model.unet,
|
246 |
+
scheduler=tango.scheduler
|
247 |
+
)
|
248 |
+
pipe.to(device_type)
|
249 |
+
#tango.vae.to(device_type)
|
250 |
+
#tango.stft.to(device_type)
|
251 |
+
#tango.model.to(device_type)
|
252 |
|
253 |
@spaces.GPU(duration=60)
|
254 |
def gradio_generate(prompt, output_format, steps, guidance):
|
255 |
+
output_wave = pipe(prompt,steps,guidance) ## Using pipeliine automatically uses flash attention for torch2.0 above
|
256 |
+
#output_wave = tango.generate(prompt, steps, guidance)
|
257 |
# output_filename = f"{prompt.replace(' ', '_')}_{steps}_{guidance}"[:250] + ".wav"
|
258 |
output_filename = "temp.wav"
|
259 |
wavio.write(output_filename, output_wave, rate=16000, sampwidth=2)
|