hungchiayu commited on
Commit
d193c14
1 Parent(s): d16c6dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -6,13 +6,13 @@ from tqdm import tqdm
6
  from huggingface_hub import snapshot_download
7
  from models import AudioDiffusion, DDPMScheduler
8
  from audioldm.audio.stft import TacotronSTFT
9
- from audioldm.variational_autoencoder import AutoencoderKL
10
  from pydub import AudioSegment
11
  from gradio import Markdown
12
  import spaces
13
 
14
  import torch
15
- #from diffusers.models.autoencoder_kl import AutoencoderKL
16
  from diffusers.models.unet_2d_condition import UNet2DConditionModel
17
  from diffusers import DiffusionPipeline,AudioPipelineOutput
18
  from transformers import CLIPTextModel, T5EncoderModel, AutoModel, T5Tokenizer, T5TokenizerFast
@@ -239,21 +239,21 @@ class Tango:
239
 
240
  tango = Tango(device="cpu")
241
 
242
- #pipe = Tango2Pipeline(vae=tango.vae,
243
- # text_encoder=tango.model.text_encoder,
244
- # tokenizer=tango.model.tokenizer,
245
- # unet=tango.model.unet,
246
- # scheduler=tango.scheduler
247
- # )
248
- #pipe.to(device)
249
- tango.vae.to(device_type)
250
- tango.stft.to(device_type)
251
- tango.model.to(device_type)
252
 
253
  @spaces.GPU(duration=60)
254
  def gradio_generate(prompt, output_format, steps, guidance):
255
- #output_wave = pipe(prompt,steps,guidance) ## Using pipeliine automatically uses flash attention for torch2.0 above
256
- output_wave = tango.generate(prompt, steps, guidance)
257
  # output_filename = f"{prompt.replace(' ', '_')}_{steps}_{guidance}"[:250] + ".wav"
258
  output_filename = "temp.wav"
259
  wavio.write(output_filename, output_wave, rate=16000, sampwidth=2)
 
6
  from huggingface_hub import snapshot_download
7
  from models import AudioDiffusion, DDPMScheduler
8
  from audioldm.audio.stft import TacotronSTFT
9
+ #from audioldm.variational_autoencoder import AutoencoderKL
10
  from pydub import AudioSegment
11
  from gradio import Markdown
12
  import spaces
13
 
14
  import torch
15
+ from diffusers.models.autoencoder_kl import AutoencoderKL
16
  from diffusers.models.unet_2d_condition import UNet2DConditionModel
17
  from diffusers import DiffusionPipeline,AudioPipelineOutput
18
  from transformers import CLIPTextModel, T5EncoderModel, AutoModel, T5Tokenizer, T5TokenizerFast
 
239
 
240
  tango = Tango(device="cpu")
241
 
242
+ pipe = Tango2Pipeline(vae=tango.vae,
243
+ text_encoder=tango.model.text_encoder,
244
+ tokenizer=tango.model.tokenizer,
245
+ unet=tango.model.unet,
246
+ scheduler=tango.scheduler
247
+ )
248
+ pipe.to(device_type)
249
+ #tango.vae.to(device_type)
250
+ #tango.stft.to(device_type)
251
+ #tango.model.to(device_type)
252
 
253
  @spaces.GPU(duration=60)
254
  def gradio_generate(prompt, output_format, steps, guidance):
255
+ output_wave = pipe(prompt,steps,guidance) ## Using pipeliine automatically uses flash attention for torch2.0 above
256
+ #output_wave = tango.generate(prompt, steps, guidance)
257
  # output_filename = f"{prompt.replace(' ', '_')}_{steps}_{guidance}"[:250] + ".wav"
258
  output_filename = "temp.wav"
259
  wavio.write(output_filename, output_wave, rate=16000, sampwidth=2)