lmzjms commited on
Commit
988947c
1 Parent(s): 4f2ef2c

Update audio_foundation_models.py

Browse files
Files changed (1) hide show
  1. audio_foundation_models.py +6 -6
audio_foundation_models.py CHANGED
@@ -71,7 +71,7 @@ def initialize_model_inpaint(config, ckpt):
71
  sampler = DDIMSampler(model)
72
  return sampler
73
  def select_best_audio(prompt,wav_list):
74
- clap_model = CLAPWrapper('useful_ckpts/CLAP/CLAP_weights_2022.pth','useful_ckpts/CLAP/config.yml',use_cuda=torch.cuda.is_available())
75
  text_embeddings = clap_model.get_text_embeddings([prompt])
76
  score_list = []
77
  for data in wav_list:
@@ -132,7 +132,7 @@ class T2A:
132
  def __init__(self, device):
133
  print("Initializing Make-An-Audio to %s" % device)
134
  self.device = device
135
- self.sampler = initialize_model('configs/text-to-audio/txt2audio_args.yaml', 'useful_ckpts/ta40multi_epoch=000085.ckpt', device=device)
136
  self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
137
 
138
  @prompts(name="Generate Audio From User Input Text",
@@ -185,8 +185,8 @@ class I2A:
185
  def __init__(self, device):
186
  print("Initializing Make-An-Audio-Image to %s" % device)
187
  self.device = device
188
- self.sampler = initialize_model('text_to_audio/Make_An_Audio_img/configs/img_to_audio/img2audio_args.yaml', 'text_to_audio/Make_An_Audio_img/useful_ckpts/ta54_epoch=000216.ckpt', device=device)
189
- self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio_img/vocoder/logs/bigv16k53w',device=device)
190
 
191
  @prompts(name="Generate Audio From The Image",
192
  description="useful for when you want to generate an audio "
@@ -345,8 +345,8 @@ class Inpaint:
345
  def __init__(self, device):
346
  print("Initializing Make-An-Audio-inpaint to %s" % device)
347
  self.device = device
348
- self.sampler = initialize_model_inpaint('text_to_audio/Make_An_Audio_inpaint/configs/inpaint/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio_inpaint/useful_ckpts/inpaint7_epoch00047.ckpt')
349
- self.vocoder = VocoderBigVGAN('./vocoder/logs/bigv16k53w',device=device)
350
  self.cmap_transform = matplotlib.cm.viridis
351
 
352
  @prompts(name="Audio Inpainting",
 
71
  sampler = DDIMSampler(model)
72
  return sampler
73
  def select_best_audio(prompt,wav_list):
74
+ clap_model = CLAPWrapper('text_to_audio/Make_An_Audio/useful_ckpts/CLAP/CLAP_weights_2022.pth','text_to_audio/Make_An_Audio/useful_ckpts/CLAP/config.yml',use_cuda=torch.cuda.is_available())
75
  text_embeddings = clap_model.get_text_embeddings([prompt])
76
  score_list = []
77
  for data in wav_list:
 
132
  def __init__(self, device):
133
  print("Initializing Make-An-Audio to %s" % device)
134
  self.device = device
135
+ self.sampler = initialize_model('text_to_audio/Make_An_Audio/configs/text-to-audio/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/ta40multi_epoch=000085.ckpt', device=device)
136
  self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
137
 
138
  @prompts(name="Generate Audio From User Input Text",
 
185
  def __init__(self, device):
186
  print("Initializing Make-An-Audio-Image to %s" % device)
187
  self.device = device
188
+ self.sampler = initialize_model('text_to_audio/Make_An_Audio/configs/img_to_audio/img2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/ta54_epoch=000216.ckpt', device=device)
189
+ self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
190
 
191
  @prompts(name="Generate Audio From The Image",
192
  description="useful for when you want to generate an audio "
 
345
  def __init__(self, device):
346
  print("Initializing Make-An-Audio-inpaint to %s" % device)
347
  self.device = device
348
+ self.sampler = initialize_model_inpaint('text_to_audio/Make_An_Audio/configs/inpaint/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/inpaint7_epoch00047.ckpt')
349
+ self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
350
  self.cmap_transform = matplotlib.cm.viridis
351
 
352
  @prompts(name="Audio Inpainting",