deeplearning's picture
Duplicate from AIFILMS/audioldm-text-to-audio-generation
ddc593e
raw
history blame contribute delete
495 Bytes
{
"embed_dim": 512,
"audio_cfg": {
"audio_length": 1024,
"clip_samples": 480000,
"mel_bins": 64,
"sample_rate": 48000,
"window_size": 1024,
"hop_size": 480,
"fmin": 50,
"fmax": 14000,
"class_num": 527,
"model_type": "PANN",
"model_name": "Cnn6"
},
"text_cfg": {
"context_length": 77,
"vocab_size": 49408,
"width": 512,
"heads": 8,
"layers": 12
}
}