Spaces:
Sleeping
Sleeping
File size: 922 Bytes
5325fcc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# @package __global__
classifier_free_guidance:
training_dropout: 0.3
inference_coef: 3.0
attribute_dropout:
text: {}
wav: {}
fuser:
cross_attention_pos_emb: false
cross_attention_pos_emb_scale: 1
sum: []
prepend: []
cross: [description]
input_interpolate: []
conditioners:
description:
model: clap
clap:
checkpoint: //reference/clap/music_audioset_epoch_15_esc_90.14.pt
model_arch: 'HTSAT-base'
enable_fusion: false
sample_rate: 48000
max_audio_length: 10
audio_stride: 1
dim: 512
attribute: description
normalize: true
quantize: true # use RVQ quantization
n_q: 12
bins: 1024
kmeans_iters: 50
text_p: 0. # probability of using text embed at train time
cache_path: null
dataset:
joint_embed_attributes: [description]
train:
merge_text_p: 0.25
drop_desc_p: 0.5
drop_other_p: 0.5
|