lpepino commited on
Commit
65cc081
1 Parent(s): 303bcfc

Update config.gin

Browse files
Files changed (1) hide show
  1. config.gin +56 -74
config.gin CHANGED
@@ -1,74 +1,56 @@
1
- MASK_AMOUNT = 150
2
- MASK_GAP_SIZE = 15
3
- MASKED_LOSS_WEIGHT = 0.9
4
- MODEL_DIM = 768
5
- NUM_DECODER_HEADS = 12
6
- NUM_DECODER_LAYERS = 2
7
- NUM_ENCODEC_TARGETS = 8
8
- NUM_ENCODER_HEADS = 12
9
- NUM_ENCODER_LAYERS = 10
10
- NUM_TARGET_TOKENS = 1024
11
- NUM_TOTAL_TARGETS = 8
12
-
13
- # Parameters for EncodecMAE:
14
- # ==============================================================================
15
- EncodecMAE.decoder = @decoder/tasks.models.transformers.TransformerEncoder
16
- EncodecMAE.head = @tasks.models.encodecmae.heads.FrameLevelClassificationHead
17
- EncodecMAE.lr_scheduler = None
18
- EncodecMAE.masked_weight = %MASKED_LOSS_WEIGHT
19
- EncodecMAE.masker = @tasks.models.encodecmae.masking.TimeGapMask
20
- EncodecMAE.optimizer = @torch.optim.AdamW
21
- EncodecMAE.positional_encoder = \
22
- @tasks.models.transformers.SinusoidalPositionalEmbeddings
23
- EncodecMAE.quantizer_weights = \
24
- [0.22407463,
25
- 0.1759858,
26
- 0.14499009,
27
- 0.12150037,
28
- 0.10315603,
29
- 0.08831368,
30
- 0.07608274,
31
- 0.06589669]
32
- EncodecMAE.target_encoder = @tasks.models.encodecmae.targets.EncodecQuantizer
33
- EncodecMAE.visible_encoder = @encoder/tasks.models.transformers.TransformerEncoder
34
- EncodecMAE.wav_encoder = @tasks.models.encodecmae.encoders.EncodecEncoder
35
-
36
- # Parameters for EncodecQuantizer:
37
- # ==============================================================================
38
- EncodecQuantizer.n = %NUM_ENCODEC_TARGETS
39
-
40
- # Parameters for decoder/MultiHeadAttention:
41
- # ==============================================================================
42
- decoder/MultiHeadAttention.model_dim = %MODEL_DIM
43
- decoder/MultiHeadAttention.num_heads = %NUM_DECODER_HEADS
44
-
45
- # Parameters for encoder/MultiHeadAttention:
46
- # ==============================================================================
47
- encoder/MultiHeadAttention.model_dim = %MODEL_DIM
48
- encoder/MultiHeadAttention.num_heads = %NUM_ENCODER_HEADS
49
-
50
- # Parameters for SinusoidalPositionalEmbeddings:
51
- # ==============================================================================
52
- SinusoidalPositionalEmbeddings.embedding_dim = %MODEL_DIM
53
-
54
- # Parameters for decoder/TransformerEncoder:
55
- # ==============================================================================
56
- decoder/TransformerEncoder.attention_layer = \
57
- @decoder/tasks.models.transformers.MultiHeadAttention
58
- decoder/TransformerEncoder.compile = True
59
- decoder/TransformerEncoder.model_dim = %MODEL_DIM
60
- decoder/TransformerEncoder.num_layers = %NUM_DECODER_LAYERS
61
-
62
- # Parameters for encoder/TransformerEncoder:
63
- # ==============================================================================
64
- encoder/TransformerEncoder.attention_layer = \
65
- @encoder/tasks.models.transformers.MultiHeadAttention
66
- encoder/TransformerEncoder.compile = True
67
- encoder/TransformerEncoder.model_dim = %MODEL_DIM
68
- encoder/TransformerEncoder.num_layers = %NUM_ENCODER_LAYERS
69
-
70
- # Parameters for FrameLevelClassificationHead:
71
- # ==============================================================================
72
- FrameLevelClassificationHead.model_dim = %MODEL_DIM
73
- FrameLevelClassificationHead.num_streams = %NUM_TOTAL_TARGETS
74
- FrameLevelClassificationHead.num_tokens = %NUM_TARGET_TOKENS
 
1
+ NUM_ENCODEC_TARGETS=8
2
+ NUM_TOTAL_TARGETS=8
3
+ NUM_TARGET_TOKENS=1024
4
+ MASK_AMOUNT=150
5
+ MASK_GAP_SIZE=15
6
+ MASK_PROP=0.5
7
+ MODEL_DIM=768
8
+ NUM_ENCODER_LAYERS=10
9
+ NUM_ENCODER_HEADS=12
10
+ NUM_DECODER_LAYERS=2
11
+ NUM_DECODER_HEADS=12
12
+ MASKED_LOSS_WEIGHT=0.9
13
+ get_model.model=@models.EncodecMAE
14
+ models.EncodecMAE:
15
+ wav_encoder = @models.encodecmae.encoders.EncodecEncoder
16
+ target_encoder = @models.encodecmae.targets.EncodecQuantizer
17
+ masker = @models.encodecmae.masking.TimeGapMask
18
+ visible_encoder = @encoder/models.transformers.TransformerEncoder
19
+ positional_encoder = @models.transformers.SinusoidalPositionalEmbeddings
20
+ decoder = @decoder/models.transformers.TransformerEncoder
21
+ head = @models.encodecmae.heads.FrameLevelClassificationHead
22
+ optimizer=@torch.optim.AdamW
23
+ lr_scheduler=None
24
+ masked_weight=%MASKED_LOSS_WEIGHT
25
+ quantizer_weights=[0.22407463, 0.1759858 , 0.14499009, 0.12150037, 0.10315603, 0.08831368, 0.07608274, 0.06589669]
26
+ torch.optim.AdamW:
27
+ lr=%PRETRAIN_MAX_LR
28
+ betas=(0.9,0.95)
29
+ weight_decay=0.05
30
+ models.encodecmae.targets.EncodecQuantizer:
31
+ n = %NUM_ENCODEC_TARGETS
32
+ models.encodecmae.masking.TimeGapMask:
33
+ mask_amount = %MASK_AMOUNT
34
+ gap_size = %MASK_GAP_SIZE
35
+ mask_prop = %MASK_PROP
36
+ encoder/models.transformers.TransformerEncoder:
37
+ model_dim=%MODEL_DIM
38
+ num_layers=%NUM_ENCODER_LAYERS
39
+ attention_layer=@encoder/models.transformers.MultiHeadAttention
40
+ compile=True
41
+ encoder/models.transformers.MultiHeadAttention:
42
+ model_dim=%MODEL_DIM
43
+ num_heads=%NUM_ENCODER_HEADS
44
+ decoder/models.transformers.TransformerEncoder:
45
+ model_dim=%MODEL_DIM
46
+ num_layers=%NUM_DECODER_LAYERS
47
+ attention_layer=@decoder/models.transformers.MultiHeadAttention
48
+ compile=True
49
+ decoder/models.transformers.MultiHeadAttention:
50
+ model_dim=%MODEL_DIM
51
+ num_heads=%NUM_DECODER_HEADS
52
+ models.transformers.SinusoidalPositionalEmbeddings.embedding_dim = %MODEL_DIM
53
+ models.encodecmae.heads.FrameLevelClassificationHead:
54
+ model_dim=%MODEL_DIM
55
+ num_tokens=%NUM_TARGET_TOKENS
56
+ num_streams=%NUM_TOTAL_TARGETS