ylacombe HF staff commited on
Commit
458f154
1 Parent(s): 9330af7
Files changed (1) hide show
  1. config.json +5 -34
config.json CHANGED
@@ -3,7 +3,6 @@
3
  "activation_function": "relu",
4
  "adaptor_dropout": 0.1,
5
  "adaptor_kernel_size": 8,
6
- "adaptor_layer_norm": true,
7
  "adaptor_stride": 8,
8
  "add_adapter": true,
9
  "architectures": [
@@ -11,35 +10,8 @@
11
  ],
12
  "attention_dropout": 0.1,
13
  "bos_token_id": 2,
14
- "conv_bias": false,
15
  "conv_depthwise_kernel_size": 31,
16
- "conv_dim": [
17
- 512,
18
- 512,
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 160
24
- ],
25
- "conv_kernel": [
26
- 10,
27
- 3,
28
- 3,
29
- 3,
30
- 3,
31
- 2,
32
- 2
33
- ],
34
- "conv_stride": [
35
- 5,
36
- 2,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 2
42
- ],
43
  "decoder_attention_heads": 16,
44
  "decoder_ffn_dim": 4096,
45
  "decoder_layerdrop": 0.05,
@@ -51,26 +23,22 @@
51
  "encoder_layerdrop": 0.05,
52
  "encoder_layers": 12,
53
  "eos_token_id": 3,
54
- "hidden_act": "gelu",
55
  "hidden_size": 1024,
56
- "init_std": 0.02,
57
  "initializer_range": 0.02,
58
  "is_encoder_decoder": true,
59
  "lang_embed_dim": 256,
60
  "layer_norm_eps": 1e-05,
61
- "layerdrop": 0.1,
62
  "leaky_relu_slope": 0.1,
63
  "max_new_tokens": 256,
64
  "max_position_embeddings": 4096,
65
  "max_source_positions": 4096,
66
- "model_in_dim": 1792,
67
  "model_type": "seamless_m4t",
68
  "num_adapter_layers": 1,
69
  "num_attention_heads": 16,
70
  "num_conv_pos_embedding_groups": 16,
71
  "num_conv_pos_embeddings": 128,
72
  "num_hidden_layers": 12,
73
- "output_hidden_size": null,
74
  "pad_token_id": 0,
75
  "position_embeddings_type": "relative",
76
  "resblock_dilation_sizes": [
@@ -102,6 +70,7 @@
102
  "speech_encoder_dropout": 0.0,
103
  "speech_encoder_hidden_act": "swish",
104
  "speech_encoder_intermediate_size": 4096,
 
105
  "speech_encoder_layers": 12,
106
  "spkr_embed_dim": 256,
107
  "t2u_bos_token_id": 0,
@@ -114,7 +83,9 @@
114
  "t2u_encoder_layers": 4,
115
  "t2u_eos_token_id": 2,
116
  "t2u_max_new_tokens": 1024,
 
117
  "t2u_num_langs": 38,
 
118
  "t2u_pad_token_id": 1,
119
  "torch_dtype": "float32",
120
  "transformers_version": "4.33.0.dev0",
 
3
  "activation_function": "relu",
4
  "adaptor_dropout": 0.1,
5
  "adaptor_kernel_size": 8,
 
6
  "adaptor_stride": 8,
7
  "add_adapter": true,
8
  "architectures": [
 
10
  ],
11
  "attention_dropout": 0.1,
12
  "bos_token_id": 2,
13
+ "control_symbol_vocoder_offset": 4,
14
  "conv_depthwise_kernel_size": 31,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "decoder_attention_heads": 16,
16
  "decoder_ffn_dim": 4096,
17
  "decoder_layerdrop": 0.05,
 
23
  "encoder_layerdrop": 0.05,
24
  "encoder_layers": 12,
25
  "eos_token_id": 3,
26
+ "feature_projection_input_dim": 160,
27
  "hidden_size": 1024,
 
28
  "initializer_range": 0.02,
29
  "is_encoder_decoder": true,
30
  "lang_embed_dim": 256,
31
  "layer_norm_eps": 1e-05,
 
32
  "leaky_relu_slope": 0.1,
33
  "max_new_tokens": 256,
34
  "max_position_embeddings": 4096,
35
  "max_source_positions": 4096,
 
36
  "model_type": "seamless_m4t",
37
  "num_adapter_layers": 1,
38
  "num_attention_heads": 16,
39
  "num_conv_pos_embedding_groups": 16,
40
  "num_conv_pos_embeddings": 128,
41
  "num_hidden_layers": 12,
 
42
  "pad_token_id": 0,
43
  "position_embeddings_type": "relative",
44
  "resblock_dilation_sizes": [
 
70
  "speech_encoder_dropout": 0.0,
71
  "speech_encoder_hidden_act": "swish",
72
  "speech_encoder_intermediate_size": 4096,
73
+ "speech_encoder_layerdrop": 0.1,
74
  "speech_encoder_layers": 12,
75
  "spkr_embed_dim": 256,
76
  "t2u_bos_token_id": 0,
 
83
  "t2u_encoder_layers": 4,
84
  "t2u_eos_token_id": 2,
85
  "t2u_max_new_tokens": 1024,
86
+ "t2u_max_position_embeddings": 2048,
87
  "t2u_num_langs": 38,
88
+ "t2u_offset_tgt_lang": 10005,
89
  "t2u_pad_token_id": 1,
90
  "torch_dtype": "float32",
91
  "transformers_version": "4.33.0.dev0",