fxmarty HF staff commited on
Commit
56121b9
1 Parent(s): 510e35f

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ decoder/whisper_float16_tp1_rank0.engine filter=lfs diff=lfs merge=lfs -text
37
+ encoder/whisper_float16_tp1_rank0.engine filter=lfs diff=lfs merge=lfs -text
decoder/build.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_config": {
3
+ "apply_query_key_layer_scaling": false,
4
+ "cross_attention": true,
5
+ "fp8": false,
6
+ "has_position_embedding": true,
7
+ "has_token_type_embedding": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_size": 768,
10
+ "huggingface": {
11
+ "_name_or_path": "openai/whisper-small",
12
+ "activation_dropout": 0.0,
13
+ "activation_function": "gelu",
14
+ "architectures": [
15
+ "WhisperForConditionalGeneration"
16
+ ],
17
+ "attention_dropout": 0.0,
18
+ "begin_suppress_tokens": [
19
+ 220,
20
+ 50257
21
+ ],
22
+ "bos_token_id": 50257,
23
+ "d_model": 768,
24
+ "decoder_attention_heads": 12,
25
+ "decoder_ffn_dim": 3072,
26
+ "decoder_layerdrop": 0.0,
27
+ "decoder_layers": 12,
28
+ "decoder_start_token_id": 50258,
29
+ "dropout": 0.0,
30
+ "encoder_attention_heads": 12,
31
+ "encoder_ffn_dim": 3072,
32
+ "encoder_layerdrop": 0.0,
33
+ "encoder_layers": 12,
34
+ "eos_token_id": 50257,
35
+ "forced_decoder_ids": [
36
+ [
37
+ 1,
38
+ 50259
39
+ ],
40
+ [
41
+ 2,
42
+ 50359
43
+ ],
44
+ [
45
+ 3,
46
+ 50363
47
+ ]
48
+ ],
49
+ "hidden_size": 768,
50
+ "init_std": 0.02,
51
+ "is_encoder_decoder": true,
52
+ "max_length": 448,
53
+ "max_sequence_length": 448,
54
+ "max_source_positions": 1500,
55
+ "max_target_positions": 448,
56
+ "model_type": "whisper",
57
+ "num_hidden_layers": 12,
58
+ "num_layers": 12,
59
+ "num_mel_bins": 80,
60
+ "pad_token_id": 50257,
61
+ "scale_embedding": false,
62
+ "suppress_tokens": [
63
+ 1,
64
+ 2,
65
+ 7,
66
+ 8,
67
+ 9,
68
+ 10,
69
+ 14,
70
+ 25,
71
+ 26,
72
+ 27,
73
+ 28,
74
+ 29,
75
+ 31,
76
+ 58,
77
+ 59,
78
+ 60,
79
+ 61,
80
+ 62,
81
+ 63,
82
+ 90,
83
+ 91,
84
+ 92,
85
+ 93,
86
+ 359,
87
+ 503,
88
+ 522,
89
+ 542,
90
+ 873,
91
+ 893,
92
+ 902,
93
+ 918,
94
+ 922,
95
+ 931,
96
+ 1350,
97
+ 1853,
98
+ 1982,
99
+ 2460,
100
+ 2627,
101
+ 3246,
102
+ 3253,
103
+ 3268,
104
+ 3536,
105
+ 3846,
106
+ 3961,
107
+ 4183,
108
+ 4667,
109
+ 6585,
110
+ 6647,
111
+ 7273,
112
+ 9061,
113
+ 9383,
114
+ 10428,
115
+ 10929,
116
+ 11938,
117
+ 12033,
118
+ 12331,
119
+ 12562,
120
+ 13793,
121
+ 14157,
122
+ 14635,
123
+ 15265,
124
+ 15618,
125
+ 16553,
126
+ 16604,
127
+ 18362,
128
+ 18956,
129
+ 20075,
130
+ 21675,
131
+ 22520,
132
+ 26130,
133
+ 26161,
134
+ 26435,
135
+ 28279,
136
+ 29464,
137
+ 31650,
138
+ 32302,
139
+ 32470,
140
+ 36865,
141
+ 42863,
142
+ 47425,
143
+ 49870,
144
+ 50254,
145
+ 50258,
146
+ 50360,
147
+ 50361,
148
+ 50362
149
+ ],
150
+ "torch_dtype": "float32",
151
+ "transformers_version": "4.27.0.dev0",
152
+ "use_cache": true,
153
+ "vocab_size": 51865
154
+ },
155
+ "int8": false,
156
+ "max_batch_size": 1,
157
+ "max_input_len": 1,
158
+ "max_output_len": 448,
159
+ "max_position_embeddings": 448,
160
+ "name": "whisper",
161
+ "num_heads": 12,
162
+ "num_layers": 12,
163
+ "precision": "float16",
164
+ "quant_mode": 0,
165
+ "tensor_parallel": 1,
166
+ "tensorrt": "9.2.0.post12.dev5",
167
+ "use_refit": false
168
+ },
169
+ "plugin_config": {
170
+ "attention_qk_half_accumulation": false,
171
+ "bert_attention_plugin": false,
172
+ "context_fmha_type": 1,
173
+ "gemm_plugin": "float16",
174
+ "gpt_attention_plugin": "float16",
175
+ "identity_plugin": false,
176
+ "layernorm_plugin": false,
177
+ "layernorm_quantization_plugin": false,
178
+ "lookup_plugin": false,
179
+ "lora_plugin": false,
180
+ "multi_block_mode": false,
181
+ "nccl_plugin": false,
182
+ "paged_kv_cache": false,
183
+ "quantize_per_token_plugin": false,
184
+ "quantize_tensor_plugin": false,
185
+ "remove_input_padding": true,
186
+ "rmsnorm_plugin": false,
187
+ "rmsnorm_quantization_plugin": false,
188
+ "smooth_quant_gemm_plugin": false,
189
+ "tokens_per_block": 0,
190
+ "use_custom_all_reduce": false,
191
+ "use_paged_context_fmha": false,
192
+ "weight_only_groupwise_quant_matmul_plugin": false,
193
+ "weight_only_quant_matmul_plugin": false
194
+ }
195
+ }
decoder/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_name_or_path": "openai/whisper-small", "activation_dropout": 0.0, "activation_function": "gelu", "architectures": ["WhisperForConditionalGeneration"], "attention_dropout": 0.0, "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "d_model": 768, "decoder_attention_heads": 12, "decoder_ffn_dim": 3072, "decoder_layerdrop": 0.0, "decoder_layers": 12, "decoder_start_token_id": 50258, "dropout": 0.0, "encoder_attention_heads": 12, "encoder_ffn_dim": 3072, "encoder_layerdrop": 0.0, "encoder_layers": 12, "eos_token_id": 50257, "forced_decoder_ids": [[1, 50259], [2, 50359], [3, 50363]], "init_std": 0.02, "is_encoder_decoder": true, "max_length": 448, "max_source_positions": 1500, "max_target_positions": 448, "model_type": "whisper", "num_hidden_layers": 12, "num_mel_bins": 80, "pad_token_id": 50257, "scale_embedding": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50360, 50361, 50362], "torch_dtype": "float32", "transformers_version": "4.27.0.dev0", "use_cache": true, "vocab_size": 51865, "num_layers": 12, "max_sequence_length": 448, "hidden_size": 768}
decoder/timings.cache ADDED
Binary file (102 kB). View file
 
decoder/whisper_float16_tp1_rank0.engine ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:932161932241b2e40711ecc96c46350a1d818ce2aac7572581d2eef407494c99
3
+ size 388396564
encoder/build.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_config": {
3
+ "fp8": false,
4
+ "hidden_size": 768,
5
+ "huggingface": {
6
+ "_name_or_path": "openai/whisper-small",
7
+ "activation_dropout": 0.0,
8
+ "activation_function": "gelu",
9
+ "architectures": [
10
+ "WhisperForConditionalGeneration"
11
+ ],
12
+ "attention_dropout": 0.0,
13
+ "begin_suppress_tokens": [
14
+ 220,
15
+ 50257
16
+ ],
17
+ "bos_token_id": 50257,
18
+ "d_model": 768,
19
+ "decoder_attention_heads": 12,
20
+ "decoder_ffn_dim": 3072,
21
+ "decoder_layerdrop": 0.0,
22
+ "decoder_layers": 12,
23
+ "decoder_start_token_id": 50258,
24
+ "dropout": 0.0,
25
+ "encoder_attention_heads": 12,
26
+ "encoder_ffn_dim": 3072,
27
+ "encoder_layerdrop": 0.0,
28
+ "encoder_layers": 12,
29
+ "eos_token_id": 50257,
30
+ "forced_decoder_ids": [
31
+ [
32
+ 1,
33
+ 50259
34
+ ],
35
+ [
36
+ 2,
37
+ 50359
38
+ ],
39
+ [
40
+ 3,
41
+ 50363
42
+ ]
43
+ ],
44
+ "hidden_size": 768,
45
+ "init_std": 0.02,
46
+ "is_encoder_decoder": true,
47
+ "max_length": 448,
48
+ "max_sequence_length": 448,
49
+ "max_source_positions": 1500,
50
+ "max_target_positions": 448,
51
+ "model_type": "whisper",
52
+ "num_hidden_layers": 12,
53
+ "num_layers": 12,
54
+ "num_mel_bins": 80,
55
+ "pad_token_id": 50257,
56
+ "scale_embedding": false,
57
+ "suppress_tokens": [
58
+ 1,
59
+ 2,
60
+ 7,
61
+ 8,
62
+ 9,
63
+ 10,
64
+ 14,
65
+ 25,
66
+ 26,
67
+ 27,
68
+ 28,
69
+ 29,
70
+ 31,
71
+ 58,
72
+ 59,
73
+ 60,
74
+ 61,
75
+ 62,
76
+ 63,
77
+ 90,
78
+ 91,
79
+ 92,
80
+ 93,
81
+ 359,
82
+ 503,
83
+ 522,
84
+ 542,
85
+ 873,
86
+ 893,
87
+ 902,
88
+ 918,
89
+ 922,
90
+ 931,
91
+ 1350,
92
+ 1853,
93
+ 1982,
94
+ 2460,
95
+ 2627,
96
+ 3246,
97
+ 3253,
98
+ 3268,
99
+ 3536,
100
+ 3846,
101
+ 3961,
102
+ 4183,
103
+ 4667,
104
+ 6585,
105
+ 6647,
106
+ 7273,
107
+ 9061,
108
+ 9383,
109
+ 10428,
110
+ 10929,
111
+ 11938,
112
+ 12033,
113
+ 12331,
114
+ 12562,
115
+ 13793,
116
+ 14157,
117
+ 14635,
118
+ 15265,
119
+ 15618,
120
+ 16553,
121
+ 16604,
122
+ 18362,
123
+ 18956,
124
+ 20075,
125
+ 21675,
126
+ 22520,
127
+ 26130,
128
+ 26161,
129
+ 26435,
130
+ 28279,
131
+ 29464,
132
+ 31650,
133
+ 32302,
134
+ 32470,
135
+ 36865,
136
+ 42863,
137
+ 47425,
138
+ 49870,
139
+ 50254,
140
+ 50258,
141
+ 50360,
142
+ 50361,
143
+ 50362
144
+ ],
145
+ "torch_dtype": "float32",
146
+ "transformers_version": "4.27.0.dev0",
147
+ "use_cache": true,
148
+ "vocab_size": 51865
149
+ },
150
+ "int8": false,
151
+ "max_batch_size": 1,
152
+ "n_mels": 80,
153
+ "name": "whisper",
154
+ "num_heads": 12,
155
+ "num_languages": 99,
156
+ "num_layers": 12,
157
+ "precision": "float16",
158
+ "quant_mode": 0,
159
+ "tensor_parallel": 1,
160
+ "tensorrt": "9.2.0.post12.dev5",
161
+ "use_refit": false
162
+ },
163
+ "plugin_config": {
164
+ "attention_qk_half_accumulation": false,
165
+ "bert_attention_plugin": false,
166
+ "context_fmha_type": 1,
167
+ "gemm_plugin": "float16",
168
+ "gpt_attention_plugin": "float16",
169
+ "identity_plugin": false,
170
+ "layernorm_plugin": false,
171
+ "layernorm_quantization_plugin": false,
172
+ "lookup_plugin": false,
173
+ "lora_plugin": false,
174
+ "multi_block_mode": false,
175
+ "nccl_plugin": false,
176
+ "paged_kv_cache": false,
177
+ "quantize_per_token_plugin": false,
178
+ "quantize_tensor_plugin": false,
179
+ "remove_input_padding": true,
180
+ "rmsnorm_plugin": false,
181
+ "rmsnorm_quantization_plugin": false,
182
+ "smooth_quant_gemm_plugin": false,
183
+ "tokens_per_block": 0,
184
+ "use_custom_all_reduce": false,
185
+ "use_paged_context_fmha": false,
186
+ "weight_only_groupwise_quant_matmul_plugin": false,
187
+ "weight_only_quant_matmul_plugin": false
188
+ }
189
+ }
encoder/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_name_or_path": "openai/whisper-small", "activation_dropout": 0.0, "activation_function": "gelu", "architectures": ["WhisperForConditionalGeneration"], "attention_dropout": 0.0, "begin_suppress_tokens": [220, 50257], "bos_token_id": 50257, "d_model": 768, "decoder_attention_heads": 12, "decoder_ffn_dim": 3072, "decoder_layerdrop": 0.0, "decoder_layers": 12, "decoder_start_token_id": 50258, "dropout": 0.0, "encoder_attention_heads": 12, "encoder_ffn_dim": 3072, "encoder_layerdrop": 0.0, "encoder_layers": 12, "eos_token_id": 50257, "forced_decoder_ids": [[1, 50259], [2, 50359], [3, 50363]], "init_std": 0.02, "is_encoder_decoder": true, "max_length": 448, "max_source_positions": 1500, "max_target_positions": 448, "model_type": "whisper", "num_hidden_layers": 12, "num_mel_bins": 80, "pad_token_id": 50257, "scale_embedding": false, "suppress_tokens": [1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93, 359, 503, 522, 542, 873, 893, 902, 918, 922, 931, 1350, 1853, 1982, 2460, 2627, 3246, 3253, 3268, 3536, 3846, 3961, 4183, 4667, 6585, 6647, 7273, 9061, 9383, 10428, 10929, 11938, 12033, 12331, 12562, 13793, 14157, 14635, 15265, 15618, 16553, 16604, 18362, 18956, 20075, 21675, 22520, 26130, 26161, 26435, 28279, 29464, 31650, 32302, 32470, 36865, 42863, 47425, 49870, 50254, 50258, 50360, 50361, 50362], "torch_dtype": "float32", "transformers_version": "4.27.0.dev0", "use_cache": true, "vocab_size": 51865, "num_layers": 12, "max_sequence_length": 448, "hidden_size": 768}
encoder/timings.cache ADDED
Binary file (217 kB). View file
 
encoder/whisper_float16_tp1_rank0.engine ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec1582bb8aacee7c9005e0a3eb791de8c427825d64d927f4c0461aad071190a
3
+ size 183202884