|
{ |
|
"_name_or_path": "NbAiLab/nb-whisper-small", |
|
"activation_dropout": 0.1, |
|
"activation_function": "gelu", |
|
"alignment_heads": [ |
|
[ |
|
5, |
|
3 |
|
], |
|
[ |
|
5, |
|
9 |
|
], |
|
[ |
|
8, |
|
0 |
|
], |
|
[ |
|
8, |
|
4 |
|
], |
|
[ |
|
8, |
|
7 |
|
], |
|
[ |
|
8, |
|
8 |
|
], |
|
[ |
|
9, |
|
0 |
|
], |
|
[ |
|
9, |
|
7 |
|
], |
|
[ |
|
9, |
|
9 |
|
], |
|
[ |
|
10, |
|
5 |
|
] |
|
], |
|
"apply_spec_augment": false, |
|
"architectures": [ |
|
"WhisperForConditionalGeneration" |
|
], |
|
"attention_dropout": 0, |
|
"begin_suppress_tokens": null, |
|
"bos_token_id": 50257, |
|
"classifier_proj_size": 256, |
|
"d_model": 768, |
|
"decoder_attention_heads": 12, |
|
"decoder_ffn_dim": 3072, |
|
"decoder_layerdrop": 0, |
|
"decoder_layers": 2, |
|
"decoder_start_token_id": 50258, |
|
"dropout": 0, |
|
"encoder_attention_heads": 12, |
|
"encoder_ffn_dim": 3072, |
|
"encoder_layerdrop": 0, |
|
"encoder_layers": 12, |
|
"eos_token_id": 50257, |
|
"forced_decoder_ids": [ |
|
[ |
|
1, |
|
50259 |
|
], |
|
[ |
|
2, |
|
50359 |
|
], |
|
[ |
|
3, |
|
50363 |
|
] |
|
], |
|
"init_std": 0.02, |
|
"is_encoder_decoder": true, |
|
"lang_ids": [ |
|
50259, |
|
50260, |
|
50261, |
|
50262, |
|
50263, |
|
50264, |
|
50265, |
|
50266, |
|
50267, |
|
50268, |
|
50269, |
|
50270, |
|
50271, |
|
50272, |
|
50273, |
|
50274, |
|
50275, |
|
50276, |
|
50277, |
|
50278, |
|
50279, |
|
50280, |
|
50281, |
|
50282, |
|
50283, |
|
50284, |
|
50285, |
|
50286, |
|
50287, |
|
50288, |
|
50289, |
|
50290, |
|
50291, |
|
50292, |
|
50293, |
|
50294, |
|
50295, |
|
50296, |
|
50297, |
|
50298, |
|
50299, |
|
50300, |
|
50301, |
|
50302, |
|
50303, |
|
50304, |
|
50305, |
|
50306, |
|
50307, |
|
50308, |
|
50309, |
|
50310, |
|
50311, |
|
50312, |
|
50313, |
|
50314, |
|
50315, |
|
50316, |
|
50317, |
|
50318, |
|
50319, |
|
50320, |
|
50321, |
|
50322, |
|
50323, |
|
50324, |
|
50325, |
|
50326, |
|
50327, |
|
50328, |
|
50329, |
|
50330, |
|
50331, |
|
50332, |
|
50333, |
|
50334, |
|
50335, |
|
50336, |
|
50337, |
|
50338, |
|
50339, |
|
50340, |
|
50341, |
|
50342, |
|
50343, |
|
50344, |
|
50345, |
|
50346, |
|
50347, |
|
50348, |
|
50349, |
|
50350, |
|
50351, |
|
50352, |
|
50353, |
|
50354, |
|
50355, |
|
50356, |
|
50357 |
|
], |
|
"mask_feature_length": 10, |
|
"mask_feature_min_masks": 0, |
|
"mask_feature_prob": 0, |
|
"mask_time_length": 10, |
|
"mask_time_min_masks": 2, |
|
"mask_time_prob": 0.05, |
|
"max_length": null, |
|
"max_source_positions": 1500, |
|
"max_target_positions": 448, |
|
"median_filter_width": 7, |
|
"model_type": "whisper", |
|
"num_hidden_layers": 12, |
|
"num_mel_bins": 80, |
|
"pad_token_id": 50257, |
|
"scale_embedding": false, |
|
"suppress_ids": [ |
|
1, |
|
2, |
|
7, |
|
8, |
|
9, |
|
10, |
|
14, |
|
25, |
|
26, |
|
27, |
|
28, |
|
29, |
|
31, |
|
58, |
|
59, |
|
60, |
|
61, |
|
62, |
|
63, |
|
90, |
|
91, |
|
92, |
|
93, |
|
359, |
|
503, |
|
522, |
|
542, |
|
873, |
|
893, |
|
902, |
|
918, |
|
922, |
|
931, |
|
1350, |
|
1853, |
|
1982, |
|
2460, |
|
2627, |
|
3246, |
|
3253, |
|
3268, |
|
3536, |
|
3846, |
|
3961, |
|
4183, |
|
4667, |
|
6585, |
|
6647, |
|
7273, |
|
9061, |
|
9383, |
|
10428, |
|
10929, |
|
11938, |
|
12033, |
|
12331, |
|
12562, |
|
13793, |
|
14157, |
|
14635, |
|
15265, |
|
15618, |
|
16553, |
|
16604, |
|
18362, |
|
18956, |
|
20075, |
|
21675, |
|
22520, |
|
26130, |
|
26161, |
|
26435, |
|
28279, |
|
29464, |
|
31650, |
|
32302, |
|
32470, |
|
36865, |
|
42863, |
|
47425, |
|
49870, |
|
50254, |
|
50258, |
|
50358, |
|
50359, |
|
50360, |
|
50361, |
|
50362 |
|
], |
|
"suppress_ids_begin": [ |
|
220, |
|
50257 |
|
], |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.45.2", |
|
"use_cache": true, |
|
"use_weighted_layer_sum": false, |
|
"vocab_size": 51865 |
|
} |
|
|