1 {
2 "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3 "activation_dropout": 0.0,
4 "apply_spec_augment": true,
5 "architectures": [
6 "Wav2Vec2ForCTC"
7 ],
8 "attention_dropout": 0.1,
9 "bos_token_id": 1,
10 "codevector_dim": 768,
11 "contrastive_logits_temperature": 0.1,
12 "conv_bias": true,
13 "conv_dim": [
14 512,
15 512,
16 512,
17 512,
18 512,
19 512,
20 512
21 ],
22 "conv_kernel": [
23 10,
24 3,
25 3,
26 3,
27 3,
28 2,
29 2
30 ],
31 "conv_stride": [
32 5,
33 2,
34 2,
35 2,
36 2,
37 2,
38 2
39 ],
40 "ctc_loss_reduction": "mean",
41 "ctc_zero_infinity": false,
42 "diversity_loss_weight": 0.1,
43 "do_stable_layer_norm": true,
44 "eos_token_id": 2,
45 "feat_extract_activation": "gelu",
46 "feat_extract_dropout": 0.0,
47 "feat_extract_norm": "layer",
48 "feat_proj_dropout": 0.0,
49 "feat_quantizer_dropout": 0.0,
50 "final_dropout": 0.0,
51 "gradient_checkpointing": true,
52 "hidden_act": "gelu",
53 "hidden_dropout": 0.1,
54 "hidden_size": 1024,
55 "initializer_range": 0.02,
56 "intermediate_size": 4096,
57 "layer_norm_eps": 1e-05,
58 "layerdrop": 0.1,
59 "mask_channel_length": 10,
60 "mask_channel_min_space": 1,
61 "mask_channel_other": 0.0,
62 "mask_channel_prob": 0.0,
63 "mask_channel_selection": "static",
64 "mask_feature_length": 10,
65 "mask_feature_prob": 0.0,
66 "mask_time_length": 10,
67 "mask_time_min_space": 1,
68 "mask_time_other": 0.0,
69 "mask_time_prob": 0.05,
70 "mask_time_selection": "static",
71 "model_type": "wav2vec2",
72 "num_attention_heads": 16,
73 "num_codevector_groups": 2,
74 "num_codevectors_per_group": 320,
75 "num_conv_pos_embedding_groups": 16,
76 "num_conv_pos_embeddings": 128,
77 "num_feat_extract_layers": 7,
78 "num_hidden_layers": 24,
79 "num_negatives": 100,
80 "pad_token_id": 69,
81 "proj_codevector_dim": 768,
82 "torch_dtype": "float32",
83 "transformers_version": "4.9.1",
84 "vocab_size": 70
85 }
86