File size: 1,907 Bytes
f91ea52
4a77977
f91ea52
 
 
4a77977
f91ea52
 
 
 
 
 
 
 
5a16a0c
 
 
f91ea52
 
5a16a0c
 
 
f91ea52
 
5a16a0c
 
 
f91ea52
 
 
 
5a16a0c
f91ea52
 
5a16a0c
 
 
f91ea52
 
5a16a0c
f91ea52
 
5a16a0c
 
f91ea52
5a16a0c
f91ea52
 
 
4a77977
f91ea52
 
4a77977
f91ea52
 
5a16a0c
f91ea52
 
 
5a16a0c
 
 
 
 
f91ea52
 
 
4a77977
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f91ea52
4a77977
f91ea52
4a77977
 
f91ea52
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
{
  "_name_or_path": "./",
  "activation_dropout": 0.1,
  "apply_spec_augment": true,
  "architectures": [
    "UniSpeechSatForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 1,
  "classifier_proj_size": 256,
  "codevector_dim": 256,
  "contrastive_logits_temperature": 0.1,
  "conv_bias": false,
  "conv_dim": [
    32,
    32,
    32
  ],
  "conv_kernel": [
    8,
    8,
    8
  ],
  "conv_stride": [
    4,
    4,
    4
  ],
  "ctc_loss_reduction": "sum",
  "ctc_zero_infinity": false,
  "diversity_loss_weight": 0.1,
  "do_stable_layer_norm": true,
  "eos_token_id": 2,
  "feat_extract_activation": "gelu",
  "feat_extract_dropout": 0.0,
  "feat_extract_norm": "layer",
  "feat_proj_dropout": 0.1,
  "feat_quantizer_dropout": 0.0,
  "final_dropout": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout": 0.1,
  "hidden_dropout_prob": 0.1,
  "hidden_size": 16,
  "initializer_range": 0.02,
  "intermediate_size": 20,
  "layer_norm_eps": 1e-05,
  "layerdrop": 0.1,
  "mask_feature_length": 10,
  "mask_feature_min_masks": 0,
  "mask_feature_prob": 0.0,
  "mask_time_length": 10,
  "mask_time_min_masks": 2,
  "mask_time_prob": 0.05,
  "model_type": "unispeech-sat",
  "num_attention_heads": 2,
  "num_clusters": 504,
  "num_codevector_groups": 2,
  "num_codevectors_per_group": 320,
  "num_conv_pos_embedding_groups": 2,
  "num_conv_pos_embeddings": 16,
  "num_feat_extract_layers": 3,
  "num_hidden_layers": 4,
  "num_negatives": 10,
  "pad_token_id": 0,
  "proj_codevector_dim": 256,
  "replace_prob": 0.5,
  "tdnn_dilation": [
    1,
    2,
    3,
    1,
    1
  ],
  "tdnn_dim": [
    512,
    512,
    512,
    512,
    1500
  ],
  "tdnn_kernel": [
    5,
    3,
    3,
    1,
    1
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.16.0.dev0",
  "use_weighted_layer_sum": false,
  "vocab_size": 32,
  "xvector_output_dim": 512
}