patrickvonplaten commited on
Commit
c43348b
1 Parent(s): 9aa9e5f
config.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "apply_spec_augment": true,
4
+ "architectures": [
5
+ "Wav2Vec2ForCTC"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "bos_token_id": 1,
9
+ "classifier_proj_size": 256,
10
+ "codevector_dim": 768,
11
+ "contrastive_logits_temperature": 0.1,
12
+ "conv_bias": true,
13
+ "conv_dim": [
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512
21
+ ],
22
+ "conv_kernel": [
23
+ 10,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 3,
28
+ 2,
29
+ 2
30
+ ],
31
+ "conv_stride": [
32
+ 5,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2,
38
+ 2
39
+ ],
40
+ "ctc_loss_reduction": "sum",
41
+ "ctc_zero_infinity": false,
42
+ "diversity_loss_weight": 0.1,
43
+ "do_stable_layer_norm": true,
44
+ "eos_token_id": 2,
45
+ "feat_extract_activation": "gelu",
46
+ "feat_extract_dropout": 0.0,
47
+ "feat_extract_norm": "layer",
48
+ "feat_proj_dropout": 0.1,
49
+ "feat_quantizer_dropout": 0.0,
50
+ "final_dropout": 0.0,
51
+ "gradient_checkpointing": false,
52
+ "hidden_act": "gelu",
53
+ "hidden_dropout": 0.1,
54
+ "hidden_size": 1024,
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 4096,
57
+ "layer_norm_eps": 1e-05,
58
+ "layerdrop": 0.1,
59
+ "mask_channel_length": 10,
60
+ "mask_channel_min_space": 1,
61
+ "mask_channel_other": 0.0,
62
+ "mask_channel_prob": 0.0,
63
+ "mask_channel_selection": "static",
64
+ "mask_feature_length": 10,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_space": 1,
68
+ "mask_time_other": 0.0,
69
+ "mask_time_prob": 0.075,
70
+ "mask_time_selection": "static",
71
+ "model_type": "wav2vec2",
72
+ "num_attention_heads": 16,
73
+ "num_codevector_groups": 2,
74
+ "num_codevectors_per_group": 320,
75
+ "num_conv_pos_embedding_groups": 16,
76
+ "num_conv_pos_embeddings": 128,
77
+ "num_feat_extract_layers": 7,
78
+ "num_hidden_layers": 24,
79
+ "num_negatives": 100,
80
+ "pad_token_id": 0,
81
+ "proj_codevector_dim": 768,
82
+ "torch_dtype": "float32",
83
+ "transformers_version": "4.13.0.dev0",
84
+ "use_weighted_layer_sum": false,
85
+ "vocab_size": 392
86
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3173bde9e9ce490fa0f989e413c42f25bc1820c020adc1e6b9b87025b3cfcc5e
3
+ size 1263535127
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"<s>": 1, "<pad>": 0, "</s>": 2, "<unk>": 3, "n": 4, "s": 5, "t": 6, "ə": 7, "l": 8, "a": 9, "i": 10, "k": 11, "d": 12, "m": 13, "ɛ": 14, "ɾ": 15, "e": 16, "ɪ": 17, "p": 18, "o": 19, "ɐ": 20, "z": 21, "ð": 22, "f": 23, "j": 24, "v": 25, "b": 26, "ɹ": 27, "ʁ": 28, "ʊ": 29, "iː": 30, "r": 31, "w": 32, "ʌ": 33, "u": 34, "ɡ": 35, "æ": 36, "aɪ": 37, "ʃ": 38, "h": 39, "ɔ": 40, "ɑː": 41, "ŋ": 42, "ɚ": 43, "eɪ": 44, "β": 45, "uː": 46, "y": 47, "ɑ̃": 48, "oʊ": 49, "ᵻ": 50, "eː": 51, "θ": 52, "aʊ": 53, "ts": 54, "oː": 55, "ɔ̃": 56, "ɣ": 57, "ɜ": 58, "ɑ": 59, "dʒ": 60, "əl": 61, "x": 62, "ɜː": 63, "ç": 64, "ʒ": 65, "tʃ": 66, "ɔː": 67, "ɑːɹ": 68, "ɛ̃": 69, "ʎ": 70, "ɔːɹ": 71, "ʋ": 72, "aː": 73, "ɕ": 74, "œ": 75, "ø": 76, "oːɹ": 77, "ɲ": 78, "yː": 79, "ʔ": 80, "iə": 81, "i5": 82, "s.": 83, "tɕ": 84, "??": 85, "nʲ": 86, "ɛː": 87, "œ̃": 88, "ɭ": 89, "ɔø": 90, "ʑ": 91, "tʲ": 92, "ɨ": 93, "ɛɹ": 94, "ts.": 95, "rʲ": 96, "ɪɹ": 97, "ɭʲ": 98, "i.5": 99, "ɔɪ": 100, "q": 101, "sʲ": 102, "u5": 103, "ʊɹ": 104, "iɜ": 105, "a5": 106, "iɛ5": 107, "øː": 108, "ʕ": 109, "ja": 110, "əɜ": 111, "th": 112, "ɑ5": 113, "oɪ": 114, "dʲ": 115, "ə5": 116, "tɕh": 117, "ts.h": 118, "mʲ": 119, "ɯ": 120, "dʑ": 121, "vʲ": 122, "e̞": 123, "tʃʲ": 124, "ei5": 125, "o5": 126, "onɡ5": 127, "ɑu5": 128, "iɑ5": 129, "ai5": 130, "aɪɚ": 131, "kh": 132, "ə1": 133, "ʐ": 134, "i2": 135, "ʉ": 136, "ħ": 137, "t[": 138, "aɪə": 139, "ʲ": 140, "ju": 141, "ə2": 142, "u2": 143, "oɜ": 144, "pː": 145, "iɛɜ": 146, "ou5": 147, "y5": 148, "uɜ": 149, "tː": 150, "uo5": 151, "d[": 152, "uoɜ": 153, "tsh": 154, "ɑɜ": 155, "ɵ": 156, "i̪5": 157, "uei5": 158, "ɟ": 159, "aɜ": 160, "ɑɨ": 161, "i.ɜ": 162, "eʊ": 163, "o2": 164, "ɐ̃": 165, "ä": 166, "pʲ": 167, "kʲ": 168, "n̩": 169, "ɒ": 170, "ph": 171, "ɑu2": 172, "uɨ": 173, "əɪ": 174, "ɫ": 175, "ɬ": 176, "yɜ": 177, "bʲ": 178, "ɑ2": 179, "s̪": 180, "aiɜ": 181, "χ": 182, "ɐ̃ʊ̃": 183, "1": 184, "ə4": 185, "yæɜ": 186, "a2": 187, "ɨː": 188, "t̪": 189, "iouɜ": 190, "ũ": 191, "onɡɜ": 192, "aɨ": 193, "iɛ2": 194, "ɔɨ": 195, "ɑuɜ": 196, "o̞": 197, "ei2": 198, "iou2": 199, "c": 200, "kː": 201, "y2": 202, "ɖ": 203, "oe": 204, "dˤ": 205, "yɛɜ": 206, "əʊ": 207, "S": 208, "ɡʲ": 209, "onɡ2": 210, "u\"": 211, "eiɜ": 212, "ʈ": 213, "ɯᵝ": 214, "iou5": 215, "dZ": 216, "r̝̊": 217, "i.2": 218, "tS": 219, "s^": 220, "ʝ": 221, "yə5": 222, "iɑɜ": 223, "uə5": 224, "pf": 225, "ɨu": 226, "iɑ2": 227, "ou2": 228, "ər2": 229, "fʲ": 230, "ai2": 231, "r̝": 232, "uəɜ": 233, "ɳ": 234, "əɨ": 235, "ua5": 236, "uɪ": 237, "ɽ": 238, "bː": 239, "yu5": 240, "uo2": 241, "yɛ5": 242, "l̩": 243, "ɻ": 244, "ərɜ": 245, "ʂ": 246, "i̪2": 247, "ouɜ": 248, "uaɜ": 249, "a.": 250, "a.ː": 251, "yæ5": 252, "dː": 253, "r̩": 254, "ee": 255, "ɪu": 256, "ər5": 257, "i̪ɜ": 258, "æi": 259, "u:": 260, "i.ː": 261, "t^": 262, "o1": 263, "ɪ^": 264, "ai": 265, "ueiɜ": 266, "æː": 267, "ɛɪ": 268, "eə": 269, "i.": 270, "ɴ": 271, "ie": 272, "ua2": 273, "ɑ1": 274, "o4": 275, "tʃː": 276, "o:": 277, "ɑ:": 278, "u1": 279, "N": 280, "i̪1": 281, "au": 282, "yæ2": 283, "u.": 284, "qː": 285, "yəɜ": 286, "y:": 287, "kʰ": 288, "tʃʰ": 289, "iʊ": 290, "sx": 291, "õ": 292, "uo": 293, "tʰ": 294, "uai5": 295, "bʰ": 296, "u.ː": 297, "uə2": 298, "ʊə": 299, "d^": 300, "s̪ː": 301, "yiɜ": 302, "dʰ": 303, "r.": 304, "oe:": 305, "i1": 306, "ɟː": 307, "yu2": 308, "nʲʲ": 309, "i̪4": 310, "uei2": 311, "tsʲ": 312, "ɸ": 313, "ĩ": 314, "ɑ4": 315, "t̪ː": 316, "eɑ": 317, "u4": 318, "e:": 319, "tsː": 320, "ʈʰ": 321, "ɡʰ": 322, "ɯɯ": 323, "dʒʲ": 324, "ʂʲ": 325, "X": 326, "ɵː": 327, "uaiɜ": 328, "tɕʲ": 329, "ã": 330, "t^ː": 331, "ẽː": 332, "yɛ2": 333, "cː": 334, "i.1": 335, "ɛʊ": 336, "dˤdˤ": 337, "dʒː": 338, "i4": 339, "ɡː": 340, "yi": 341, "ɕʲ": 342, "ɟʰ": 343, "pʰ": 344, "dʑʲ": 345, "yuɜ": 346, "ua1": 347, "ua4": 348, "æiː": 349, "ɐɐ": 350, "ui": 351, "iou1": 352, "ʊː": 353, "a1": 354, "iou4": 355, "cʰ": 356, "iɛ1": 357, "yə2": 358, "ɖʰ": 359, "ẽ": 360, "ʒʲ": 361, "ää": 362, "ər4": 363, "iːː": 364, "ɪː": 365, "iɑ1": 366, "ər1": 367, "œː": 368, "øi": 369, "ɪuː": 370, "cʰcʰ": 371, "əː1": 372, "iː1": 373, "ũ": 374, "kʰː": 375, "o̞o̞": 376, "xʲ": 377, "ou1": 378, "iɛ4": 379, "e̞e̞": 380, "y1": 381, "dzː": 382, "dʲʲ": 383, "dʰː": 384, "ɯᵝɯᵝ": 385, "lː": 386, "uo1": 387, "i.4": 388, "i:": 389, "yɛ5ʲ": 390, "a4": 391}