patrickvonplaten commited on
Commit
2598051
1 Parent(s): eb8c880
config.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "apply_spec_augment": true,
4
+ "architectures": [
5
+ "Wav2Vec2ForCTC"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "bos_token_id": 1,
9
+ "classifier_proj_size": 256,
10
+ "codevector_dim": 768,
11
+ "contrastive_logits_temperature": 0.1,
12
+ "conv_bias": true,
13
+ "conv_dim": [
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512
21
+ ],
22
+ "conv_kernel": [
23
+ 10,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 3,
28
+ 2,
29
+ 2
30
+ ],
31
+ "conv_stride": [
32
+ 5,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2,
38
+ 2
39
+ ],
40
+ "ctc_loss_reduction": "sum",
41
+ "ctc_zero_infinity": false,
42
+ "diversity_loss_weight": 0.1,
43
+ "do_stable_layer_norm": true,
44
+ "eos_token_id": 2,
45
+ "feat_extract_activation": "gelu",
46
+ "feat_extract_dropout": 0.0,
47
+ "feat_extract_norm": "layer",
48
+ "feat_proj_dropout": 0.1,
49
+ "feat_quantizer_dropout": 0.0,
50
+ "final_dropout": 0.0,
51
+ "gradient_checkpointing": false,
52
+ "hidden_act": "gelu",
53
+ "hidden_dropout": 0.1,
54
+ "hidden_size": 1024,
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 4096,
57
+ "layer_norm_eps": 1e-05,
58
+ "layerdrop": 0.1,
59
+ "mask_channel_length": 10,
60
+ "mask_channel_min_space": 1,
61
+ "mask_channel_other": 0.0,
62
+ "mask_channel_prob": 0.0,
63
+ "mask_channel_selection": "static",
64
+ "mask_feature_length": 10,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_space": 1,
68
+ "mask_time_other": 0.0,
69
+ "mask_time_prob": 0.075,
70
+ "mask_time_selection": "static",
71
+ "model_type": "wav2vec2",
72
+ "num_attention_heads": 16,
73
+ "num_codevector_groups": 2,
74
+ "num_codevectors_per_group": 320,
75
+ "num_conv_pos_embedding_groups": 16,
76
+ "num_conv_pos_embeddings": 128,
77
+ "num_feat_extract_layers": 7,
78
+ "num_hidden_layers": 24,
79
+ "num_negatives": 100,
80
+ "pad_token_id": 0,
81
+ "proj_codevector_dim": 768,
82
+ "torch_dtype": "float32",
83
+ "transformers_version": "4.13.0.dev0",
84
+ "use_weighted_layer_sum": false,
85
+ "vocab_size": 446
86
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e4fbdebd17772979cc892d15fbf831a3f4715fb9ab08586325ef57eaddc8868
3
+ size 1263756567
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"<s>": 1, "<pad>": 0, "</s>": 2, "<unk>": 3, "a": 4, "n": 5, "t": 6, "s": 7, "l": 8, "i": 9, "e": 10, "k": 11, "ɪ": 12, "d": 13, "m": 14, "ə": 15, "ɛ": 16, "o": 17, "p": 18, "u": 19, "j": 20, "b": 21, "z": 22, "r": 23, "h": 24, "ɔ": 25, "w": 26, "ʁ": 27, "f": 28, "v": 29, "ɾ": 30, "ŋ": 31, "ʊ": 32, "ɑ": 33, "ʃ": 34, "ɡ": 35, "ɻ": 36, "aː": 37, "æ": 38, "iː": 39, "ʔ": 40, "ɒ": 41, "θ": 42, "ɜ": 43, "y": 44, "eɪ": 45, "ð": 46, "uː": 47, "R": 48, "x": 49, "ɨ": 50, "g": 51, "ɔː": 52, "aɪ": 53, "dʒ": 54, "eː": 55, "ɲ": 56, "ʌ": 57, "əʊ": 58, "ʈ": 59, "ʒ": 60, "tʃ": 61, "n̩": 62, "oː": 63, "ɘ": 64, "ɐ": 65, "ɑː": 66, "ɜː": 67, "ɔ̃": 68, "ai": 69, "l̩": 70, "ã": 71, "ɑ̃": 72, "tʰ": 73, "ts": 74, "β": 75, "ɫ": 76, "ʋ": 77, "ẽ": 78, "œ": 79, "ɓ": 80, "ɣ": 81, "aʊ": 82, "c": 83, "ɗ": 84, "ɥ": 85, "m̩": 86, "tɕ": 87, "ʏ": 88, "ɪə": 89, "kʰ": 90, "ʂ": 91, "ɯ": 92, "ɽ": 93, "ɛə": 94, "ɖ": 95, "q": 96, "au": 97, "ɠ": 98, "ɕ": 99, "ø": 100, "tʃʰ": 101, "ɳ": 102, "õ": 103, "ɭ": 104, "pʰ": 105, "əː": 106, "’": 107, "ɛ̃": 108, "ɹ": 109, "aːɪ": 110, "ʈʂ": 111, "ɨə": 112, "ɔɪ": 113, "ɟ": 114, "lʲ": 115, "ɡj": 116, "əɪ": 117, "oɪ": 118, "ʎ": 119, "ʉ": 120, "oi": 121, "ʊə": 122, "tʲ": 123, "aːʊ": 124, "ɦ": 125, "dⁿ": 126, "iə": 127, "rʲ": 128, "kw": 129, "nʲ": 130, "ĩ": 131, "æ̃": 132, "sʲ": 133, "aj": 134, "nd": 135, "əːɪ": 136, "ç": 137, "ʕ": 138, "ʐ": 139, "ˌi": 140, "bʰ": 141, "tɕʰ": 142, "ˌa": 143, "ŋɡ": 144, "ʈʂʰ": 145, "ʝ": 146, "uə": 147, "b̻": 148, "dʲ": 149, "ɔy": 150, "tʂ": 151, "ŋw": 152, "dz": 153, "ũ": 154, "ʄ": 155, "iəʊ": 156, "ɕʲ": 157, "vʲ": 158, "ɝ": 159, "ow": 160, "ɬ": 161, "mʲ": 162, "mb": 163, "ħ": 164, "dʰ": 165, "ɡb": 166, "ɰ": 167, "uo": 168, "tⁿ": 169, "ɨəɪ": 170, "iʲ": 171, "tsʰ": 172, "ʐʲ": 173, "aw": 174, "ej": 175, "ie": 176, "ɤ": 177, "ɛː": 178, "ɡʰ": 179, "ɰ̃": 180, "kp": 181, "ɡ̼": 182, "pʲ": 183, "bʲ": 184, "ɡw": 185, "r̝": 186, "ʈʰ": 187, "ɚ": 188, "aʲ": 189, "sˤ": 190, "oʲ": 191, "ɐ̃": 192, "ɲɟ": 193, "tˤ": 194, "ǀ": 195, "øː": 196, "ʂʲ": 197, "ˌu": 198, "ɔaː": 199, "yː": 200, "uɪə": 201, "ɮ": 202, "bj": 203, "pf": 204, "dˤ": 205, "ʲ": 206, "ǃ": 207, "ɨʲ": 208, "ɱ": 209, "eʲ": 210, "ˌe": 211, "dʒʰ": 212, "oaɪ": 213, "ou": 214, "ɨ̃": 215, "ʉː": 216, "ɐj": 217, "uɪ": 218, "oj": 219, "ǁ": 220, "ɡǀ̤": 221, "uəɪ": 222, "ù": 223, "ɑ̃ː": 224, "ɛʊ": 225, "xw": 226, "sw": 227, "æʲ": 228, "kʲ": 229, "ɡʲ": 230, "ðˤ": 231, "fʲ": 232, "dʱ": 233, "yʲ": 234, "̇": 235, "ǀʰ": 236, "ɧ": 237, "ɨː": 238, "ː": 239, "hw": 240, "uiː": 241, "rj": 242, "ɔɛ": 243, "zʲ": 244, "ˌo": 245, "á": 246, "n̪": 247, "ɔ̃ː": 248, "ɑː̌": 249, "iʊ": 250, "zj": 251, "tw": 252, "bʱ": 253, "ˌã": 254, "í": 255, "ɥj": 256, "ŋ̩": 257, "sj": 258, "ɡǃ̤": 259, "ǃʰ": 260, "ɵ": 261, "ü": 262, "â": 263, "ö": 264, "ue": 265, "ɨɪ": 266, "ó": 267, "ɒ̃": 268, "ʃʲ": 269, "ˌə": 270, "ɖʰ": 271, "ˌɨ": 272, "ř": 273, "zʱ": 274, "œ̃": 275, "ō": 276, "t̪": 277, "ɪ̌": 278, "ɥi": 279, "ë": 280, "ia": 281, "iː̂": 282, "ʊ̃": 283, "ˌẽ": 284, "ˌɛ": 285, "oː̂": 286, "ɕː": 287, "ɨʊ": 288, "uː̌": 289, "ʒʲ": 290, "ɪ̂": 291, "ɔa": 292, "õ": 293, "lw": 294, "rw": 295, "ndʒ": 296, "tʃʲ": 297, "æː̌": 298, "ˌɑ": 299, "mw": 300, "d̪": 301, "ě": 302, "eː̂": 303, "oː̌": 304, "iː̌": 305, "tsʲ": 306, "ʊ̂": 307, "ˌĩ": 308, "oũ": 309, "nw": 310, "∅": 311, "ň": 312, "ã": 313, "ɨəʊ": 314, "ɛ̂": 315, "æː": 316, "ˌɐ": 317, "ʊ̌": 318, "ua": 319, "dj": 320, "kx": 321, "t̠ʃ": 322, "ˌuː": 323, "æe̯": 324, "uː̂": 325, "iw": 326, "ɔ̌": 327, "ʊ̯": 328, "ǁʰ": 329, "ʒʱ": 330, "pj": 331, "ˌõ": 332, "xʲ": 333, "eː̌": 334, "dʒʲ": 335, "dw": 336, "mː": 337, "eʊ": 338, "fj": 339, "nʲ̌": 340, "rʲ̌": 341, "tʃw": 342, "ɔ̂": 343, "zw": 344, "ɡǁ̤": 345, "bw": 346, "â": 347, "ɡʱ": 348, "dʒw": 349, "ʰ": 350, "lj": 351, "vʱ": 352, "oaɪː": 353, "ʃʰ": 354, "kh": 355, "ɫ̌": 356, "ʌi": 357, "uj": 358, "æ̃ː": 359, "th": 360, "ɖʱ": 361, "fw": 362, "ui": 363, "ˌũ": 364, "lʲ̌": 365, "zː": 366, "ʝʰ": 367, "ʱ": 368, "lː": 369, "nː": 370, "ʔʷa": 371, "nð": 372, "ǀ̃": 373, "ʃw": 374, "ɮʱ": 375, "ɐw": 376, "lʰ": 377, "ɑː̂": 378, "gʱ": 379, "ǀʱ": 380, "dː": 381, "fʷa": 382, "ˌiː": 383, "ˌej": 384, "ɣʲ": 385, "jʲ": 386, "kj": 387, "mʱ": 388, "m̌": 389, "mʲ̌": 390, "ǃʱ": 391, "rː": 392, "ˌɐw": 393, "̃": 394, "oi̯": 395, "ˌaj": 396, "dʒʱ": 397, "ǃ̃": 398, "ʊi": 399, "2": 400, "ɽʱ": 401, "ɟː": 402, "tsʷa": 403, "ɓ̥": 404, ":": 405, "tsː": 406, "jː": 407, "ɲw": 408, "ʌʋ": 409, "ˌʌ": 410, "ˌɐj": 411, "ˌoj": 412, "ǁʱ": 413, "ɗʒ": 414, "uəː": 415, "tj": 416, "vj": 417, "ǐ": 418, "۷": 419, "3": 420, "tː": 421, "ʃː": 422, "vw": 423, "mj": 424, "ˌuj": 425, "v̤": 426, "æː̂": 427, "gʰ": 428, "4": 429, "hː": 430, "dzː": 431, "ɲː": 432, "tʃː": 433, "ʉʲ": 434, "ʒw": 435, "kʷa": 436, "tʷa": 437, "pw": 438, "ɔi": 439, "yʉ": 440, "ˌɜ": 441, "ɮ̈": 442, "ᵑǀʱ": 443, "dʒ̈": 444, "ᵑǀ": 445}