ntinouldinho commited on
Commit
6f5f44b
1 Parent(s): c1b4e4e

Upload decoder with 6-gram lm

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 55,
3
+ "<s>": 54
4
+ }
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": [" ", "a", "e", "g", "h", "m", "n", "o", "r", "t", "v", "\u00ab", "\u00b4", "\u00b7", "\u00bb", "\u0301", "\u0390", "\u03ac", "\u03ad", "\u03ae", "\u03af", "\u03b1", "\u03b2", "\u03b3", "\u03b4", "\u03b5", "\u03b6", "\u03b7", "\u03b8", "\u03b9", "\u03ba", "\u03bb", "\u03bc", "\u03bd", "\u03be", "\u03bf", "\u03c0", "\u03c1", "\u03c2", "\u03c3", "\u03c4", "\u03c5", "\u03c6", "\u03c7", "\u03c8", "\u03c9", "\u03ca", "\u03cb", "\u03cc", "\u03cd", "\u03ce", "\u2019", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
language_model/6gram.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76a745bf0a1ea09b2e4b40e680adf45f75c8136189a44958d2e74410c37eb461
3
+ size 2220116062
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
File without changes
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<s>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</s>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<s>",
33
+ "lstrip": false,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "</s>",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<s>",
47
+ "lstrip": false,
48
+ "normalized": true,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "</s>",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<s>",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "</s>",
68
+ "lstrip": false,
69
+ "normalized": true,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<s>",
75
+ "lstrip": false,
76
+ "normalized": true,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "</s>",
82
+ "lstrip": false,
83
+ "normalized": true,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "<s>",
89
+ "lstrip": false,
90
+ "normalized": true,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "</s>",
96
+ "lstrip": false,
97
+ "normalized": true,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ },
101
+ {
102
+ "content": "<s>",
103
+ "lstrip": false,
104
+ "normalized": true,
105
+ "rstrip": false,
106
+ "single_word": false
107
+ },
108
+ {
109
+ "content": "</s>",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ {
116
+ "content": "<s>",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false
121
+ },
122
+ {
123
+ "content": "</s>",
124
+ "lstrip": false,
125
+ "normalized": true,
126
+ "rstrip": false,
127
+ "single_word": false
128
+ },
129
+ {
130
+ "content": "<s>",
131
+ "lstrip": false,
132
+ "normalized": true,
133
+ "rstrip": false,
134
+ "single_word": false
135
+ },
136
+ {
137
+ "content": "</s>",
138
+ "lstrip": false,
139
+ "normalized": true,
140
+ "rstrip": false,
141
+ "single_word": false
142
+ },
143
+ {
144
+ "content": "<s>",
145
+ "lstrip": false,
146
+ "normalized": true,
147
+ "rstrip": false,
148
+ "single_word": false
149
+ },
150
+ {
151
+ "content": "</s>",
152
+ "lstrip": false,
153
+ "normalized": true,
154
+ "rstrip": false,
155
+ "single_word": false
156
+ }
157
+ ],
158
+ "bos_token": "<s>",
159
+ "eos_token": "</s>",
160
+ "pad_token": "[PAD]",
161
+ "unk_token": "[UNK]"
162
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "name_or_path": "/content/drive/MyDrive/GREEK-HUBERT/checkpoint-4960",
6
+ "pad_token": "[PAD]",
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "replace_word_delimiter_char": " ",
9
+ "special_tokens_map_file": null,
10
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
11
+ "unk_token": "[UNK]",
12
+ "word_delimiter_token": "|"
13
+ }
vocab.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 53,
3
+ "[UNK]": 52,
4
+ "a": 1,
5
+ "e": 2,
6
+ "g": 3,
7
+ "h": 4,
8
+ "m": 5,
9
+ "n": 6,
10
+ "o": 7,
11
+ "r": 8,
12
+ "t": 9,
13
+ "v": 10,
14
+ "|": 0,
15
+ "«": 11,
16
+ "´": 12,
17
+ "·": 13,
18
+ "»": 14,
19
+ "́": 15,
20
+ "ΐ": 16,
21
+ "ά": 17,
22
+ "έ": 18,
23
+ "ή": 19,
24
+ "ί": 20,
25
+ "α": 21,
26
+ "β": 22,
27
+ "γ": 23,
28
+ "δ": 24,
29
+ "ε": 25,
30
+ "ζ": 26,
31
+ "η": 27,
32
+ "θ": 28,
33
+ "ι": 29,
34
+ "κ": 30,
35
+ "λ": 31,
36
+ "μ": 32,
37
+ "ν": 33,
38
+ "ξ": 34,
39
+ "ο": 35,
40
+ "π": 36,
41
+ "ρ": 37,
42
+ "ς": 38,
43
+ "σ": 39,
44
+ "τ": 40,
45
+ "υ": 41,
46
+ "φ": 42,
47
+ "χ": 43,
48
+ "ψ": 44,
49
+ "ω": 45,
50
+ "ϊ": 46,
51
+ "ϋ": 47,
52
+ "ό": 48,
53
+ "ύ": 49,
54
+ "ώ": 50,
55
+ "’": 51
56
+ }