Anas2000 commited on
Commit
4989633
1 Parent(s): ec77feb

Upload lm-boosted decoder

Browse files
.gitattributes CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zstandard filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zstandard filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ language_model/unigrams.txt filter=lfs diff=lfs merge=lfs -text
added_tokens.json CHANGED
@@ -1 +1,4 @@
1
- {"<s>": 110, "</s>": 111}
 
 
 
 
1
+ {
2
+ "</s>": 111,
3
+ "<s>": 110
4
+ }
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": [" ", "_", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "x", "y", "z", "\u0093", "\u0094", "\u0153", "\u0964", "\u0981", "\u0982", "\u0983", "\u0985", "\u0986", "\u0987", "\u0988", "\u0989", "\u098a", "\u098b", "\u098f", "\u0990", "\u0993", "\u0994", "\u0995", "\u0996", "\u0997", "\u0998", "\u0999", "\u099a", "\u099b", "\u099c", "\u099d", "\u099e", "\u099f", "\u09a0", "\u09a1", "\u09a2", "\u09a3", "\u09a4", "\u09a5", "\u09a6", "\u09a7", "\u09a8", "\u09aa", "\u09ab", "\u09ac", "\u09ad", "\u09ae", "\u09af", "\u09b0", "\u09b2", "\u09b6", "\u09b7", "\u09b8", "\u09b9", "\u09bc", "\u09be", "\u09bf", "\u09c0", "\u09c1", "\u09c2", "\u09c3", "\u09c7", "\u09c8", "\u09cb", "\u09cc", "\u09cd", "\u09ce", "\u09d7", "\u09dc", "\u09dd", "\u09df", "\u09e6", "\u09e7", "\u09e8", "\u09e9", "\u09ea", "\u09eb", "\u09ec", "\u09ed", "\u09ee", "\u09ef", "\u09f0", "\u200c", "\u200d", "\u200e", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
language_model/6gram.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9529db6e465c8d7f27cbeec9d3dc16a77513383e923d404770908bb21753ee74
3
+ size 3005190229
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50128d5d0c760a8c8e2095916414c14bbb54e72c1859bfbf18c5214bb99fd7cb
3
+ size 29505850
special_tokens_map.json CHANGED
@@ -1 +1,106 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<s>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</s>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<s>",
33
+ "lstrip": false,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "</s>",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<s>",
47
+ "lstrip": false,
48
+ "normalized": true,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "</s>",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<s>",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "</s>",
68
+ "lstrip": false,
69
+ "normalized": true,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<s>",
75
+ "lstrip": false,
76
+ "normalized": true,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "</s>",
82
+ "lstrip": false,
83
+ "normalized": true,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "<s>",
89
+ "lstrip": false,
90
+ "normalized": true,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "</s>",
96
+ "lstrip": false,
97
+ "normalized": true,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ }
101
+ ],
102
+ "bos_token": "<s>",
103
+ "eos_token": "</s>",
104
+ "pad_token": "[PAD]",
105
+ "unk_token": "[UNK]"
106
+ }
tokenizer_config.json CHANGED
@@ -1 +1,14 @@
1
- {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "Anas2000/hope", "tokenizer_class": "Wav2Vec2CTCTokenizer", "processor_class": "Wav2Vec2ProcessorWithLM"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "name_or_path": "Anas2000/hope",
6
+ "pad_token": "[PAD]",
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "replace_word_delimiter_char": " ",
9
+ "special_tokens_map_file": null,
10
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
11
+ "tokenizer_file": null,
12
+ "unk_token": "[UNK]",
13
+ "word_delimiter_token": "|"
14
+ }
vocab.json CHANGED
@@ -1 +1,112 @@
1
- {"_": 1, "a": 2, "b": 3, "c": 4, "d": 5, "e": 6, "f": 7, "g": 8, "h": 9, "i": 10, "j": 11, "k": 12, "l": 13, "m": 14, "n": 15, "o": 16, "p": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "“": 27, "”": 28, "œ": 29, "।": 30, "ঁ": 31, "ং": 32, "ঃ": 33, "অ": 34, "আ": 35, "ই": 36, "ঈ": 37, "উ": 38, "ঊ": 39, "ঋ": 40, "এ": 41, "ঐ": 42, "ও": 43, "ঔ": 44, "ক": 45, "খ": 46, "গ": 47, "ঘ": 48, "ঙ": 49, "চ": 50, "ছ": 51, "জ": 52, "ঝ": 53, "ঞ": 54, "ট": 55, "ঠ": 56, "ড": 57, "ঢ": 58, "ণ": 59, "ত": 60, "থ": 61, "দ": 62, "ধ": 63, "ন": 64, "প": 65, "ফ": 66, "ব": 67, "ভ": 68, "ম": 69, "য": 70, "র": 71, "ল": 72, "শ": 73, "ষ": 74, "স": 75, "হ": 76, "়": 77, "া": 78, "ি": 79, "ী": 80, "ু": 81, "ূ": 82, "ৃ": 83, "ে": 84, "ৈ": 85, "ো": 86, "ৌ": 87, "্": 88, "ৎ": 89, "ৗ": 90, "ড়": 91, "ঢ়": 92, "য়": 93, "০": 94, "১": 95, "২": 96, "৩": 97, "৪": 98, "৫": 99, "৬": 100, "৭": 101, "৮": 102, "৯": 103, "ৰ": 104, "‌": 105, "‍": 106, "‎": 107, "|": 0, "[UNK]": 108, "[PAD]": 109}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 109,
3
+ "[UNK]": 108,
4
+ "_": 1,
5
+ "a": 2,
6
+ "b": 3,
7
+ "c": 4,
8
+ "d": 5,
9
+ "e": 6,
10
+ "f": 7,
11
+ "g": 8,
12
+ "h": 9,
13
+ "i": 10,
14
+ "j": 11,
15
+ "k": 12,
16
+ "l": 13,
17
+ "m": 14,
18
+ "n": 15,
19
+ "o": 16,
20
+ "p": 17,
21
+ "r": 18,
22
+ "s": 19,
23
+ "t": 20,
24
+ "u": 21,
25
+ "v": 22,
26
+ "w": 23,
27
+ "x": 24,
28
+ "y": 25,
29
+ "z": 26,
30
+ "|": 0,
31
+ "“": 27,
32
+ "”": 28,
33
+ "œ": 29,
34
+ "।": 30,
35
+ "ঁ": 31,
36
+ "ং": 32,
37
+ "ঃ": 33,
38
+ "অ": 34,
39
+ "আ": 35,
40
+ "ই": 36,
41
+ "ঈ": 37,
42
+ "উ": 38,
43
+ "ঊ": 39,
44
+ "ঋ": 40,
45
+ "এ": 41,
46
+ "ঐ": 42,
47
+ "ও": 43,
48
+ "ঔ": 44,
49
+ "ক": 45,
50
+ "খ": 46,
51
+ "গ": 47,
52
+ "ঘ": 48,
53
+ "ঙ": 49,
54
+ "চ": 50,
55
+ "ছ": 51,
56
+ "জ": 52,
57
+ "ঝ": 53,
58
+ "ঞ": 54,
59
+ "ট": 55,
60
+ "ঠ": 56,
61
+ "ড": 57,
62
+ "ঢ": 58,
63
+ "ণ": 59,
64
+ "ত": 60,
65
+ "থ": 61,
66
+ "দ": 62,
67
+ "ধ": 63,
68
+ "ন": 64,
69
+ "প": 65,
70
+ "ফ": 66,
71
+ "ব": 67,
72
+ "ভ": 68,
73
+ "ম": 69,
74
+ "য": 70,
75
+ "র": 71,
76
+ "ল": 72,
77
+ "শ": 73,
78
+ "ষ": 74,
79
+ "স": 75,
80
+ "হ": 76,
81
+ "়": 77,
82
+ "া": 78,
83
+ "ি": 79,
84
+ "ী": 80,
85
+ "ু": 81,
86
+ "ূ": 82,
87
+ "ৃ": 83,
88
+ "ে": 84,
89
+ "ৈ": 85,
90
+ "ো": 86,
91
+ "ৌ": 87,
92
+ "্": 88,
93
+ "ৎ": 89,
94
+ "ৗ": 90,
95
+ "ড়": 91,
96
+ "ঢ়": 92,
97
+ "য়": 93,
98
+ "০": 94,
99
+ "১": 95,
100
+ "২": 96,
101
+ "৩": 97,
102
+ "৪": 98,
103
+ "৫": 99,
104
+ "৬": 100,
105
+ "৭": 101,
106
+ "৮": 102,
107
+ "৯": 103,
108
+ "ৰ": 104,
109
+ "‌": 105,
110
+ "‍": 106,
111
+ "‎": 107
112
+ }