jonatasgrosman
commited on
Commit
•
a9676c0
1
Parent(s):
b5385a8
add LM
Browse files- alphabet.json +1 -0
- language_model/attrs.json +3 -0
- language_model/lm.binary +3 -0
- language_model/unigrams.txt +3 -0
- preprocessor_config.json +2 -1
- vocab.json +1 -1
alphabet.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"labels": ["", "<s>", "</s>", "⁇", " ", "-", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", "v", "w", "x", "y", "z", "ó", "ą", "ć", "ę", "ł", "ń", "ś", "ź", "ż"], "is_bpe": false}
|
language_model/attrs.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5ffd02e1ceef6517476e72ebe7997ddef7e92d27cb5a23d6695d64c4317d6ad
|
3 |
+
size 78
|
language_model/lm.binary
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1df471209afd17b6476d8ff10707e55444b77c99609ade5d75e65dd2bf464b5
|
3 |
+
size 1628858942
|
language_model/unigrams.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5618b3acd674a33040b05dc3f4b6d4e27971d7817b3a323a9365d38a7477b2e7
|
3 |
+
size 16663622
|
preprocessor_config.json
CHANGED
@@ -5,5 +5,6 @@
|
|
5 |
"padding_side": "right",
|
6 |
"padding_value": 0.0,
|
7 |
"return_attention_mask": true,
|
8 |
-
"sampling_rate": 16000
|
|
|
9 |
}
|
|
|
5 |
"padding_side": "right",
|
6 |
"padding_value": 0.0,
|
7 |
"return_attention_mask": true,
|
8 |
+
"sampling_rate": 16000,
|
9 |
+
"processor_class": "Wav2Vec2ProcessorWithLM"
|
10 |
}
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "-": 5, "
|
|
|
1 |
+
{"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "|": 4, "-": 5, "a": 6, "b": 7, "c": 8, "d": 9, "e": 10, "f": 11, "g": 12, "h": 13, "i": 14, "j": 15, "k": 16, "l": 17, "m": 18, "n": 19, "o": 20, "p": 21, "r": 22, "s": 23, "t": 24, "u": 25, "v": 26, "w": 27, "x": 28, "y": 29, "z": 30, "ó": 31, "ą": 32, "ć": 33, "ę": 34, "ł": 35, "ń": 36, "ś": 37, "ź": 38, "ż": 39}
|