Michael Hansen
commited on
Commit
•
87f72f6
1
Parent(s):
e7ac210
Add vocab
Browse files- _script/print-vocabulary.sh +50 -0
- _vocab/.gitattributes +1 -0
- _vocab/vosk-model-ar-mgb2-0.4.txt +3 -0
- _vocab/vosk-model-br-0.8.txt +3 -0
- _vocab/vosk-model-en-us-0.22-lgraph.txt +3 -0
- _vocab/vosk-model-nl-spraakherkenning-0.6-lgraph.txt +3 -0
- _vocab/vosk-model-small-ca-0.4.txt +3 -0
- _vocab/vosk-model-small-cn-0.22.txt +3 -0
- _vocab/vosk-model-small-cs-0.4-rhasspy.txt +3 -0
- _vocab/vosk-model-small-de-0.15.txt +3 -0
- _vocab/vosk-model-small-en-us-0.15.txt +3 -0
- _vocab/vosk-model-small-eo-0.42.txt +3 -0
- _vocab/vosk-model-small-es-0.42.txt +3 -0
- _vocab/vosk-model-small-fa-0.5.txt +3 -0
- _vocab/vosk-model-small-fr-0.22.txt +3 -0
- _vocab/vosk-model-small-hi-0.22.txt +3 -0
- _vocab/vosk-model-small-it-0.22.txt +3 -0
- _vocab/vosk-model-small-ja-0.22.txt +3 -0
- _vocab/vosk-model-small-ko-0.22.txt +3 -0
- _vocab/vosk-model-small-kz-0.15.txt +3 -0
- _vocab/vosk-model-small-nl-0.22.txt +3 -0
- _vocab/vosk-model-small-pl-0.22.txt +3 -0
- _vocab/vosk-model-small-pt-0.3.txt +3 -0
- _vocab/vosk-model-small-ru-0.22.txt +3 -0
- _vocab/vosk-model-small-sv-rhasspy-0.15.txt +3 -0
- _vocab/vosk-model-small-tr-0.3.txt +3 -0
- _vocab/vosk-model-small-uk-v3-small.txt +3 -0
- _vocab/vosk-model-small-uz-0.22.txt +3 -0
- _vocab/vosk-model-small-vn-0.4.txt +3 -0
- _vocab/vosk-model-tl-ph-generic-0.6.txt +3 -0
- _vocab/vosk-model-vn-0.4.txt +3 -0
_script/print-vocabulary.sh
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
set -e
|
3 |
+
|
4 |
+
# Print out the vocabulary from Gr.fst for all zipped models in a directory.
|
5 |
+
# Assumes fstprint is in PATH and ngramfst.so is in LD_LIBRARY_PATH.
|
6 |
+
|
7 |
+
if [ -z "$2" ]; then
|
8 |
+
echo 'Usage: print-vocabulary <MODEL_DIR> <VOCAB_DIR>'
|
9 |
+
exit 1
|
10 |
+
fi
|
11 |
+
|
12 |
+
model_dir="$1"
|
13 |
+
vocab_dir="$2"
|
14 |
+
|
15 |
+
mkdir -p "${vocab_dir}"
|
16 |
+
|
17 |
+
temp_dir="$(mktemp -d)"
|
18 |
+
function finish {
|
19 |
+
rm -rf "${temp_dir}"
|
20 |
+
}
|
21 |
+
|
22 |
+
trap finish EXIT
|
23 |
+
|
24 |
+
find "${model_dir}" -name '*.zip' -type f | \
|
25 |
+
while read -r zip_file; do
|
26 |
+
model_name="$(basename "${zip_file}" .zip)"
|
27 |
+
vocab_file="${vocab_dir}/${model_name}.txt"
|
28 |
+
|
29 |
+
if [ -s "${vocab_file}" ]; then
|
30 |
+
echo "Skipping ${model_name} (${vocab_file})"
|
31 |
+
continue
|
32 |
+
fi
|
33 |
+
|
34 |
+
model_dir="${temp_dir}/${model_name}"
|
35 |
+
mkdir -p "${model_dir}"
|
36 |
+
unzip -j "${zip_file}" "${model_name}/graph/Gr.fst" -d "${model_dir}" || \
|
37 |
+
unzip -j "${zip_file}" "${model_name}/Gr.fst" -d "${model_dir}" || \
|
38 |
+
unzip -j "${zip_file}" "${model_name}/words.txt" -d "${model_dir}" || \
|
39 |
+
unzip -j "${zip_file}" "${model_name}/graph/words.txt" -d "${model_dir}" || \
|
40 |
+
true
|
41 |
+
|
42 |
+
if [ -f "${model_dir}/words.txt" ]; then
|
43 |
+
cut -d' ' -f1 < "${model_dir}/words.txt" | sort | uniq > "${vocab_file}"
|
44 |
+
elif [ -f "${model_dir}/Gr.fst" ]; then
|
45 |
+
fstprint "${model_dir}/Gr.fst" | cut -f3 | sort | uniq > "${vocab_file}"
|
46 |
+
else
|
47 |
+
echo "ERROR: can't get vocabulary for ${model_name}"
|
48 |
+
fi
|
49 |
+
|
50 |
+
done
|
_vocab/.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.txt filter=lfs diff=lfs merge=lfs -text
|
_vocab/vosk-model-ar-mgb2-0.4.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9322358024475930043374b723698799c47d8e99fe91d2c50d618108d0f3c354
|
3 |
+
size 13671773
|
_vocab/vosk-model-br-0.8.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dcedd286aaa4b5a278ac918421684478fafe4ce2a69c8a1fe4b065c91b76295c
|
3 |
+
size 408890
|
_vocab/vosk-model-en-us-0.22-lgraph.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e122967bc04600777afde3957aed0a4610e690f507861ea556334a0c48a9781
|
3 |
+
size 2035553
|
_vocab/vosk-model-nl-spraakherkenning-0.6-lgraph.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7b8b4aafab5d0679baaecc76f16fcd98e13d08fc22ab32bfbfeb693b7f67c0f
|
3 |
+
size 1649265
|
_vocab/vosk-model-small-ca-0.4.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d733c70577a4283c20ef2197a1dc8b0b2d8bb1167f57ae9e67272ed75d864ba9
|
3 |
+
size 1871468
|
_vocab/vosk-model-small-cn-0.22.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c9dcbb6386472cada9446707c854f3fbf3064739aac99f1e7b7b95302026d6d
|
3 |
+
size 857113
|
_vocab/vosk-model-small-cs-0.4-rhasspy.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d426881cb0c6e82f56bcf78411ac5668e64b0e04a3457ae2187a586fa935281
|
3 |
+
size 299968
|
_vocab/vosk-model-small-de-0.15.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd017e93fe10248d7e291f8ca698dea3e12b51120cbf2e09468fe56d460c2bab
|
3 |
+
size 2696357
|
_vocab/vosk-model-small-en-us-0.15.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0889b2edb3d5e2a6d184c6d0d511758b07df6eb7550dee1df81e1e28ec33fe77
|
3 |
+
size 1308647
|
_vocab/vosk-model-small-eo-0.42.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d55aff7fdc52215418b3c0617a165beb89460030e2e4fbaeb426546cff9ef351
|
3 |
+
size 839991
|
_vocab/vosk-model-small-es-0.42.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9475ff1400cf94b97561bb6c86de8b0144b4091c4deced411164fd5f758b2f3f
|
3 |
+
size 926038
|
_vocab/vosk-model-small-fa-0.5.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e2aaa51eeae0a44f9d585edab5d9c9c7d726bcb3a8451c04ffdb595516ef348
|
3 |
+
size 6033850
|
_vocab/vosk-model-small-fr-0.22.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32a0ae1bb2f38d2916e325e0d9df49164d515644bb9200d371ebde9724a87161
|
3 |
+
size 1332818
|
_vocab/vosk-model-small-hi-0.22.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b5aaf7ab3c2a4d053cb984f877d24b5f9a2a52b93dcc9db47e7403a5d2f3912
|
3 |
+
size 2845641
|
_vocab/vosk-model-small-it-0.22.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7843544d72e2589be787e144915d3e3d46c57f285a3810a824f726a648bece89
|
3 |
+
size 1985053
|
_vocab/vosk-model-small-ja-0.22.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8231817838e979a97f7cce140e7c07faf82b6511b756ce7ec918fb6773fd3182
|
3 |
+
size 2381760
|
_vocab/vosk-model-small-ko-0.22.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9555df5e8671b6ec105ef66be50f769ad9c9c07f058de37be08fee18616375f2
|
3 |
+
size 20775443
|
_vocab/vosk-model-small-kz-0.15.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04dd8fc25bad5a88f389435bdd8dc20be90edaf03016da30b76e454873755b08
|
3 |
+
size 4381500
|
_vocab/vosk-model-small-nl-0.22.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5f184771234aa534852bd6b74593190c1f18b55d5f2250ad75429b38ec68b12
|
3 |
+
size 1034007
|
_vocab/vosk-model-small-pl-0.22.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7065c6e3fc661d678f351c6f932dd995698902498ba069aee13d63c231c2a752
|
3 |
+
size 2649311
|
_vocab/vosk-model-small-pt-0.3.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19fe70f8d5a4793bd594eb2fb5b351577c01c72aec190ca2c22b54516db56de5
|
3 |
+
size 958387
|
_vocab/vosk-model-small-ru-0.22.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c4a9500541cc728f2481515e9bac57dbf5867e60d8b6afb1c167e11d517ef2e
|
3 |
+
size 4550334
|
_vocab/vosk-model-small-sv-rhasspy-0.15.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d554d4ec48bf6122d9b1e165b50641cb33f9e75b316b40f4b007b85a170bde2
|
3 |
+
size 1003448
|
_vocab/vosk-model-small-tr-0.3.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95663cb9b3a6b14289c34d066f9b65a5818c17d5a99347d891e607d55e02f4e3
|
3 |
+
size 1107458
|
_vocab/vosk-model-small-uk-v3-small.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02a68adb184d803ea03a3f87858fe003686d5767078b8049e4f351f08845204e
|
3 |
+
size 12414593
|
_vocab/vosk-model-small-uz-0.22.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7b73d79c57a8b1675780d76e877171eb7200c62ca32f9887c535df651f1c2d7
|
3 |
+
size 2671687
|
_vocab/vosk-model-small-vn-0.4.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff6f90769d74bcae37cef627bad7fa939f14a2c409cc3b94e347c465c7fe936f
|
3 |
+
size 128639
|
_vocab/vosk-model-tl-ph-generic-0.6.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b436e856c174ff360da9a53bf08de79661b6f7fe815f0db4dad7e854474e785
|
3 |
+
size 2290976
|
_vocab/vosk-model-vn-0.4.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c615174246f5832d957aac9b7b0e7fb99fc4dfe9c30264bba253cc2817826430
|
3 |
+
size 131182
|