{ "english": [ { "vosk_model_id": "vosk-model-small-en-us-0.15", "vosk_model_language": "en", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip", "vosk_model_size": "1.8G", "vosk_model_word_error_rate_and_speed": "9.85 (librispeech test-clean) 10.38 (tedlium)", "vosk_model_notes": "Lightweight wideband model for Android and RPi ", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-en-us-0.22", "vosk_model_language": "en", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip", "vosk_model_size": "40M", "vosk_model_word_error_rate_and_speed": "5.69 (librispeech test-clean) 6.05 (tedlium) 29.78(callcenter)", "vosk_model_notes": "Accurate generic US English model", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-en-us-0.22-lgraph", "vosk_model_language": "en", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22-lgraph.zip", "vosk_model_size": "128M", "vosk_model_word_error_rate_and_speed": "7.82 (librispeech) 8.20 (tedlium)", "vosk_model_notes": "Big US English model with dynamic graph", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-en-us-0.42-gigaspeech", "vosk_model_language": "en", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.42-gigaspeech.zip", "vosk_model_size": "2.3G", "vosk_model_word_error_rate_and_speed": "5.64 (librispeech test-clean) 6.24 (tedlium) 30.17 (callcenter)", "vosk_model_notes": "Accurate generic US English model trained by Kaldi on Gigaspeech. Mostly for podcasts, not for telephony", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-en-in-0.5", "vosk_model_language": "en-indian", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-en-in-0.5.zip", "vosk_model_size": "1G", "vosk_model_word_error_rate_and_speed": "36.12 (NPTEL Pure)", "vosk_model_notes": "Generic Indian English model for telecom and broadcast", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-small-en-in-0.4", "vosk_model_language": "en-indian", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-en-in-0.4.zip", "vosk_model_size": "36M", "vosk_model_word_error_rate_and_speed": "49.05 (NPTEL Pure)", "vosk_model_notes": "Lightweight Indian English model for mobile applications", "vosk_model_licenses": "Apache 2.0" } ], "chinese": [ { "vosk_model_id": "vosk-model-small-cn-0.22", "vosk_model_language": "cn", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip", "vosk_model_size": "42M", "vosk_model_word_error_rate_and_speed": "23.54 (SpeechIO-02) 38.29 (SpeechIO-06) 17.15 (THCHS)", "vosk_model_notes": "Lightweight model for Android and RPi", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-cn-0.22", "vosk_model_language": "cn", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-cn-0.22.zip", "vosk_model_size": "1.3G", "vosk_model_word_error_rate_and_speed": "13.98 (SpeechIO-02) 27.30 (SpeechIO-06) 7.43 (THCHS)", "vosk_model_notes": "Big generic Chinese model for server processing", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-cn-kaldi-multicn-0.15", "vosk_model_language": "cn", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-cn-kaldi-multicn-0.15.zip", "vosk_model_size": "1.5G", "vosk_model_word_error_rate_and_speed": "17.44 (SpeechIO-02) 9.56 (THCHS)", "vosk_model_notes": "Original Wideband Kaldi multi-cn model from Kaldi with Vosk LM", "vosk_model_licenses": "Apache 2.0" } ], "french": [ { "vosk_model_id": "vosk-model-small-fr-0.22", "vosk_model_language": "fr", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-fr-0.22.zip", "vosk_model_size": "41M", "vosk_model_word_error_rate_and_speed": "23.95 (cv test) 19.30 (mtedx) 27.25 (podcast)", "vosk_model_notes": "Lightweight wideband model for Android/iOS and RPi", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-fr-0.22", "vosk_model_language": "fr", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-fr-0.22.zip", "vosk_model_size": "1.4G", "vosk_model_word_error_rate_and_speed": "14.72 (cv test) 11.64 (mls) 13.10 (mtedx) 21.61 (podcast) 13.22 (voxpopuli)", "vosk_model_notes": "Big accurate model for servers", "vosk_model_licenses": "Apache 2.0" } ], "spanish": [ { "vosk_model_id": "vosk-model-small-es-0.42", "vosk_model_language": "es", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip", "vosk_model_size": "39M", "vosk_model_word_error_rate_and_speed": "16.02 (cv test) 16.72 (mtedx test) 11.21 (mls)", "vosk_model_notes": "Lightweight wideband model for Android/iOS and RPi", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-es-0.42", "vosk_model_language": "es", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-es-0.42.zip", "vosk_model_size": "1.4G", "vosk_model_word_error_rate_and_speed": "7.50 (cv test) 10.05 (mtedx test) 5.84 (mls)", "vosk_model_notes": "Big model for Spanish", "vosk_model_licenses": "Apache 2.0" } ], "german": [ { "vosk_model_id": "vosk-model-de-0.21", "vosk_model_language": "de", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip", "vosk_model_size": "1.9G", "vosk_model_word_error_rate_and_speed": "9.83 (Tuda-de test), 24.00 (podcast) 12.82 (cv-test) 12.42 (mls) 33.26 (mtedx)", "vosk_model_notes": "Big German model for telephony and server", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-small-de-zamia-0.3", "vosk_model_language": "de", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip", "vosk_model_size": "1.4G", "vosk_model_word_error_rate_and_speed": "14.81 (Tuda-de test, 37.46 (podcast)", "vosk_model_notes": "Zamia f_250 small model repackaged (not recommended)", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-small-de-0.15", "vosk_model_language": "de", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip", "vosk_model_size": "45M", "vosk_model_word_error_rate_and_speed": "13.75 (Tuda-de test), 30.67 (podcast)", "vosk_model_notes": "Lightweight wideband model for Android and RPi", "vosk_model_licenses": "Apache 2.0" } ], "portuguese": [ { "vosk_model_id": "vosk-model-small-pt-0.3", "vosk_model_language": "pt", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-pt-0.3.zip", "vosk_model_size": "1.9G", "vosk_model_word_error_rate_and_speed": "68.92 (coraa dev) 32.60 (cv test)", "vosk_model_notes": "Lightweight wideband model for Android and RPi", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-pt-fb-v0.1.1-20220516_2113", "vosk_model_language": "pt", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-pt-fb-v0.1.1-20220516_2113.zip", "vosk_model_size": "1.6G", "vosk_model_word_error_rate_and_speed": "54.34 (coraa dev) 27.70 (cv test)", "vosk_model_notes": "Big model from FalaBrazil", "vosk_model_licenses": "Apache 2.0" } ], "greek": [ { "vosk_model_id": "vosk-model-el-gr-0.7", "vosk_model_language": "gr", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-el-gr-0.7.zip", "vosk_model_size": "1.1G", "vosk_model_word_error_rate_and_speed": "TBD", "vosk_model_notes": "Big narrowband Greek model for server processing, not extremely accurate though", "vosk_model_licenses": "Apache 2.0" } ], "vietnamese": [ { "vosk_model_id": "vosk-model-small-vn-0.4", "vosk_model_language": "vn", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-vn-0.4.zip", "vosk_model_size": "32M", "vosk_model_word_error_rate_and_speed": "15.70 (Vivos test)", "vosk_model_notes": "Lightweight Vietnamese model", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-vn-0.4", "vosk_model_language": "vn", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip", "vosk_model_size": "78M", "vosk_model_word_error_rate_and_speed": "15.70 (Vivos test)", "vosk_model_notes": "Bigger Vietnamese model for server", "vosk_model_licenses": "Apache 2.0" } ], "italian": [ { "vosk_model_id": "vosk-model-small-it-0.22", "vosk_model_language": "it", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-it-0.22.zip", "vosk_model_size": "48M", "vosk_model_word_error_rate_and_speed": "16.88 (cv test) 25.87 (mls) 17.01 (mtedx)", "vosk_model_notes": "Lightweight model for Android and RPi", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-it-0.22", "vosk_model_language": "it", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip", "vosk_model_size": "1.2G", "vosk_model_word_error_rate_and_speed": "8.10 (cv test) 15.68 (mls) 11.23 (mtedx)", "vosk_model_notes": "Big generic Italian model for servers", "vosk_model_licenses": "Apache 2.0" } ], "dutch": [ { "vosk_model_id": "vosk-model-small-nl-0.22", "vosk_model_language": "nl", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-nl-0.22.zip", "vosk_model_size": "39M", "vosk_model_word_error_rate_and_speed": "22.45 (cv test) 26.80 (tv) 25.84 (mls) 24.09 (voxpopuli)", "vosk_model_notes": "Lightweight model for Dutch", "vosk_model_licenses": "Apache 2.0" } ], "arabic": [ { "vosk_model_id": "vosk-model-ar-mgb2-0.4", "vosk_model_language": "ar", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ar-mgb2-0.4.zip", "vosk_model_size": "318M", "vosk_model_word_error_rate_and_speed": "16.40 (MGB-2 dev set)", "vosk_model_notes": "Repackaged Arabic model trained on MGB2 dataset from Kaldi", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-ar-0.22-linto-1.1.0", "vosk_model_language": "ar", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ar-0.22-linto-1.1.0.zip", "vosk_model_size": "1.3G", "vosk_model_word_error_rate_and_speed": "16.40 (MGB-2 dev set)", "vosk_model_notes": "52.87 (cv test) 28.50 (MBG-2 dev set) 1.0xRT", "vosk_model_licenses": "Apache 2.0" } ], "farsi": [ { "vosk_model_id": "vosk-model-small-fa-0.4", "vosk_model_language": "ph", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-fa-0.4.zip", "vosk_model_size": "47M", "vosk_model_word_error_rate_and_speed": "TBD", "vosk_model_notes": "Lightweight wideband model for Android and RPi for Farsi (Persian)", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-fa-0.5", "vosk_model_language": "ph", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-fa-0.5.zip", "vosk_model_size": "1G", "vosk_model_word_error_rate_and_speed": "TBD", "vosk_model_notes": "Model with large vocabulary, not yet accurate but better than before (Persian)", "vosk_model_licenses": "Apache 2.0" } ], "filipino": [ { "vosk_model_id": "vosk-model-tl-ph-generic-0.6", "vosk_model_language": "ph", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-tl-ph-generic-0.6.zip", "vosk_model_size": "320M", "vosk_model_word_error_rate_and_speed": "TBD", "vosk_model_notes": "Medium wideband model for Filipino (Tagalog) by feddybear", "vosk_model_licenses": "CC-BY-NC-SA 4.0" } ], "ukrainian": [ { "vosk_model_id": "vosk-model-small-uk-v3-small", "vosk_model_language": "uk", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-small.zip", "vosk_model_size": "133M", "vosk_model_word_error_rate_and_speed": "TBD", "vosk_model_notes": "Small model from Speech Recognition for Ukrainian", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-uk-v3", "vosk_model_language": "uk", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-uk-v3.zip", "vosk_model_size": "343M", "vosk_model_word_error_rate_and_speed": "TBD", "vosk_model_notes": "Bigger model from Speech Recognition for Ukrainian", "vosk_model_licenses": "Apache 2.0" } ], "swedish": [ { "vosk_model_id": "vosk-model-small-sv-rhasspy-0.15", "vosk_model_language": "sv", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-sv-rhasspy-0.15.zip", "vosk_model_size": "289M", "vosk_model_word_error_rate_and_speed": "TBD", "vosk_model_notes": "Repackaged model from Rhasspy project", "vosk_model_licenses": "Apache 2.0" } ], "japanese": [ { "vosk_model_id": "vosk-model-small-ja-0.22", "vosk_model_language": "ja", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-ja-0.22.zip", "vosk_model_size": "48M", "vosk_model_word_error_rate_and_speed": "9.52(csj CER) 17.07(ted10k CER)", "vosk_model_notes": "Lightweight wideband model for Japanese", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-ja-0.22", "vosk_model_language": "ja", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-ja-0.22.zip", "vosk_model_size": "1Gb", "vosk_model_word_error_rate_and_speed": "8.40(csj CER) 13.91(ted10k CER)", "vosk_model_notes": "Big model for Japanese", "vosk_model_licenses": "Apache 2.0" } ], "hindi": [ { "vosk_model_id": "vosk-model-small-hi-0.22", "vosk_model_language": "hi", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-hi-0.22.zip", "vosk_model_size": "42M", "vosk_model_word_error_rate_and_speed": "20.89 (IITM Challenge) 24.72 (MUCS Challenge)", "vosk_model_notes": "Lightweight model for Hindi", "vosk_model_licenses": "Apache 2.0" }, { "vosk_model_id": "vosk-model-hi-0.22", "vosk_model_language": "hi", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-hi-0.22.zip", "vosk_model_size": "1.5Gb", "vosk_model_word_error_rate_and_speed": "14.85 (CV Test) 14.83 (IITM Challenge) 13.11 (MUCS Challenge)", "vosk_model_notes": "Big accurate model for servers", "vosk_model_licenses": "Apache 2.0" } ], "czech": [ { "vosk_model_id": "vosk-model-small-cs-0.4-rhasspy", "vosk_model_language": "cs", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-cs-0.4-rhasspy.zip", "vosk_model_size": "44M", "vosk_model_word_error_rate_and_speed": "21.29 (CV Test)", "vosk_model_notes": "Lightweight model for Czech from Rhasspy project", "vosk_model_licenses": "Apache 2.0" } ], "polish": [ { "vosk_model_id": "vosk-model-small-pl-0.22", "vosk_model_language": "pl", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-pl-0.22.zip", "vosk_model_size": "50M", "vosk_model_word_error_rate_and_speed": "18.36 (CV Test) 16.88 (MLS Test) 11.55 (Voxpopuli Test)", "vosk_model_notes": "Lightweight model for Polish", "vosk_model_licenses": "Apache 2.0" } ], "uzbek": [ { "vosk_model_id": "vosk-model-small-uz-0.22", "vosk_model_language": "uz", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-uz-0.22.zip", "vosk_model_size": "82M", "vosk_model_word_error_rate_and_speed": "13.54 (CV Test) 12.92 (IS2AI USC test)", "vosk_model_notes": "Lightweight model for Uzbek", "vosk_model_licenses": "Apache 2.0" } ], "korean": [ { "vosk_model_id": "vosk-model-small-ko-0.22", "vosk_model_language": "ko", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-small-ko-0.22.zip", "vosk_model_size": "82M", "vosk_model_word_error_rate_and_speed": "28.1 (Zeroth Test)", "vosk_model_notes": "Lightweight model for Korean", "vosk_model_licenses": "Apache 2.0" } ], "speaker_identification": [ { "vosk_model_id": "vosk-model-spk-0.4", "vosk_model_language": "TBD", "vosk_model_file_url": "https://alphacephei.com/vosk/models/vosk-model-spk-0.4.zip", "vosk_model_size": "13M", "vosk_model_word_error_rate_and_speed": "TBD", "vosk_model_notes": "Model for speaker identification, should work for all languages", "vosk_model_licenses": "Apache 2.0" } ] }