Update app.py
Browse files
app.py
CHANGED
|
@@ -21,6 +21,10 @@ models_ru = {
|
|
| 21 |
for name in [
|
| 22 |
"gigaam-v2-ctc",
|
| 23 |
"gigaam-v2-rnnt",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
"nemo-fastconformer-ru-ctc",
|
| 25 |
"nemo-fastconformer-ru-rnnt",
|
| 26 |
"alphacep/vosk-model-ru",
|
|
@@ -99,7 +103,7 @@ def recognize_with_vad(audio: tuple[int, np.ndarray], name: str):
|
|
| 99 |
|
| 100 |
|
| 101 |
with gr.Blocks() as recognize_short:
|
| 102 |
-
audio = gr.Audio(min_length=1, max_length=30)
|
| 103 |
with gr.Row():
|
| 104 |
gr.ClearButton(audio)
|
| 105 |
btn_ru = gr.Button("Recognize (ru)", variant="primary")
|
|
@@ -112,7 +116,7 @@ with gr.Blocks() as recognize_short:
|
|
| 112 |
with gr.Blocks() as recognize_long:
|
| 113 |
gr.Markdown("The default VAD parameters are used. For best results, you should adjust the VAD parameters in your app.")
|
| 114 |
name = gr.Dropdown(models_vad.keys(), label="Model")
|
| 115 |
-
audio = gr.Audio(min_length=1, max_length=300)
|
| 116 |
with gr.Row():
|
| 117 |
gr.ClearButton(audio)
|
| 118 |
btn = gr.Button("Recognize", variant="primary")
|
|
@@ -135,7 +139,7 @@ with gr.Blocks(title="onnx-asr demo") as demo:
|
|
| 135 |
# ASR demo using onnx-asr
|
| 136 |
**[onnx-asr](https://github.com/istupakov/onnx-asr)** is a Python package for Automatic Speech Recognition using ONNX models.
|
| 137 |
The package is written in pure Python with minimal dependencies (no `pytorch` or `transformers`).
|
| 138 |
-
Supports Parakeet TDT 0.6B V2 (En), Parakeet TDT 0.6B V3 (Multilingual) and GigaAM v2 (Ru) models
|
| 139 |
(and many other modern [models](https://github.com/istupakov/onnx-asr?tab=readme-ov-file#supported-model-names)).
|
| 140 |
You can also use it with your own model if it has a supported architecture.
|
| 141 |
""")
|
|
@@ -151,6 +155,10 @@ with gr.Blocks(title="onnx-asr demo") as demo:
|
|
| 151 |
## Russian ASR models
|
| 152 |
* `gigaam-v2-ctc` - Sber GigaAM v2 CTC ([origin](https://github.com/salute-developers/GigaAM), [onnx](https://huggingface.co/istupakov/gigaam-v2-onnx))
|
| 153 |
* `gigaam-v2-rnnt` - Sber GigaAM v2 RNN-T ([origin](https://github.com/salute-developers/GigaAM), [onnx](https://huggingface.co/istupakov/gigaam-v2-onnx))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
* `nemo-fastconformer-ru-ctc` - Nvidia FastConformer-Hybrid Large (ru) with CTC decoder ([origin](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc), [onnx](https://huggingface.co/istupakov/stt_ru_fastconformer_hybrid_large_pc_onnx))
|
| 155 |
* `nemo-fastconformer-ru-rnnt` - Nvidia FastConformer-Hybrid Large (ru) with RNN-T decoder ([origin](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc), [onnx](https://huggingface.co/istupakov/stt_ru_fastconformer_hybrid_large_pc_onnx))
|
| 156 |
* `nemo-parakeet-tdt-0.6b-v3` - Nvidia Parakeet TDT 0.6B V3 (multilingual) ([origin](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3), [onnx](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx))
|
|
|
|
| 21 |
for name in [
|
| 22 |
"gigaam-v2-ctc",
|
| 23 |
"gigaam-v2-rnnt",
|
| 24 |
+
"gigaam-v3-ctc",
|
| 25 |
+
"gigaam-v3-rnnt",
|
| 26 |
+
"gigaam-v3-e2e-ctc",
|
| 27 |
+
"gigaam-v3-e2e-rnnt",
|
| 28 |
"nemo-fastconformer-ru-ctc",
|
| 29 |
"nemo-fastconformer-ru-rnnt",
|
| 30 |
"alphacep/vosk-model-ru",
|
|
|
|
| 103 |
|
| 104 |
|
| 105 |
with gr.Blocks() as recognize_short:
|
| 106 |
+
audio = gr.Audio(validator=lambda audio: gr.validators.is_audio_correct_length(audio, min_length=1, max_length=30))
|
| 107 |
with gr.Row():
|
| 108 |
gr.ClearButton(audio)
|
| 109 |
btn_ru = gr.Button("Recognize (ru)", variant="primary")
|
|
|
|
| 116 |
with gr.Blocks() as recognize_long:
|
| 117 |
gr.Markdown("The default VAD parameters are used. For best results, you should adjust the VAD parameters in your app.")
|
| 118 |
name = gr.Dropdown(models_vad.keys(), label="Model")
|
| 119 |
+
audio = gr.Audio(validator=lambda audio: gr.validators.is_audio_correct_length(audio, min_length=1, max_length=300))
|
| 120 |
with gr.Row():
|
| 121 |
gr.ClearButton(audio)
|
| 122 |
btn = gr.Button("Recognize", variant="primary")
|
|
|
|
| 139 |
# ASR demo using onnx-asr
|
| 140 |
**[onnx-asr](https://github.com/istupakov/onnx-asr)** is a Python package for Automatic Speech Recognition using ONNX models.
|
| 141 |
The package is written in pure Python with minimal dependencies (no `pytorch` or `transformers`).
|
| 142 |
+
Supports Parakeet TDT 0.6B V2 (En), Parakeet TDT 0.6B V3 (Multilingual) and GigaAM v2/v3 (Ru) models
|
| 143 |
(and many other modern [models](https://github.com/istupakov/onnx-asr?tab=readme-ov-file#supported-model-names)).
|
| 144 |
You can also use it with your own model if it has a supported architecture.
|
| 145 |
""")
|
|
|
|
| 155 |
## Russian ASR models
|
| 156 |
* `gigaam-v2-ctc` - Sber GigaAM v2 CTC ([origin](https://github.com/salute-developers/GigaAM), [onnx](https://huggingface.co/istupakov/gigaam-v2-onnx))
|
| 157 |
* `gigaam-v2-rnnt` - Sber GigaAM v2 RNN-T ([origin](https://github.com/salute-developers/GigaAM), [onnx](https://huggingface.co/istupakov/gigaam-v2-onnx))
|
| 158 |
+
* `gigaam-v3-ctc` - Sber GigaAM v3 CTC ([origin](https://huggingface.co/ai-sage/GigaAM-v3), [onnx](https://huggingface.co/istupakov/gigaam-v3-onnx))
|
| 159 |
+
* `gigaam-v3-rnnt` - Sber GigaAM v3 RNN-T ([origin](https://huggingface.co/ai-sage/GigaAM-v3), [onnx](https://huggingface.co/istupakov/gigaam-v3-onnx))
|
| 160 |
+
* `gigaam-v3-e2e-ctc` - Sber GigaAM v3 E2E CTC ([origin](https://huggingface.co/ai-sage/GigaAM-v3), [onnx](https://huggingface.co/istupakov/gigaam-v3-onnx))
|
| 161 |
+
* `gigaam-v3-e2e-rnnt` - Sber GigaAM v3 E2E RNN-T ([origin](https://huggingface.co/ai-sage/GigaAM-v3), [onnx](https://huggingface.co/istupakov/gigaam-v3-onnx))
|
| 162 |
* `nemo-fastconformer-ru-ctc` - Nvidia FastConformer-Hybrid Large (ru) with CTC decoder ([origin](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc), [onnx](https://huggingface.co/istupakov/stt_ru_fastconformer_hybrid_large_pc_onnx))
|
| 163 |
* `nemo-fastconformer-ru-rnnt` - Nvidia FastConformer-Hybrid Large (ru) with RNN-T decoder ([origin](https://huggingface.co/nvidia/stt_ru_fastconformer_hybrid_large_pc), [onnx](https://huggingface.co/istupakov/stt_ru_fastconformer_hybrid_large_pc_onnx))
|
| 164 |
* `nemo-parakeet-tdt-0.6b-v3` - Nvidia Parakeet TDT 0.6B V3 (multilingual) ([origin](https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3), [onnx](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx))
|